[mlpack-svn] r15573 - in mlpack/conf/jenkins-conf/benchmark: . benchmark methods/mlpack

Mon Jul 29 12:41:18 EDT 2013

Author: marcus
Date: Mon Jul 29 12:41:17 2013
New Revision: 15573

Log:
Move PYTHONPATH and LD_LIBRARY_PATH to the Makefile and add logfile support.

Modified:
   mlpack/conf/jenkins-conf/benchmark/Makefile
   mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
   mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py

Modified: mlpack/conf/jenkins-conf/benchmark/Makefile
==============================================================================

--- mlpack/conf/jenkins-conf/benchmark/Makefile	(original)
+++ mlpack/conf/jenkins-conf/benchmark/Makefile	Mon Jul 29 12:41:17 2013
@@ -6,8 +6,13 @@
 BENCHMARKDDIR := benchmark
 
 # Specify the path for the libraries.
+export MLPACK_BIN=/usr/local/bin/
+export MATLAB_BIN=/opt/matlab/bin/
+export MATLABPATH=methods/matlab/
 export WEKA_CLASSPATH=".:/opt/weka/weka-3-6-9:/opt/weka/weka-3-6-9/weka.jar"
 export SHOGUN_PATH=/opt/shogun/shogun-2.1.0-mod
+export PYTHONPATH=/opt/scikit-learn/scikit-learn-0.13.1/lib/python3.3/site-packages/:/opt/mlpy/mlpy-3.5.0/lib/python3.3/site-packages/:/opt/shogun/shogun-2.1.0/lib/python3.3/dist-packages/
+export LD_LIBRARY_PATH=/opt/shogun/shogun-2.1.0/lib/
 
 ifeq ($(PYTHON_VERSION), 0)
 	$(error Python version 2.7 required which was not found)

Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py	Mon Jul 29 12:41:17 2013
@@ -21,6 +21,7 @@
 from convert import *
 
 import argparse
+import datetime
 
 '''
 Show system informations. Are there no data available, the value is 'N/A'.
@@ -161,40 +162,31 @@
 
 @para configfile - Start the benchmark with this configuration file.
 '''
-def Main(configfile):
+def Main(configfile, blocks, log):
   # Benchmark settings.
   timeout = 23000
+  logfile = "results.log"
 
   # Read Config.
   config = Parser(configfile, verbose=False)
   streamData = config.StreamMerge()
 
   # Read the general block and set the attributes.
+
   if "general" in streamData:
     for key, value in streamData["general"]:
       if key == "timeout":
         timeout = value
-      elif key == "MLPACK_BIN":
-        os.environ["MLPACK_BIN"] = value
-      elif key == "MATLAB_BIN":
-        os.environ["MATLAB_BIN"] = value
-      elif key == "MATLABPATH":
-        os.environ["MATLABPATH"] = value
-      elif key == "PYTHONPATH":
-        try:
-          PYTHONPATH = os.environ["PYTHONPATH"]
-        except KeyError:
-          os.environ["PYTHONPATH"] = value
-        else:
-          os.environ["PYTHONPATH"] = PYTHONPATH + ":" + value
-      elif key == "LD_LIBRARY_PATH":
-        try:
-          LD_LIBRARY_PATH = os.environ["LD_LIBRARY_PATH"]
-        except KeyError:
-          os.environ["LD_LIBRARY_PATH"] = value
-        else:
-          os.environ["LD_LIBRARY_PATH"] = LD_LIBRARY_PATH + ":" + value
+      if key == "logfile":
+        logfile = value
 
+  # Open logfile if the user asked for.
+  if log:
+    fid = open(logfile, "a")
+
+  # Transform the blocks string to a list.
+  if blocks:
+    blocks = blocks.split(",")
 
   # Iterate through all libraries.
   for method, sets in streamData.items():
@@ -217,6 +209,7 @@
           range(datasetCount)] 
 
       col = 1
+      run = 0
       for libary in libraries:
         name = libary[0]
         datsets = libary[1]
@@ -224,70 +217,97 @@
         script = libary[3]
         format = libary[4]
 
-        Log.Info("Libary: " + name)
         header.append(name)
+        
+        if not blocks or name in blocks:
+          run += 1
+          Log.Info("Libary: " + name)
 
-        # Load script.
-        try:
-          module = Loader.ImportModuleFromPath(script)
-          methodCall = getattr(module, method)
-        except Exception as e:
-          Log.Fatal("Could not load the script: " + script)
-          Log.Fatal("Exception: " + str(e))
-          continue
-
-        for dataset in datsets:  
-          datasetName = NormalizeDatasetName(dataset)          
-          row = FindRightRow(dataMatrix, datasetName, datasetCount)      
-
-          dataMatrix[row][0] = NormalizeDatasetName(dataset)
-          Log.Info("Dataset: " + dataMatrix[row][0])    
-
-          modifiedDataset = GetDataset(dataset, format)
-
+          # Load script.
           try:
-            instance = methodCall(modifiedDataset[0], timeout=timeout, verbose=False)
+            module = Loader.ImportModuleFromPath(script)
+            methodCall = getattr(module, method)
           except Exception as e:
-            Log.Fatal("Could not call the constructor: " + script)
+            Log.Fatal("Could not load the script: " + script)
             Log.Fatal("Exception: " + str(e))
-            continue
+          else:
 
-          time = 0
-          for trial in range(trials + 1):
-            if trial > 0:
-              try:
-                time += instance.RunMethod(options);
+            for dataset in datsets:  
+              datasetName = NormalizeDatasetName(dataset)          
+              row = FindRightRow(dataMatrix, datasetName, datasetCount)      
+
+              dataMatrix[row][0] = NormalizeDatasetName(dataset)
+              Log.Info("Dataset: " + dataMatrix[row][0])    
+
+              modifiedDataset = GetDataset(dataset, format)
 
-                # Method unsuccessful.
-                if time < 0:
-                  break
+              try:
+                instance = methodCall(modifiedDataset[0], timeout=timeout, verbose=False)
               except Exception as e:
+                Log.Fatal("Could not call the constructor: " + script)
                 Log.Fatal("Exception: " + str(e))
+                continue
 
-          # Set time.
-          if time == -2:
-            dataMatrix[row][col] = ">" + str(timeout)
-          else:
-            dataMatrix[row][col] = "{0:.6f}".format(time / trials)
+              time = []
+              for trial in range(trials + 1):
+                if trial > 0:
+                  try:
+                    time.append(instance.RunMethod(options));
+
+                    # Method unsuccessful.
+                    if sum(time) < 0:
+                      break
+                  except Exception as e:
+                    Log.Fatal("Exception: " + str(e))
+
+              # Set time.
+              if sum(time) == -2:
+                dataMatrix[row][col] = ">" + str(timeout)
+              elif sum(time) == -1:
+                dataMatrix[row][col] = "failure"
+              else:
+                dataMatrix[row][col] = "{0:.6f}".format(sum(time) / trials)
+
+              # Save results in the logfile if the user asked for.
+              if log:
+                # Get the variance.
+                var = 0
+                if len(time) != 0:
+                  avg = sum(time) / len(time)
+                  var = sum((avg - value) ** 2 for value in time) / len(time)
+
+                logData = str(datetime.datetime.now()) + " : " + name + ":"
+                logData += method + ":" + options + ":" + dataMatrix[row][0] 
+                logData += ":" + dataMatrix[row][col] + ":" + str(var)
+                fid.write(logData + "\n")
 
-          # Remove temporary datasets.
-          RemoveDataset(modifiedDataset[1])
-          row += 1
+              # Remove temporary datasets.
+              RemoveDataset(modifiedDataset[1])
         col += 1
 
       # Show results in a table.
-      Log.Notice("\n\n")
-      Log.PrintTable(AddMatrixToTable(dataMatrix, table))
-      Log.Notice("\n\n")
+      if not log and run > 0:
+        Log.Notice("\n\n")
+        Log.PrintTable(AddMatrixToTable(dataMatrix, table))
+        Log.Notice("\n\n")
+        run = 0
+
+  # Close the logfile.
+  if log:
+    fid.close()
 
 if __name__ == '__main__':
   parser = argparse.ArgumentParser(description="""Perform the benchmark with the
       given config.""")
   parser.add_argument('-c','--config', help='Configuration file name.', 
       required=True)
+  parser.add_argument('-b','--blocks', help='Run only the specified blocks.', 
+      required=False)
+  parser.add_argument('-l','--log', help='Save the results in the logfile.', 
+      required=False, action='store_true')
 
   args = parser.parse_args()
 
   if args:
     SystemInformation()
-    Main(args.config)
+    Main(args.config, args.blocks, args.log)

Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py	Mon Jul 29 12:41:17 2013
@@ -13,9 +13,9 @@
 # Import the util path, this method even works if the path contains symlinks to
 # modules.
 cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
-	os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
 if cmd_subfolder not in sys.path:
-	sys.path.insert(0, cmd_subfolder)
+  sys.path.insert(0, cmd_subfolder)
 
 from log import *
 
@@ -30,125 +30,125 @@
 '''
 class HMMVITERBI(object):
 
-	''' 
-	Create the Hidden Markov Model Viterbi State Prediction benchmark instance, 
-	show some	informations and return the instance.
+  ''' 
+  Create the Hidden Markov Model Viterbi State Prediction benchmark instance, 
+  show some informations and return the instance.
   
   @param dataset - Input dataset to perform HMM Viterbi State Prediction on.
   @param timeout - The time until the timeout. Default no timeout.
   @param path - Path to the mlpack executable.
   @param verbose - Display informational messages.
-	'''
-	def __init__(self, dataset, timeout=0, path=os.environ["MLPACK_BIN"], 
-			verbose=True): 
-		self.verbose = verbose
-		self.dataset = dataset
-		self.path = path
-
-		# Get description from executable.
-		cmd = shlex.split(self.path + "hmm_viterbi -h")
-		try:
-			s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)	
-		except Exception as e:
-			Log.Fatal("Could not execute command: " + str(cmd))
-		else:
-			# Use regular expression pattern to get the description.
-			pattern = re.compile(br"""(.*?)Required.*?options:""", 
-					re.VERBOSE|re.MULTILINE|re.DOTALL)
-			
-			match = pattern.match(s)
-			if not match:
-				Log.Warn("Can't parse description", self.verbose)
-				description = ""
-			else:
-				description = match.group(1)
-			
-			self.description = description
-
-	'''
-	Destructor to clean up at the end. Use this method to remove created files.
-	'''
-	def __del__(self):		
-		Log.Info("Clean up.", self.verbose)
-		filelist = ["gmon.out", "output.csv"]
-		for f in filelist:
-			if os.path.isfile(f):
-				os.remove(f)				
+  '''
+  def __init__(self, dataset, timeout=0, path=os.environ["MLPACK_BIN"], 
+      verbose=True): 
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+
+    # Get description from executable.
+    cmd = shlex.split(self.path + "hmm_viterbi -h")
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) 
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+    else:
+      # Use regular expression pattern to get the description.
+      pattern = re.compile(br"""(.*?)Required.*?options:""", 
+          re.VERBOSE|re.MULTILINE|re.DOTALL)
+      
+      match = pattern.match(s)
+      if not match:
+        Log.Warn("Can't parse description", self.verbose)
+        description = ""
+      else:
+        description = match.group(1)
+      
+      self.description = description
+
+  '''
+  Destructor to clean up at the end. Use this method to remove created files.
+  '''
+  def __del__(self):    
+    Log.Info("Clean up.", self.verbose)
+    filelist = ["gmon.out", "output.csv"]
+    for f in filelist:
+      if os.path.isfile(f):
+        os.remove(f)        
 
-	'''
+  '''
   Perform Hidden Markov Model (HMM) Viterbi State Prediction. If the method the 
   has been successfully completed return the elapsed time in seconds.
 
   @param options - Extra options for the method.
   @return - Elapsed time in seconds or -1 if the method was not successful.
   '''
-	def RunMethod(self, options):
-		Log.Info("Perform HMM Viterbi State Prediction.", self.verbose)
-		
-		if len(self.dataset) == 2:
-			cmd = shlex.split(self.path + "hmm_viterbi -i " + self.dataset[0] + " -m " 
-					+ self.dataset[1] + " -v " + options)	
-		else:
-			Log.Fatal("Not enough input datasets.")
-			return -1
-
-		# Run command with the nessecary arguments and return its output as a byte
-		# string. We have untrusted input so we disables all shell based features.
-		try:
-			s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False, 
-					timeout=self.timeout)
-		except subprocess.TimeoutExpired as e:
+  def RunMethod(self, options):
+    Log.Info("Perform HMM Viterbi State Prediction.", self.verbose)
+    
+    if len(self.dataset) == 2:
+      cmd = shlex.split(self.path + "hmm_viterbi -i " + self.dataset[0] + " -m " 
+          + self.dataset[1] + " -v " + options) 
+    else:
+      Log.Fatal("Not enough input datasets.")
+      return -1
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disables all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False, 
+          timeout=self.timeout)
+    except subprocess.TimeoutExpired as e:
       Log.Warn(str(e))
       return -2
-		except Exception as e:
-			Log.Fatal("Could not execute command: " + str(cmd))
-			return -1
-
-		# Return the elapsed time.
-		timer = self.parseTimer(s)
-		if not timer:
-			Log.Fatal("Can't parse the timer")
-			return -1
-		else:
-			time = self.GetTime(timer)
-			Log.Info(("total time: %fs" % (time)), self.verbose)
-
-			return time
-
-	'''
-	Parse the timer data form a given string.
-
-	@param data - String to parse timer data from.
-	@return - Namedtuple that contains the timer data.
-	'''
-	def parseTimer(self, data):
-		# Compile the regular expression pattern into a regular expression object to
-		# parse the timer data.
-		pattern = re.compile(br"""
-				.*?loading_data: (?P<loading_data>.*?)s.*?
-				.*?saving_data: (?P<saving_data>.*?)s.*?
-				.*?total_time: (?P<total_time>.*?)s.*?
-				""", re.VERBOSE|re.MULTILINE|re.DOTALL)
-		
-		match = pattern.match(data)
-		if not match:
-			Log.Fatal("Can't parse the data: wrong format")
-			return -1
-		else:
-			# Create a namedtuple and return the timer data.
-			timer = collections.namedtuple("timer", ["loading_data", "saving_data", 
-					"total_time"])
-
-			return timer(float(match.group("loading_data")),
-					float(match.group("saving_data")),
-					float(match.group("total_time")))
-
-	'''
-	Return the elapsed time in seconds.
-
-	@param timer - Namedtuple that contains the timer data.
-	@return Elapsed time in seconds.
-	'''
-	def GetTime(self, timer):
-		time = timer.total_time - timer.loading_data - timer.saving_data
-		return time
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Return the elapsed time.
+    timer = self.parseTimer(s)
+    if not timer:
+      Log.Fatal("Can't parse the timer")
+      return -1
+    else:
+      time = self.GetTime(timer)
+      Log.Info(("total time: %fs" % (time)), self.verbose)
+
+      return time
+
+  '''
+  Parse the timer data form a given string.
+
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(br"""
+        .*?loading_data: (?P<loading_data>.*?)s.*?
+        .*?saving_data: (?P<saving_data>.*?)s.*?
+        .*?total_time: (?P<total_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+    
+    match = pattern.match(data)
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["loading_data", "saving_data", 
+          "total_time"])
+
+      return timer(float(match.group("loading_data")),
+          float(match.group("saving_data")),
+          float(match.group("total_time")))
+
+  '''
+  Return the elapsed time in seconds.
+
+  @param timer - Namedtuple that contains the timer data.
+  @return Elapsed time in seconds.
+  '''
+  def GetTime(self, timer):
+    time = timer.total_time - timer.loading_data - timer.saving_data
+    return time