[mlpack-svn] r15562 - in mlpack/conf/jenkins-conf/benchmark: . benchmark util

Fri Jul 26 12:26:32 EDT 2013

Author: marcus
Date: Fri Jul 26 12:26:32 2013
New Revision: 15562

Log:
Add new general block for general settings.

Modified:
   mlpack/conf/jenkins-conf/benchmark/Makefile
   mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
   mlpack/conf/jenkins-conf/benchmark/small_config.yaml
   mlpack/conf/jenkins-conf/benchmark/util/parser.py

Modified: mlpack/conf/jenkins-conf/benchmark/Makefile
==============================================================================

--- mlpack/conf/jenkins-conf/benchmark/Makefile	(original)
+++ mlpack/conf/jenkins-conf/benchmark/Makefile	Fri Jul 26 12:26:32 2013
@@ -5,12 +5,7 @@
 CONFIG := config.yaml
 BENCHMARKDDIR := benchmark
 
-# Export matlab path to execute matlab file in the methods directory.
-export MATLABPATH=methods/matlab/
-
 # Specify the path for the libraries.
-export MLPACK_BIN=/usr/local/bin/
-export MATLAB_BIN=/opt/matlab/bin/
 export WEKA_CLASSPATH=".:/opt/weka/weka-3-6-9:/opt/weka/weka-3-6-9/weka.jar"
 export SHOGUN_PATH=/opt/shogun/shogun-2.1.0-mod
 

Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py	Fri Jul 26 12:26:32 2013
@@ -37,7 +37,7 @@
 Normalize the dataset name. If the dataset is a list of datasets, take the first
 dataset as name. If necessary remove characters like '.', '_'.
 
- at para dataset - Dataset file or a list of datasets files.
+ at param dataset - Dataset file or a list of datasets files.
 @return Normalized dataset name.
 '''
 def NormalizeDatasetName(dataset):
@@ -49,8 +49,8 @@
 '''
 Check if the file is available in one of the given formats.
 
- at para dataset - Datsets which should be checked.
- at para formats - List of supported file formats.
+ at param dataset - Datsets which should be checked.
+ at param formats - List of supported file formats.
 @return Orginal dataset or dataset with new file format.
 '''
 def CheckFileExtension(dataset, formats):
@@ -63,8 +63,8 @@
 '''
 Return a list with modified dataset.
 
- at para dataset - Datasets to be modified.
- at para format - List of file formats to be converted to.
+ at param dataset - Datasets to be modified.
+ at param format - List of file formats to be converted to.
 @return List of modified datasets.
 '''
 def GetDataset(dataset, format):
@@ -104,7 +104,7 @@
 '''
 This function Remove a given file or list of files.
 
- at para dataset - File or list of file which should be deleted.
+ at param dataset - File or list of file which should be deleted.
 '''
 def RemoveDataset(dataset):
   if isinstance(dataset, str):
@@ -117,8 +117,8 @@
 '''
 Add all rows from a given matrix to a given table.
 
- at para matrix - 2D array contains the row.
- at para table - Table in which the rows are inserted.
+ at param matrix - 2D array contains the row.
+ at param table - Table in which the rows are inserted.
 @return Table with the inserted rows.
 '''
 def AddMatrixToTable(matrix, table):
@@ -129,7 +129,7 @@
 '''
 Count all datasets to determine the dataset size.
 
- at para libraries - Contains the Dataset List.
+ at param libraries - Contains the Dataset List.
 @return Dataset count.
 '''
 def CountLibrariesDatasets(libraries):
@@ -146,9 +146,9 @@
 Search the correct row to insert the new data. We look at the left column for
 a free place or for the matching name.
 
- at para dataMatrix - In this Matrix we search for the right position.
- at para datasetName - Name of the dataset.
- at para datasetCount - Maximum dataset count.
+ at param dataMatrix - In this Matrix we search for the right position.
+ at param datasetName - Name of the dataset.
+ at param datasetCount - Maximum dataset count.
 '''
 def FindRightRow(dataMatrix, datasetName, datasetCount):
   for row in range(datasetCount):
@@ -161,14 +161,30 @@
 
 @para configfile - Start the benchmark with this configuration file.
 '''
-def Main(configfile): 
+def Main(configfile):
+  # Benchmark settings.
+  timeout = 23000
 
   # Read Config.
   config = Parser(configfile, verbose=False)
   streamData = config.StreamMerge()
 
+  # Read the general block and set the attributes.
+  if "general" in streamData:
+    for key, value in streamData["general"]:
+      if key == "timeout":
+        timeout = value
+      elif key == "MLPACK_BIN":
+        os.environ["MLPACK_BIN"] = value
+      elif key == "MATLAB_BIN":
+        os.environ["MATLAB_BIN"] = value
+      elif key == "MATLABPATH":
+        os.environ["MATLABPATH"] = value
+
   # Iterate through all libraries.
   for method, sets in streamData.items():
+    if method == "general":
+      continue
     Log.Info("Method: " + method)    
     for options, libraries in sets.items():
       Log.Info('Options: ' + (options if options != '' else 'None'))
@@ -215,7 +231,7 @@
           modifiedDataset = GetDataset(dataset, format)
 
           try:
-            instance = methodCall(modifiedDataset[0], verbose=False)
+            instance = methodCall(modifiedDataset[0], timeout=timeout, verbose=False)
           except Exception as e:
             Log.Fatal("Could not call the constructor: " + script)
             Log.Fatal("Exception: " + str(e))

Modified: mlpack/conf/jenkins-conf/benchmark/small_config.yaml
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/small_config.yaml	(original)
+++ mlpack/conf/jenkins-conf/benchmark/small_config.yaml	Fri Jul 26 12:26:32 2013
@@ -1,9 +1,20 @@
+# Block for general settings.
+library: general
+settings:
+    # Time until a timeout in seconds.
+    timeout: 3
+
+    # Specify the path for the libraries.
+    MLPACK_BIN: '/usr/local/bin/'
+    MATLAB_BIN: '/opt/matlab/bin/'
+    MATLABPATH: 'methods/matlab/'
+---
 # MLPACK:
 # A Scalable C++  Machine Learning Library
 library: mlpack
 methods:
     PCA:
-        run: false
+        run: true
         iteration: 3
         script: methods/mlpack/pca.py
         format: [csv, txt]

Modified: mlpack/conf/jenkins-conf/benchmark/util/parser.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/parser.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/util/parser.py	Fri Jul 26 12:26:32 2013
@@ -63,6 +63,7 @@
   def GetConfigLibraryMethods(self):
     try:
       stream = next(self.streams)
+        
     except StopIteration as e:
       # We have to catch the exception to stop at the end. There exists no 
       # hasNext().
@@ -74,9 +75,12 @@
       libraryName = stream["library"]
       Log.Info("Library: " + libraryName, self.verbose)
 
-    attr = collections.namedtuple("attributes", ["libraryName", "methods"])
-      
-    return attr(libraryName, stream["methods"].items())
+    if stream["library"] == "general":
+      attr = collections.namedtuple("attributes", ["libraryName", "settings"])
+      return attr(libraryName, stream["settings"].items())
+    else:
+      attr = collections.namedtuple("attributes", ["libraryName", "methods"])      
+      return attr(libraryName, stream["methods"].items())
 
   '''
   This method return the attributes of a given method.
@@ -257,43 +261,44 @@
 
       if not "library" in stream:
         return self.KeyErrorMsg("library", streamNum)
-      elif not "methods" in stream:
-        return self.KeyErrorMsg("methods", streamNum)
-      else:
-        try:
-          for key, value in stream["methods"].items():
+      elif not "settings" in stream:
+        if not "methods" in stream:
+          return self.KeyErrorMsg("methods", streamNum)
+        else:
+          try:
+            for key, value in stream["methods"].items():
 
-            if not "script" in value:
-              return self.KeyErrorMsg("script", streamNum)              
+              if not "script" in value:
+                return self.KeyErrorMsg("script", streamNum)              
 
-            if not "format" in value:
-              return self.KeyErrorMsg("format", streamNum)
+              if not "format" in value:
+                return self.KeyErrorMsg("format", streamNum)
 
-            if not "run" in value:
-              self.KeyWarnMsg("run", streamNum)
+              if not "run" in value:
+                self.KeyWarnMsg("run", streamNum)
 
-            if not "iteration" in value:
-              self.KeyWarnMsg("iteration", streamNum)
+              if not "iteration" in value:
+                self.KeyWarnMsg("iteration", streamNum)
 
-            if "datasets" in value:
-              if not value["datasets"]:
-                return self.EmptyErrorMsg("datasets", streamNum)
-              else:
-                for dataset in value["datasets"]:
+              if "datasets" in value:
+                if not value["datasets"]:
+                  return self.EmptyErrorMsg("datasets", streamNum)
+                else:
+                  for dataset in value["datasets"]:
 
-                  if not self.CheckIfAvailable(dataset["files"]):
-                    return False
+                    if not self.CheckIfAvailable(dataset["files"]):
+                      return False
 
-                  if not "options" in dataset:
-                    self.KeyWarnMsg("options", streamNum)
-            else:
-              return self.KeyErrorMsg("datasets", streamNum)
+                    if not "options" in dataset:
+                      self.KeyWarnMsg("options", streamNum)
+              else:
+                return self.KeyErrorMsg("datasets", streamNum)
 
-            if not self.CheckIfCallable(key, value["script"]):
-              return self.CallableMethodErroMsg(key, value["script"], streamNum)
+              if not self.CheckIfCallable(key, value["script"]):
+                return self.CallableMethodErroMsg(key, value["script"], streamNum)
 
-        except AttributeError as e:
-          return self.KeyErrorMsg("methods", streamNum)
+          except AttributeError as e:
+            return self.KeyErrorMsg("methods", streamNum)
 
     Log.Info("Config file check: successful", self.verbose)
 
@@ -308,38 +313,40 @@
 
     # Iterate through all libraries.
     libraryMapping = self.GetConfigLibraryMethods()
-    while libraryMapping: 
-      # Iterate through all methods.
-      methodMapping = self.GetConfigMethod(libraryMapping.methods)
-      while methodMapping and libraryMapping:
-        # Collect data only from method with run value = true.
-        if methodMapping.run:
-          for dataset in methodMapping.datasets:     
-
-            if methodMapping.methodName in streamData:
-              tempDict = streamData[methodMapping.methodName]
-
-              if dataset["options"] in tempDict:              
-                t = (libraryMapping.libraryName, dataset["files"], 
-                  methodMapping.iteration, methodMapping.script, 
-                  methodMapping.format)  
-                tempDict[dataset["options"]].append(t)          
+    while libraryMapping:
+      if libraryMapping.libraryName == "general":
+        streamData["general"] = libraryMapping.settings
+      else:
+        # Iterate through all methods.
+        methodMapping = self.GetConfigMethod(libraryMapping.methods)
+        while methodMapping and libraryMapping:
+          # Collect data only from method with run value = true.
+          if methodMapping.run:
+            for dataset in methodMapping.datasets:     
+
+              if methodMapping.methodName in streamData:
+                tempDict = streamData[methodMapping.methodName]
+
+                if dataset["options"] in tempDict:              
+                  t = (libraryMapping.libraryName, dataset["files"], 
+                    methodMapping.iteration, methodMapping.script, 
+                    methodMapping.format)  
+                  tempDict[dataset["options"]].append(t)          
+                else:
+                  t = (libraryMapping.libraryName, dataset["files"], 
+                    methodMapping.iteration, methodMapping.script, 
+                    methodMapping.format)            
+                  tempDict[dataset["options"]] = [t]
               else:
+                d = {}
                 t = (libraryMapping.libraryName, dataset["files"], 
                   methodMapping.iteration, methodMapping.script, 
                   methodMapping.format)            
-                tempDict[dataset["options"]] = [t]
-            else:
-              d = {}
-              t = (libraryMapping.libraryName, dataset["files"], 
-                methodMapping.iteration, methodMapping.script, 
-                methodMapping.format)            
-              d[dataset["options"]] = [t]
-              streamData[methodMapping.methodName] = d          
+                d[dataset["options"]] = [t]
+                streamData[methodMapping.methodName] = d          
 
-        methodMapping = self.GetConfigMethod(libraryMapping.methods)
+          methodMapping = self.GetConfigMethod(libraryMapping.methods)
       libraryMapping = self.GetConfigLibraryMethods()
       self.mc = 0
 
     return streamData
-