[mlpack-svn] r15573 - in mlpack/conf/jenkins-conf/benchmark: . benchmark methods/mlpack
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jul 29 12:41:18 EDT 2013
Author: marcus
Date: Mon Jul 29 12:41:17 2013
New Revision: 15573
Log:
Move PYTHONPATH and LD_LIBRARY_PATH to the Makefile and add logfile support.
Modified:
mlpack/conf/jenkins-conf/benchmark/Makefile
mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py
Modified: mlpack/conf/jenkins-conf/benchmark/Makefile
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/Makefile (original)
+++ mlpack/conf/jenkins-conf/benchmark/Makefile Mon Jul 29 12:41:17 2013
@@ -6,8 +6,13 @@
BENCHMARKDDIR := benchmark
# Specify the path for the libraries.
+export MLPACK_BIN=/usr/local/bin/
+export MATLAB_BIN=/opt/matlab/bin/
+export MATLABPATH=methods/matlab/
export WEKA_CLASSPATH=".:/opt/weka/weka-3-6-9:/opt/weka/weka-3-6-9/weka.jar"
export SHOGUN_PATH=/opt/shogun/shogun-2.1.0-mod
+export PYTHONPATH=/opt/scikit-learn/scikit-learn-0.13.1/lib/python3.3/site-packages/:/opt/mlpy/mlpy-3.5.0/lib/python3.3/site-packages/:/opt/shogun/shogun-2.1.0/lib/python3.3/dist-packages/
+export LD_LIBRARY_PATH=/opt/shogun/shogun-2.1.0/lib/
ifeq ($(PYTHON_VERSION), 0)
$(error Python version 2.7 required which was not found)
Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py Mon Jul 29 12:41:17 2013
@@ -21,6 +21,7 @@
from convert import *
import argparse
+import datetime
'''
Show system informations. Are there no data available, the value is 'N/A'.
@@ -161,40 +162,31 @@
@para configfile - Start the benchmark with this configuration file.
'''
-def Main(configfile):
+def Main(configfile, blocks, log):
# Benchmark settings.
timeout = 23000
+ logfile = "results.log"
# Read Config.
config = Parser(configfile, verbose=False)
streamData = config.StreamMerge()
# Read the general block and set the attributes.
+
if "general" in streamData:
for key, value in streamData["general"]:
if key == "timeout":
timeout = value
- elif key == "MLPACK_BIN":
- os.environ["MLPACK_BIN"] = value
- elif key == "MATLAB_BIN":
- os.environ["MATLAB_BIN"] = value
- elif key == "MATLABPATH":
- os.environ["MATLABPATH"] = value
- elif key == "PYTHONPATH":
- try:
- PYTHONPATH = os.environ["PYTHONPATH"]
- except KeyError:
- os.environ["PYTHONPATH"] = value
- else:
- os.environ["PYTHONPATH"] = PYTHONPATH + ":" + value
- elif key == "LD_LIBRARY_PATH":
- try:
- LD_LIBRARY_PATH = os.environ["LD_LIBRARY_PATH"]
- except KeyError:
- os.environ["LD_LIBRARY_PATH"] = value
- else:
- os.environ["LD_LIBRARY_PATH"] = LD_LIBRARY_PATH + ":" + value
+ if key == "logfile":
+ logfile = value
+ # Open logfile if the user asked for.
+ if log:
+ fid = open(logfile, "a")
+
+ # Transform the blocks string to a list.
+ if blocks:
+ blocks = blocks.split(",")
# Iterate through all libraries.
for method, sets in streamData.items():
@@ -217,6 +209,7 @@
range(datasetCount)]
col = 1
+ run = 0
for libary in libraries:
name = libary[0]
datsets = libary[1]
@@ -224,70 +217,97 @@
script = libary[3]
format = libary[4]
- Log.Info("Libary: " + name)
header.append(name)
+
+ if not blocks or name in blocks:
+ run += 1
+ Log.Info("Libary: " + name)
- # Load script.
- try:
- module = Loader.ImportModuleFromPath(script)
- methodCall = getattr(module, method)
- except Exception as e:
- Log.Fatal("Could not load the script: " + script)
- Log.Fatal("Exception: " + str(e))
- continue
-
- for dataset in datsets:
- datasetName = NormalizeDatasetName(dataset)
- row = FindRightRow(dataMatrix, datasetName, datasetCount)
-
- dataMatrix[row][0] = NormalizeDatasetName(dataset)
- Log.Info("Dataset: " + dataMatrix[row][0])
-
- modifiedDataset = GetDataset(dataset, format)
-
+ # Load script.
try:
- instance = methodCall(modifiedDataset[0], timeout=timeout, verbose=False)
+ module = Loader.ImportModuleFromPath(script)
+ methodCall = getattr(module, method)
except Exception as e:
- Log.Fatal("Could not call the constructor: " + script)
+ Log.Fatal("Could not load the script: " + script)
Log.Fatal("Exception: " + str(e))
- continue
+ else:
- time = 0
- for trial in range(trials + 1):
- if trial > 0:
- try:
- time += instance.RunMethod(options);
+ for dataset in datsets:
+ datasetName = NormalizeDatasetName(dataset)
+ row = FindRightRow(dataMatrix, datasetName, datasetCount)
+
+ dataMatrix[row][0] = NormalizeDatasetName(dataset)
+ Log.Info("Dataset: " + dataMatrix[row][0])
+
+ modifiedDataset = GetDataset(dataset, format)
- # Method unsuccessful.
- if time < 0:
- break
+ try:
+ instance = methodCall(modifiedDataset[0], timeout=timeout, verbose=False)
except Exception as e:
+ Log.Fatal("Could not call the constructor: " + script)
Log.Fatal("Exception: " + str(e))
+ continue
- # Set time.
- if time == -2:
- dataMatrix[row][col] = ">" + str(timeout)
- else:
- dataMatrix[row][col] = "{0:.6f}".format(time / trials)
+ time = []
+ for trial in range(trials + 1):
+ if trial > 0:
+ try:
+ time.append(instance.RunMethod(options));
+
+ # Method unsuccessful.
+ if sum(time) < 0:
+ break
+ except Exception as e:
+ Log.Fatal("Exception: " + str(e))
+
+ # Set time.
+ if sum(time) == -2:
+ dataMatrix[row][col] = ">" + str(timeout)
+ elif sum(time) == -1:
+ dataMatrix[row][col] = "failure"
+ else:
+ dataMatrix[row][col] = "{0:.6f}".format(sum(time) / trials)
+
+ # Save results in the logfile if the user asked for.
+ if log:
+ # Get the variance.
+ var = 0
+ if len(time) != 0:
+ avg = sum(time) / len(time)
+ var = sum((avg - value) ** 2 for value in time) / len(time)
+
+ logData = str(datetime.datetime.now()) + " : " + name + ":"
+ logData += method + ":" + options + ":" + dataMatrix[row][0]
+ logData += ":" + dataMatrix[row][col] + ":" + str(var)
+ fid.write(logData + "\n")
- # Remove temporary datasets.
- RemoveDataset(modifiedDataset[1])
- row += 1
+ # Remove temporary datasets.
+ RemoveDataset(modifiedDataset[1])
col += 1
# Show results in a table.
- Log.Notice("\n\n")
- Log.PrintTable(AddMatrixToTable(dataMatrix, table))
- Log.Notice("\n\n")
+ if not log and run > 0:
+ Log.Notice("\n\n")
+ Log.PrintTable(AddMatrixToTable(dataMatrix, table))
+ Log.Notice("\n\n")
+ run = 0
+
+ # Close the logfile.
+ if log:
+ fid.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="""Perform the benchmark with the
given config.""")
parser.add_argument('-c','--config', help='Configuration file name.',
required=True)
+ parser.add_argument('-b','--blocks', help='Run only the specified blocks.',
+ required=False)
+ parser.add_argument('-l','--log', help='Save the results in the logfile.',
+ required=False, action='store_true')
args = parser.parse_args()
if args:
SystemInformation()
- Main(args.config)
+ Main(args.config, args.blocks, args.log)
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpack/hmm_viterbi.py Mon Jul 29 12:41:17 2013
@@ -13,9 +13,9 @@
# Import the util path, this method even works if the path contains symlinks to
# modules.
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
- os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
if cmd_subfolder not in sys.path:
- sys.path.insert(0, cmd_subfolder)
+ sys.path.insert(0, cmd_subfolder)
from log import *
@@ -30,125 +30,125 @@
'''
class HMMVITERBI(object):
- '''
- Create the Hidden Markov Model Viterbi State Prediction benchmark instance,
- show some informations and return the instance.
+ '''
+ Create the Hidden Markov Model Viterbi State Prediction benchmark instance,
+ show some informations and return the instance.
@param dataset - Input dataset to perform HMM Viterbi State Prediction on.
@param timeout - The time until the timeout. Default no timeout.
@param path - Path to the mlpack executable.
@param verbose - Display informational messages.
- '''
- def __init__(self, dataset, timeout=0, path=os.environ["MLPACK_BIN"],
- verbose=True):
- self.verbose = verbose
- self.dataset = dataset
- self.path = path
-
- # Get description from executable.
- cmd = shlex.split(self.path + "hmm_viterbi -h")
- try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
- except Exception as e:
- Log.Fatal("Could not execute command: " + str(cmd))
- else:
- # Use regular expression pattern to get the description.
- pattern = re.compile(br"""(.*?)Required.*?options:""",
- re.VERBOSE|re.MULTILINE|re.DOTALL)
-
- match = pattern.match(s)
- if not match:
- Log.Warn("Can't parse description", self.verbose)
- description = ""
- else:
- description = match.group(1)
-
- self.description = description
-
- '''
- Destructor to clean up at the end. Use this method to remove created files.
- '''
- def __del__(self):
- Log.Info("Clean up.", self.verbose)
- filelist = ["gmon.out", "output.csv"]
- for f in filelist:
- if os.path.isfile(f):
- os.remove(f)
+ '''
+ def __init__(self, dataset, timeout=0, path=os.environ["MLPACK_BIN"],
+ verbose=True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+
+ # Get description from executable.
+ cmd = shlex.split(self.path + "hmm_viterbi -h")
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ except Exception as e:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ else:
+ # Use regular expression pattern to get the description.
+ pattern = re.compile(br"""(.*?)Required.*?options:""",
+ re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(s)
+ if not match:
+ Log.Warn("Can't parse description", self.verbose)
+ description = ""
+ else:
+ description = match.group(1)
+
+ self.description = description
+
+ '''
+ Destructor to clean up at the end. Use this method to remove created files.
+ '''
+ def __del__(self):
+ Log.Info("Clean up.", self.verbose)
+ filelist = ["gmon.out", "output.csv"]
+ for f in filelist:
+ if os.path.isfile(f):
+ os.remove(f)
- '''
+ '''
Perform Hidden Markov Model (HMM) Viterbi State Prediction. If the method the
has been successfully completed return the elapsed time in seconds.
@param options - Extra options for the method.
@return - Elapsed time in seconds or -1 if the method was not successful.
'''
- def RunMethod(self, options):
- Log.Info("Perform HMM Viterbi State Prediction.", self.verbose)
-
- if len(self.dataset) == 2:
- cmd = shlex.split(self.path + "hmm_viterbi -i " + self.dataset[0] + " -m "
- + self.dataset[1] + " -v " + options)
- else:
- Log.Fatal("Not enough input datasets.")
- return -1
-
- # Run command with the nessecary arguments and return its output as a byte
- # string. We have untrusted input so we disables all shell based features.
- try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
- timeout=self.timeout)
- except subprocess.TimeoutExpired as e:
+ def RunMethod(self, options):
+ Log.Info("Perform HMM Viterbi State Prediction.", self.verbose)
+
+ if len(self.dataset) == 2:
+ cmd = shlex.split(self.path + "hmm_viterbi -i " + self.dataset[0] + " -m "
+ + self.dataset[1] + " -v " + options)
+ else:
+ Log.Fatal("Not enough input datasets.")
+ return -1
+
+ # Run command with the nessecary arguments and return its output as a byte
+ # string. We have untrusted input so we disables all shell based features.
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+ timeout=self.timeout)
+ except subprocess.TimeoutExpired as e:
Log.Warn(str(e))
return -2
- except Exception as e:
- Log.Fatal("Could not execute command: " + str(cmd))
- return -1
-
- # Return the elapsed time.
- timer = self.parseTimer(s)
- if not timer:
- Log.Fatal("Can't parse the timer")
- return -1
- else:
- time = self.GetTime(timer)
- Log.Info(("total time: %fs" % (time)), self.verbose)
-
- return time
-
- '''
- Parse the timer data form a given string.
-
- @param data - String to parse timer data from.
- @return - Namedtuple that contains the timer data.
- '''
- def parseTimer(self, data):
- # Compile the regular expression pattern into a regular expression object to
- # parse the timer data.
- pattern = re.compile(br"""
- .*?loading_data: (?P<loading_data>.*?)s.*?
- .*?saving_data: (?P<saving_data>.*?)s.*?
- .*?total_time: (?P<total_time>.*?)s.*?
- """, re.VERBOSE|re.MULTILINE|re.DOTALL)
-
- match = pattern.match(data)
- if not match:
- Log.Fatal("Can't parse the data: wrong format")
- return -1
- else:
- # Create a namedtuple and return the timer data.
- timer = collections.namedtuple("timer", ["loading_data", "saving_data",
- "total_time"])
-
- return timer(float(match.group("loading_data")),
- float(match.group("saving_data")),
- float(match.group("total_time")))
-
- '''
- Return the elapsed time in seconds.
-
- @param timer - Namedtuple that contains the timer data.
- @return Elapsed time in seconds.
- '''
- def GetTime(self, timer):
- time = timer.total_time - timer.loading_data - timer.saving_data
- return time
+ except Exception as e:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer")
+ return -1
+ else:
+ time = self.GetTime(timer)
+ Log.Info(("total time: %fs" % (time)), self.verbose)
+
+ return time
+
+ '''
+ Parse the timer data form a given string.
+
+ @param data - String to parse timer data from.
+ @return - Namedtuple that contains the timer data.
+ '''
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object to
+ # parse the timer data.
+ pattern = re.compile(br"""
+ .*?loading_data: (?P<loading_data>.*?)s.*?
+ .*?saving_data: (?P<saving_data>.*?)s.*?
+ .*?total_time: (?P<total_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data)
+ if not match:
+ Log.Fatal("Can't parse the data: wrong format")
+ return -1
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple("timer", ["loading_data", "saving_data",
+ "total_time"])
+
+ return timer(float(match.group("loading_data")),
+ float(match.group("saving_data")),
+ float(match.group("total_time")))
+
+ '''
+ Return the elapsed time in seconds.
+
+ @param timer - Namedtuple that contains the timer data.
+ @return Elapsed time in seconds.
+ '''
+ def GetTime(self, timer):
+ time = timer.total_time - timer.loading_data - timer.saving_data
+ return time
More information about the mlpack-svn
mailing list