[mlpack-svn] r15451 - in mlpack/conf/jenkins-conf/benchmark: benchmark util
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jul 11 10:53:59 EDT 2013
Author: marcus
Date: Thu Jul 11 10:53:59 2013
New Revision: 15451
Log:
Add function to merge streams and use this function in the main benchmark routine.
Modified:
mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
mlpack/conf/jenkins-conf/benchmark/util/parser.py
Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py Thu Jul 11 10:53:59 2013
@@ -5,7 +5,6 @@
Perform the timing benchmark.
'''
-
import os, sys, inspect
# Import the util path, this method even works if the path contains
@@ -34,74 +33,84 @@
Log.Info('CPU Cores: ' + SystemInfo.GetCPUCores())
'''
+Normalize the dataset name. If the dataset is a list of datasets, take the first
+dataset as name. If necessary remove characters like '.', '_'.
+'''
+def NormalizeDatasetName(dataset):
+ if not isinstance(dataset, basestring):
+ return os.path.splitext(os.path.basename(dataset[0]))[0]
+ else:
+ return os.path.splitext(os.path.basename(dataset))[0]
+
+def AddMatrixToTable(matrix, table):
+ for row in matrix:
+ table.append(row)
+ return table
+
+'''
Start the main benchmark routine. The method shows some DEBUG information and
prints a table with the runtime information.
'''
-def Main(configfile):
+def Main(configfile):
+
# Read Config.
config = Parser(configfile, verbose=False)
+ streamData = config.StreamMerge()
# Iterate through all libraries.
- libraryMapping = config.GetConfigLibraryMethods()
- while libraryMapping:
+ for method, sets in streamData.items():
+ Log.Info("Method: " + method)
+ for options, libraries in sets.items():
+ Log.Info('Options: ' + (options if options != '' else 'None'))
+
+ # Create the Table.
+ table = []
+ header = ['']
+ table.append(header)
+
+ # Count the Datasets.
+ datasetCount = 0
+ for libary in libraries:
+ datasetCount = max(datasetCount, len(libary[1]))
+
+ # Create the matrix which contains the time and dataset informations.
+ dataMatrix = [['-' for x in xrange(len(libraries) + 1)] for x in
+ xrange(datasetCount)]
+
+ col = 1
+ for libary in libraries:
+ name = libary[0]
+ datsets = libary[1]
+ trials = libary[2]
+ script = libary[3]
- # Iterate through all methods.
- methodMapping = config.GetConfigMethod(libraryMapping.methods)
- while methodMapping and libraryMapping:
-
- if methodMapping.run:
-
- Log.Info('Method: ' + methodMapping.methodName)
+ Log.Info("Libary: " + name)
+ header.append(name)
# Load script.
- module = Loader.ImportModuleFromPath(methodMapping.script)
- methodCall = getattr(module, methodMapping.methodName)
-
- for dataset in methodMapping.datasets:
-
- #! TEMPORARY
- # Create table.
- table = []
- # set table header.
- header = ['', libraryMapping.libraryName, 'matlab', 'shougun']
- table.append(header)
-
- Log.Info('Options: ' + (dataset["options"] if dataset["options"] != ''
- else 'None'))
-
- for files in dataset["files"]:
-
- row = ['-'] * 4;
- # Get dataset name.
- if not isinstance(files, basestring):
- row[0] = os.path.splitext(os.path.basename(files[0]))[0]
- else:
- row[0] = os.path.splitext(os.path.basename(files))[0]
-
- if row[0].count('_') != 0:
- row[0] = row[0].split("_")[0]
-
- Log.Info('Dataset: ' + row[0])
-
- time = 0
- for num in range(methodMapping.iteration):
- instance = methodCall(files, verbose=False)
- time += instance.RunMethod(dataset["options"]);
-
- # Call the destructor.
- del instance
-
- # Set time.
- row[1] = time / methodMapping.iteration
- table.append(row)
-
- # Show results in a table.
- Log.Notice('')
- Log.PrintTable(table)
- Log.Notice('')
+ module = Loader.ImportModuleFromPath(script)
+ methodCall = getattr(module, method)
- methodMapping = config.GetConfigMethod(libraryMapping.methods)
- libraryMapping = config.GetConfigLibraryMethods()
+ row = 0
+ for dataset in datsets:
+ dataMatrix[row][0] = NormalizeDatasetName(dataset)
+ Log.Info("Dataset: " + dataMatrix[row][0])
+
+ time = 0
+ for trial in range(trials + 1):
+ instance = methodCall(dataset, verbose=False)
+ if trial > 0:
+ time += instance.RunMethod(options);
+
+ # Set time.
+ dataMatrix[row][col] = "{0:.6f}".format(time / trials)
+ row += 1
+ col += 1
+
+ # Show results in a table.
+ Log.Notice("\n\n")
+ Log.PrintTable(AddMatrixToTable(dataMatrix, table))
+ Log.Notice("\n\n")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="""Perform the benchmark with the
Modified: mlpack/conf/jenkins-conf/benchmark/util/parser.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/parser.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/parser.py Thu Jul 11 10:53:59 2013
@@ -12,9 +12,9 @@
# Import the util path, this method even works if the path contains symlinks to
# modules.
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
- os.path.split(inspect.getfile(inspect.currentframe()))[0], "util")))
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "util")))
if cmd_subfolder not in sys.path:
- sys.path.insert(0, cmd_subfolder)
+ sys.path.insert(0, cmd_subfolder)
from log import *
from loader import *
@@ -27,270 +27,314 @@
'''
class Parser(object):
- '''
- Create the parser instance and load the config file.
+ '''
+ Create the parser instance and load the config file.
- @param config - Contains the config path and config name.
- @param verbose - Display informational messages.
- '''
- def __init__(self, config, verbose=True):
- self.verbose = verbose
- self.config = config
-
- # Default values.
- self.RUN = True
- self.ITERATION = 1
- self.OPTIONS = ''
-
- try:
- Log.Info("Load config file: " + config, verbose)
- streams = yaml.load_all(open(config))
- self.streams = streams
-
- except IOError, e:
- Log.Fatal("Could not load config file: " + config)
- except yaml.YAMLError, exc:
- if hasattr(exc, "problem_mark"):
- mark = exc.problem_mark
- Log.Fatal("Error at position: (%s:%s)" % (mark.line+1, mark.column+1))
-
- '''
- This method return the library informations.
-
- @return Library name, methods attributes
- '''
- def GetConfigLibraryMethods(self):
- try:
- stream = self.streams.next()
- except StopIteration, e:
- # We have to catch the exception to stop at the end. There exists no
- # hasNext().
- return False
-
- if not stream.has_key("library"):
- return self.KeyErrorMsg("library", streamNum)
- else:
- libraryName = stream["library"]
- Log.Info("Library: " + libraryName, self.verbose)
-
- attr = collections.namedtuple("attributes", ["libraryName", "methods"])
-
- return attr(libraryName, stream["methods"].iteritems())
-
- '''
- This method return the attributes of a given method.
-
- @param methods - Contains the methods attributes.
- '''
- def GetConfigMethod(self, methods):
- try:
- method = methods.next()
- except StopIteration, e:
- # We have to catch the exception to stop at the end. There exists no
- # hasNext().
- return False
-
- methodName = method[0]
- Log.Info("Method: " + methodName, self.verbose)
-
- attributes = method[1]
-
- # First check the required attributes.
- if attributes.has_key("script"):
- script = attributes["script"]
- Log.Info("Script: " + script, self.verbose)
- else:
- return self.KeyErrorMsg("script")
-
- if attributes.has_key("format"):
- format = attributes["format"]
- Log.Info("Format: " + str(format), self.verbose)
- else:
- return self.gKeyErrorMsg('format')
-
- if attributes.has_key("datasets"):
- datasets = attributes['datasets']
- for dataset in datasets:
- Log.Info("Dataset: " + str(dataset["files"]), self.verbose)
- if not dataset.has_key("options"):
- dataset["options"] = self.OPTIONS
-
- else:
- return self.KeyErrorMsg("datasets")
-
- # Check the optional attributes.
- if attributes.has_key("run"):
- run = attributes["run"]
- Log.Info("Run: " + str(run), self.verbose)
- else:
- self.KeyWarnMsg("run")
- run = self.RUN
-
- if attributes.has_key("iteration"):
- iteration = attributes["iteration"]
- Log.Info("Iteration: " + str(iteration), self.verbose)
- else:
- self.KeyWarnMsg("iteration")
- iteration = self.ITERATION
-
- attr = collections.namedtuple("attributes", ["methodName", "script",
- "format", "datasets", "run", "iteration"])
-
- return attr(methodName, script, format, datasets, run, iteration)
-
- '''
- Show a key error message.
-
- @return False
- '''
- def ConfigKeyErrorMsg(self, key, streamNum = 0):
- if streamNum == 0:
- Log.Fatal("No [" + key + "] key.")
- else:
- Log.Fatal("Stream number: " + str(streamNum) + " has no [" + key +
- "] key.")
-
- return False
-
- '''
- Show a emtpy value error message.
-
- @return False
- '''
- def EmptyErrorMsg(self, key, streamNum):
- Log.Fatal("Stream number: " + str(streamNum) + " the [" + key +
- "] list is empty.")
- return False
-
- '''
- Show a value is not set warn message.
- '''
- def KeyWarnMsg(self, key, streamNum = 0):
- if streamNum == 0:
- Log.Warn("No [" + key + "] key, use default value.", self.verbose)
- else:
- Log.Warn("Stream number: " + str(streamNum) + " has no [" + key +
- "] key, use default value.", self.verbose)
-
- '''
- Show a method is not callable error message.
-
- @return False
- '''
- def CallableMethodErroMsg(self, methodName, methodScript, streamNum):
- Log.Fatal("Stream number: " + str(streamNum) + " the method: " + methodName
- + " in script: " + methodScript + " is not callable.")
- return False
-
- '''
- Show a file not available error message.
-
- @ return False
- '''
- def NotAvailableErrorMsg(self, fileName):
- Log.Fatal("The file: " + fileName + " is not available.")
- return False
-
- '''
- This function check if a script have the necessary class and the RunMethod
- function.
-
- @param methodName - Contains the method name.
- @param methodScript - Contains the script path with the script name.
- @return False in case the script dosen't exist or the RunMethod method is not
- available otherwise True.
- '''
- def CheckIfCallable(self, methodName, methodScript):
- try:
- with open(methodScript): pass
- except IOError:
- return False
-
- module = Loader.ImportModuleFromPath(methodScript)
- methodClass = getattr(module, methodName, None)
- if callable(methodClass):
- if getattr(methodClass, "RunMethod", None):
- return True
-
- return False
-
- '''
- This function checks if a file is readable.
-
- @return The function returns True if the file is readable otherwise false.
- '''
- def CheckIfAvailable(self, files):
- def CheckDataset(dataset):
- try:
- with open(dataset): pass
- except IOError:
- return self.NotAvailableErrorMsg(datasets)
- return True
-
- for datasets in files:
- # Check if the value datasets is a list of datasets.
- if not isinstance(datasets, basestring):
- for dataset in datasets:
- if not CheckDataset(dataset):
- return False
- else:
- if not CheckDataset(datasets):
- return False
-
- return True
-
- '''
- This function checks the config attributes and keys. The function checks also,
- if the script is runable and if the datasets are readable.
-
- @return The function returns False if the config file is not correct and the
- function shows some information to adjust the config. If the config is correct
- the function prints a successful message.
- '''
- def CheckConfig(self):
- Log.Info("Check config file: " + self.config, self.verbose)
- streamNum = 0
- for stream in self.streams:
- streamNum += 1
-
- if not stream.has_key("library"):
- return self.KeyErrorMsg("library", streamNum)
- elif not stream.has_key("methods"):
- return self.KeyErrorMsg("methods", streamNum)
- else:
- try:
- for key, value in stream["methods"].iteritems():
-
- if not value.has_key("script"):
- return self.KeyErrorMsg("script", streamNum)
-
- if not value.has_key("format"):
- return self.KeyErrorMsg("format", streamNum)
-
- if not value.has_key("run"):
- self.KeyWarnMsg("run", streamNum)
-
- if not value.has_key("iteration"):
- self.KeyWarnMsg("iteration", streamNum)
-
- if value.has_key("datasets"):
- if not value["datasets"]:
- return self.EmptyErrorMsg("datasets", streamNum)
- else:
- for dataset in value["datasets"]:
-
- if not self.CheckIfAvailable(dataset["files"]):
- return False
-
- if not dataset.has_key("options"):
- self.KeyWarnMsg("options", streamNum)
- else:
- return self.KeyErrorMsg("datasets", streamNum)
+ @param config - Contains the config path and config name.
+ @param verbose - Display informational messages.
+ '''
+ def __init__(self, config, verbose=True):
+ self.verbose = verbose
+ self.config = config
+
+ # Default values.
+ self.RUN = True
+ self.ITERATION = 1
+ self.OPTIONS = ''
+
+ try:
+ Log.Info("Load config file: " + config, verbose)
+ streams = yaml.load_all(open(config))
+ self.streams = streams
+
+ except IOError, e:
+ Log.Fatal("Could not load config file: " + config)
+ except yaml.YAMLError, exc:
+ if hasattr(exc, "problem_mark"):
+ mark = exc.problem_mark
+ Log.Fatal("Error at position: (%s:%s)" % (mark.line+1, mark.column+1))
+
+ '''
+ This method return the library informations.
+
+ @return Library name, methods attributes
+ '''
+ def GetConfigLibraryMethods(self):
+ try:
+ stream = self.streams.next()
+ except StopIteration, e:
+ # We have to catch the exception to stop at the end. There exists no
+ # hasNext().
+ return False
+
+ if not stream.has_key("library"):
+ return self.KeyErrorMsg("library", streamNum)
+ else:
+ libraryName = stream["library"]
+ Log.Info("Library: " + libraryName, self.verbose)
+
+ attr = collections.namedtuple("attributes", ["libraryName", "methods"])
+
+ return attr(libraryName, stream["methods"].iteritems())
+
+ '''
+ This method return the attributes of a given method.
+
+ @param methods - Contains the methods attributes.
+ '''
+ def GetConfigMethod(self, methods):
+ try:
+ method = methods.next()
+ except StopIteration, e:
+ # We have to catch the exception to stop at the end. There exists no
+ # hasNext().
+ return False
+
+ methodName = method[0]
+ Log.Info("Method: " + methodName, self.verbose)
+
+ attributes = method[1]
+
+ # First check the required attributes.
+ if attributes.has_key("script"):
+ script = attributes["script"]
+ Log.Info("Script: " + script, self.verbose)
+ else:
+ return self.KeyErrorMsg("script")
+
+ if attributes.has_key("format"):
+ format = attributes["format"]
+ Log.Info("Format: " + str(format), self.verbose)
+ else:
+ return self.gKeyErrorMsg('format')
+
+ if attributes.has_key("datasets"):
+ datasets = attributes['datasets']
+ for dataset in datasets:
+ Log.Info("Dataset: " + str(dataset["files"]), self.verbose)
+ if not dataset.has_key("options"):
+ dataset["options"] = self.OPTIONS
+
+ else:
+ return self.KeyErrorMsg("datasets")
+
+ # Check the optional attributes.
+ if attributes.has_key("run"):
+ run = attributes["run"]
+ Log.Info("Run: " + str(run), self.verbose)
+ else:
+ self.KeyWarnMsg("run")
+ run = self.RUN
+
+ if attributes.has_key("iteration"):
+ iteration = attributes["iteration"]
+ Log.Info("Iteration: " + str(iteration), self.verbose)
+ else:
+ self.KeyWarnMsg("iteration")
+ iteration = self.ITERATION
+
+ attr = collections.namedtuple("attributes", ["methodName", "script",
+ "format", "datasets", "run", "iteration"])
+
+ return attr(methodName, script, format, datasets, run, iteration)
+
+ '''
+ Show a key error message.
+
+ @return False
+ '''
+ def ConfigKeyErrorMsg(self, key, streamNum = 0):
+ if streamNum == 0:
+ Log.Fatal("No [" + key + "] key.")
+ else:
+ Log.Fatal("Stream number: " + str(streamNum) + " has no [" + key +
+ "] key.")
+
+ return False
+
+ '''
+ Show a emtpy value error message.
+
+ @return False
+ '''
+ def EmptyErrorMsg(self, key, streamNum):
+ Log.Fatal("Stream number: " + str(streamNum) + " the [" + key +
+ "] list is empty.")
+ return False
+
+ '''
+ Show a value is not set warn message.
+ '''
+ def KeyWarnMsg(self, key, streamNum = 0):
+ if streamNum == 0:
+ Log.Warn("No [" + key + "] key, use default value.", self.verbose)
+ else:
+ Log.Warn("Stream number: " + str(streamNum) + " has no [" + key +
+ "] key, use default value.", self.verbose)
+
+ '''
+ Show a method is not callable error message.
+
+ @return False
+ '''
+ def CallableMethodErroMsg(self, methodName, methodScript, streamNum):
+ Log.Fatal("Stream number: " + str(streamNum) + " the method: " + methodName
+ + " in script: " + methodScript + " is not callable.")
+ return False
+
+ '''
+ Show a file not available error message.
+
+ @ return False
+ '''
+ def NotAvailableErrorMsg(self, fileName):
+ Log.Fatal("The file: " + fileName + " is not available.")
+ return False
+
+ '''
+ This function check if a script have the necessary class and the RunMethod
+ function.
+
+ @param methodName - Contains the method name.
+ @param methodScript - Contains the script path with the script name.
+ @return False in case the script dosen't exist or the RunMethod method is not
+ available otherwise True.
+ '''
+ def CheckIfCallable(self, methodName, methodScript):
+ try:
+ with open(methodScript): pass
+ except IOError:
+ return False
+
+ module = Loader.ImportModuleFromPath(methodScript)
+ methodClass = getattr(module, methodName, None)
+ if callable(methodClass):
+ if getattr(methodClass, "RunMethod", None):
+ return True
+
+ return False
+
+ '''
+ This function checks if a file is readable.
+
+ @return The function returns True if the file is readable otherwise false.
+ '''
+ def CheckIfAvailable(self, files):
+ def CheckDataset(dataset):
+ try:
+ with open(dataset): pass
+ except IOError:
+ return self.NotAvailableErrorMsg(datasets)
+ return True
+
+ for datasets in files:
+ # Check if the value datasets is a list of datasets.
+ if not isinstance(datasets, basestring):
+ for dataset in datasets:
+ if not CheckDataset(dataset):
+ return False
+ else:
+ if not CheckDataset(datasets):
+ return False
+
+ return True
+
+ '''
+ This function checks the config attributes and keys. The function checks also,
+ if the script is runable and if the datasets are readable.
+
+ @return The function returns False if the config file is not correct and the
+ function shows some information to adjust the config. If the config is correct
+ the function prints a successful message.
+ '''
+ def CheckConfig(self):
+ Log.Info("Check config file: " + self.config, self.verbose)
+ streamNum = 0
+ for stream in self.streams:
+ streamNum += 1
+
+ if not stream.has_key("library"):
+ return self.KeyErrorMsg("library", streamNum)
+ elif not stream.has_key("methods"):
+ return self.KeyErrorMsg("methods", streamNum)
+ else:
+ try:
+ for key, value in stream["methods"].iteritems():
+
+ if not value.has_key("script"):
+ return self.KeyErrorMsg("script", streamNum)
+
+ if not value.has_key("format"):
+ return self.KeyErrorMsg("format", streamNum)
+
+ if not value.has_key("run"):
+ self.KeyWarnMsg("run", streamNum)
+
+ if not value.has_key("iteration"):
+ self.KeyWarnMsg("iteration", streamNum)
+
+ if value.has_key("datasets"):
+ if not value["datasets"]:
+ return self.EmptyErrorMsg("datasets", streamNum)
+ else:
+ for dataset in value["datasets"]:
+
+ if not self.CheckIfAvailable(dataset["files"]):
+ return False
+
+ if not dataset.has_key("options"):
+ self.KeyWarnMsg("options", streamNum)
+ else:
+ return self.KeyErrorMsg("datasets", streamNum)
+
+ if not self.CheckIfCallable(key, value["script"]):
+ return self.CallableMethodErroMsg(key, value["script"], streamNum)
+
+ except AttributeError, e:
+ return self.KeyErrorMsg("methods", streamNum)
+
+ Log.Info("Config file check: successful", self.verbose)
+
+ '''
+ This function merge the streams and creates a dictionary which contains the
+ data.
+
+ @return Dictionary with all informations.
+ '''
+ def StreamMerge(self):
+ streamData = {}
+
+ # Iterate through all libraries.
+ libraryMapping = self.GetConfigLibraryMethods()
+ while libraryMapping:
+ # Iterate through all methods.
+ methodMapping = self.GetConfigMethod(libraryMapping.methods)
+ while methodMapping and libraryMapping:
+
+ # Collect data only from method with run value = true.
+ if methodMapping.run:
+ for dataset in methodMapping.datasets:
+
+ if methodMapping.methodName in streamData:
+ tempDict = streamData[methodMapping.methodName]
+
+ if dataset["options"] in tempDict:
+ t = (libraryMapping.libraryName, dataset["files"],
+ methodMapping.iteration, methodMapping.script)
+ tempDict[dataset["options"]].append(t)
+ else:
+ t = (libraryMapping.libraryName, dataset["files"],
+ methodMapping.iteration, methodMapping.script)
+ tempDict[dataset["options"]] = [t]
+ else:
+ d = {}
+ t = (libraryMapping.libraryName, dataset["files"],
+ methodMapping.iteration, methodMapping.script)
+ d[dataset["options"]] = [t]
+ streamData[methodMapping.methodName] = d
- if not self.CheckIfCallable(key, value["script"]):
- return self.CallableMethodErroMsg(key, value["script"], streamNum)
+ methodMapping = self.GetConfigMethod(libraryMapping.methods)
+ libraryMapping = self.GetConfigLibraryMethods()
- except AttributeError, e:
- return self.KeyErrorMsg("methods", streamNum)
+ return streamData
- Log.Info("Config file check: successful", self.verbose)
\ No newline at end of file
More information about the mlpack-svn
mailing list