[mlpack-svn] r15611 - in mlpack/conf/jenkins-conf/benchmark: . benchmark methods/mlpack util
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Aug 13 15:15:30 EDT 2013
Author: marcus
Date: Tue Aug 13 15:15:29 2013
New Revision: 15611
Log:
Add functions and a new table to create the memory reports.
Modified:
mlpack/conf/jenkins-conf/benchmark/Makefile
mlpack/conf/jenkins-conf/benchmark/benchmark/make_reports.py
mlpack/conf/jenkins-conf/benchmark/benchmark/memory_benchmark.py
mlpack/conf/jenkins-conf/benchmark/methods/mlpack/pca.py
mlpack/conf/jenkins-conf/benchmark/util/database.py
mlpack/conf/jenkins-conf/benchmark/util/misc.py
mlpack/conf/jenkins-conf/benchmark/util/profiler.py
mlpack/conf/jenkins-conf/benchmark/util/template.py
Modified: mlpack/conf/jenkins-conf/benchmark/Makefile
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/Makefile (original)
+++ mlpack/conf/jenkins-conf/benchmark/Makefile Tue Aug 13 15:15:29 2013
@@ -15,6 +15,8 @@
export SHOGUN_PATH=/opt/shogun/shogun-2.1.0-mod
export PYTHONPATH=/opt/scikit-learn/scikit-learn-0.13.1/lib/python3.3/site-packages/:/opt/mlpy/mlpy-3.5.0/lib/python3.3/site-packages/:/opt/shogun/shogun-2.1.0/lib/python3.3/dist-packages/
export LD_LIBRARY_PATH=/opt/shogun/shogun-2.1.0/lib/
+export MS_PRINT_BIN=/usr/bin/ms_print
+export VALGRIND_BIN=/usr/bin/valgrind
ifeq ($(PYTHON_VERSION), 0)
$(error Python version 2.7 required which was not found)
Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/make_reports.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/make_reports.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/make_reports.py Tue Aug 13 15:15:29 2013
@@ -20,6 +20,7 @@
from database import *
from template import *
from misc import *
+from profiler import *
import argparse
import glob
@@ -95,6 +96,23 @@
return datasetTable
'''
+Create the content for the memory section.
+
+ at param results - This data structure contains the results.
+ at return A string that contains the content for the memory section.
+'''
+def CreateMemoryContent(results):
+ memoryContent = ""
+ if results:
+ for result in results:
+ memoryValues = {}
+ memoryValues["name"] = result[7]
+ memoryValues["content"] = Profiler.MassifMemoryUsageReport(str(result[5]))
+ memoryContent += memoryTemplate % memoryValues
+
+ return memoryContent
+
+'''
Create the method container with the informations from the database.
@param db - The database object.
@@ -124,6 +142,15 @@
# Generate a "unique" hash for the chart names.
chartHash = str(hash(str(method[1:]) + str(buildIds)))
+ # Create the memory content.
+ memoryContent = ""
+ mlpackMemoryId = db.GetLibrary("mlpack_memory")
+ if mlpackMemoryId:
+ mlpackMemoryBuilId = db.GetLatestBuildFromLibary(mlpackMemoryId[0][0])
+ if mlpackMemoryBuilId:
+ memoryResults = db.GetMemoryResults(mlpackMemoryBuilId, mlpackMemoryId[0][0], method[0])
+ memoryContent = CreateMemoryContent(memoryResults)
+
# Generate a "unique" name for the line chart.
lineChartName = "img/line_" + chartHash + ".png"
@@ -141,7 +168,7 @@
# Create the timing table.
header, timingTable = CreateTimingTable(timingData, methodLibararies)
- datasetTable = CreateDatasetTable(methodResults)
+ datasetTable = CreateDatasetTable(methodResults)
# Create the container.
reportValues = {}
@@ -163,6 +190,7 @@
reportValues["timingHeader"] = header
reportValues["timingTable"] = timingTable
reportValues["datasetTable"] = datasetTable
+ reportValues["memoryContent"] = memoryContent
methodsPage += methodTemplate % reportValues
Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/memory_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/memory_benchmark.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/memory_benchmark.py Tue Aug 13 15:15:29 2013
@@ -5,7 +5,6 @@
Perform the memory benchmark.
'''
-
import os, sys, inspect
# Import the util path, this method even works if the path contains
@@ -16,85 +15,171 @@
sys.path.insert(0, cmd_subfolder)
from log import *
-from system import *
from loader import *
from parser import *
+from convert import *
+from misc import *
+from database import *
import argparse
+import datetime
-'''
-Show system informations. Are there no data available, the value is 'N/A'.
-'''
-def SystemInformation():
-
- Log.Info('CPU Model: ' + SystemInfo.GetCPUModel())
- Log.Info('Distribution: ' + SystemInfo.GetDistribution())
- Log.Info('Platform: ' + SystemInfo.GetPlatform())
- Log.Info('Memory: ' + SystemInfo.GetMemory())
- Log.Info('CPU Cores: ' + SystemInfo.GetCPUCores())
'''
-Start the main benchmark routine. The method shows some DEBUG information and
-prints a table with the runtime information.
+Return a list with modified dataset.
+
+ at param dataset - Datasets to be modified.
+ at param format - List of file formats to be converted to.
+ at return List of modified datasets.
'''
-def Main(configfile):
+def GetDataset(dataset, format):
+ # Check if the given dataset is a list or a single dataset.
+ if not isinstance(dataset, str):
+ datasetList = []
+ modifiedList = []
+
+ for data in dataset:
+ mdata = CheckFileExtension(data, format)
+
+ # Check if the dataset is available.
+ if os.path.isfile(mdata):
+ datasetList.append(mdata)
+ else:
+ # Check if the dataset is available.
+ convert = Convert(data, format[0])
+ datasetList.append(convert.modifiedDataset)
+ modifiedList.append(convert.modifiedDataset)
+ else:
+ datasetList = ""
+ modifiedList = ""
+
+ mdataset = CheckFileExtension(dataset, format)
+
+ # Check if the dataset is available.
+ if os.path.isfile(mdataset):
+ datasetList = mdataset
+ else:
+ # Convert the Dataset.
+ convert = Convert(dataset, format[0])
+ datasetList = convert.modifiedDataset
+ modifiedList = convert.modifiedDataset
+
+ return (datasetList, modifiedList)
+
+def Main(configfile, blocks, log):
+ # Benchmark settings.
+ timeout = 23000
+ database = "reports/benchmark.db"
+
# Read Config.
config = Parser(configfile, verbose=False)
+ streamData = config.StreamMerge()
- # Iterate through all libraries.
- libraryMapping = config.GetConfigLibraryMethods()
- while libraryMapping:
+ # Read the general block and set the attributes.
+ if "general" in streamData:
+ for key, value in streamData["general"]:
+ if key == "timeout":
+ timeout = value
+ if key == "database":
+ database = value
+
+ # Temporary datastructures for the current build.
+ build = {}
+
+ # Open logfile if the user asked for.
+ if log:
+ db = Database(database)
+ db.CreateTables()
+
+ # Transform the blocks string to a list.
+ if blocks:
+ blocks = blocks.split(",")
- if libraryMapping.libraryName != "mlpack":
+ # Iterate through all libraries.
+ for method, sets in streamData.items():
+ if method == "general":
continue
+ Log.Info("Method: " + method)
+ for options, libraries in sets.items():
+ Log.Info('Options: ' + (options if options != '' else 'None'))
+
+ if log:
+ methodId = db.GetMethod(method, options)
+ methodId = methodId[0][0] if methodId else db.NewMethod(method, options)
+
+ for libary in libraries:
+ name = libary[0]
+ datsets = libary[1]
+ script = libary[3]
+ format = libary[4]
+
+ if not blocks or name in blocks:
+ Log.Info("Libary: " + name)
+
+ # Logging: create a new library record for this library.
+ if log and name not in build:
+ libaryId = db.GetLibrary(name + "_memory")
+ libaryId = libaryId[0][0] if libaryId else db.NewLibrary(name + "_memory")
+
+ build[name] = (db.NewBuild(libaryId), libaryId)
+
+ # Load script.
+ try:
+ module = Loader.ImportModuleFromPath(script)
+ methodCall = getattr(module, method)
+ except Exception as e:
+ Log.Fatal("Could not load the script: " + script)
+ Log.Fatal("Exception: " + str(e))
+ else:
+
+ for dataset in datsets:
+ datasetName = NormalizeDatasetName(dataset)
+
+ # Logging: Create a new dataset record fot this dataset.
+ if log:
+ datasetId = db.GetDataset(datasetName)
+ datasetId = datasetId[0][0] if datasetId else db.NewDataset(*DatasetInfo(dataset))
+
+ Log.Info("Dataset: " + datasetName)
+ modifiedDataset = GetDataset(dataset, format)
+
+ try:
+ instance = methodCall(modifiedDataset[0], timeout=timeout,
+ verbose=False)
+ except Exception as e:
+ Log.Fatal("Could not call the constructor: " + script)
+ Log.Fatal("Exception: " + str(e))
+ continue
+
+ # Generate a "unique" name for the memory output file.
+ outputName = "reports/etc/" + str(hash(datetime.datetime.now())) + ".mout"
+
+ try:
+ instance.RunMemoryProfiling(options, outputName);
+ except Exception as e:
+ Log.Fatal("Exception: " + str(e))
+ continue
+
+ # Save results in the logfile if the user asked for.
+ if log:
+ buildId, libaryId = build[name]
+ db.NewMemory(buildId, libaryId, methodId, datasetId, outputName)
- # Iterate through all methods.
- methodMapping = config.GetConfigMethod(libraryMapping.methods)
- while methodMapping and libraryMapping:
-
- if methodMapping.run:
-
- Log.Info('Method: ' + methodMapping.methodName)
-
- # Load script.
- module = Loader.ImportModuleFromPath(methodMapping.script)
- methodCall = getattr(module, methodMapping.methodName)
-
- for dataset in methodMapping.datasets:
-
- Log.Info('Options: ' + (dataset["options"] if dataset["options"] != ''
- else 'None'))
-
- for files in dataset["files"]:
-
- # Get dataset name.
- if not isinstance(files, basestring):
- name = os.path.splitext(os.path.basename(files[0]))[0]
- else:
- name = os.path.splitext(os.path.basename(files))[0]
-
- if name.count('_') != 0:
- name = name.split("_")[0]
-
- Log.Info('Dataset: ' + name)
-
- instance = methodCall(files, verbose=True)
- instance.RunMemoryProfiling(dataset["options"]);
-
- # Call the destructor.
- del instance
-
- methodMapping = config.GetConfigMethod(libraryMapping.methods)
- libraryMapping = config.GetConfigLibraryMethods()
+ # Remove temporary datasets.
+ RemoveDataset(modifiedDataset[1])
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="""Perform the benchmark with the
given config.""")
parser.add_argument('-c','--config', help='Configuration file name.',
required=True)
+ parser.add_argument('-b','--blocks', help='Run only the specified blocks.',
+ required=False)
+ parser.add_argument('-l','--log', help='Save the results in the logfile.',
+ required=False)
args = parser.parse_args()
if args:
- SystemInformation()
- Main(args.config)
\ No newline at end of file
+ log = True if args.log == "True" else False
+ Main(args.config, args.blocks, log)
\ No newline at end of file
Modified: mlpack/conf/jenkins-conf/benchmark/methods/mlpack/pca.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpack/pca.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpack/pca.py Tue Aug 13 15:15:29 2013
@@ -71,7 +71,7 @@
'''
def __del__(self):
Log.Info("Clean up.", self.verbose)
- filelist = ["gmon.out", "output.csv", "PCA.mout"]
+ filelist = ["gmon.out", "output.csv"]
for f in filelist:
if os.path.isfile(f):
os.remove(f)
@@ -85,14 +85,14 @@
@return Returns False if the method was not successful, if the method was
successful save the report file in the specified file.
'''
- def RunMemoryProfiling(self, methodOptions, massifOptions = "--depth=3"):
+ def RunMemoryProfiling(self, methodOptions, fileName, massifOptions="--depth=2"):
Log.Info("Perform PCA Memory Profiling.", self.verbose)
# Split the command using shell-like syntax.
cmd = shlex.split(self.path + "pca -i " + self.dataset +
" -o output.csv -v " + methodOptions)
- return Profiler.MassifMemoryUsage(cmd, "PCA.mout", options = massifOptions)
+ return Profiler.MassifMemoryUsage(cmd, fileName, options=massifOptions)
'''
Perform Principal Components Analysis. If the method has been successfully
Modified: mlpack/conf/jenkins-conf/benchmark/util/database.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/database.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/database.py Tue Aug 13 15:15:29 2013
@@ -100,6 +100,26 @@
""")
'''
+ Create a new memory table.
+ '''
+ def CreateMemoryTable(self):
+ self.con.executescript("""
+ CREATE TABLE IF NOT EXISTS memory (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ build_id INTEGER NOT NULL,
+ libary_id INTEGER NOT NULL,
+ method_id INTEGER NOT NULL,
+ dataset_id INTEGER NOT NULL,
+ memory_info TEXT NOT NULL,
+
+ FOREIGN KEY(build_id) REFERENCES builds(id) ON DELETE CASCADE,
+ FOREIGN KEY(libary_id) REFERENCES libraries(id) ON DELETE CASCADE,
+ FOREIGN KEY(dataset_id) REFERENCES datasets(id) ON DELETE CASCADE,
+ FOREIGN KEY(method_id) REFERENCES methods(id) ON DELETE CASCADE
+ );
+ """)
+
+ '''
Create a new build, libraries, datasets and results table.
'''
def CreateTables(self):
@@ -108,6 +128,7 @@
self.CreateDatasetsTable()
self.CreateMethodsTable()
self.CreateResultsTable()
+ self.CreateMemoryTable()
'''
Add a new build record to the builds table.
@@ -189,6 +210,7 @@
@param time - The mesured time of the build.
@param var - The variance of the build.
@param datasetId - The id of the dataset.
+ @param methodId - The id of the method.
'''
def NewResult(self, buildId, libaryId, time, var, datasetId, methodId):
with self.con:
@@ -304,3 +326,21 @@
str(buildId[0]) + " AND method_id=" + str(methodId))
timeSummed.append(self.cur.fetchall()[0][0])
return (buildId[0], timeSummed)
+
+ '''
+ Add a new memory record to the memory table.
+
+ @param libaryId - The if ot the library.
+ @param methodId - The id of the method
+ @param datasetId - The id of the dataset.
+ @param memoryInfo - The text for the memory value.
+ '''
+ def NewMemory(self, buildId, libaryId, methodId, datasetId, memoryInfo):
+ with self.con:
+ self.cur.execute("INSERT INTO memory VALUES (NULL,?,?,?,?,?)",
+ (buildId, libaryId, methodId, datasetId, memoryInfo))
+
+ def GetMemoryResults(self, buildId, libaryId, methodId):
+ with self.con:
+ self.cur.execute("SELECT * FROM memory JOIN datasets ON memory.dataset_id = datasets.id WHERE libary_id=" + str(libaryId) + " AND build_id="+ str(buildId) + " AND method_id=" + str(methodId))
+ return self.cur.fetchall()
Modified: mlpack/conf/jenkins-conf/benchmark/util/misc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/misc.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/misc.py Tue Aug 13 15:15:29 2013
@@ -110,3 +110,30 @@
datasetType = "real"
return (name, size, attributes, instances, datasetType)
+
+'''
+This function Remove a given file or list of files.
+
+ at param dataset - File or list of file which should be deleted.
+'''
+def RemoveDataset(dataset):
+ if isinstance(dataset, str):
+ dataset = [dataset]
+
+ for f in dataset:
+ if os.path.isfile(f):
+ os.remove(f)
+
+'''
+Check if the file is available in one of the given formats.
+
+ at param dataset - Datsets which should be checked.
+ at param formats - List of supported file formats.
+ at return Orginal dataset or dataset with new file format.
+'''
+def CheckFileExtension(dataset, formats):
+ dataExtension = os.path.splitext(dataset)[1][1:]
+ if dataExtension in formats:
+ return dataset
+ else:
+ return dataset[0:len(dataset) - len(dataExtension)] + formats[0]
Modified: mlpack/conf/jenkins-conf/benchmark/util/profiler.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/profiler.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/profiler.py Tue Aug 13 15:15:29 2013
@@ -1,8 +1,8 @@
'''
- @file profiler.py
- @author Marcus Edel
+ @file profiler.py
+ @author Marcus Edel
- Contains functions to get profiling informations.
+ Contains functions to get profiling informations.
'''
import os
@@ -12,9 +12,9 @@
# import the util path, this method even works if the path contains
# symlinks to modules.
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
- os.path.split(inspect.getfile(inspect.currentframe()))[0], '')))
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], '')))
if cmd_subfolder not in sys.path:
- sys.path.insert(0, cmd_subfolder)
+ sys.path.insert(0, cmd_subfolder)
from log import *
@@ -23,60 +23,79 @@
'''
class Profiler(object):
- '''
- Use valgrind massif to get memory profiling information and the save the ouput
- in the specified file.
-
- @param cmd - Method command line to profile.
- @param output - Save the report at the output path with the specified name.
- @param valgrind - Path to the valgrind binary.
- @param options - Specified massif options.
- @ return Returns False if the method was not successful, if the method was
- successful save the report file in the specified file.
- '''
- @staticmethod
- def MassifMemoryUsage(cmd, output, valgrind = "valgrind", options = ""):
- import shlex, subprocess
-
- cmd = shlex.split(("%s --tool=massif --massif-out-file=%s %s") %
- (valgrind, output, options)) + cmd
- try:
- s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
- except Exception:
- Log.Fatal("Could not execute command: " + str(cmd))
- return -1
-
- '''
- Returns the memory used by a process and his children. We don't know when the
- process is done so we have to poll to get the memory. To avoid memory overflow
- we use a ringbuffer to limit the size of the memory values.
-
- @param process - Popen instance.
- @param Buffersize - Memory value count.
- @return List of memory values.
- '''
- @staticmethod
- def SubprocessMemoryUsage(process, Buffersize = 200):
- import psutil, time, collections
-
- # Create the process list with the main process and his childrens.
- p = psutil.Process(process.pid)
- children = list(p.get_children(recursive=True)) + [p]
-
- memoryTable = collections.deque(maxlen = Buffersize)
-
- # We have to poll to get the memory values.
- while process.poll() == None:
- try:
- for p in children:
- memoryTable.append(int(p.get_memory_info()[0]))
- # Sometimes a subprocess has terminated in the time between we measure the
- # memory. In this case, we continue.
- except psutil.NoSuchProcess:
- continue
- except psutil.AccessDenied:
- continue
+ '''
+ Use valgrind massif to get memory profiling information and the save the ouput
+ in the specified file.
+
+ @param cmd - Method command line to profile.
+ @param output - Save the report at the output path with the specified name.
+ @param valgrind - Path to the valgrind binary.
+ @param options - Specified massif options.
+ @ return Returns False if the method was not successful, if the method was
+ successful save the report file in the specified file.
+ '''
+ @staticmethod
+ def MassifMemoryUsage(command, output, valgrind=os.environ["VALGRIND_BIN"], options=""):
+ import shlex, subprocess
+
+ cmd = shlex.split(("%s --tool=massif --massif-out-file=%s %s ") %
+ (valgrind, output, options)) + command
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ except Exception:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ '''
+ Use the valgrind ms_print script to generate the massif output.
+
+ @param fileName - The filname of the valgrind massif log file.
+ @param valgrind - The path to the ms_print script.
+ @return The ms_print output.
+ '''
+ @staticmethod
+ def MassifMemoryUsageReport(fileName, valgrind=os.environ["MS_PRINT_BIN"]):
+ import shlex, subprocess
+
+ cmd = shlex.split(valgrind + " " + fileName)
+ try:
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+ return s
+ except Exception:
+ Log.Fatal("Could not execute command: " + str(cmd))
+ return -1
+
+ '''
+ Returns the memory used by a process and his children. We don't know when the
+ process is done so we have to poll to get the memory. To avoid memory overflow
+ we use a ringbuffer to limit the size of the memory values.
+
+ @param process - Popen instance.
+ @param Buffersize - Memory value count.
+ @return List of memory values.
+ '''
+ @staticmethod
+ def SubprocessMemoryUsage(process, Buffersize=200):
+ import psutil, time, collections
+
+ # Create the process list with the main process and his childrens.
+ p = psutil.Process(process.pid)
+ children = list(p.get_children(recursive=True)) + [p]
+
+ memoryTable = collections.deque(maxlen=Buffersize)
+
+ # We have to poll to get the memory values.
+ while process.poll() == None:
+ try:
+ for p in children:
+ memoryTable.append(int(p.get_memory_info()[0]))
+ # Sometimes a subprocess has terminated in the time between we measure the
+ # memory. In this case, we continue.
+ except psutil.NoSuchProcess:
+ continue
+ except psutil.AccessDenied:
+ continue
- time.sleep(0.01)
+ time.sleep(0.01)
- return memoryTable
+ return memoryTable
Modified: mlpack/conf/jenkins-conf/benchmark/util/template.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/template.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/template.py Tue Aug 13 15:15:29 2013
@@ -129,9 +129,7 @@
<div>
<div class="panel">
<div class="panel-heading">Massif Log</div>
- <div class="row">
-
- </div>
+ %(memoryContent)s
</div>
</div>
@@ -150,4 +148,15 @@
</div>
</div>
+"""
+
+memoryTemplate = """
+<div class="accordion-group">
+<div class="accordion-heading"><a class="accordion-toggle" data-toggle="collapse" data-parent="#accordion2" href="#%(name)s">%(name)s</a></div>
+ <div id="%(name)s" class="accordion-body collapse">
+ <div class="accordion-inner">
+ %(content)s
+ </div>
+ </div>
+</div>
"""
\ No newline at end of file
More information about the mlpack-svn
mailing list