[mlpack-svn] r15509 - in mlpack/conf/jenkins-conf/benchmark: benchmark util
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jul 19 14:59:28 EDT 2013
Author: marcus
Date: Fri Jul 19 14:59:27 2013
New Revision: 15509
Log:
Add function to modify datasets (e.g. add arff header).
Added:
mlpack/conf/jenkins-conf/benchmark/util/convert.py
Modified:
mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
mlpack/conf/jenkins-conf/benchmark/util/parser.py
Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py Fri Jul 19 14:59:27 2013
@@ -18,6 +18,7 @@
from system import *
from loader import *
from parser import *
+from convert import *
import argparse
@@ -40,12 +41,80 @@
@return Normalized dataset name.
'''
def NormalizeDatasetName(dataset):
- if not isinstance(dataset, basestring):
+ if not isinstance(dataset, basestring):
return os.path.splitext(os.path.basename(dataset[0]))[0].split('_')[0]
else:
return os.path.splitext(os.path.basename(dataset))[0].split('_')[0]
'''
+Check if the file is available in one of the given formats.
+
+ at para dataset - Datsets which should be checked.
+ at para formats - List of supported file formats.
+ at return Orginal dataset or dataset with new file format.
+'''
+def CheckFileExtension(dataset, formats):
+ dataExtension = os.path.splitext(dataset)[1][1:]
+ if dataExtension in formats:
+ return dataset
+ else:
+ return dataset[0:len(dataset) - len(dataExtension)] + formats[0]
+
+'''
+Return a list with modified dataset.
+
+ at para dataset - Datasets to be modified.
+ at para format - List of file formats to be converted to.
+ at return List of modified datasets.
+'''
+def GetDataset(dataset, format):
+ # Check if the given dataset is a list or a single dataset.
+ if not isinstance(dataset, basestring):
+ datasetList = []
+ modifiedList = []
+
+ for data in dataset:
+ mdata = CheckFileExtension(data, format)
+
+ # Check if the dataset is available.
+ if os.path.isfile(mdata):
+ datasetList.append(mdata)
+ else:
+ # Check if the dataset is available.
+ convert = Convert(data, format[0])
+ datasetList.append(convert.modifiedDataset)
+ modifiedList.append(convert.modifiedDataset)
+ else:
+ datasetList = ""
+ modifiedList = ""
+
+ mdataset = CheckFileExtension(dataset, format)
+
+ # Check if the dataset is available.
+ if os.path.isfile(mdataset):
+ datasetList = mdataset
+ else:
+ # Convert the Dataset.
+ convert = Convert(dataset, format[0])
+ datasetList = convert.modifiedDataset
+ modifiedList = convert.modifiedDataset
+
+ return (datasetList, modifiedList)
+
+'''
+This function Remove a given file or list of files.
+
+ at para dataset - File or list of file which should be deleted.
+'''
+def RemoveDataset(dataset):
+ if isinstance(dataset, basestring):
+ dataset = [dataset]
+
+ for f in dataset:
+ if os.path.isfile(f):
+ os.remove(f)
+
+'''
Add all rows from a given matrix to a given table.
@para matrix - 2D array contains the row.
@@ -122,29 +191,35 @@
datsets = libary[1]
trials = libary[2]
script = libary[3]
+ format = libary[4]
Log.Info("Libary: " + name)
header.append(name)
# Load script.
module = Loader.ImportModuleFromPath(script)
- methodCall = getattr(module, method)
+ methodCall = getattr(module, method)
for dataset in datsets:
datasetName = NormalizeDatasetName(dataset)
row = FindRightRow(dataMatrix, datasetName, datasetCount)
dataMatrix[row][0] = NormalizeDatasetName(dataset)
- Log.Info("Dataset: " + dataMatrix[row][0])
+ Log.Info("Dataset: " + dataMatrix[row][0])
+
+ modifiedDataset = GetDataset(dataset, format)
time = 0
for trial in range(trials + 1):
- instance = methodCall(dataset, verbose=False)
+ instance = methodCall(modifiedDataset[0], verbose=False)
if trial > 0:
time += instance.RunMethod(options);
# Set time.
dataMatrix[row][col] = "{0:.6f}".format(time / trials)
+
+ # Remove temporary datasets.
+ RemoveDataset(modifiedDataset[1])
row += 1
col += 1
Added: mlpack/conf/jenkins-conf/benchmark/util/convert.py
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/util/convert.py Fri Jul 19 14:59:27 2013
@@ -0,0 +1,91 @@
+'''
+ @file convert.py
+ @author Marcus Edel
+
+ Implementation of the Convert class.
+'''
+
+import os, sys, inspect
+
+# Import the util path, this method even works if the path contains
+# symlinks to modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], "")))
+if cmd_subfolder not in sys.path:
+ sys.path.insert(0, cmd_subfolder)
+
+from log import *
+
+import os.path
+
+'''
+This class implements functions to convert files.
+'''
+class Convert(object):
+ '''
+ Convert dataset to a file with the given extension.
+
+ @para dataset - Convert this dataset.
+ @para extension - Convert dataset to a new file with this extension.
+ '''
+ def __init__(self, dataset, extension):
+ self.extension = extension
+ self.modifiedDataset = ""
+
+ self.ModifyDataset(dataset, extension)
+
+ '''
+ Decide which method we have to call to modify the dataset.
+
+ @para dataset - Convert this dataset.
+ @para extension - Convert dataset to a new file with this extension.
+ '''
+ def ModifyDataset(self, dataset, extension):
+ dataExtension = os.path.splitext(dataset)[1][1:]
+ newDataset = dataset[0:len(dataset) - len(dataExtension)] + extension
+
+ if extension == "arff" and (dataExtension == "csv" or dataExtension == "txt"):
+ self.AddArffHeader(dataset, newDataset)
+ else:
+ Log.Fatal("No conversion possible.")
+ pass
+
+ '''
+ Add an header to the dataset file.
+
+ @para data - This dataset contains the information.
+ @para newData - This dataset contais the information and the header.
+ '''
+ def AddArffHeader(self, data, newData):
+ # Extract the dataset name.
+ relationName = os.path.splitext(os.path.basename(data))[0].split('_')[0]
+
+ # Read the first to get the attributes count.
+ fid = open(data)
+ head = [fid.next() for x in xrange(1)]
+ fid.close()
+
+ # We can convert files with ' ' and ',' as seperator.
+ count = max(head[0].count(","), head[0].count(" ")) + 1
+
+ # Write the header to the new file.
+ nfid = open(newData, "a")
+ nfid.write("@relation " + relationName + "\n\n")
+ for i in range(count):
+ nfid.write("@attribute " + data + "_dim" + str(i) + " NUMERIC\n")
+ nfid.write("\n at data\n")
+
+ # Append the data to the new file.
+ fid = open(data, "r")
+ while True:
+ line = fid.read(65536)
+ if line:
+ nfid.write(line)
+ else:
+ break
+
+ fid.close()
+ nfid.close()
+
+ # Add the modified datasetname to the list.
+ self.modifiedDataset = newData
Modified: mlpack/conf/jenkins-conf/benchmark/util/parser.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/parser.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/parser.py Fri Jul 19 14:59:27 2013
@@ -310,7 +310,6 @@
# Iterate through all methods.
methodMapping = self.GetConfigMethod(libraryMapping.methods)
while methodMapping and libraryMapping:
-
# Collect data only from method with run value = true.
if methodMapping.run:
for dataset in methodMapping.datasets:
@@ -320,16 +319,19 @@
if dataset["options"] in tempDict:
t = (libraryMapping.libraryName, dataset["files"],
- methodMapping.iteration, methodMapping.script)
+ methodMapping.iteration, methodMapping.script,
+ methodMapping.format)
tempDict[dataset["options"]].append(t)
else:
t = (libraryMapping.libraryName, dataset["files"],
- methodMapping.iteration, methodMapping.script)
+ methodMapping.iteration, methodMapping.script,
+ methodMapping.format)
tempDict[dataset["options"]] = [t]
else:
d = {}
t = (libraryMapping.libraryName, dataset["files"],
- methodMapping.iteration, methodMapping.script)
+ methodMapping.iteration, methodMapping.script,
+ methodMapping.format)
d[dataset["options"]] = [t]
streamData[methodMapping.methodName] = d
More information about the mlpack-svn
mailing list