[mlpack-svn] r15588 - in mlpack/conf/jenkins-conf/benchmark: benchmark util
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Aug 6 16:43:30 EDT 2013
Author: marcus
Date: Tue Aug 6 16:43:30 2013
New Revision: 15588
Log:
Add function to get dataset informations.
Modified:
mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
mlpack/conf/jenkins-conf/benchmark/util/misc.py
Modified: mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/benchmark/run_benchmark.py Tue Aug 6 16:43:30 2013
@@ -20,6 +20,7 @@
from parser import *
from convert import *
from misc import *
+from database import *
import argparse
import datetime
@@ -36,19 +37,6 @@
Log.Info('CPU Cores: ' + SystemInfo.GetCPUCores())
'''
-Normalize the dataset name. If the dataset is a list of datasets, take the first
-dataset as name. If necessary remove characters like '.', '_'.
-
- at param dataset - Dataset file or a list of datasets files.
- at return Normalized dataset name.
-'''
-def NormalizeDatasetName(dataset):
- if not isinstance(dataset, str):
- return os.path.splitext(os.path.basename(dataset[0]))[0].split('_')[0]
- else:
- return os.path.splitext(os.path.basename(dataset))[0].split('_')[0]
-
-'''
Check if the file is available in one of the given formats.
@param dataset - Datsets which should be checked.
Modified: mlpack/conf/jenkins-conf/benchmark/util/misc.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/misc.py (original)
+++ mlpack/conf/jenkins-conf/benchmark/util/misc.py Tue Aug 6 16:43:30 2013
@@ -5,6 +5,8 @@
Supporting functions.
'''
+import os
+
'''
This function determinate if the given number is a float.
@@ -47,6 +49,19 @@
return table
'''
+Normalize the dataset name. If the dataset is a list of datasets, take the first
+dataset as name. If necessary remove characters like '.', '_'.
+
+ at param dataset - Dataset file or a list of datasets files.
+ at return Normalized dataset name.
+'''
+def NormalizeDatasetName(dataset):
+ if not isinstance(dataset, str):
+ return os.path.splitext(os.path.basename(dataset[0]))[0].split('_')[0]
+ else:
+ return os.path.splitext(os.path.basename(dataset))[0].split('_')[0]
+
+'''
Search the correct row to insert the new data. We look at the left column for
a free place or for the matching name.
@@ -58,3 +73,27 @@
for row in range(datasetCount):
if (dataMatrix[row][0] == datasetName) or (dataMatrix[row][0] == "-"):
return row
+
+'''
+Collect informations for the given dataset.
+
+ at param path - Path to the dataset.
+ at return Tuble which contains the informations about the given dataset.
+'''
+def DatasetInfo(path):
+ instances = 0
+ with open(path, "r") as fid:
+ for line in fid:
+ instances += 1
+
+ attributes = 0
+ with open(path, "r") as fid:
+ for line in fid:
+ attributes = line.count(",") + 1
+ break
+
+ name = NormalizeDatasetName(path)
+ size = os.path.getsize(path) / (1 << 20)
+ datasetType = "real"
+
+ return (name, size, attributes, instances, datasetType)
More information about the mlpack-svn
mailing list