[mlpack-svn] r15279 - in mlpack/conf/jenkins-conf/benchmark: datasets methods/mlpack
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jun 20 18:46:11 EDT 2013
Author: marcus
Date: 2013-06-20 18:46:11 -0400 (Thu, 20 Jun 2013)
New Revision: 15279
Added:
mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv
mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py
Log:
Add method and dataset to benchmark Kernel Principal Components Analysis (mlpack).
Added: mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv
===================================================================
--- mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv (rev 0)
+++ mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv 2013-06-20 22:46:11 UTC (rev 15279)
@@ -0,0 +1,150 @@
+-6.60517775e-02, 6.04629334e-02
+ 1.19377700e-02, 3.68025145e-02
+ 5.09666010e-02, -3.40946758e-03
+-8.58372578e-02, -8.10610185e-02
+-2.57777434e-03, 1.13906075e-03
+-4.27241025e-02, -3.05499299e-03
+-6.57011728e-03, 1.03873366e-03
+ 2.68916485e-02, -1.38467228e-02
+-6.01263170e-02, 2.02106703e-02
+-1.00546418e-02, -3.48527720e-02
+-6.04774217e-02, -2.94025850e-02
+ 5.14737884e-03, 3.55849582e-02
+ 5.94569673e-02, -5.54701523e-02
+-1.60169069e-01, -6.43924335e-02
+ 3.11287207e-03, 1.74674093e-02
+-2.00517604e-02, -3.48167505e-02
+ 3.03217995e-03, 1.62991633e-01
+ 3.69941670e-02, -2.78047796e-02
+-5.02721716e-02, -7.55288818e-02
+-4.65832890e-03, -2.29852416e-03
+-1.20813639e-02, -7.19011205e-02
+-5.98596848e-03, 1.14251518e-02
+-1.03762139e-01, 4.70718349e-02
+ 8.51399308e-02, 8.94379128e-02
+ 2.66305872e-02, 3.01595721e-02
+ 9.78414625e-03, -6.77784559e-02
+-8.61814952e-02, -1.24750407e-02
+-4.81615078e-02, 3.21113455e-02
+-3.20266738e-03, 3.09902037e-02
+-5.39825686e-04, -5.59053172e-03
+-1.02980942e-01, -5.45038265e-02
+ 3.36445749e-02, 8.35639047e-02
+-4.37024976e-02, -1.60781460e-02
+-1.53344882e-01, -9.26248701e-03
+ 6.84335507e-02, -1.32158220e-01
+ 1.67174702e-01, 8.94235795e-02
+-8.83503359e-02, -7.80348058e-02
+ 1.44767494e-02, 1.06842497e-02
+-3.71152247e-02, 1.00436905e-01
+ 6.13603646e-02, -8.57530763e-02
+ 3.98495773e-02, -6.12598497e-03
+-3.03943004e-02, 1.18405684e-01
+ 1.25716134e-02, 1.66053968e-02
+ 9.64984295e-02, -1.50429775e-02
+-1.08045219e-03, -3.56202571e-02
+ 1.36595044e-02, -6.93244139e-02
+-7.35914811e-04, 7.48955100e-04
+-1.36701907e-01, 1.14917026e-01
+ 4.44488973e-03, 1.18873777e-02
+ 3.15501101e-02, -2.49107414e-02
+ 1.86037981e+00, -7.15357884e-01
+-9.99360914e-01, 1.92768521e+00
+-1.27117001e+00, 1.44629577e+00
+-1.90850144e+00, 1.84056659e-01
+-1.70298680e+00, 1.02983100e+00
+ 1.90340085e+00, -3.40699159e-01
+ 5.66310846e-01, -2.03534751e+00
+-1.32711084e+00, -1.34850537e+00
+ 1.83027974e+00, -4.51946301e-01
+-1.19674305e-01, -1.95255394e+00
+ 1.78380995e+00, -7.88219167e-01
+-2.09811428e+00, 6.37934340e-01
+-1.89898088e+00, -8.84677310e-01
+ 1.92910918e+00, -5.60031179e-01
+ 1.25039344e+00, -1.40048526e+00
+-1.69574895e+00, 1.21215105e+00
+-1.87395070e+00, 3.13411781e-01
+ 1.63098232e+00, -8.63626900e-01
+-7.43207939e-01, 1.98417943e+00
+-1.97132024e+00, 4.91607634e-01
+ 7.91942523e-01, 1.93640140e+00
+-1.93360438e+00, 6.24244367e-01
+ 1.23525431e+00, 1.68054176e+00
+-1.00378860e+00, -1.65415735e+00
+-1.08843341e+00, -1.55316198e+00
+ 1.84382035e+00, -9.36808424e-01
+ 3.88375621e-01, -1.87996148e+00
+ 1.71854130e+00, -8.80653545e-01
+-1.00699622e+00, 1.67505842e+00
+ 5.73779859e-01, 1.91775165e+00
+-1.47332606e+00, -1.29960220e+00
+-2.29074598e-01, 1.84836393e+00
+-1.00790586e+00, -1.65251955e+00
+ 1.76877903e+00, 1.77467449e-01
+-8.53751431e-01, -1.87752769e+00
+-9.68343586e-01, 1.56433805e+00
+-4.50286398e-01, 1.83512546e+00
+ 1.59097781e+00, -1.22052565e+00
+ 6.18112949e-01, 1.82416576e+00
+-2.06288577e+00, 6.20900222e-01
+-5.13728636e-02, 1.87000878e+00
+ 3.83570452e-01, 1.99007719e+00
+ 1.72862740e+00, 9.98072944e-01
+-7.75129498e-01, 1.71626958e+00
+ 7.36623324e-01, 1.91557692e+00
+-1.95275938e+00, 3.44102509e-01
+-1.99976160e+00, -5.61806345e-01
+-1.94322076e+00, -7.48382471e-01
+-1.21152657e+00, 1.85588083e+00
+ 2.56876376e-01, -2.11813332e+00
+ 2.01834438e+00, -4.63142159e+00
+ 3.60946187e+00, -3.37628677e+00
+-4.77043742e+00, -1.54437089e+00
+-2.32834371e+00, 4.38867940e+00
+-2.16023580e+00, 4.58581534e+00
+ 1.82103917e+00, 4.73110899e+00
+ 4.91305805e+00, -3.76202205e-01
+-3.30605074e+00, -3.56451982e+00
+ 3.08710327e+00, 3.72986755e+00
+ 4.82233946e+00, -1.53595154e+00
+-4.68806769e+00, 1.35915267e+00
+ 2.68839559e+00, 4.15551217e+00
+-3.86846704e+00, 3.07281705e+00
+ 5.25956130e-01, -4.96697398e+00
+-4.73335718e+00, -8.34590805e-01
+ 1.39946985e+00, -4.81981526e+00
+ 5.04516800e+00, -2.70028625e-01
+ 1.85628196e+00, 4.65217342e+00
+ 3.40930962e+00, 3.61478744e+00
+ 4.96565942e+00, -6.63115037e-01
+-4.99171106e+00, -6.95312433e-01
+ 2.19702730e+00, -4.18059218e+00
+ 5.92907714e-01, -5.16164980e+00
+ 2.92992457e-01, 5.03048412e+00
+ 6.30746054e-01, 4.89428311e+00
+ 4.86325148e+00, 9.79469301e-01
+ 4.45123067e+00, 2.38384670e+00
+ 3.71915491e+00, 3.32444245e+00
+-1.44381389e+00, -4.57448946e+00
+ 3.81852716e+00, -3.53920394e+00
+-1.17895313e+00, 4.84764387e+00
+ 1.07064123e-01, -5.10089380e+00
+-6.61943233e-01, -4.88616114e+00
+ 1.34350038e+00, 4.97543851e+00
+-4.43536267e+00, 2.36954038e+00
+-3.91159301e+00, 3.21122887e+00
+-8.48211846e-02, -4.89473978e+00
+ 4.99783406e+00, -1.11690140e+00
+ 1.16383006e+00, 4.93356628e+00
+-2.70668593e+00, 4.35805525e+00
+-2.06611610e+00, -4.48408537e+00
+ 1.57052842e+00, -4.69624608e+00
+ 4.78122166e+00, -2.12059341e+00
+ 2.90402895e+00, -3.93926850e+00
+ 2.81555028e+00, -4.11544794e+00
+ 2.68563184e+00, 4.35168912e+00
+-4.44696088e+00, -2.30702394e+00
+-4.90088351e+00, -1.25207543e+00
+-4.13160191e+00, -2.74445118e+00
+-5.30324117e-01, 5.00901005e+00
\ No newline at end of file
Added: mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py
===================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py (rev 0)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py 2013-06-20 22:46:11 UTC (rev 15279)
@@ -0,0 +1,115 @@
+'''
+ @file kernel_pca.py
+ @author Marcus Edel
+
+ Class to benchmark the mlpack Kernel Principal Components Analysis method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains
+# symlinks to modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+ os.path.split(inspect.getfile(inspect.currentframe()))[0], '../../util')))
+if cmd_subfolder not in sys.path:
+ sys.path.insert(0, cmd_subfolder)
+
+from log import *
+
+import shlex
+import subprocess
+import re
+import collections
+
+class KPCA(object):
+
+ # Create the Kernel Principal Components Analysis instance, show some informations
+ # and return the instance.
+ def __init__(self, dataset, path='/usr/local/bin/', verbose=True):
+ self.verbose = verbose
+ self.dataset = dataset
+ self.path = path
+
+ # Get description from executable.
+ cmd = shlex.split(self.path + "pca -h")
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+
+ # Use regular expression pattern to get the description.
+ pattern = re.compile(r"""(.*?)Required.*?options:""",
+ re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(s)
+ if not match:
+ Log.Warn("Can't parse description", self.verbose)
+ description = ''
+ else:
+ description = match.group(1)
+
+ # Show method informations.
+ # Log.Notice(description)
+ # Log.Notice('\n')
+
+ # Remove created files.
+ def __del__(self):
+ Log.Info('Clean up.', self.verbose)
+ filelist = ['gmon.out', 'output.csv']
+ for f in filelist:
+ if os.path.isfile(f):
+ os.remove(f)
+
+ # Kernel Principal Components Analysis and return the elapsed time.
+ def RunMethod(self, options):
+ Log.Info('Perform KPCA.', self.verbose)
+
+ # Split the command using shell-like syntax.
+ cmd = shlex.split(self.path + "kernel_pca -i " + self.dataset + " -v -o output.csv " + options)
+
+ # Run command with the nessecary arguments and return its output as
+ # a byte string. We have untrusted input so we disables all shell
+ # based features.
+ s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+
+ # Return the elapsed time.
+ timer = self.parseTimer(s)
+ if not timer:
+ Log.Fatal("Can't parse the timer", self.verbose)
+ return 0
+ else:
+ time = self.GetTime(timer)
+ Log.Info(('total time: %fs' % (time)), self.verbose)
+
+ return time
+
+ # Parse the timer data.
+ def parseTimer(self, data):
+ # Compile the regular expression pattern into a regular expression object
+ # to parse the timer data.
+ pattern = re.compile(r"""
+ .*?loading_data: (?P<loading_time>.*?)s.*?
+ .*?saving_data: (?P<saving_time>.*?)s.*?
+ .*?total_time: (?P<total_time>.*?)s.*?
+ """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+ match = pattern.match(data)
+ if not match:
+ print "Can't parse the data: wrong format"
+ return False
+ else:
+ # Create a namedtuple and return the timer data.
+ timer = collections.namedtuple('timer', ['loading_time',
+ 'saving_time', 'total_time'])
+ if match.group("loading_time").count(".") == 1:
+ return timer(float(match.group("loading_time")),
+ float(match.group("saving_time")),
+ float(match.group("total_time")))
+ else:
+ return timer(float(match.group("loading_time").replace(",", ".")),
+ float(match.group("saving_time").replace(",", ".")),
+ float(match.group("total_time").replace(",", ".")))
+
+ # Return the elapsed time.
+ def GetTime(self, timer):
+ time = timer.total_time - timer.loading_time - timer.saving_time
+ return time
\ No newline at end of file
More information about the mlpack-svn
mailing list