[mlpack-svn] r15279 - in mlpack/conf/jenkins-conf/benchmark: datasets methods/mlpack

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jun 20 18:46:11 EDT 2013


Author: marcus
Date: 2013-06-20 18:46:11 -0400 (Thu, 20 Jun 2013)
New Revision: 15279

Added:
   mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv
   mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py
Log:
Add method and dataset to benchmark Kernel Principal Components Analysis (mlpack).

Added: mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv
===================================================================
--- mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv	                        (rev 0)
+++ mlpack/conf/jenkins-conf/benchmark/datasets/circle_data.csv	2013-06-20 22:46:11 UTC (rev 15279)
@@ -0,0 +1,150 @@
+-6.60517775e-02, 6.04629334e-02
+ 1.19377700e-02, 3.68025145e-02
+ 5.09666010e-02, -3.40946758e-03
+-8.58372578e-02, -8.10610185e-02
+-2.57777434e-03, 1.13906075e-03
+-4.27241025e-02, -3.05499299e-03
+-6.57011728e-03, 1.03873366e-03
+ 2.68916485e-02, -1.38467228e-02
+-6.01263170e-02, 2.02106703e-02
+-1.00546418e-02, -3.48527720e-02
+-6.04774217e-02, -2.94025850e-02
+ 5.14737884e-03, 3.55849582e-02
+ 5.94569673e-02, -5.54701523e-02
+-1.60169069e-01, -6.43924335e-02
+ 3.11287207e-03, 1.74674093e-02
+-2.00517604e-02, -3.48167505e-02
+ 3.03217995e-03, 1.62991633e-01
+ 3.69941670e-02, -2.78047796e-02
+-5.02721716e-02, -7.55288818e-02
+-4.65832890e-03, -2.29852416e-03
+-1.20813639e-02, -7.19011205e-02
+-5.98596848e-03, 1.14251518e-02
+-1.03762139e-01, 4.70718349e-02
+ 8.51399308e-02, 8.94379128e-02
+ 2.66305872e-02, 3.01595721e-02
+ 9.78414625e-03, -6.77784559e-02
+-8.61814952e-02, -1.24750407e-02
+-4.81615078e-02, 3.21113455e-02
+-3.20266738e-03, 3.09902037e-02
+-5.39825686e-04, -5.59053172e-03
+-1.02980942e-01, -5.45038265e-02
+ 3.36445749e-02, 8.35639047e-02
+-4.37024976e-02, -1.60781460e-02
+-1.53344882e-01, -9.26248701e-03
+ 6.84335507e-02, -1.32158220e-01
+ 1.67174702e-01, 8.94235795e-02
+-8.83503359e-02, -7.80348058e-02
+ 1.44767494e-02, 1.06842497e-02
+-3.71152247e-02, 1.00436905e-01
+ 6.13603646e-02, -8.57530763e-02
+ 3.98495773e-02, -6.12598497e-03
+-3.03943004e-02, 1.18405684e-01
+ 1.25716134e-02, 1.66053968e-02
+ 9.64984295e-02, -1.50429775e-02
+-1.08045219e-03, -3.56202571e-02
+ 1.36595044e-02, -6.93244139e-02
+-7.35914811e-04, 7.48955100e-04
+-1.36701907e-01, 1.14917026e-01
+ 4.44488973e-03, 1.18873777e-02
+ 3.15501101e-02, -2.49107414e-02
+ 1.86037981e+00, -7.15357884e-01
+-9.99360914e-01, 1.92768521e+00
+-1.27117001e+00, 1.44629577e+00
+-1.90850144e+00, 1.84056659e-01
+-1.70298680e+00, 1.02983100e+00
+ 1.90340085e+00, -3.40699159e-01
+ 5.66310846e-01, -2.03534751e+00
+-1.32711084e+00, -1.34850537e+00
+ 1.83027974e+00, -4.51946301e-01
+-1.19674305e-01, -1.95255394e+00
+ 1.78380995e+00, -7.88219167e-01
+-2.09811428e+00, 6.37934340e-01
+-1.89898088e+00, -8.84677310e-01
+ 1.92910918e+00, -5.60031179e-01
+ 1.25039344e+00, -1.40048526e+00
+-1.69574895e+00, 1.21215105e+00
+-1.87395070e+00, 3.13411781e-01
+ 1.63098232e+00, -8.63626900e-01
+-7.43207939e-01, 1.98417943e+00
+-1.97132024e+00, 4.91607634e-01
+ 7.91942523e-01, 1.93640140e+00
+-1.93360438e+00, 6.24244367e-01
+ 1.23525431e+00, 1.68054176e+00
+-1.00378860e+00, -1.65415735e+00
+-1.08843341e+00, -1.55316198e+00
+ 1.84382035e+00, -9.36808424e-01
+ 3.88375621e-01, -1.87996148e+00
+ 1.71854130e+00, -8.80653545e-01
+-1.00699622e+00, 1.67505842e+00
+ 5.73779859e-01, 1.91775165e+00
+-1.47332606e+00, -1.29960220e+00
+-2.29074598e-01, 1.84836393e+00
+-1.00790586e+00, -1.65251955e+00
+ 1.76877903e+00, 1.77467449e-01
+-8.53751431e-01, -1.87752769e+00
+-9.68343586e-01, 1.56433805e+00
+-4.50286398e-01, 1.83512546e+00
+ 1.59097781e+00, -1.22052565e+00
+ 6.18112949e-01, 1.82416576e+00
+-2.06288577e+00, 6.20900222e-01
+-5.13728636e-02, 1.87000878e+00
+ 3.83570452e-01, 1.99007719e+00
+ 1.72862740e+00, 9.98072944e-01
+-7.75129498e-01, 1.71626958e+00
+ 7.36623324e-01, 1.91557692e+00
+-1.95275938e+00, 3.44102509e-01
+-1.99976160e+00, -5.61806345e-01
+-1.94322076e+00, -7.48382471e-01
+-1.21152657e+00, 1.85588083e+00
+ 2.56876376e-01, -2.11813332e+00
+ 2.01834438e+00, -4.63142159e+00
+ 3.60946187e+00, -3.37628677e+00
+-4.77043742e+00, -1.54437089e+00
+-2.32834371e+00, 4.38867940e+00
+-2.16023580e+00, 4.58581534e+00
+ 1.82103917e+00, 4.73110899e+00
+ 4.91305805e+00, -3.76202205e-01
+-3.30605074e+00, -3.56451982e+00
+ 3.08710327e+00, 3.72986755e+00
+ 4.82233946e+00, -1.53595154e+00
+-4.68806769e+00, 1.35915267e+00
+ 2.68839559e+00, 4.15551217e+00
+-3.86846704e+00, 3.07281705e+00
+ 5.25956130e-01, -4.96697398e+00
+-4.73335718e+00, -8.34590805e-01
+ 1.39946985e+00, -4.81981526e+00
+ 5.04516800e+00, -2.70028625e-01
+ 1.85628196e+00, 4.65217342e+00
+ 3.40930962e+00, 3.61478744e+00
+ 4.96565942e+00, -6.63115037e-01
+-4.99171106e+00, -6.95312433e-01
+ 2.19702730e+00, -4.18059218e+00
+ 5.92907714e-01, -5.16164980e+00
+ 2.92992457e-01, 5.03048412e+00
+ 6.30746054e-01, 4.89428311e+00
+ 4.86325148e+00, 9.79469301e-01
+ 4.45123067e+00, 2.38384670e+00
+ 3.71915491e+00, 3.32444245e+00
+-1.44381389e+00, -4.57448946e+00
+ 3.81852716e+00, -3.53920394e+00
+-1.17895313e+00, 4.84764387e+00
+ 1.07064123e-01, -5.10089380e+00
+-6.61943233e-01, -4.88616114e+00
+ 1.34350038e+00, 4.97543851e+00
+-4.43536267e+00, 2.36954038e+00
+-3.91159301e+00, 3.21122887e+00
+-8.48211846e-02, -4.89473978e+00
+ 4.99783406e+00, -1.11690140e+00
+ 1.16383006e+00, 4.93356628e+00
+-2.70668593e+00, 4.35805525e+00
+-2.06611610e+00, -4.48408537e+00
+ 1.57052842e+00, -4.69624608e+00
+ 4.78122166e+00, -2.12059341e+00
+ 2.90402895e+00, -3.93926850e+00
+ 2.81555028e+00, -4.11544794e+00
+ 2.68563184e+00, 4.35168912e+00
+-4.44696088e+00, -2.30702394e+00
+-4.90088351e+00, -1.25207543e+00
+-4.13160191e+00, -2.74445118e+00
+-5.30324117e-01, 5.00901005e+00
\ No newline at end of file

Added: mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py
===================================================================
--- mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py	                        (rev 0)
+++ mlpack/conf/jenkins-conf/benchmark/methods/mlpack/kernel_pca.py	2013-06-20 22:46:11 UTC (rev 15279)
@@ -0,0 +1,115 @@
+'''
+  @file kernel_pca.py
+  @author Marcus Edel
+
+  Class to benchmark the mlpack Kernel Principal Components Analysis method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains
+# symlinks to modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+	os.path.split(inspect.getfile(inspect.currentframe()))[0], '../../util')))
+if cmd_subfolder not in sys.path:
+	sys.path.insert(0, cmd_subfolder)
+
+from log import *
+
+import shlex
+import subprocess
+import re
+import collections
+
+class KPCA(object):
+
+	# Create the Kernel Principal Components Analysis instance, show some informations 
+	# and return the instance.
+	def __init__(self, dataset, path='/usr/local/bin/', verbose=True): 
+		self.verbose = verbose
+		self.dataset = dataset
+		self.path = path
+
+		# Get description from executable.
+		cmd = shlex.split(self.path + "pca -h")
+		s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)	
+
+		# Use regular expression pattern to get the description.
+		pattern = re.compile(r"""(.*?)Required.*?options:""", 
+				re.VERBOSE|re.MULTILINE|re.DOTALL)
+		
+		match = pattern.match(s)
+		if not match:
+			Log.Warn("Can't parse description", self.verbose)
+			description = ''
+		else:
+			description = match.group(1)
+		
+		# Show method informations.
+		# Log.Notice(description)
+		# Log.Notice('\n')
+
+	# Remove created files.
+	def __del__(self):		
+		Log.Info('Clean up.', self.verbose)
+		filelist = ['gmon.out', 'output.csv']
+		for f in filelist:
+			if os.path.isfile(f):
+				os.remove(f)				
+
+	# Kernel Principal Components Analysis and return the elapsed time.
+	def RunMethod(self, options):
+		Log.Info('Perform KPCA.', self.verbose)
+
+		# Split the command using shell-like syntax.
+		cmd = shlex.split(self.path + "kernel_pca -i " + self.dataset + " -v -o output.csv " + options)
+
+		# Run command with the nessecary arguments and return its output as
+		# a byte string. We have untrusted input so we disables all shell 
+		# based features.
+		s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)		
+
+		# Return the elapsed time.
+		timer = self.parseTimer(s)
+		if not timer:
+			Log.Fatal("Can't parse the timer", self.verbose)
+			return 0
+		else:
+			time = self.GetTime(timer)
+			Log.Info(('total time: %fs' % (time)), self.verbose)
+
+			return time
+
+	# Parse the timer data.
+	def parseTimer(self, data):
+		# Compile the regular expression pattern into a regular expression object
+		# to parse the timer data.
+		pattern = re.compile(r"""
+							.*?loading_data: (?P<loading_time>.*?)s.*?
+							.*?saving_data: (?P<saving_time>.*?)s.*?
+							.*?total_time: (?P<total_time>.*?)s.*?
+							""", re.VERBOSE|re.MULTILINE|re.DOTALL)
+		
+		match = pattern.match(data)
+		if not match:
+			print "Can't parse the data: wrong format"
+			return False
+		else:
+			# Create a namedtuple and return the timer data.
+			timer = collections.namedtuple('timer', ['loading_time', 
+					'saving_time', 'total_time'])
+			if match.group("loading_time").count(".") == 1:
+				return timer(float(match.group("loading_time")),
+						 	float(match.group("saving_time")),
+						 	float(match.group("total_time")))
+			else:
+				return timer(float(match.group("loading_time").replace(",", ".")),
+						 	float(match.group("saving_time").replace(",", ".")),
+						 	float(match.group("total_time").replace(",", ".")))	
+
+	# Return the elapsed time.
+	def GetTime(self, timer):
+		time = timer.total_time - timer.loading_time - timer.saving_time
+		return time
\ No newline at end of file




More information about the mlpack-svn mailing list