[mlpack-svn] r15389 - in mlpack/conf/jenkins-conf/benchmark: methods/shogun util

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 3 10:24:21 EDT 2013


Author: marcus
Date: Wed Jul  3 10:24:21 2013
New Revision: 15389

Log:
Add Principal Components Analysis shogun method.

Added:
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/
   mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py
Modified:
   mlpack/conf/jenkins-conf/benchmark/util/timer.py

Added: mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/shogun/pca.py	Wed Jul  3 10:24:21 2013
@@ -0,0 +1,96 @@
+ '''
+  @file pca.py
+  @author Marcus Edel
+
+  Principal Components Analysis with shogun.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+from log import *
+from timer import *
+
+import numpy as np
+from shogun.Features import RealFeatures
+from shogun.Classifier import PCA as ShogunPCA
+
+'''
+This class implements the Principal Components Analysis benchmark.
+'''
+class PCA(object):
+
+  ''' 
+  Create the Principal Components Analysis benchmark instance.
+  
+  @param dataset - Input dataset to perform PCA on.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, verbose=True): 
+    self.verbose = verbose
+    self.dataset = dataset
+
+  '''
+  Destructor to clean up at the end.
+  '''
+  def __del__(self):
+    pass
+
+  '''
+  Use the shogun libary to implement Principal Components Analysis.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or -1 if the method was not successful.
+  '''
+  def PCAShogun(self, options):
+    totalTimer = Timer()
+    loadTimer = Timer()
+    with totalTimer:
+      # Load input dataset.
+      with loadTimer:
+        Log.Info("Loading dataset", self.verbose)
+        data = np.genfromtxt(self.dataset, delimiter=',')
+        feat = RealFeatures(data.T)
+
+      # Find out what dimension we want.
+      match = re.search('-d (\d+)', options)
+
+      if not match:
+        k = data.shape[1]
+      else:
+        k = int(match.group(1))      
+        if (k > data.shape[1]):
+          Log.Fatal("New dimensionality (" + str(k) + ") cannot be greater "
+              + "than existing dimensionality (" + str(data.shape[1]) + ")!")
+          return -1
+
+      # Get the options for running PCA.
+      s = True if options.find("-s") > -1 else False
+
+      # Perform PCA.
+      prep = ShogunPCA(s)
+      prep.set_target_dim(k)
+      prep.init(feat)
+      prep.apply_to_feature_matrix(feat)
+
+    return (totalTimer.ElapsedTime() - loadTimer.ElapsedTime())
+
+  '''
+  Perform Principal Components Analysis. If the method has been successfully 
+  completed return the elapsed time in seconds.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or -1 if the method was not successful.
+  '''
+  def RunMethod(self, options):
+    Log.Info("Perform PCA.", self.verbose)
+
+    return self.PCAShogun(options)

Modified: mlpack/conf/jenkins-conf/benchmark/util/timer.py
==============================================================================
--- mlpack/conf/jenkins-conf/benchmark/util/timer.py	(original)
+++ mlpack/conf/jenkins-conf/benchmark/util/timer.py	Wed Jul  3 10:24:21 2013
@@ -1,5 +1,5 @@
 '''
-  @file log.py
+  @file timer.py
   @author Marcus Edel
 
   Implementation of the timer class.



More information about the mlpack-svn mailing list