[mlpack-svn] r15387 - in mlpack/conf/jenkins-conf/benchmark/methods/weka: . src src/pca src/util

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 3 06:12:07 EDT 2013


Author: marcus
Date: Wed Jul  3 06:12:06 2013
New Revision: 15387

Log:
Add Principal Components Analysis src weka method.

Added:
   mlpack/conf/jenkins-conf/benchmark/methods/weka/src/
   mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/
   mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/PCA.java
   mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/
   mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/Timers.java
Removed:
   mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.jar

Added: mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/PCA.java
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/PCA.java	Wed Jul  3 06:12:06 2013
@@ -0,0 +1,110 @@
+/**
+ * @file PCA.java
+ * @author Marcus Edel
+ *
+ * Principal Components Analysis with weka.
+ */
+
+import weka.core.Instances;
+import weka.core.Utils;
+import weka.core.converters.ConverterUtils.DataSource;
+import weka.attributeSelection.AttributeSelection;
+import weka.attributeSelection.PrincipalComponents;
+import weka.attributeSelection.Ranker;
+
+/**
+ * This class use the weka libary to implement Principal Components Analysis.
+ */
+public class PCA {
+
+  private static final String USAGE = String
+      .format("This program performs principal components analysis on the given"
+      + "dataset.\nIt will transform the data onto its principal components, "
+      + "optionally performing\ndimensionality reduction by ignoring the "
+      + "principal components with the\nsmallest eigenvalues.\n\n"
+      + "Required options:\n"
+      + "-i [string]     Input dataset to perform PCA on.\n\n"
+      + "Options:\n\n"
+      + "-d [int]    Desired dimensionality of output dataset. If -1,\n"
+      + "            no dimensionality reduction is performed.\n"
+      + "            Default value -1.\n"
+      + "-s          If set, the data will be scaled before running\n"
+      + "            PCA, such that the variance of each feature is 1.");
+
+  public static void main(String args[]) {
+    Timers timer = new Timers();
+    try {
+      // Get the data set path.
+      String dataset = Utils.getOption('i', args);
+      if (dataset.length() == 0)
+        throw new IllegalArgumentException();
+
+      timer.StartTimer("total_time");
+      timer.StartTimer("loading_data");
+
+      // Load input dataset.
+      DataSource source = new DataSource(dataset);
+      Instances data = source.getDataSet();
+
+      timer.StopTimer("loading_data");
+
+      // Find out what dimension we want.
+      int k = 0;
+      String dimension = Utils.getOption('d', args);
+      if (dimension.length() == 0) {
+        k = data.numAttributes();
+      } else {
+        k = Integer.parseInt(dimension);
+        // Validate the parameter.
+        if (k > data.numAttributes()) {
+          System.out.printf("[Fatal] New dimensionality (%d) cannot be greater"
+              + "than existing dimensionality (%d)!'\n", k, 
+              data.numAttributes());
+          
+          System.exit(-1);
+        }
+      }
+
+      // Performs a principal components analysis.
+      PrincipalComponents pcaEvaluator = new PrincipalComponents();
+
+      // Sets the amount of variance to account for when retaining principal 
+      // components.
+      pcaEvaluator.setVarianceCovered(1.0);
+      // Sets maximum number of attributes to include in transformed attribute 
+      // names.
+      pcaEvaluator.setMaximumAttributeNames(-1);
+
+      // Scaled X such that the variance of each feature is 1.
+      String scale = Utils.getOption('s', args);
+      if (scale.length() == 0) {
+        pcaEvaluator.setCenterData(true);
+      } else {
+        pcaEvaluator.setCenterData(false);
+      }
+
+      // Ranking the attributes.
+      Ranker ranker = new Ranker();
+      // Specify the number of attributes to select from the ranked list.
+      ranker.setNumToSelect(k);
+
+      AttributeSelection selector = new AttributeSelection();
+      selector.setSearch(ranker);
+      selector.setEvaluator(pcaEvaluator);
+      selector.SelectAttributes(data);
+
+      // Transform data into eigenvector basis.
+      Instances transformedData = selector.reduceDimensionality(data);
+
+      timer.StopTimer("total_time");
+
+      timer.PrintTimer("loading_data");
+      timer.PrintTimer("total_time");
+
+    } catch (IllegalArgumentException e) {
+      System.err.println(USAGE);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+}

Added: mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/Timers.java
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/Timers.java	Wed Jul  3 06:12:06 2013
@@ -0,0 +1,72 @@
+/**
+ * @file Timers.java
+ * @author Marcus Edel
+ *
+ * Class to provide timers.
+ */
+
+import java.util.HashMap;
+
+/**
+ * The timer class provides a way for methods to be timed. The three methods 
+ * contained in this class allow a named timer to be started and stopped, and
+ * its value to be obtained.
+ */
+public class Timers {
+	private HashMap<String, Long> timers = new HashMap<String, Long>();
+
+	/**
+   * Start the given timer.  If a timer is started, then stopped, then
+   * re-started, then re-stopped, the final value of the timer is the length of
+   * both runs.
+   *
+   * @note Undefined behavior will occur if a timer is started twice.
+   *
+   * @param timerNname - Name of timer to be started.
+   */
+	public void StartTimer(final String timerNname) {
+		timers.put(timerNname, System.nanoTime());
+	}
+
+	/**
+   * Stop the given timer.
+   *
+   * @note Undefined behavior will occur if a timer is started twice.
+   *
+   * @param timerName - Name of timer to be stopped.
+   */
+	public void StopTimer(final String timerName) {
+		Long time = timers.get(timerName);
+		if (time != null) {
+			timers.put(timerName, (System.nanoTime() - time));
+		}
+	}
+
+	/**
+   * Get the value of the given timer.
+   *
+   * @param timerName - Name of timer to return value of.
+   */
+	public Long GetTimer(final String timerName) throws Exception {
+		Long time = timers.get(timerName);
+		if (time == null) {
+			throw new Exception("There exists no timer with this name.");
+		}
+
+		return time;
+	}
+
+	/**
+   * Prints the specified timer.
+   *
+   * @param timerName The name of the timer in question.
+   */
+	public void PrintTimer(final String timerName) throws Exception {
+		Long time = timers.get(timerName);
+		if (time == null) {
+			throw new Exception("There exists no timer with this name.");
+		}
+
+		System.out.printf("[INFO ]   %s: %fs\n", timerName, (time / 1e9));
+	}
+}
\ No newline at end of file



More information about the mlpack-svn mailing list