[mlpack-svn] r15387 - in mlpack/conf/jenkins-conf/benchmark/methods/weka: . src src/pca src/util
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 3 06:12:07 EDT 2013
Author: marcus
Date: Wed Jul 3 06:12:06 2013
New Revision: 15387
Log:
Add Principal Components Analysis src weka method.
Added:
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/PCA.java
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/Timers.java
Removed:
mlpack/conf/jenkins-conf/benchmark/methods/weka/pca.jar
Added: mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/PCA.java
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/src/pca/PCA.java Wed Jul 3 06:12:06 2013
@@ -0,0 +1,110 @@
+/**
+ * @file PCA.java
+ * @author Marcus Edel
+ *
+ * Principal Components Analysis with weka.
+ */
+
+import weka.core.Instances;
+import weka.core.Utils;
+import weka.core.converters.ConverterUtils.DataSource;
+import weka.attributeSelection.AttributeSelection;
+import weka.attributeSelection.PrincipalComponents;
+import weka.attributeSelection.Ranker;
+
+/**
+ * This class use the weka libary to implement Principal Components Analysis.
+ */
+public class PCA {
+
+ private static final String USAGE = String
+ .format("This program performs principal components analysis on the given"
+ + "dataset.\nIt will transform the data onto its principal components, "
+ + "optionally performing\ndimensionality reduction by ignoring the "
+ + "principal components with the\nsmallest eigenvalues.\n\n"
+ + "Required options:\n"
+ + "-i [string] Input dataset to perform PCA on.\n\n"
+ + "Options:\n\n"
+ + "-d [int] Desired dimensionality of output dataset. If -1,\n"
+ + " no dimensionality reduction is performed.\n"
+ + " Default value -1.\n"
+ + "-s If set, the data will be scaled before running\n"
+ + " PCA, such that the variance of each feature is 1.");
+
+ public static void main(String args[]) {
+ Timers timer = new Timers();
+ try {
+ // Get the data set path.
+ String dataset = Utils.getOption('i', args);
+ if (dataset.length() == 0)
+ throw new IllegalArgumentException();
+
+ timer.StartTimer("total_time");
+ timer.StartTimer("loading_data");
+
+ // Load input dataset.
+ DataSource source = new DataSource(dataset);
+ Instances data = source.getDataSet();
+
+ timer.StopTimer("loading_data");
+
+ // Find out what dimension we want.
+ int k = 0;
+ String dimension = Utils.getOption('d', args);
+ if (dimension.length() == 0) {
+ k = data.numAttributes();
+ } else {
+ k = Integer.parseInt(dimension);
+ // Validate the parameter.
+ if (k > data.numAttributes()) {
+ System.out.printf("[Fatal] New dimensionality (%d) cannot be greater"
+ + "than existing dimensionality (%d)!'\n", k,
+ data.numAttributes());
+
+ System.exit(-1);
+ }
+ }
+
+ // Performs a principal components analysis.
+ PrincipalComponents pcaEvaluator = new PrincipalComponents();
+
+ // Sets the amount of variance to account for when retaining principal
+ // components.
+ pcaEvaluator.setVarianceCovered(1.0);
+ // Sets maximum number of attributes to include in transformed attribute
+ // names.
+ pcaEvaluator.setMaximumAttributeNames(-1);
+
+ // Scaled X such that the variance of each feature is 1.
+ String scale = Utils.getOption('s', args);
+ if (scale.length() == 0) {
+ pcaEvaluator.setCenterData(true);
+ } else {
+ pcaEvaluator.setCenterData(false);
+ }
+
+ // Ranking the attributes.
+ Ranker ranker = new Ranker();
+ // Specify the number of attributes to select from the ranked list.
+ ranker.setNumToSelect(k);
+
+ AttributeSelection selector = new AttributeSelection();
+ selector.setSearch(ranker);
+ selector.setEvaluator(pcaEvaluator);
+ selector.SelectAttributes(data);
+
+ // Transform data into eigenvector basis.
+ Instances transformedData = selector.reduceDimensionality(data);
+
+ timer.StopTimer("total_time");
+
+ timer.PrintTimer("loading_data");
+ timer.PrintTimer("total_time");
+
+ } catch (IllegalArgumentException e) {
+ System.err.println(USAGE);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+}
Added: mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/Timers.java
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/src/util/Timers.java Wed Jul 3 06:12:06 2013
@@ -0,0 +1,72 @@
+/**
+ * @file Timers.java
+ * @author Marcus Edel
+ *
+ * Class to provide timers.
+ */
+
+import java.util.HashMap;
+
+/**
+ * The timer class provides a way for methods to be timed. The three methods
+ * contained in this class allow a named timer to be started and stopped, and
+ * its value to be obtained.
+ */
+public class Timers {
+ private HashMap<String, Long> timers = new HashMap<String, Long>();
+
+ /**
+ * Start the given timer. If a timer is started, then stopped, then
+ * re-started, then re-stopped, the final value of the timer is the length of
+ * both runs.
+ *
+ * @note Undefined behavior will occur if a timer is started twice.
+ *
+ * @param timerNname - Name of timer to be started.
+ */
+ public void StartTimer(final String timerNname) {
+ timers.put(timerNname, System.nanoTime());
+ }
+
+ /**
+ * Stop the given timer.
+ *
+ * @note Undefined behavior will occur if a timer is started twice.
+ *
+ * @param timerName - Name of timer to be stopped.
+ */
+ public void StopTimer(final String timerName) {
+ Long time = timers.get(timerName);
+ if (time != null) {
+ timers.put(timerName, (System.nanoTime() - time));
+ }
+ }
+
+ /**
+ * Get the value of the given timer.
+ *
+ * @param timerName - Name of timer to return value of.
+ */
+ public Long GetTimer(final String timerName) throws Exception {
+ Long time = timers.get(timerName);
+ if (time == null) {
+ throw new Exception("There exists no timer with this name.");
+ }
+
+ return time;
+ }
+
+ /**
+ * Prints the specified timer.
+ *
+ * @param timerName The name of the timer in question.
+ */
+ public void PrintTimer(final String timerName) throws Exception {
+ Long time = timers.get(timerName);
+ if (time == null) {
+ throw new Exception("There exists no timer with this name.");
+ }
+
+ System.out.printf("[INFO ] %s: %fs\n", timerName, (time / 1e9));
+ }
+}
\ No newline at end of file
More information about the mlpack-svn
mailing list