[mlpack-svn] r15435 - mlpack/conf/jenkins-conf/benchmark/methods/weka/src/kmeans
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jul 8 12:19:33 EDT 2013
Author: marcus
Date: Mon Jul 8 12:19:33 2013
New Revision: 15435
Log:
Add weka K-Means method src.
Added:
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/kmeans/
mlpack/conf/jenkins-conf/benchmark/methods/weka/src/kmeans/KMeans.java
Added: mlpack/conf/jenkins-conf/benchmark/methods/weka/src/kmeans/KMeans.java
==============================================================================
--- (empty file)
+++ mlpack/conf/jenkins-conf/benchmark/methods/weka/src/kmeans/KMeans.java Mon Jul 8 12:19:33 2013
@@ -0,0 +1,101 @@
+/**
+ * @file KMeans.java
+ * @author Marcus Edel
+ *
+ * K-Means Clustering with weka.
+ */
+
+import weka.clusterers.SimpleKMeans;
+import weka.core.Instances;
+import weka.core.Utils;
+import weka.core.converters.ConverterUtils.DataSource;
+
+/**
+ * This class use the weka libary to implement K-Means Clustering.
+ */
+public class KMeans {
+
+ private static final String USAGE = String
+ .format("This program performs K-Means clustering on the given dataset.\n\n"
+ + "Required options:\n"
+ + "(-c) [int] Number of clusters to find.\n"
+ + "(-i) [string] Input dataset to perform clustering on."
+ + "-m) [int] Maximum number of iterations before K-Means\n"
+ + " terminates. Default value 1000.\n"
+ + "(-s) [int] Random seed. ");
+
+ public static void main(String args[]) {
+ Timers timer = new Timers();
+ try {
+ // Get the data set path.
+ String inputFile = Utils.getOption('i', args);
+ if (inputFile.length() == 0)
+ throw new IllegalArgumentException();
+
+ // Load input dataset.
+ DataSource source = new DataSource(inputFile);
+ Instances data = source.getDataSet();
+
+ // Create the KMeans object.
+ SimpleKMeans kmeans = new SimpleKMeans();
+
+ // Gather parameters and validation of options.
+ String maxIteration = Utils.getOption('m', args);
+ int m = 1000;
+ if (maxIteration.length() != 0)
+ {
+ m = Integer.parseInt(maxIteration);
+ if (m < 0)
+ {
+ System.out.println("[Fatal] Invalid value for maximum iterations(" +
+ maxIteration + ")! Must be greater than or equal to 0..");
+ System.exit(-1);
+ }
+ else if(m == 0)
+ {
+ m = Integer.MAX_VALUE;
+ }
+ }
+
+ String clusters = Utils.getOption('c', args);
+ if (clusters.length() == 0)
+ {
+ throw new IllegalArgumentException();
+ }
+ else
+ {
+ int c = Integer.parseInt(clusters);
+ if (c < 1)
+ {
+ System.out.println("[Fatal] Invalid number of clusters requested (" +
+ clusters + ")! Must be greater than or equal to 1.");
+ System.exit(-1);
+ }
+
+ kmeans.setNumClusters(c);
+ }
+
+ String seed = Utils.getOption('s', args);
+ if (seed.length() != 0)
+ kmeans.setSeed(Integer.parseInt(seed));
+
+ kmeans.setMaxIterations(m);
+ kmeans.setPreserveInstancesOrder(true);
+
+ // Perform K-Means clustering.
+ timer.StartTimer("total_time");
+
+ kmeans.buildClusterer(data);
+ int[] assignments = kmeans.getAssignments();
+
+ timer.StopTimer("total_time");
+ timer.PrintTimer("total_time");
+
+ } catch (IllegalArgumentException e) {
+ System.err.println(USAGE);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
More information about the mlpack-svn
mailing list