[mlpack-svn] r10787 - mlpack/trunk/src/mlpack/methods/kmeans

Wed Dec 14 12:23:59 EST 2011

Author: rcurtin
Date: 2011-12-14 12:23:59 -0500 (Wed, 14 Dec 2011)
New Revision: 10787

Modified:
   mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp
   mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
Log:
Better documentation for K-Means.


Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp	2011-12-14 17:16:02 UTC (rev 10786)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp	2011-12-14 17:23:59 UTC (rev 10787)
@@ -14,7 +14,7 @@
 #include "max_variance_new_cluster.hpp"
 
 namespace mlpack {
-namespace kmeans {
+namespace kmeans /** K-Means clustering. */ {
 
 /**
  * This class implements K-Means clustering.  This implementation supports
@@ -26,6 +26,21 @@
  * find the initial partition of the data, and the actions to be taken when an
  * empty cluster is encountered, as well as the distance metric to be used.
  *
+ * A simple example of how to run K-Means clustering is shown below.
+ *
+ * @code
+ * extern arma::mat data; // Dataset we want to run K-Means on.
+ * arma::Col<size_t> assignments; // Cluster assignments.
+ *
+ * KMeans<> k(); // Default options.
+ * k.Cluster(data, 3, assignments); // 3 clusters.
+ *
+ * // Cluster using the Manhattan distance, 100 iterations maximum, and an
+ * // overclustering factor of 4.0.
+ * KMeans<metric::ManhattanDistance> k(100, 4.0);
+ * k.Cluster(data, 6, assignments); // 6 clusters.
+ * @endcode
+ *
  * @tparam DistanceMetric The distance metric to use for this KMeans; see
  *     metric::LMetric for an example.
  * @tparam InitialPartitionPolicy Initial partitioning policy; must implement a

Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp	2011-12-14 17:16:02 UTC (rev 10786)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp	2011-12-14 17:23:59 UTC (rev 10787)
@@ -21,19 +21,21 @@
     "becomes empty, the point furthest from the centroid of the cluster with "
     "maximum variance is taken to fill that cluster.");
 
-PARAM_STRING_REQ("input_file", "Input dataset to perform clustering on.", "I");
-PARAM_INT_REQ("clusters", "Number of clusters to find.", "C");
+PARAM_STRING_REQ("input_file", "Input dataset to perform clustering on.", "i");
+PARAM_INT_REQ("clusters", "Number of clusters to find.", "c");
+
 PARAM_FLAG("in_place", "If specified, a column of the learned cluster "
-    "assignments will be added to the input dataset file.  In this case "
-    "--output_file is not necessary.", "P");
-PARAM_STRING("output_file", "File to write output labels to.", "O", "");
-PARAM_FLAG("allow_empty_clusters", "Allow empty clusters to be created.", "E");
-PARAM_FLAG("labels_only", "Only output labels into output file.", "L");
+    "assignments will be added to the input dataset file.  In this case, "
+    "--output_file is not necessary.", "p");
+PARAM_STRING("output_file", "File to write output labels or labeled data to.",
+    "o", "output.csv");
+PARAM_FLAG("allow_empty_clusters", "Allow empty clusters to be created.", "e");
+PARAM_FLAG("labels_only", "Only output labels into output file.", "l");
 PARAM_DOUBLE("overclustering", "Finds (overclustering * clusters) clusters, "
     "then merges them together until only the desired number of clusters are "
-    "left.", "C", 1.0);
+    "left.", "O", 1.0);
 PARAM_INT("max_iterations", "Maximum number of iterations before K-Means "
-    "terminates.", "M", 1000);
+    "terminates.", "m", 1000);
 
 int main(int argc, char** argv)
 {