[mlpack-svn] r10787 - mlpack/trunk/src/mlpack/methods/kmeans
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Dec 14 12:23:59 EST 2011
Author: rcurtin
Date: 2011-12-14 12:23:59 -0500 (Wed, 14 Dec 2011)
New Revision: 10787
Modified:
mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp
mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
Log:
Better documentation for K-Means.
Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp 2011-12-14 17:16:02 UTC (rev 10786)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans.hpp 2011-12-14 17:23:59 UTC (rev 10787)
@@ -14,7 +14,7 @@
#include "max_variance_new_cluster.hpp"
namespace mlpack {
-namespace kmeans {
+namespace kmeans /** K-Means clustering. */ {
/**
* This class implements K-Means clustering. This implementation supports
@@ -26,6 +26,21 @@
* find the initial partition of the data, and the actions to be taken when an
* empty cluster is encountered, as well as the distance metric to be used.
*
+ * A simple example of how to run K-Means clustering is shown below.
+ *
+ * @code
+ * extern arma::mat data; // Dataset we want to run K-Means on.
+ * arma::Col<size_t> assignments; // Cluster assignments.
+ *
+ * KMeans<> k(); // Default options.
+ * k.Cluster(data, 3, assignments); // 3 clusters.
+ *
+ * // Cluster using the Manhattan distance, 100 iterations maximum, and an
+ * // overclustering factor of 4.0.
+ * KMeans<metric::ManhattanDistance> k(100, 4.0);
+ * k.Cluster(data, 6, assignments); // 6 clusters.
+ * @endcode
+ *
* @tparam DistanceMetric The distance metric to use for this KMeans; see
* metric::LMetric for an example.
* @tparam InitialPartitionPolicy Initial partitioning policy; must implement a
Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp 2011-12-14 17:16:02 UTC (rev 10786)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp 2011-12-14 17:23:59 UTC (rev 10787)
@@ -21,19 +21,21 @@
"becomes empty, the point furthest from the centroid of the cluster with "
"maximum variance is taken to fill that cluster.");
-PARAM_STRING_REQ("input_file", "Input dataset to perform clustering on.", "I");
-PARAM_INT_REQ("clusters", "Number of clusters to find.", "C");
+PARAM_STRING_REQ("input_file", "Input dataset to perform clustering on.", "i");
+PARAM_INT_REQ("clusters", "Number of clusters to find.", "c");
+
PARAM_FLAG("in_place", "If specified, a column of the learned cluster "
- "assignments will be added to the input dataset file. In this case "
- "--output_file is not necessary.", "P");
-PARAM_STRING("output_file", "File to write output labels to.", "O", "");
-PARAM_FLAG("allow_empty_clusters", "Allow empty clusters to be created.", "E");
-PARAM_FLAG("labels_only", "Only output labels into output file.", "L");
+ "assignments will be added to the input dataset file. In this case, "
+ "--output_file is not necessary.", "p");
+PARAM_STRING("output_file", "File to write output labels or labeled data to.",
+ "o", "output.csv");
+PARAM_FLAG("allow_empty_clusters", "Allow empty clusters to be created.", "e");
+PARAM_FLAG("labels_only", "Only output labels into output file.", "l");
PARAM_DOUBLE("overclustering", "Finds (overclustering * clusters) clusters, "
"then merges them together until only the desired number of clusters are "
- "left.", "C", 1.0);
+ "left.", "O", 1.0);
PARAM_INT("max_iterations", "Maximum number of iterations before K-Means "
- "terminates.", "M", 1000);
+ "terminates.", "m", 1000);
int main(int argc, char** argv)
{
More information about the mlpack-svn
mailing list