[mlpack-git] master: Allow autodetection of number of centroids. (70deac5)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:01 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit 70deac5abb15b44746efba6d8bc23e23b964603f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Wed Feb 4 17:54:53 2015 -0500

    Allow autodetection of number of centroids.


>---------------------------------------------------------------

70deac5abb15b44746efba6d8bc23e23b964603f
 src/mlpack/methods/kmeans/kmeans_main.cpp | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/methods/kmeans/kmeans_main.cpp b/src/mlpack/methods/kmeans/kmeans_main.cpp
index c163827..ff74d6d 100644
--- a/src/mlpack/methods/kmeans/kmeans_main.cpp
+++ b/src/mlpack/methods/kmeans/kmeans_main.cpp
@@ -48,7 +48,8 @@ PROGRAM_INFO("K-Means Clustering", "This program performs K-Means clustering "
 
 // Required options.
 PARAM_STRING_REQ("inputFile", "Input dataset to perform clustering on.", "i");
-PARAM_INT_REQ("clusters", "Number of clusters to find.", "c");
+PARAM_INT_REQ("clusters", "Number of clusters to find (0 autodetects from "
+    "initial centroids).", "c");
 
 // Output options.
 PARAM_FLAG("in_place", "If specified, a column containing the learned cluster "
@@ -176,11 +177,21 @@ void RunKMeans(const InitialPartitionPolicy& ipp)
 {
   // Now, do validation of input options.
   const string inputFile = CLI::GetParam<string>("inputFile");
-  const int clusters = CLI::GetParam<int>("clusters");
-  if (clusters < 1)
+  int clusters = CLI::GetParam<int>("clusters");
+  if (clusters < 0)
   {
     Log::Fatal << "Invalid number of clusters requested (" << clusters << ")! "
-        << "Must be greater than or equal to 1." << endl;
+        << "Must be greater than or equal to 0." << endl;
+  }
+  else if (clusters == 0 && CLI::HasParam("initial_centroids"))
+  {
+    Log::Info << "Detecting number of clusters automatically from input "
+        << "centroids." << endl;
+  }
+  else if (clusters == 0)
+  {
+    Log::Fatal << "Number of clusters requested is 0, and no initial centroids "
+        << "provided!" << endl;
   }
 
   const int maxIterations = CLI::GetParam<int>("max_iterations");
@@ -210,6 +221,8 @@ void RunKMeans(const InitialPartitionPolicy& ipp)
   {
     string initialCentroidsFile = CLI::GetParam<string>("initial_centroids");
     data::Load(initialCentroidsFile, centroids, true);
+    if (clusters == 0)
+      clusters = centroids.n_cols;
 
     if (CLI::HasParam("refined_start"))
       Log::Warn << "Initial centroids are specified, but will be ignored "



More information about the mlpack-git mailing list