[mlpack-git] master: Allow autodetection of number of centroids. (70deac5)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:01 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 70deac5abb15b44746efba6d8bc23e23b964603f
Author: Ryan Curtin <ryan at ratml.org>
Date: Wed Feb 4 17:54:53 2015 -0500
Allow autodetection of number of centroids.
>---------------------------------------------------------------
70deac5abb15b44746efba6d8bc23e23b964603f
src/mlpack/methods/kmeans/kmeans_main.cpp | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/src/mlpack/methods/kmeans/kmeans_main.cpp b/src/mlpack/methods/kmeans/kmeans_main.cpp
index c163827..ff74d6d 100644
--- a/src/mlpack/methods/kmeans/kmeans_main.cpp
+++ b/src/mlpack/methods/kmeans/kmeans_main.cpp
@@ -48,7 +48,8 @@ PROGRAM_INFO("K-Means Clustering", "This program performs K-Means clustering "
// Required options.
PARAM_STRING_REQ("inputFile", "Input dataset to perform clustering on.", "i");
-PARAM_INT_REQ("clusters", "Number of clusters to find.", "c");
+PARAM_INT_REQ("clusters", "Number of clusters to find (0 autodetects from "
+ "initial centroids).", "c");
// Output options.
PARAM_FLAG("in_place", "If specified, a column containing the learned cluster "
@@ -176,11 +177,21 @@ void RunKMeans(const InitialPartitionPolicy& ipp)
{
// Now, do validation of input options.
const string inputFile = CLI::GetParam<string>("inputFile");
- const int clusters = CLI::GetParam<int>("clusters");
- if (clusters < 1)
+ int clusters = CLI::GetParam<int>("clusters");
+ if (clusters < 0)
{
Log::Fatal << "Invalid number of clusters requested (" << clusters << ")! "
- << "Must be greater than or equal to 1." << endl;
+ << "Must be greater than or equal to 0." << endl;
+ }
+ else if (clusters == 0 && CLI::HasParam("initial_centroids"))
+ {
+ Log::Info << "Detecting number of clusters automatically from input "
+ << "centroids." << endl;
+ }
+ else if (clusters == 0)
+ {
+ Log::Fatal << "Number of clusters requested is 0, and no initial centroids "
+ << "provided!" << endl;
}
const int maxIterations = CLI::GetParam<int>("max_iterations");
@@ -210,6 +221,8 @@ void RunKMeans(const InitialPartitionPolicy& ipp)
{
string initialCentroidsFile = CLI::GetParam<string>("initial_centroids");
data::Load(initialCentroidsFile, centroids, true);
+ if (clusters == 0)
+ clusters = centroids.n_cols;
if (CLI::HasParam("refined_start"))
Log::Warn << "Initial centroids are specified, but will be ignored "
More information about the mlpack-git
mailing list