[mlpack-git] master: Add --kill_empty_clusters and documentation for it. (c5b7186)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jun 8 10:05:13 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/8551a21f9821399ded164d8dbb11e453bcb33c45...e8e2ff17da5978cacf3c9a45d4aa572a4bf008e5
>---------------------------------------------------------------
commit c5b7186ab8e8e8ec30ffec8c4c8ad572f684715f
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Jun 5 20:29:53 2016 +0000
Add --kill_empty_clusters and documentation for it.
>---------------------------------------------------------------
c5b7186ab8e8e8ec30ffec8c4c8ad572f684715f
src/mlpack/methods/kmeans/kmeans_main.cpp | 26 +++++++++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
diff --git a/src/mlpack/methods/kmeans/kmeans_main.cpp b/src/mlpack/methods/kmeans/kmeans_main.cpp
index 110774e..a197f77 100644
--- a/src/mlpack/methods/kmeans/kmeans_main.cpp
+++ b/src/mlpack/methods/kmeans/kmeans_main.cpp
@@ -8,6 +8,7 @@
#include "kmeans.hpp"
#include "allow_empty_clusters.hpp"
+#include "kill_empty_clusters.hpp"
#include "refined_start.hpp"
#include "elkan_kmeans.hpp"
#include "hamerly_kmeans.hpp"
@@ -42,8 +43,19 @@ PROGRAM_INFO("K-Means Clustering", "This program performs K-Means clustering "
"('hamerly'), the dual-tree k-means algorithm ('dualtree'), and the "
"dual-tree k-means algorithm using the cover tree ('dualtree-covertree')."
"\n\n"
+ "The behavior for when an empty cluster is encountered can be modified with"
+ " the --allow_empty_clusters (-e) option. When this option is specified "
+ "and there is a cluster owning no points at the end of an iteration, that "
+ "cluster's centroid will simply remain in its position from the previous "
+ "iteration. If the --kill_empty_clusters (-E) option is specified, then "
+ "when a cluster owns no points at the end of an iteration, the cluster "
+ "centroid is simply filled with DBL_MAX, killing it and effectively "
+ "reducing k for the rest of the computation. Note that the default option "
+ "when neither empty cluster option is specified can be time-consuming to "
+ "calculate; therefore, specifying -e or -E will often accelerate runtime."
+ "\n\n"
"As of October 2014, the --overclustering option has been removed. If you "
- "want this support back, let us know -- file a bug at "
+ "want this support back, let us know---file a bug at "
"https://github.com/mlpack/mlpack/ or get in touch through another means.");
// Required options.
@@ -61,7 +73,9 @@ PARAM_STRING("centroid_file", "If specified, the centroids of each cluster will"
" be written to the given file.", "C", "");
// k-means configuration options.
-PARAM_FLAG("allow_empty_clusters", "Allow empty clusters to be created.", "e");
+PARAM_FLAG("allow_empty_clusters", "Allow empty clusters to be persist.", "e");
+PARAM_FLAG("kill_empty_clusters", "Remove empty clusters when they occur.",
+ "E");
PARAM_FLAG("labels_only", "Only output labels into output file.", "l");
PARAM_INT("max_iterations", "Maximum number of iterations before K-Means "
"terminates.", "m", 1000);
@@ -135,8 +149,14 @@ int main(int argc, char** argv)
template<typename InitialPartitionPolicy>
void FindEmptyClusterPolicy(const InitialPartitionPolicy& ipp)
{
- if (CLI::HasParam("allow_empty_clusters"))
+ if (CLI::HasParam("allow_empty_clusters") &&
+ CLI::HasParam("kill_empty_clusters"))
+ Log::Fatal << "Only one of --allow_empty_clusters (-e) or "
+ << "--kill_empty_clusters (-E) may be specified!" << endl;
+ else if (CLI::HasParam("allow_empty_clusters"))
FindLloydStepType<InitialPartitionPolicy, AllowEmptyClusters>(ipp);
+ else if (CLI::HasParam("kill_empty_clusters"))
+ FindLloydStepType<InitialPartitionPolicy, KillEmptyClusters>(ipp);
else
FindLloydStepType<InitialPartitionPolicy, MaxVarianceNewCluster>(ipp);
}
More information about the mlpack-git
mailing list