[mlpack-git] master: Handle empty centroids _only_ in EmptyClusterPolicy. (428191f)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jun 8 10:05:13 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/8551a21f9821399ded164d8dbb11e453bcb33c45...e8e2ff17da5978cacf3c9a45d4aa572a4bf008e5

>---------------------------------------------------------------

commit 428191f27084f76f58c3a4f7a284cd4cd1188906
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon May 2 21:30:56 2016 +0000

    Handle empty centroids _only_ in EmptyClusterPolicy.


>---------------------------------------------------------------

428191f27084f76f58c3a4f7a284cd4cd1188906
 src/mlpack/methods/kmeans/CMakeLists.txt           |  1 +
 src/mlpack/methods/kmeans/allow_empty_clusters.hpp | 14 +++++++-------
 ..._empty_clusters.hpp => kill_empty_clusters.hpp} | 22 +++++++++++-----------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/mlpack/methods/kmeans/CMakeLists.txt b/src/mlpack/methods/kmeans/CMakeLists.txt
index 2c3eafc..df295c2 100644
--- a/src/mlpack/methods/kmeans/CMakeLists.txt
+++ b/src/mlpack/methods/kmeans/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SOURCES
   elkan_kmeans_impl.hpp
   hamerly_kmeans.hpp
   hamerly_kmeans_impl.hpp
+  kill_empty_clusters.hpp
   kmeans.hpp
   kmeans_impl.hpp
   max_variance_new_cluster.hpp
diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
index d8388dd..7aacdf7 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
@@ -24,8 +24,8 @@ class AllowEmptyClusters
   AllowEmptyClusters() { }
 
   /**
-   * This function does nothing.  It is called by K-Means when K-Means detects
-   * an empty cluster.
+   * This function allows empty clusters to persist simply by leaving the empty
+   * cluster in its last position.
    *
    * @tparam MatType Type of data (arma::mat or arma::spmat).
    * @param data Dataset on which clustering is being performed.
@@ -43,15 +43,15 @@ class AllowEmptyClusters
   template<typename MetricType, typename MatType>
   static inline force_inline size_t EmptyCluster(
       const MatType& /* data */,
-      const size_t /* emptyCluster */,
-      const arma::mat& /* oldCentroids */,
-      arma::mat& /* newCentroids */,
+      const size_t emptyCluster,
+      const arma::mat& oldCentroids,
+      arma::mat& newCentroids,
       arma::Col<size_t>& /* clusterCounts */,
       MetricType& /* metric */,
       const size_t /* iteration */)
   {
-    // Empty clusters are okay!  Do nothing.
-    return 0;
+    // Take the last iteration's centroid.
+    newCentroids.col(emptyCluster) = oldCentroids.col(emptyCluster);
   }
 
   //! Serialize the empty cluster policy (nothing to do).
diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
similarity index 73%
copy from src/mlpack/methods/kmeans/allow_empty_clusters.hpp
copy to src/mlpack/methods/kmeans/kill_empty_clusters.hpp
index d8388dd..9b0038e 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
@@ -5,8 +5,8 @@
  * This very simple policy is used when K-Means is allowed to return empty
  * clusters.
  */
-#ifndef MLPACK_METHODS_KMEANS_ALLOW_EMPTY_CLUSTERS_HPP
-#define MLPACK_METHODS_KMEANS_ALLOW_EMPTY_CLUSTERS_HPP
+#ifndef __MLPACK_METHODS_KMEANS_KILL_EMPTY_CLUSTERS_HPP
+#define __MLPACK_METHODS_KMEANS_KILL_EMPTY_CLUSTERS_HPP
 
 #include <mlpack/core.hpp>
 
@@ -14,18 +14,18 @@ namespace mlpack {
 namespace kmeans {
 
 /**
- * Policy which allows K-Means to create empty clusters without any error being
- * reported.
+ * Policy which allows K-Means to "kill" empty clusters without any error being
+ * reported.  This means the centroids will be filled with DBL_MAX.
  */
-class AllowEmptyClusters
+class KillEmptyClusters
 {
  public:
   //! Default constructor required by EmptyClusterPolicy policy.
   AllowEmptyClusters() { }
 
   /**
-   * This function does nothing.  It is called by K-Means when K-Means detects
-   * an empty cluster.
+   * This function sets an empty cluster found during k-means to all DBL_MAX
+   * (i.e. an invalid "dead" cluster).
    *
    * @tparam MatType Type of data (arma::mat or arma::spmat).
    * @param data Dataset on which clustering is being performed.
@@ -43,15 +43,15 @@ class AllowEmptyClusters
   template<typename MetricType, typename MatType>
   static inline force_inline size_t EmptyCluster(
       const MatType& /* data */,
-      const size_t /* emptyCluster */,
+      const size_t emptyCluster,
       const arma::mat& /* oldCentroids */,
-      arma::mat& /* newCentroids */,
+      arma::mat& newCentroids,
       arma::Col<size_t>& /* clusterCounts */,
       MetricType& /* metric */,
       const size_t /* iteration */)
   {
-    // Empty clusters are okay!  Do nothing.
-    return 0;
+    // Kill the empty cluster.
+    newCentroids.col(emptyCluster).fill(DBL_MAX);
   }
 
   //! Serialize the empty cluster policy (nothing to do).




More information about the mlpack-git mailing list