[mlpack-git] master: Handle empty centroids _only_ in EmptyClusterPolicy. (428191f)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jun 8 10:05:13 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/8551a21f9821399ded164d8dbb11e453bcb33c45...e8e2ff17da5978cacf3c9a45d4aa572a4bf008e5
>---------------------------------------------------------------
commit 428191f27084f76f58c3a4f7a284cd4cd1188906
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon May 2 21:30:56 2016 +0000
Handle empty centroids _only_ in EmptyClusterPolicy.
>---------------------------------------------------------------
428191f27084f76f58c3a4f7a284cd4cd1188906
src/mlpack/methods/kmeans/CMakeLists.txt | 1 +
src/mlpack/methods/kmeans/allow_empty_clusters.hpp | 14 +++++++-------
..._empty_clusters.hpp => kill_empty_clusters.hpp} | 22 +++++++++++-----------
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/src/mlpack/methods/kmeans/CMakeLists.txt b/src/mlpack/methods/kmeans/CMakeLists.txt
index 2c3eafc..df295c2 100644
--- a/src/mlpack/methods/kmeans/CMakeLists.txt
+++ b/src/mlpack/methods/kmeans/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SOURCES
elkan_kmeans_impl.hpp
hamerly_kmeans.hpp
hamerly_kmeans_impl.hpp
+ kill_empty_clusters.hpp
kmeans.hpp
kmeans_impl.hpp
max_variance_new_cluster.hpp
diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
index d8388dd..7aacdf7 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
@@ -24,8 +24,8 @@ class AllowEmptyClusters
AllowEmptyClusters() { }
/**
- * This function does nothing. It is called by K-Means when K-Means detects
- * an empty cluster.
+ * This function allows empty clusters to persist simply by leaving the empty
+ * cluster in its last position.
*
* @tparam MatType Type of data (arma::mat or arma::spmat).
* @param data Dataset on which clustering is being performed.
@@ -43,15 +43,15 @@ class AllowEmptyClusters
template<typename MetricType, typename MatType>
static inline force_inline size_t EmptyCluster(
const MatType& /* data */,
- const size_t /* emptyCluster */,
- const arma::mat& /* oldCentroids */,
- arma::mat& /* newCentroids */,
+ const size_t emptyCluster,
+ const arma::mat& oldCentroids,
+ arma::mat& newCentroids,
arma::Col<size_t>& /* clusterCounts */,
MetricType& /* metric */,
const size_t /* iteration */)
{
- // Empty clusters are okay! Do nothing.
- return 0;
+ // Take the last iteration's centroid.
+ newCentroids.col(emptyCluster) = oldCentroids.col(emptyCluster);
}
//! Serialize the empty cluster policy (nothing to do).
diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
similarity index 73%
copy from src/mlpack/methods/kmeans/allow_empty_clusters.hpp
copy to src/mlpack/methods/kmeans/kill_empty_clusters.hpp
index d8388dd..9b0038e 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
@@ -5,8 +5,8 @@
* This very simple policy is used when K-Means is allowed to return empty
* clusters.
*/
-#ifndef MLPACK_METHODS_KMEANS_ALLOW_EMPTY_CLUSTERS_HPP
-#define MLPACK_METHODS_KMEANS_ALLOW_EMPTY_CLUSTERS_HPP
+#ifndef __MLPACK_METHODS_KMEANS_KILL_EMPTY_CLUSTERS_HPP
+#define __MLPACK_METHODS_KMEANS_KILL_EMPTY_CLUSTERS_HPP
#include <mlpack/core.hpp>
@@ -14,18 +14,18 @@ namespace mlpack {
namespace kmeans {
/**
- * Policy which allows K-Means to create empty clusters without any error being
- * reported.
+ * Policy which allows K-Means to "kill" empty clusters without any error being
+ * reported. This means the centroids will be filled with DBL_MAX.
*/
-class AllowEmptyClusters
+class KillEmptyClusters
{
public:
//! Default constructor required by EmptyClusterPolicy policy.
AllowEmptyClusters() { }
/**
- * This function does nothing. It is called by K-Means when K-Means detects
- * an empty cluster.
+ * This function sets an empty cluster found during k-means to all DBL_MAX
+ * (i.e. an invalid "dead" cluster).
*
* @tparam MatType Type of data (arma::mat or arma::spmat).
* @param data Dataset on which clustering is being performed.
@@ -43,15 +43,15 @@ class AllowEmptyClusters
template<typename MetricType, typename MatType>
static inline force_inline size_t EmptyCluster(
const MatType& /* data */,
- const size_t /* emptyCluster */,
+ const size_t emptyCluster,
const arma::mat& /* oldCentroids */,
- arma::mat& /* newCentroids */,
+ arma::mat& newCentroids,
arma::Col<size_t>& /* clusterCounts */,
MetricType& /* metric */,
const size_t /* iteration */)
{
- // Empty clusters are okay! Do nothing.
- return 0;
+ // Kill the empty cluster.
+ newCentroids.col(emptyCluster).fill(DBL_MAX);
}
//! Serialize the empty cluster policy (nothing to do).
More information about the mlpack-git
mailing list