[mlpack-git] master: Remove any handling of empty clusters from LloydStepTypes. (1c87ed9)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jun 8 10:05:13 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/8551a21f9821399ded164d8dbb11e453bcb33c45...e8e2ff17da5978cacf3c9a45d4aa572a4bf008e5

>---------------------------------------------------------------

commit 1c87ed95c5f7a00425b5c0e1bf36f8c991c97916
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon May 2 21:31:37 2016 +0000

    Remove any handling of empty clusters from LloydStepTypes.


>---------------------------------------------------------------

1c87ed95c5f7a00425b5c0e1bf36f8c991c97916
 src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp    | 1 -
 src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp        | 2 --
 src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp      | 2 --
 src/mlpack/methods/kmeans/naive_kmeans.hpp             | 5 ++++-
 src/mlpack/methods/kmeans/naive_kmeans_impl.hpp        | 2 --
 src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp | 6 +-----
 6 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 9bcf464..ed21c4c 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -179,7 +179,6 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
   {
     if (counts[c] == 0)
     {
-      newCentroids.col(c).fill(DBL_MAX);
       clusterDistances[c] = 0;
     }
     else
diff --git a/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp b/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp
index 90659a5..27751d8 100644
--- a/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp
@@ -155,8 +155,6 @@ double ElkanKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
   {
     if (counts[c] > 0)
       newCentroids.col(c) /= counts[c];
-    else
-      newCentroids.col(c).fill(DBL_MAX); // Fill with invalid value.
 
     moveDistances(c) = metric.Evaluate(newCentroids.col(c), centroids.col(c));
     cNorm += std::pow(moveDistances(c), 2.0);
diff --git a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
index 244faaa..1c3ac79 100644
--- a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
@@ -132,8 +132,6 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
   {
     if (counts(c) > 0)
       newCentroids.col(c) /= counts(c);
-    else
-      newCentroids.col(c).fill(DBL_MAX); // Empty cluster.
 
     // Calculate movement.
     const double movement = metric.Evaluate(centroids.col(c),
diff --git a/src/mlpack/methods/kmeans/naive_kmeans.hpp b/src/mlpack/methods/kmeans/naive_kmeans.hpp
index ee4f2fc..abb5655 100644
--- a/src/mlpack/methods/kmeans/naive_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/naive_kmeans.hpp
@@ -35,10 +35,13 @@ class NaiveKMeans
 
   /**
    * Run a single iteration of the Lloyd algorithm, updating the given centroids
-   * into the newCentroids matrix.
+   * into the newCentroids matrix.  If any cluster is empty (that is, if any
+   * cluster has no points assigned to it), then the centroid associated with
+   * that cluster may be filled with invalid data (it will be corrected later).
    *
    * @param centroids Current cluster centroids.
    * @param newCentroids New cluster centroids.
+   * @param counts Number of points in each cluster at the end of the iteration.
    */
   double Iterate(const arma::mat& centroids,
                  arma::mat& newCentroids,
diff --git a/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp b/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp
index 2457a59..239169c 100644
--- a/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp
@@ -61,8 +61,6 @@ double NaiveKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
   for (size_t i = 0; i < centroids.n_cols; ++i)
     if (counts(i) != 0)
       newCentroids.col(i) /= counts(i);
-    else
-      newCentroids.col(i).fill(DBL_MAX); // Invalid value.
 
   distanceCalculations += centroids.n_cols * dataset.n_cols;
 
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
index daca5ea..8403b07 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
@@ -61,11 +61,7 @@ double PellegMooreKMeans<MetricType, MatType>::Iterate(
   double residual = 0.0;
   for (size_t c = 0; c < centroids.n_cols; ++c)
   {
-    if (counts[c] == 0)
-    {
-      newCentroids.col(c).fill(DBL_MAX); // Should have happened anyway I think.
-    }
-    else
+    if (counts[c] > 0)
     {
       newCentroids.col(c) /= counts(c);
       residual += std::pow(metric.Evaluate(centroids.col(c),




More information about the mlpack-git mailing list