[mlpack-git] master: Remove any handling of empty clusters from LloydStepTypes. (1c87ed9)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jun 8 10:05:13 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/8551a21f9821399ded164d8dbb11e453bcb33c45...e8e2ff17da5978cacf3c9a45d4aa572a4bf008e5
>---------------------------------------------------------------
commit 1c87ed95c5f7a00425b5c0e1bf36f8c991c97916
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon May 2 21:31:37 2016 +0000
Remove any handling of empty clusters from LloydStepTypes.
>---------------------------------------------------------------
1c87ed95c5f7a00425b5c0e1bf36f8c991c97916
src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 1 -
src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp | 2 --
src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp | 2 --
src/mlpack/methods/kmeans/naive_kmeans.hpp | 5 ++++-
src/mlpack/methods/kmeans/naive_kmeans_impl.hpp | 2 --
src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp | 6 +-----
6 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 9bcf464..ed21c4c 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -179,7 +179,6 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
{
if (counts[c] == 0)
{
- newCentroids.col(c).fill(DBL_MAX);
clusterDistances[c] = 0;
}
else
diff --git a/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp b/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp
index 90659a5..27751d8 100644
--- a/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/elkan_kmeans_impl.hpp
@@ -155,8 +155,6 @@ double ElkanKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
{
if (counts[c] > 0)
newCentroids.col(c) /= counts[c];
- else
- newCentroids.col(c).fill(DBL_MAX); // Fill with invalid value.
moveDistances(c) = metric.Evaluate(newCentroids.col(c), centroids.col(c));
cNorm += std::pow(moveDistances(c), 2.0);
diff --git a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
index 244faaa..1c3ac79 100644
--- a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
@@ -132,8 +132,6 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
{
if (counts(c) > 0)
newCentroids.col(c) /= counts(c);
- else
- newCentroids.col(c).fill(DBL_MAX); // Empty cluster.
// Calculate movement.
const double movement = metric.Evaluate(centroids.col(c),
diff --git a/src/mlpack/methods/kmeans/naive_kmeans.hpp b/src/mlpack/methods/kmeans/naive_kmeans.hpp
index ee4f2fc..abb5655 100644
--- a/src/mlpack/methods/kmeans/naive_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/naive_kmeans.hpp
@@ -35,10 +35,13 @@ class NaiveKMeans
/**
* Run a single iteration of the Lloyd algorithm, updating the given centroids
- * into the newCentroids matrix.
+ * into the newCentroids matrix. If any cluster is empty (that is, if any
+ * cluster has no points assigned to it), then the centroid associated with
+ * that cluster may be filled with invalid data (it will be corrected later).
*
* @param centroids Current cluster centroids.
* @param newCentroids New cluster centroids.
+ * @param counts Number of points in each cluster at the end of the iteration.
*/
double Iterate(const arma::mat& centroids,
arma::mat& newCentroids,
diff --git a/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp b/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp
index 2457a59..239169c 100644
--- a/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/naive_kmeans_impl.hpp
@@ -61,8 +61,6 @@ double NaiveKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
for (size_t i = 0; i < centroids.n_cols; ++i)
if (counts(i) != 0)
newCentroids.col(i) /= counts(i);
- else
- newCentroids.col(i).fill(DBL_MAX); // Invalid value.
distanceCalculations += centroids.n_cols * dataset.n_cols;
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
index daca5ea..8403b07 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
@@ -61,11 +61,7 @@ double PellegMooreKMeans<MetricType, MatType>::Iterate(
double residual = 0.0;
for (size_t c = 0; c < centroids.n_cols; ++c)
{
- if (counts[c] == 0)
- {
- newCentroids.col(c).fill(DBL_MAX); // Should have happened anyway I think.
- }
- else
+ if (counts[c] > 0)
{
newCentroids.col(c) /= counts(c);
residual += std::pow(metric.Evaluate(centroids.col(c),
More information about the mlpack-git
mailing list