[mlpack-git] master: Use a sane strategy for normalizing variances. (cd08c87)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Mon Jun 1 15:10:34 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/7011dd89f6a9cf8cf7b66e1b42c9147b606551c2...cd08c87d14fa5fe4e4e917feb7ea787cd05f6fcd

>---------------------------------------------------------------

commit cd08c87d14fa5fe4e4e917feb7ea787cd05f6fcd
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Jun 1 15:10:05 2015 -0400

    Use a sane strategy for normalizing variances.


>---------------------------------------------------------------

cd08c87d14fa5fe4e4e917feb7ea787cd05f6fcd
 src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
index 7bc4f76..2e6c117 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
@@ -109,13 +109,14 @@ void MaxVarianceNewCluster::Precalculate(const MatType& data,
         centroids.col(closestCluster)), 2.0);
   }
 
-  // Divide by the number of points in the cluster to produce the variance.
-  // Although a -nan will occur here for the empty cluster(s), this doesn't
-  // matter because variances.max() won't pick it up.  If the number of points
-  // in the cluster is 1, we ensure that cluster is not selected by forcing the
-  // variance to 0.
+  // Divide by the number of points in the cluster to produce the variance,
+  // unless the cluster is empty or contains only one point, in which case we
+  // set the variance to 0.
   for (size_t i = 0; i < clusterCounts.n_elem; ++i)
-    variances[i] /= (clusterCounts[i] == 1) ? DBL_MAX : clusterCounts[i];
+    if (clusterCounts[i] <= 1)
+      variances[i] = 0;
+    else
+      variances[i] /= clusterCounts[i];
 }
 
 }; // namespace kmeans



More information about the mlpack-git mailing list