[mlpack-git] master: Use a sane strategy for normalizing variances. (cd08c87)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Jun 1 15:10:34 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/7011dd89f6a9cf8cf7b66e1b42c9147b606551c2...cd08c87d14fa5fe4e4e917feb7ea787cd05f6fcd
>---------------------------------------------------------------
commit cd08c87d14fa5fe4e4e917feb7ea787cd05f6fcd
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Jun 1 15:10:05 2015 -0400
Use a sane strategy for normalizing variances.
>---------------------------------------------------------------
cd08c87d14fa5fe4e4e917feb7ea787cd05f6fcd
src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
index 7bc4f76..2e6c117 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster_impl.hpp
@@ -109,13 +109,14 @@ void MaxVarianceNewCluster::Precalculate(const MatType& data,
centroids.col(closestCluster)), 2.0);
}
- // Divide by the number of points in the cluster to produce the variance.
- // Although a -nan will occur here for the empty cluster(s), this doesn't
- // matter because variances.max() won't pick it up. If the number of points
- // in the cluster is 1, we ensure that cluster is not selected by forcing the
- // variance to 0.
+ // Divide by the number of points in the cluster to produce the variance,
+ // unless the cluster is empty or contains only one point, in which case we
+ // set the variance to 0.
for (size_t i = 0; i < clusterCounts.n_elem; ++i)
- variances[i] /= (clusterCounts[i] == 1) ? DBL_MAX : clusterCounts[i];
+ if (clusterCounts[i] <= 1)
+ variances[i] = 0;
+ else
+ variances[i] /= clusterCounts[i];
}
}; // namespace kmeans
More information about the mlpack-git
mailing list