[mlpack-git] master: Fix a bug; now this algorithm is much faster. (1283992)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 22:01:08 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 128399241ca1a71842335b809e9c34dc1ceb8a1c
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat Oct 11 01:43:52 2014 +0000

    Fix a bug; now this algorithm is much faster.


>---------------------------------------------------------------

128399241ca1a71842335b809e9c34dc1ceb8a1c
 src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
index 5292eab..7441acf 100644
--- a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
@@ -45,7 +45,8 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
   {
     for (size_t j = i + 1; j < centroids.n_cols; ++j)
     {
-      const double dist = metric.Evaluate(centroids.col(i), centroids.col(j));
+      const double dist = metric.Evaluate(centroids.col(i), centroids.col(j)) /
+          2.0;
       ++distanceCalculations;
 
       // Update bounds, if this intra-cluster distance is smaller.
@@ -58,7 +59,7 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
 
   for (size_t i = 0; i < dataset.n_cols; ++i)
   {
-    const double m = std::max(minClusterDistances(assignments[i]) / 2.0,
+    const double m = std::max(minClusterDistances(assignments[i]),
                               lowerBounds(i));
 
     // First bound test.
@@ -84,13 +85,14 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
 
     // The bounds failed.  So test against all other clusters.
     // This is Hamerly's Point-All-Ctrs() function from the paper.
+    // We have to reset the lower bound first.
+    lowerBounds(i) = DBL_MAX;
     for (size_t c = 0; c < centroids.n_cols; ++c)
     {
       if (c == assignments[i])
         continue;
 
       const double dist = metric.Evaluate(dataset.col(i), centroids.col(c));
-      ++distanceCalculations;
 
       // Is this a better cluster?  At this point, upperBounds[i] = d(i, c(i)).
       if (dist < upperBounds(i))
@@ -106,6 +108,7 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
         lowerBounds(i) = dist;
       }
     }
+    distanceCalculations += centroids.n_cols - 1;
 
     // Update new centroids.
     newCentroids.col(assignments[i]) += dataset.col(i);



More information about the mlpack-git mailing list