[mlpack-git] master: Recalculate upper bound before giving up on prune. This is done by Hamerly's algorithm and we obtain minor speedup as a result. (e4b3d87)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:32 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit e4b3d877560bd80575ffd841d3657325765c2cc4
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Feb 2 21:06:51 2015 -0500

    Recalculate upper bound before giving up on prune. This is done by Hamerly's algorithm and we obtain minor speedup as a result.


>---------------------------------------------------------------

e4b3d877560bd80575ffd841d3657325765c2cc4
 src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 27 ++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index 407b16c..8db18c7 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -124,7 +124,6 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
   if (iteration != 0)
   {
     // Do the tree update for the previous iteration.
-    Log::Warn << "Performing tree update.\n";
 
     // Reset centroids and counts for things we will collect during pruning.
     prunedCentroids.zeros(centroids.n_rows, centroids.n_cols);
@@ -545,7 +544,31 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
       }
       else
       {
-        prunedPoints[index] = false;
+        // Attempt to tighten the lower bound.
+        upperBounds[index] = metric.Evaluate(centroids.col(owner),
+                                             dataset.col(index));
+        if (upperBounds[index] < lowerSecondBounds[index] -
+            clusterDistances[centroids.n_cols])
+        {
+          prunedPoints[index] = true;
+          lastOwners[index] = owner;
+          lowerSecondBounds[index] -= clusterDistances[centroids.n_cols];
+          prunedCentroids.col(owner) += dataset.col(index);
+          prunedCounts(owner)++;
+        }
+        else if (upperBounds[index] < 0.5 *
+                  interclusterDistances[newFromOldCentroids[owner]])
+        {
+          prunedPoints[index] = true;
+          lastOwners[index] = owner;
+          lowerSecondBounds[index] -= clusterDistances[centroids.n_cols];
+          prunedCentroids.col(owner) += dataset.col(index);
+          prunedCounts(owner)++;
+        }
+        else
+        {
+          prunedPoints[index] = false;
+        }
       }
     }
   }



More information about the mlpack-git mailing list