[mlpack-git] master: Be more careful about when to take the last SCB. This gives a trivial speedup, but hey, speedup! Hamerly prunes are still not working properly, but I'm getting there. (abfc0b8)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:04 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit abfc0b8e4dcaaa0b700444623f5c945855a09ff2
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Jan 26 15:47:22 2015 -0500

    Be more careful about when to take the last SCB. This gives a trivial speedup, but hey, speedup! Hamerly prunes are still not working properly, but I'm getting there.


>---------------------------------------------------------------

abfc0b8e4dcaaa0b700444623f5c945855a09ff2
 src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 592a175..938fc52 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -220,12 +220,14 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
     // Re-set second closest bound if necessary.
     if (node->Stat().SecondClosestBound() == DBL_MAX)
     {
+      if (node->Begin() == 34654)
+        Log::Warn << "r34654c" << node->Begin() << " scb is DBL_MAX!\n";
+
       if (node->Parent() == NULL)
         node->Stat().SecondClosestBound() = 0.0; // Don't prune the root.
+    }
 
-      else
-      {
-        if (node->Parent()->Stat().SecondClosestBound() != DBL_MAX &&
+    if (node->Parent()->Stat().SecondClosestBound() != DBL_MAX &&
 node->Stat().LastSecondClosestBound() != DBL_MAX)
           node->Stat().SecondClosestBound() =
 std::max(node->Parent()->Stat().SecondClosestBound(),
@@ -234,14 +236,12 @@ node->Stat().LastSecondClosestBound());
           node->Stat().SecondClosestBound() =
 std::min(node->Parent()->Stat().SecondClosestBound(),
 node->Stat().LastSecondClosestBound());
-      }
 //      if (node->Begin() == 35871)
 //        Log::Warn << "Update second closest bound for r35871c" <<
 //node->Count() << " to " << node->Stat().SecondClosestBound() << ", which could "
 //      << "have been parent's (" << node->Parent()->Stat().SecondClosestBound()
 //<< ") or adjusted last iteration's (" << node->Stat().LastSecondClosestBound()
 //<< ").\n";
-    }
 
 //    if (node->Begin() == 35871)
 //      Log::Warn << "r35871c" << node->Count() << " has second bound " <<
@@ -268,6 +268,15 @@ node->Parent()->Stat().SecondClosestBound() < node->Stat().SecondClosestBound())
 node->Parent()->Stat().SecondClosestBound();
     }
 
+    if (node->Begin() == 34654)
+    {
+      Log::Warn << "Attempt Hamerly prune on r34654c" << node->Count() <<
+          " with MQND " << node->Stat().MaxQueryNodeDistance() << ", scb "
+          << node->Stat().SecondClosestBound() << ", owner " <<
+node->Stat().Owner() << ", and clusterDistances " << clusterDistances[clusters]
+<< ".\n";
+    }
+
     if (node->Stat().MaxQueryNodeDistance() < node->Stat().SecondClosestBound()
         - clusterDistances[clusters])
     {



More information about the mlpack-git mailing list