[mlpack-git] master: Count number of points that are Hamerly pruned. (27f3a36)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:24 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit 27f3a36e93c29db4e4a0c67f0d63fc3ce22eca0c
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Jan 27 17:36:01 2015 -0500

    Count number of points that are Hamerly pruned.


>---------------------------------------------------------------

27f3a36e93c29db4e4a0c67f0d63fc3ce22eca0c
 src/mlpack/methods/kmeans/dual_tree_kmeans.hpp      |  3 ++-
 src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 18 +++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index 9948b10..9e0c17a 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -67,7 +67,8 @@ class DualTreeKMeans
                   const arma::Col<size_t>& assignments,
                   const arma::mat& oldCentroids,
                   const arma::mat& dataset,
-                  const std::vector<size_t>& oldFromNew);
+                  const std::vector<size_t>& oldFromNew,
+                  size_t& hamerlyPruned);
 };
 
 template<typename MetricType, typename MatType>
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 22dd0ad..083dcdd 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -126,8 +126,9 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
   }
 
   // Update the tree with the centroid movement information.
+  size_t hamerlyPruned = 0;
   TreeUpdate(tree, centroids.n_cols, clusterDistances, assignments,
-      oldCentroids, dataset, oldFromNewCentroids);
+      oldCentroids, dataset, oldFromNewCentroids, hamerlyPruned);
 
   delete centroidTree;
 
@@ -178,7 +179,8 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
     const arma::Col<size_t>& assignments,
     const arma::mat& centroids,
     const arma::mat& dataset,
-    const std::vector<size_t>& oldFromNew)
+    const std::vector<size_t>& oldFromNew,
+    size_t& hamerlyPruned)
 {
   // This is basically IterationUpdate(), but pulled out to be separate from the
   // actual dual-tree algorithm.
@@ -453,8 +455,11 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
       node->Stat().HamerlyPruned() = true;
 //      if (node->Begin() == 16954)
       if (!node->Parent()->Stat().HamerlyPruned())
-        Log::Warn << "Mark r" << node->Begin() << "c" << node->Count() << " as "
-            << "Hamerly pruned.\n";
+      {
+//        Log::Warn << "Mark r" << node->Begin() << "c" << node->Count() << " as "
+//            << "Hamerly pruned.\n";
+        hamerlyPruned += node->NumDescendants();
+      }
     }
 //    else
 //    {
@@ -499,13 +504,16 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
 //  if (!node->Stat().HamerlyPruned())
     for (size_t i = 0; i < node->NumChildren(); ++i)
       TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
-          centroids, dataset, oldFromNew);
+          centroids, dataset, oldFromNew, hamerlyPruned);
 
   node->Stat().LastSecondClosestBound() = node->Stat().SecondClosestBound() -
       clusterDistances[clusters];
   // This should change later, but I'm not yet sure how to do it.
   node->Stat().SecondClosestBound() = DBL_MAX;
   node->Stat().SecondClosestQueryNode() = NULL;
+
+  if (node->Parent() == NULL)
+    Log::Info << "Total Hamerly pruned points: " << hamerlyPruned << ".\n";
 }
 
 } // namespace kmeans



More information about the mlpack-git mailing list