[mlpack-git] master: A start at implementing pointwise Hamerly bounds. (cf56ad4)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:07 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit cf56ad4a2a051ca0abeadfe59cc139a4d62bf057
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Jan 29 17:32:07 2015 -0500

    A start at implementing pointwise Hamerly bounds.


>---------------------------------------------------------------

cf56ad4a2a051ca0abeadfe59cc139a4d62bf057
 src/mlpack/methods/kmeans/dual_tree_kmeans.hpp            |  1 +
 src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp       |  2 ++
 src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp      |  2 ++
 src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 11 +++++++----
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index 68714cd..ab036fa 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -52,6 +52,7 @@ class DualTreeKMeans
   arma::vec distances;
   arma::Col<size_t> visited;
   arma::Col<size_t> distanceIteration;
+  arma::vec hamerlyBounds;
 
   //! The current iteration.
   size_t iteration;
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 9d31609..8887acb 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -32,6 +32,8 @@ DualTreeKMeans<MetricType, MatType, TreeType>::DualTreeKMeans(
   assignments.zeros(dataset.n_cols);
   visited.zeros(dataset.n_cols);
   distanceIteration.zeros(dataset.n_cols);
+  hamerlyBounds.set_size(dataset.n_cols);
+  hamerlyBounds.fill(DBL_MAX);
 
   Timer::Start("tree_building");
 
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
index 43d88d8..4c960be 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
@@ -25,6 +25,7 @@ class DualTreeKMeansRules
                       arma::Col<size_t>& assignments,
                       arma::Col<size_t>& visited,
                       arma::Col<size_t>& distanceIteration,
+                      arma::vec& hamerlyBounds,
                       const arma::mat& interclusterDistances,
                       MetricType& metric);
 
@@ -62,6 +63,7 @@ class DualTreeKMeansRules
   arma::Col<size_t>& assignments;
   arma::Col<size_t>& visited;
   arma::Col<size_t>& distanceIteration;
+  arma::vec& hamerlyBounds;
   const arma::mat& interclusterDistances;
   MetricType& metric;
 
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 04b82ae..21d526d 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -26,6 +26,7 @@ DualTreeKMeansRules<MetricType, TreeType>::DualTreeKMeansRules(
     arma::Col<size_t>& assignments,
     arma::Col<size_t>& visited,
     arma::Col<size_t>& distanceIteration,
+    arma::vec& hamerlyBounds,
     const arma::mat& interclusterDistances,
     MetricType& metric) :
     dataset(dataset),
@@ -39,6 +40,7 @@ DualTreeKMeansRules<MetricType, TreeType>::DualTreeKMeansRules(
     assignments(assignments),
     visited(visited),
     distanceIteration(distanceIteration),
+    hamerlyBounds(hamerlyBounds),
     interclusterDistances(interclusterDistances),
     metric(metric),
     distanceCalculations(0)
@@ -56,10 +58,6 @@ inline force_inline double DualTreeKMeansRules<MetricType, TreeType>::BaseCase(
 
   // It's possible that the reference node has been pruned before we got to the
   // base case.  In that case, don't do the base case, and just return.
-//  if (referenceIndex == 37447)
-//    Log::Warn << "Visit " << referenceIndex << ", q" << queryIndex << ".  " <<
-//traversalInfo.LastReferenceNode()->Stat().ClustersPruned() +
-//visited[referenceIndex] << ".\n";
   if (traversalInfo.LastReferenceNode()->Stat().ClustersPruned() +
       visited[referenceIndex] == centroids.n_cols)
     return 0.0;
@@ -75,12 +73,17 @@ inline force_inline double DualTreeKMeansRules<MetricType, TreeType>::BaseCase(
     distanceIteration[referenceIndex] = iteration;
     distances[referenceIndex] = distance;
     assignments[referenceIndex] = mappings[queryIndex];
+    hamerlyBounds[referenceIndex] = DBL_MAX; // Not sure about this one.
   }
   else if (distance < distances[referenceIndex])
   {
     distances[referenceIndex] = distance;
     assignments[referenceIndex] = mappings[queryIndex];
   }
+  else if (distance < hamerlyBounds[referenceIndex])
+  {
+    hamerlyBounds[referenceIndex] = distance; // Not yet done.
+  }
 
   ++visited[referenceIndex];
 



More information about the mlpack-git mailing list