[mlpack-git] master: Set FirstBound() after the allknn. Now, checking if FirstBound() == DBL_MAX is unnecessary, although this doesn't (yet) seem to give any additional prunes. (ee7eb43)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:59 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit ee7eb43f0197e50ed6aab9720daaff0b61f9807d
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Jan 13 17:35:03 2015 -0500

    Set FirstBound() after the allknn. Now, checking if FirstBound() == DBL_MAX is unnecessary, although this doesn't (yet) seem to give any additional prunes.


>---------------------------------------------------------------

ee7eb43f0197e50ed6aab9720daaff0b61f9807d
 src/mlpack/methods/kmeans/dual_tree_kmeans.hpp     |  2 ++
 .../methods/kmeans/dual_tree_kmeans_impl.hpp       | 35 +++++-----------------
 .../methods/kmeans/dual_tree_kmeans_rules_impl.hpp |  4 ---
 3 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index c1e6070..ebeb0ed 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -58,6 +58,8 @@ class DualTreeKMeans
   //! Track distance calculations.
   size_t distanceCalculations;
 
+  void ClusterTreeUpdate(TreeType* node);
+
   void TreeUpdate(TreeType* node,
                   const size_t clusters,
                   const arma::vec& clusterDistances);
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 0065a61..88c4e96 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -80,6 +80,9 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
   distanceCalculations += nns.BaseCases();
   distanceCalculations += nns.Scores();
 
+  // Update FirstBound().
+  ClusterTreeUpdate(centroidTree);
+
   // Now run the dual-tree algorithm.
   typedef DualTreeKMeansRules<MetricType, TreeType> RulesType;
   RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids,
@@ -128,43 +131,21 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
   return std::sqrt(residual);
 }
 
-/*
 template<typename MetricType, typename MatType, typename TreeType>
 void DualTreeKMeans<MetricType, MatType, TreeType>::ClusterTreeUpdate(
     TreeType* node)
 {
-  // We will abuse stat.owner to hold the cluster with the most change.
-  // stat.minQueryNodeDistance will hold the distance.
-  double maxChange = 0.0;
-  size_t maxChangeCluster = 0;
-
+  // Just update the first bound, after recursing to the bottom.
+  double firstBound = 0.0;
   for (size_t i = 0; i < node->NumChildren(); ++i)
   {
     ClusterTreeUpdate(&node->Child(i));
-
-    const double nodeChange = node->Child(i).Stat().MinQueryNodeDistance();
-    if (nodeChange > maxChange)
-    {
-      maxChange = nodeChange;
-      maxChangeCluster = node->Child(i).Stat().Owner();
-    }
+    if (node->Child(i).Stat().FirstBound() >= firstBound)
+      firstBound = node->Child(i).Stat().FirstBound();
   }
 
-  for (size_t i = 0; i < node->NumPoints(); ++i)
-  {
-    const size_t cluster = oldFromNewCentroids[node->Point(i)];
-    const double pointChange = clusterDistances[cluster];
-    if (pointChange > maxChange)
-    {
-      maxChange = pointChange;
-      maxChangeCluster = cluster;
-    }
-  }
-
-  node->Stat().Owner() = maxChangeCluster;
-  node->Stat().MinQueryNodeDistance() = maxChange;
+  node->Stat().FirstBound() = firstBound;
 }
-*/
 
 template<typename MetricType, typename MatType, typename TreeType>
 void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 53382eb..d456c83 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -215,10 +215,6 @@ double DualTreeKMeansRules<MetricType, TreeType>::ElkanTypeScore(
   if (queryNode.NumDescendants() > 1)
   {
     const double minQueryDistance = queryNode.Stat().FirstBound();
-    if (minQueryDistance == DBL_MAX)
-      return 0.0;
-    else
-      Log::Warn << "Not DBL_MAX!\n";
     const double score = ElkanTypeScore(queryNode, referenceNode,
         minQueryDistance);
     return score;



More information about the mlpack-git mailing list