[mlpack-git] master: Don't set CQN to NULL before recursing. This was an error in TreeUpdate() that caused... some problems that I'm only vaguely aware of as problems. Also refactor one of the second bound rules and comment it so that it makes some semblance of sense. (ca4066a)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:16 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit ca4066a3a84a73534092ad244a0fb8001a8d017e
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Jan 27 20:22:57 2015 -0500

    Don't set CQN to NULL before recursing. This was an error in TreeUpdate() that caused... some problems that I'm only vaguely aware of as problems. Also refactor one of the second bound rules and comment it so that it makes some semblance of sense.


>---------------------------------------------------------------

ca4066a3a84a73534092ad244a0fb8001a8d017e
 .../methods/kmeans/dual_tree_kmeans_impl.hpp       | 56 ++++++++++------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index cf4c8e8..a9193e8 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -334,8 +334,8 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
 //node->Stat().SecondClosestBound() << ", " << secondClosest << ".\n";
       }
 
-      if (node->Parent() != NULL &&
-node->Parent()->Stat().SecondClosestQueryNode() != NULL)
+//      if (node->Parent() != NULL &&
+//node->Parent()->Stat().SecondClosestQueryNode() != NULL)
 //        if (node->Begin() == 16954)
 //          Log::Warn << "Parent's (r" << node->Parent()->Begin() << "c"
 //<< node->Parent()->Count() << ") second closest query node is q" << ((TreeType*)
@@ -343,31 +343,26 @@ node->Parent()->Stat().SecondClosestQueryNode() != NULL)
 //node->Parent()->Stat().SecondClosestQueryNode())->Count() << ", with scb " <<
 //node->Parent()->Stat().SecondClosestBound() << ".\n";
 
-      if (node->Parent() != NULL && 
-          node->Stat().SecondClosestQueryNode() != NULL &&
-          node->Parent()->Stat().SecondClosestQueryNode() != NULL && !IsDescendantOf((*(TreeType*)
-node->Parent()->Stat().SecondClosestQueryNode()), (*(TreeType*)
-node->Stat().SecondClosestQueryNode())))
+      // Suppose that the true second closest query node was pruned by the
+      // parent, and thus was never seen by this node.  To ensure the
+      // correctness of the second bound in this situation, we'll take the
+      // parent's second closest bound only if the parent's second closest query
+      // node is on a separate subtree than the node's second closest query node
+      // _and_ the node's closest query node.
+      TreeType* parent = (TreeType*) node->Parent();
+      TreeType* scqn = (TreeType*) node->Stat().SecondClosestQueryNode();
+      TreeType* parentScqn = (parent == NULL) ? NULL :
+          (TreeType*) parent->Stat().SecondClosestQueryNode();
+      TreeType* parentCqn = (parent == NULL) ? NULL :
+          (TreeType*) parent->Stat().ClosestQueryNode();
+      if (scqn != NULL && parentScqn != NULL &&
+          !IsDescendantOf(*parentScqn, *scqn) &&
+          !IsDescendantOf(*parentCqn, *scqn) &&
+          (parent->Stat().SecondClosestBound() <
+              node->Stat().SecondClosestBound()))
       {
-        // Does this even work?
-        const double minDistance = node->MinDistance((TreeType*)
-            node->Parent()->Stat().SecondClosestQueryNode());
-//        if (node->Begin() == 16954)
-//          Log::Warn << "r16954c" << node->Count() << " has min distance " <<
-//minDistance << " to SCQN of parent (q" << ((TreeType*)
-//node->Parent()->Stat().SecondClosestQueryNode())->Begin() << "c" << ((TreeType*)
-//node->Parent()->Stat().SecondClosestQueryNode())->Count() << ")\n";
-
-        if (minDistance < node->Stat().SecondClosestBound())
-        {
-          node->Stat().SecondClosestBound() =
-              node->Parent()->Stat().SecondClosestBound();
-          node->Stat().SecondClosestQueryNode() =
-              node->Parent()->Stat().SecondClosestQueryNode();
-//          if (node->Begin() == 16954)
-//            Log::Warn << "r16954c" << node->Count() << " has recalculated scb "
-//                << node->Stat().SecondClosestBound() << ".\n";
-        }
+        node->Stat().SecondClosestBound() = parent->Stat().SecondClosestBound();
+        node->Stat().SecondClosestQueryNode() = parentScqn;
       }
     }
 
@@ -411,6 +406,7 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
       }
     }
 
+
     if (node->Stat().MaxQueryNodeDistance() < node->Stat().SecondClosestBound()
         - clusterDistances[clusters])
     {
@@ -445,16 +441,16 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
     node->Stat().Owner() = centroids.n_cols;
   }
 
+  for (size_t i = 0; i < node->NumChildren(); ++i)
+    TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
+        centroids, dataset, oldFromNew, hamerlyPruned);
+
   node->Stat().Iteration() = iteration;
   node->Stat().ClustersPruned() = (node->Parent() == NULL) ? 0 : -1;
   // We have to set the closest query node to NULL because the cluster tree will
   // be rebuilt.
   node->Stat().ClosestQueryNode() = NULL;
 
-  for (size_t i = 0; i < node->NumChildren(); ++i)
-    TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
-        centroids, dataset, oldFromNew, hamerlyPruned);
-
   node->Stat().LastSecondClosestBound() = node->Stat().SecondClosestBound() -
       clusterDistances[clusters];
   // This should change later, but I'm not yet sure how to do it.



More information about the mlpack-git mailing list