[mlpack-svn] r12805 - mlpack/trunk/src/mlpack/core/tree

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun May 27 15:39:16 EDT 2012


Author: rcurtin
Date: 2012-05-27 15:39:16 -0400 (Sun, 27 May 2012)
New Revision: 12805

Modified:
   mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp
Log:
Store furthest child distance and furthest descendant distance.  This can allow
better pruning rules.


Modified: mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp	2012-05-27 19:32:44 UTC (rev 12804)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp	2012-05-27 19:39:16 UTC (rev 12805)
@@ -239,6 +239,13 @@
   //! Returns true: this tree does have self-children.
   static bool HasSelfChildren() { return true; }
 
+  //! Get the distance to the furthest child.
+  double FurthestChildDistance() const { return furthestChildDistance; }
+
+  //! Get the distance to teh furthest descendant.
+  double FurthestDescendantDistance() const
+  { return furthestDescendantDistance; }
+
  private:
   //! Reference to the matrix which this tree is built on.
   const arma::mat& dataset;
@@ -261,6 +268,12 @@
   //! The instantiated metric.  Either the user passes it in, or we build it.
   MetricType* metric;
 
+  //! Distance to the furthest child.
+  double furthestChildDistance;
+
+  //! Distance to the furthest descendant.
+  double furthestDescendantDistance;
+
   /**
    * Fill the vector of distances with the distances between the point specified
    * by pointIndex and each point in the indices array.  The distances of the

Modified: mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp	2012-05-27 19:32:44 UTC (rev 12804)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp	2012-05-27 19:39:16 UTC (rev 12805)
@@ -170,7 +170,9 @@
     dataset(dataset),
     point(pointIndex),
     scale(scale),
-    expansionConstant(expansionConstant)
+    expansionConstant(expansionConstant),
+    furthestChildDistance(0), // This stays the case if this is a leaf.
+    furthestDescendantDistance(0)
 {
   // If the size of the near set is 0, this is a leaf.
   if (nearSetSize == 0)
@@ -182,7 +184,8 @@
   //   maxDistance > pow(ec, i)
   // and using this for the scale factor should guarantee we are not creating an
   // implicit node.  If the maximum distance is 0, every point in the near set
-  // will be created as a leaf, and a child to this node.
+  // will be created as a leaf, and a child to this node.  We also do not need
+  // to change the furthestChildDistance or furthestDescendantDistance.
   const double maxDistance = max(distances.rows(0, nearSetSize + farSetSize - 1));
   if (maxDistance == 0)
   {
@@ -229,6 +232,10 @@
       nextScale, indices, distances, childNearSetSize, childFarSetSize,
       childUsedSetSize));
 
+  // The self-child can't modify the furthestChildDistance away from 0, but it
+  // can modify the furthestDescendantDistance.
+  furthestDescendantDistance = children[0]->FurthestDescendantDistance();
+
   // If we created an implicit node, take its self-child instead (this could
   // happen multiple times).
   while (children[children.size() - 1]->NumChildren() == 1)
@@ -281,25 +288,20 @@
       distances[0] = tempDist;
     }
 
+    // Will this be a new furthest child?
+    if (distances[0] > furthestChildDistance)
+      furthestChildDistance = distances[0];
+
     // If there's only one point left, we don't need this crap.
-    if (nearSetSize == 1)
+    if ((nearSetSize == 1) && (farSetSize == 0))
     {
       size_t childNearSetSize = 0;
       children.push_back(new CoverTree(dataset, expansionConstant,
           indices[0], nextScale, indices, distances, childNearSetSize,
           farSetSize, usedSetSize));
 
-      // Move last point to the used set.  Because nearSetSize == 1, we can
-      // simplify a little bit...
-      size_t tempIndex = indices[farSetSize];
-      double tempDist = distances[farSetSize];
-
-      indices[farSetSize] = indices[0];
-      distances[farSetSize] = distances[0];
-
-      indices[0] = tempIndex;
-      distances[0] = tempDist;
-
+      // Because the far set size is 0, we don't have to do any swapping to
+      // move the point into the used set.
       ++usedSetSize;
       --nearSetSize;
 
@@ -363,6 +365,9 @@
     MoveToUsedSet(indices, distances, nearSetSize, farSetSize, usedSetSize,
         childIndices, childFarSetSize, childUsedSetSize);
   }
+
+  Log::Assert(furthestChildDistance <= pow(expansionConstant, scale));
+  Log::Assert(furthestDescendantDistance <= pow(expansionConstant, scale + 1));
 }
 
 template<typename MetricType, typename RootPointPolicy, typename StatisticType>
@@ -562,9 +567,13 @@
       if (childIndices[childFarSetSize + j] == indices[i])
       {
         // We have found a point; a swap is necessary.
+
+        // Check if this point is a new furthest descendant.
+        if (distances[i] > furthestDescendantDistance)
+          furthestDescendantDistance = distances[i];
+
         // Since this point is from the near set, to preserve the near set, we
         // must do a swap.
-
         if (farSetSize > 0)
         {
           if ((nearSetSize - 1) != i)
@@ -645,6 +654,12 @@
       if (childIndices[childFarSetSize + j] == indices[i + nearSetSize])
       {
         // We have found a point to swap.
+        
+        // Check if this point is a new furthest descendant.
+        if (distances[i] > furthestDescendantDistance)
+          furthestDescendantDistance = distances[i];
+
+        // Perform the swap.
         size_t tempIndex = indices[nearSetSize + farSetSize - 1];
         double tempDist = distances[nearSetSize + farSetSize - 1];
 




More information about the mlpack-svn mailing list