[mlpack-git] master, mlpack-1.0.x: change name of leafSize to maxLeafSize. more stuff for the R-tree. Some name changes, some more node splitting, a start on traversal. (69556a0)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:48:23 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 69556a09eb69e7c19f0d11d42ec8253b00ce3152
Author: andrewmw94 <andrewmw94 at gmail.com>
Date:   Tue Jun 3 20:29:00 2014 +0000

    change name of leafSize to maxLeafSize.  more stuff for the R-tree.  Some name changes, some more node splitting, a start on traversal.


>---------------------------------------------------------------

69556a09eb69e7c19f0d11d42ec8253b00ce3152
 .../tree/binary_space_tree/binary_space_tree.hpp   | 44 +++++------
 .../binary_space_tree/binary_space_tree_impl.hpp   | 42 +++++------
 .../core/tree/rectangle_tree/r_tree_split.hpp      |  6 ++
 .../core/tree/rectangle_tree/r_tree_split_impl.hpp | 86 +++++++++++++++++-----
 .../core/tree/rectangle_tree/rectangle_tree.hpp    | 27 +++----
 .../tree/rectangle_tree/rectangle_tree_impl.hpp    | 11 +--
 .../rectangle_tree/rectangle_tree_traverser.hpp    | 60 +++++++++++++++
 7 files changed, 196 insertions(+), 80 deletions(-)

diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 6d83a65..62131d0 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -25,7 +25,7 @@ namespace tree /** Trees and tree-building procedures. */ {
  * rebuild the tree entirely.
  *
  * This tree does take one runtime parameter in the constructor, which is the
- * leaf size to be used.
+ * max leaf size to be used.
  *
  * @tparam BoundType The bound used for each node.  The valid types of bounds
  *     and the necessary skeleton interface for this class can be found in
@@ -56,8 +56,8 @@ class BinarySpaceTree
   //! The number of points of the dataset contained in this node (and its
   //! children).
   size_t count;
-  //! The leaf size.
-  size_t leafSize;
+  //! The max leaf size.
+  size_t maxLeafSize;
   //! The bound object for this node.
   BoundType bound;
   //! Any extra data contained in the node.
@@ -89,9 +89,9 @@ class BinarySpaceTree
    * dataset.  This will modify the ordering of the points in the dataset!
    *
    * @param data Dataset to create tree from.  This will be modified!
-   * @param leafSize Size of each leaf in the tree.
+   * @param maxLeafSize Size of each leaf in the tree.
    */
-  BinarySpaceTree(MatType& data, const size_t leafSize = 20);
+  BinarySpaceTree(MatType& data, const size_t maxLeafSize = 20);
 
   /**
    * Construct this as the root node of a binary space tree using the given
@@ -101,11 +101,11 @@ class BinarySpaceTree
    * @param data Dataset to create tree from.  This will be modified!
    * @param oldFromNew Vector which will be filled with the old positions for
    *     each new point.
-   * @param leafSize Size of each leaf in the tree.
+   * @param maxLeafSize Size of each leaf in the tree.
    */
   BinarySpaceTree(MatType& data,
                   std::vector<size_t>& oldFromNew,
-                  const size_t leafSize = 20);
+                  const size_t maxLeafSize = 20);
 
   /**
    * Construct this as the root node of a binary space tree using the given
@@ -118,12 +118,12 @@ class BinarySpaceTree
    *     each new point.
    * @param newFromOld Vector which will be filled with the new positions for
    *     each old point.
-   * @param leafSize Size of each leaf in the tree.
+   * @param maxLeafSize Size of each leaf in the tree.
    */
   BinarySpaceTree(MatType& data,
                   std::vector<size_t>& oldFromNew,
                   std::vector<size_t>& newFromOld,
-                  const size_t leafSize = 20);
+                  const size_t maxLeafSize = 20);
 
   /**
    * Construct this node on a subset of the given matrix, starting at column
@@ -134,13 +134,13 @@ class BinarySpaceTree
    * @param data Dataset to create tree from.  This will be modified!
    * @param begin Index of point to start tree construction with.
    * @param count Number of points to use to construct tree.
-   * @param leafSize Size of each leaf in the tree.
+   * @param maxLeafSize Size of each leaf in the tree.
    */
   BinarySpaceTree(MatType& data,
                   const size_t begin,
                   const size_t count,
                   BinarySpaceTree* parent = NULL,
-                  const size_t leafSize = 20);
+                  const size_t maxLeafSize = 20);
 
   /**
    * Construct this node on a subset of the given matrix, starting at column
@@ -158,14 +158,14 @@ class BinarySpaceTree
    * @param count Number of points to use to construct tree.
    * @param oldFromNew Vector which will be filled with the old positions for
    *     each new point.
-   * @param leafSize Size of each leaf in the tree.
+   * @param maxLeafSize Size of each leaf in the tree.
    */
   BinarySpaceTree(MatType& data,
                   const size_t begin,
                   const size_t count,
                   std::vector<size_t>& oldFromNew,
                   BinarySpaceTree* parent = NULL,
-                  const size_t leafSize = 20);
+                  const size_t maxLeafSize = 20);
 
   /**
    * Construct this node on a subset of the given matrix, starting at column
@@ -186,7 +186,7 @@ class BinarySpaceTree
    *     each new point.
    * @param newFromOld Vector which will be filled with the new positions for
    *     each old point.
-   * @param leafSize Size of each leaf in the tree.
+   * @param maxLeafSize Size of each leaf in the tree.
    */
   BinarySpaceTree(MatType& data,
                   const size_t begin,
@@ -194,7 +194,7 @@ class BinarySpaceTree
                   std::vector<size_t>& oldFromNew,
                   std::vector<size_t>& newFromOld,
                   BinarySpaceTree* parent = NULL,
-                  const size_t leafSize = 20);
+                  const size_t maxLeafSize = 20);
 
   /**
    * Create a binary space tree by copying the other tree.  Be careful!  This
@@ -251,10 +251,10 @@ class BinarySpaceTree
   //! Return whether or not this node is a leaf (true if it has no children).
   bool IsLeaf() const;
 
-  //! Return the leaf size.
-  size_t LeafSize() const { return leafSize; }
-  //! Modify the leaf size.
-  size_t& LeafSize() { return leafSize; }
+  //! Return the max leaf size.
+  size_t MaxLeafSize() const { return maxLeafSize; }
+  //! Modify the max leaf size.
+  size_t& MaxLeafSize() { return maxLeafSize; }
 
   //! Fills the tree to the specified level.
   size_t ExtendTree(const size_t level);
@@ -440,18 +440,18 @@ class BinarySpaceTree
                   const size_t count,
                   BoundType bound,
                   StatisticType stat,
-                  const int leafSize = 20) :
+                  const int maxLeafSize = 20) :
       left(NULL),
       right(NULL),
       begin(begin),
       count(count),
       bound(bound),
       stat(stat),
-      leafSize(leafSize) { }
+      maxLeafSize(maxLeafSize) { }
 
   BinarySpaceTree* CopyMe()
   {
-    return new BinarySpaceTree(begin, count, bound, stat, leafSize);
+    return new BinarySpaceTree(begin, count, bound, stat, maxLeafSize);
   }
 
   /**
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 506900a..77d949b 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@ -24,13 +24,13 @@ template<typename BoundType,
          typename SplitType>
 BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     MatType& data,
-    const size_t leafSize) :
+    const size_t maxLeafSize) :
     left(NULL),
     right(NULL),
     parent(NULL),
     begin(0), /* This root node starts at index 0, */
     count(data.n_cols), /* and spans all of the dataset. */
-    leafSize(leafSize),
+    maxLeafSize(maxLeafSize),
     bound(data.n_rows),
     parentDistance(0), // Parent distance for the root is 0: it has no parent.
     dataset(data)
@@ -49,13 +49,13 @@ template<typename BoundType,
 BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     MatType& data,
     std::vector<size_t>& oldFromNew,
-    const size_t leafSize) :
+    const size_t maxLeafSize) :
     left(NULL),
     right(NULL),
     parent(NULL),
     begin(0),
     count(data.n_cols),
-    leafSize(leafSize),
+    maxLeafSize(maxLeafSize),
     bound(data.n_rows),
     parentDistance(0), // Parent distance for the root is 0: it has no parent.
     dataset(data)
@@ -80,13 +80,13 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     MatType& data,
     std::vector<size_t>& oldFromNew,
     std::vector<size_t>& newFromOld,
-    const size_t leafSize) :
+    const size_t maxLeafSize) :
     left(NULL),
     right(NULL),
     parent(NULL),
     begin(0),
     count(data.n_cols),
-    leafSize(leafSize),
+    maxLeafSize(maxLeafSize),
     bound(data.n_rows),
     parentDistance(0), // Parent distance for the root is 0: it has no parent.
     dataset(data)
@@ -117,13 +117,13 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     const size_t begin,
     const size_t count,
     BinarySpaceTree* parent,
-    const size_t leafSize) :
+    const size_t maxLeafSize) :
     left(NULL),
     right(NULL),
     parent(parent),
     begin(begin),
     count(count),
-    leafSize(leafSize),
+    maxLeafSize(maxLeafSize),
     bound(data.n_rows),
     dataset(data)
 {
@@ -144,13 +144,13 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     const size_t count,
     std::vector<size_t>& oldFromNew,
     BinarySpaceTree* parent,
-    const size_t leafSize) :
+    const size_t maxLeafSize) :
     left(NULL),
     right(NULL),
     parent(parent),
     begin(begin),
     count(count),
-    leafSize(leafSize),
+    maxLeafSize(maxLeafSize),
     bound(data.n_rows),
     dataset(data)
 {
@@ -176,13 +176,13 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     std::vector<size_t>& oldFromNew,
     std::vector<size_t>& newFromOld,
     BinarySpaceTree* parent,
-    const size_t leafSize) :
+    const size_t maxLeafSize) :
     left(NULL),
     right(NULL),
     parent(parent),
     begin(begin),
     count(count),
-    leafSize(leafSize),
+    maxLeafSize(maxLeafSize),
     bound(data.n_rows),
     dataset(data)
 {
@@ -212,7 +212,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree() :
     count(0),
     bound(),
     stat(),
-    leafSize(20) // Default leaf size is 20.
+    maxLeafSize(20) // Default max leaf size is 20.
 {
   // Nothing to do.
 }*/
@@ -232,7 +232,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
     parent(other.parent),
     begin(other.begin),
     count(other.count),
-    leafSize(other.leafSize),
+    maxLeafSize(other.maxLeafSize),
     bound(other.bound),
     stat(other.stat),
     splitDimension(other.splitDimension),
@@ -561,7 +561,7 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
   furthestDescendantDistance = 0.5 * bound.Diameter();
 
   // Now, check if we need to split at all.
-  if (count <= leafSize)
+  if (count <= maxLeafSize)
     return; // We can't split this.
 
   // splitCol denotes the two partitions of the dataset after the split. The
@@ -582,9 +582,9 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
   // Now that we know the split column, we will recursively split the children
   // by calling their constructors (which perform this splitting process).
   left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
-      splitCol - begin, this, leafSize);
+      splitCol - begin, this, maxLeafSize);
   right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
-      begin + count - splitCol, this, leafSize);
+      begin + count - splitCol, this, maxLeafSize);
 
   // Calculate parent distances for those two nodes.
   arma::vec centroid, leftCentroid, rightCentroid;
@@ -617,7 +617,7 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
   furthestDescendantDistance = 0.5 * bound.Diameter();
 
   // First, check if we need to split at all.
-  if (count <= leafSize)
+  if (count <= maxLeafSize)
     return; // We can't split this.
 
   // splitCol denotes the two partitions of the dataset after the split. The
@@ -639,9 +639,9 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
   // Now that we know the split column, we will recursively split the children
   // by calling their constructors (which perform this splitting process).
   left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
-      splitCol - begin, oldFromNew, this, leafSize);
+      splitCol - begin, oldFromNew, this, maxLeafSize);
   right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
-      begin + count - splitCol, oldFromNew, this, leafSize);
+      begin + count - splitCol, oldFromNew, this, maxLeafSize);
 
   // Calculate parent distances for those two nodes.
   arma::vec centroid, leftCentroid, rightCentroid;
@@ -676,7 +676,7 @@ std::string BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::
   convert << mlpack::util::Indent(bound.ToString(), 2);
   convert << "  Statistic: " << std::endl;
   convert << mlpack::util::Indent(stat.ToString(), 2);
-  convert << "  Leaf size: " << leafSize << std::endl;
+  convert << "  Max leaf size: " << maxLeafSize << std::endl;
   convert << "  Split dimension: " << splitDimension << std::endl;
 
   // How many levels should we print?  This will print the top two tree levels.
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
index c1ee80a..bb2b75f 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
@@ -68,6 +68,12 @@ static void AssignNodeDestNode(
     const int intI,
     const int intJ);
 
+/**
+  * Insert a node into another node.
+  */
+static void insertNodeIntoTree(
+    RectangleTree& destTree,
+    RectangleTree& srcNode);
 
 }; // namespace tree
 }; // namespace mlpack
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
index 03ced27..79979aa 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
@@ -8,6 +8,7 @@
 #define __MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_SPLIT_IMPL_HPP
 
 #include "r_tree_split.hpp"
+#include <mlpack/core/math/range.hpp>
 
 namespace mlpack {
 namespace tree {
@@ -65,7 +66,8 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
  * We call GetBoundSeeds to get the two new nodes that this one will be broken
  * into.  Then we call AssignNodeDestNode to move the children of this node
  * into either of those two nodes.  Finally, we delete the now unused information
- * and recurse up the tree if necessary.
+ * and recurse up the tree if necessary.  We don't need to worry about the bounds
+ * higher up the tree because they were already updated if necessary.
  */
 bool RTreeSplit<MatType>::SplitNonLeafNode(const RectangleTree& tree)
 {
@@ -182,12 +184,15 @@ void RTreeSplit<MatType>::AssignPointDestNode(
   treeTwo.insertPoint(oldTree.dataset.col(intJ));
   oldTree.dataset.col(intJ) = oldTree.dataset.col(--end); // decrement end
   
-  int index = 0;
+
+  int numAssignedOne = 1;
+  int numAssignedTwo = 1;
 
   // In each iteration, we go through all points and find the one that causes the least
   // increase of volume when added to one of the rectangles.  We then add it to that
-  // rectangle.  
-  while() {
+  // rectangle.  We stop when we run out of points or when all of the remaining points
+  // need to be assigned to the same rectangle to satisfy the minimum fill requirement.
+  while(end > 1 && end < oldTree.minLeafSize() - std::min(numAssignedOne, numAssignedTwo)) {
     int bestIndex = 0;
     double bestScore = 0;
     int bestRect = 0;
@@ -200,7 +205,9 @@ void RTreeSplit<MatType>::AssignPointDestNode(
       volTwo *= treeTwo.bound[i].width();
     }
 
-    for(int j = 0; j < end; j++) {
+    // Find the point that, when assigned to one of the two new rectangles, minimizes the increase
+    // in volume.
+    for(int index = 0; index < end; index++) {
       double newVolOne = 1.0;
       double newVolTwo = 1.0;
       for(int i = 0; i < bound.Dim(); i++) {
@@ -211,6 +218,7 @@ void RTreeSplit<MatType>::AssignPointDestNode(
 	  (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
       }
     
+      // Choose the rectangle that requires the lesser increase in volume.
       if((newVolOne - volOne) < (newVolTwo - volTwo)) {
 	if(newVolOne - volOne < bestScore) {
 	  bestScore = newVolOne - volOne;
@@ -235,6 +243,19 @@ void RTreeSplit<MatType>::AssignPointDestNode(
 
     oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
   }
+
+  // See if we need to satisfy the minimum fill.
+  if(end > 1) {
+    if(numAssignedOne < numAssignedTwo) {
+      for(int i = 0; i < end; i++) {
+        treeOne.insertPoint(oldTree.dataset(i);
+      }
+    } else {
+      for(int i = 0; i < end; i++) {
+        treeTwo.insertPoint(oldTree.dataset(i);
+      }
+    }
+  }
 }
 
 void RTreeSplit<MatType>::AssignNodeDestNode(
@@ -253,17 +274,18 @@ void RTreeSplit<MatType>::AssignNodeDestNode(
   treeTwo.getChildren()[0] = oldTree.getChildren()[intJ];
   oldTree.getChildren()[intJ] = oldTree.getChildren()[--end]; // decrement end
  
-  int index = 0;
+  int numAssignTreeOne = 1;
+  int numAssignTreeTwo = 1;
 
   // In each iteration, we go through all of the nodes and find the one that causes the least
   // increase of volume when added to one of the two new rectangles.  We then add it to that
   // rectangle.
-  while() {
+  while(end > 1 && end < oldTree.getMinNumChildren() - std::min(numAssignTreeOne, numAssignTreeTwo)) {
     int bestIndex = 0;
     double bestScore = 0;
     int bestRect = 0;
 
-    // Calculate the increase in volume for assigning this point to each rectangle.
+    // Calculate the increase in volume for assigning this node to each of the new rectangles.
     double volOne = 1.0;
     double volTwo = 1.0;
     for(int i = 0; i < bound.Dim(); i++) {
@@ -271,17 +293,22 @@ void RTreeSplit<MatType>::AssignNodeDestNode(
       volTwo *= treeTwo.bound[i].width();
     }
 
-    for(int j = 0; j < end; j++) {
+    for(int index = 0; index < end; index++) {
       double newVolOne = 1.0;
       double newVolTwo = 1.0;
       for(int i = 0; i < bound.Dim(); i++) {
-	double c = oldTree.dataset.col(index)[i];      
-	newVolOne *= treeOne.bound[i].contains(c) ? treeOne.bound[i].width() :
-	  (c < treeOne.bound[i].low() ? (high - c) : (c - low));
-	newVolTwo *= treeTwo.bound[i].contains(c) ? treeTwo.bound[i].width() :
-	  (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
+	// For each of the new rectangles, find the width in this dimension if we add the rectangle at index to
+	// the new rectangle.
+	math::range range = oldTree.getChildren()[index].Bound(i);
+	newVolOne *= treeOne.Bound(i).Contains(range) ? treeOne.bound[i].width() :
+	  (range.Contains(treeOne.Bound(i)) ? range.width : (range.lo() < treeOne.Bound(i).lo() ? (treeOne.Bound(i).hi() - range.lo()) : 
+		(range.hi() - treeOne.Bound(i).lo())))
+	newVolTwo *= treeTwo.Bound(i).Contains(range) ? treeTwo.bound[i].width() :
+	  (range.Contains(treeTwo.Bound(i)) ? range.width : (range.lo() < treeTwo.Bound(i).lo() ? (treeTwo.Bound(i).hi() - range.lo()) : 
+		(range.hi() - treeTwo.Bound(i).lo())));
       }
     
+      // Choose the rectangle that requires the lesser increase in volume.
       if((newVolOne - volOne) < (newVolTwo - volTwo)) {
 	if(newVolOne - volOne < bestScore) {
 	  bestScore = newVolOne - volOne;
@@ -297,17 +324,38 @@ void RTreeSplit<MatType>::AssignNodeDestNode(
       }
     }
 
-    // Assign the point that causes the least increase in volume 
+    // Assign the rectangle that causes the least increase in volume 
     // to the appropriate rectangle.
     if(bestRect == 1)
-      treeOne.insertPoint(oldTree.dataset(bestIndex);
+      insertNodeIntoTree(treeOne, oldTree.Children()[bestIndex];
     else
-      treeTwo.insertPoint(oldTree.dataset(bestIndex);
+      insertNodeIntoTree(treeTwo, oldTree.Children()[bestIndex];
 
-    oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
+    oldTree.Children()[bestIndex] = oldTree.Children()[--end]; // Decrement end.
   }
+  // See if we need to satisfy the minimum fill.
+  if(end > 1) {
+    if(numAssignedOne < numAssignedTwo) {
+      for(int i = 0; i < end; i++) {
+        insertNodeIntoTree(treeOne, oldTree.Children()[i]);
+      }
+    } else {
+      for(int i = 0; i < end; i++) {
+        insertNodeIntoTree(treeTwo, oldTree.Children()[i]);
+      }
+    }
+  }
+}
 
-
+/**
+  * Insert a node into another node.  Expanding the bounds and updating the numberOfChildren.
+  */
+static void insertNodeIntoTree(
+    RectangleTree& destTree,
+    RectangleTree& srcNode)
+{
+  destTree.Bound() |= srcNode.Bound();
+  destTree.Children()[destTree.getNumOfChildren()++] = &srcNode;
 }
 
 
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
index b4c6ff2..b143311 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -1,5 +1,6 @@
 /**
  * @file rectangle_tree.hpp
+ * @author Andrew Wells
  *
  * Definition of generalized rectangle type trees (r_tree, r_star_tree, x_tree, and hilbert_r_tree).
  */
@@ -52,8 +53,8 @@ class RectangleTree
   //! The number of points in the dataset contained in this node (and its
   //! children).  
   size_t count;
-  //! The leaf size.  (Maximum allowable leaf size.)
-  size_t leafSize;
+  //! The max leaf size.
+  size_t maxLeafSize;
   //! The minimum leaf size.
   size_t minLeafSize;
   //! The bound object for this node.
@@ -81,10 +82,10 @@ class RectangleTree
    * dataset.  This will modify the ordering of the points in the dataset!
    *
    * @param data Dataset from which to create the tree.  This will be modified!
-   * @param leafSize Size of each leaf in the tree;
+   * @param maxLeafSize Maximum size of each leaf in the tree;
    * @param maxNumChildren The maximum number of child nodes a non-leaf node may have.
    */
-  RectangleTree(MatType& data, const size_t leafSize = 20, const size_t maxNumChildren = 4);
+  RectangleTree(MatType& data, const size_t maxLeafSize = 20, const size_t maxNumChildren = 4);
 
   //TODO implement the oldFromNew stuff if applicable.
 
@@ -149,10 +150,10 @@ class RectangleTree
   //! Return whether or not this node is a leaf (true if it has no children).
   bool IsLeaf() const;
 
-  //! Return the leaf size.
-  size_t LeafSize() const { return leafSize; }
-  //! Modify the leaf size.
-  size_t& LeafSize() { return leafSize; }
+  //! Return the max leaf size.
+  size_t MaxLeafSize() const { return maxLeafSize; }
+  //! Modify the max leaf size.
+  size_t& MaxLeafSize() { return maxLeafSize; }
 
   //! Gets the parent of this node.
   RectangleTree* Parent() const { return parent; }
@@ -181,9 +182,9 @@ class RectangleTree
   size_t& getNumOfChildren() { return numOfChildren; }
 
   //! Get the children of this node.
-  const std::vector<RectangleTree*>& getChildren() const { return children; }
+  const std::vector<RectangleTree*>& Children() const { return children; }
   //! Modify the children of this node.
-  std::vector<RectangleTree*>& getChildren() { return children; }
+  std::vector<RectangleTree*>& Children() { return children; }
 
   /**
    * Return the furthest distance to a point held in this node.  If this is not
@@ -327,16 +328,16 @@ class RectangleTree
                   const size_t count,
                   HRectBound bound,
                   StatisticType stat,
-                  const int leafSize = 20) :
+                  const int maxLeafSize = 20) :
       begin(begin),
       count(count),
       bound(bound),
       stat(stat),
-      leafSize(leafSize) { }
+      maxLeafSize(maxLeafSize) { }
 
   RectangleTree* CopyMe()
   {
-    return new RectangleTree(begin, count, bound, stat, leafSize);
+    return new RectangleTree(begin, count, bound, stat, maxLeafSize);
   }
 
   /**
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
index d454c46..7059081 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -1,5 +1,6 @@
 /**
  * @file rectangle_tree_impl.hpp
+ * @author Andrew Wells
  *
  * Implementation of generalized rectangle tree.
  */
@@ -22,7 +23,7 @@ template<typename StatisticType,
 	 typename DescentType>
 RectangleTree<StatisticType, MatType, SplitType, DescentType>::RectangleTree(
     MatType& data,
-    const size_t leafSize,
+    const size_t maxLeafSize,
     const size_t minLeafSize,
     const size_t maxNumChildren,
     const size_t minNumChildren):
@@ -34,13 +35,13 @@ RectangleTree<StatisticType, MatType, SplitType, DescentType>::RectangleTree(
   this.parent = NULL;
   this.begin = 0;
   this.count = 0;
-  this.leafSize = leafSize;
+  this.maxLeafSize = maxLeafSize;
   this.minLeafSize = minLeafSize;
   this.bound = new HRectBound(data.n_rows);
   this.stat = EmptyStatistic;
   this.parentDistance = 0.0;
   this.furthestDescendantDistance = 0.0;
-  this.dataset = new MatType(leafSize+1); // Add one to make splitting the node simpler
+  this.dataset = new MatType(maxLeafSize+1); // Add one to make splitting the node simpler
 
   // For now, just insert the points in order.
   // This won't actually work for any meaningful size of data since the root changes.
@@ -296,7 +297,7 @@ void RectangleTree<StatisticType, MatType, SplitType, DescentType>::SplitNode(
   boost::assert(numChildren == 0);
 
   // See if we are full.
-  if(points < leafSize)
+  if(points < maxLeafSize)
     return;
   
   // If we are full, then we need to move up the tree.  The SplitType takes
@@ -322,7 +323,7 @@ std::string BinarySpaceTree<StatisticType, MatType, SplitType, DescentType>::ToS
   convert << mlpack::util::Indent(bound.ToString(), 2);
   convert << "  Statistic: " << std::endl;
   convert << mlpack::util::Indent(stat.ToString(), 2);
-  convert << "  Leaf size: " << leafSize << std::endl;
+  convert << "  Max leaf size: " << maxLeafSize << std::endl;
   convert << "  Split dimension: " << splitDimension << std::endl;
 
   // How many levels should we print?  This will print the root and it's children.
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
index 8d1c8b6..5dbea6f 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
@@ -1 +1,61 @@
+/**
+  * @file rectangle_tree_traverser.hpp
+  * @author Andrew Wells
+  *
+  * A class for traversing rectangle type trees with a given set of rules
+  * which indicate the branches to prune and the order in which to recurse.
+  * This is a depth-first traverser.
+  */
+#ifndef __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_TRAVERSER_HPP
+#define __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_TRAVERSER_HPP
 
+#include <mlpack/core.hpp>
+
+#include "rectangle_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+template<typename Statistic Type,
+    typename MatType,
+    typename SplitType>
+template<typename RuleType>
+class RectangleTree<StatisticType, MatType, SplitType>::
+    RectangleTreeTraverser
+{
+ public:
+  /**
+    * Instantiate the traverser with the given rule set.
+    */
+    RectangleTreeTraverser(RuleType& rule);
+
+  /**
+    * Traverse the tree with the given point.
+    *
+    * @param queryIndex The index of the point in the query set which is being
+    *     used as the query point.
+    * @param referenceNode The tree node to be traversed.
+    */
+  void Traverse(const size_t queryIndex, const RectangleTree& referenceNode);
+
+  //! Get the number of prunes.
+  size_t NumPrunes() const { return numPrunes; }
+  //! Modify the number of prunes.
+  size_t& NumPrunes() { return numPrunes; }
+
+ private:
+  //! Reference to the rules with which the tree will be traversed.
+  RuleType& rule;
+
+  //! The number of nodes which have been prenud during traversal.
+  size_t numPrunes;
+}
+
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "rectangle_tree_traverser_impl.hpp"
+
+#endif
\ No newline at end of file



More information about the mlpack-git mailing list