[mlpack-git] master, mlpack-1.0.x: Fix/update some comments, almost finish the splitting algorithm. Several miscellaneous changes. (27e4131)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:48:21 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 27e413139ecdcce45256d783d9a8a5243d7de77c
Author: andrewmw94 <andrewmw94 at gmail.com>
Date:   Mon Jun 2 22:58:12 2014 +0000

    Fix/update some comments, almost finish the splitting algorithm.  Several miscellaneous changes.


>---------------------------------------------------------------

27e413139ecdcce45256d783d9a8a5243d7de77c
 .../core/tree/rectangle_tree/r_tree_split.hpp      |  30 ++---
 .../core/tree/rectangle_tree/r_tree_split_impl.hpp | 135 +++++++++++++++++++--
 .../core/tree/rectangle_tree/rectangle_tree.hpp    |  23 +++-
 3 files changed, 154 insertions(+), 34 deletions(-)

diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
index 949803e..c1ee80a 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
@@ -24,15 +24,17 @@ class RTreeSplit
 public:
 
 /**
- * Split a leaf node using the "default" algorithm.  The methods for splitting non-leaf
- * nodes are private since they should only be called if a leaf node overflows.
+ * Split a leaf node using the "default" algorithm.  If necessary, this split will propagate
+ * upwards through the tree.  The methods for splitting non-leaf nodes are private since
+ * they should only be called if a leaf node overflows.
  */
 static bool SplitLeafNode(const RectangleTree& tree);
 
 private:
 
 /**
- * Split a non-leaf node using the "default" algorithm.
+ * Split a non-leaf node using the "default" algorithm.  If this is the root node and
+ * we need to move up the tree, a new root node is created.
  */
 static bool SplitNonLeafNode(const RectangleTree& tree);
 
@@ -56,21 +58,15 @@ static void AssignPointDestNode(
     const int intI,
     const int intJ);
 
-static int GetBoundDestNode(
+/**
+ * Assign nodes to the two new nodes.
+ */
+static void AssignNodeDestNode(
     const RectangleTree& oldTree,
-    const RectangleTree& treeOne,
-    const RectangleTree& treeTwo,
-    const int index);
-
-};
-
-static double getPointToBoundScore(const HRectBound& bound, const arma::vec& point)
-{
-  double score = 1.0;
-  for(int i = 0; i < bound.dimensions; i++) {
-    return score;
-  }
-}
+    RectangleTree& treeOne,
+    RectangleTree& treeTwo,
+    const int intI,
+    const int intJ);
 
 
 }; // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
index 252080f..03ced27 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
@@ -12,6 +12,12 @@
 namespace mlpack {
 namespace tree {
 
+/**
+ * We call GetPointSeeds to get the two points which will be the initial points in the new nodes
+ * We then call AssignPointDestNode to assign the remaining points to the two new nodes.
+ * Finally, we delete the old node and insert the new nodes into the tree, spliting the parent
+ * if necessary.
+ */
 template<typename MatType>
 void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
 {
@@ -22,13 +28,15 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
   int j = 0;
   GetPointSeeds(tree, &i, &j);
 
-  RectangleTree treeOne = new RectangleTree();
-  treeOne.insertPoint(tree.dataset[i]);
-  RectangleTree treeTwo = new RectangleTree();
-  treeTwo.insertPoint(tree.dataset[j]);
-
+  // This will assign the ith and jth point appropriately.
   AssignPointDestNode(tree, treeOne, treeTwo, i, j);
   
+  // create the parent node if necessary
+  if(par == NULL) {
+    
+  }
+  
+  //Remove this node and insert treeOne and treeTwo
   RectangleTree* par = tree.parent();
   int index = 0;
   for(int i = 0; i < par.numOfChildren(); i++) {
@@ -40,6 +48,7 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
   par.getChildren()[i] = treeOne;
   par.getChildren()[par.end++] = treeTwo;
 
+  //because we copied the points to treeOne and treeTwo, we can just delete this node
   delete tree;
 
   // we only add one at a time, so should only need to test for equality
@@ -52,16 +61,50 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
   return;
 }
 
+/**
+ * We call GetBoundSeeds to get the two new nodes that this one will be broken
+ * into.  Then we call AssignNodeDestNode to move the children of this node
+ * into either of those two nodes.  Finally, we delete the now unused information
+ * and recurse up the tree if necessary.
+ */
 bool RTreeSplit<MatType>::SplitNonLeafNode(const RectangleTree& tree)
 {
   int i = 0;
   int j = 0;
   GetBoundSeeds(tree, &i, &j);
   
-  RectangleTree treeOne = new RectangleTree();
-  treeOne.insertRectangle()
-  RectangleTree treeTwo = new RectangleTree();
+  // This will assign the ith and jth rectangles appropriately.
+  AssignNodeDestNode(tree, treeOne, treeTwo, i, j);
+
+  // create the parent node if necessary
+  if(par == NULL) {
+    
+  }
   
+  //Remove this node and insert treeOne and treeTwo
+  RectangleTree* par = tree.parent();
+  int index = 0;
+  for(int i = 0; i < par.numOfChildren(); i++) {
+    if(par.getChildren()[i] == this) {
+      index = i;
+      break;
+    }
+  }
+  par.getChildren()[i] = treeOne;
+  par.getChildren()[par.end++] = treeTwo;
+
+  // Because we now have pointers to the information stored under this tree,
+  // we need to delete this node carefully.
+  tree.softDelete();
+
+  // we only add one at a time, so should only need to test for equality
+  // just in case, we use an assert.
+  boost::assert(numChildren <= maxNumChildren);
+
+  if(par.numOfChildren() == par.maxNumChildren) {
+    SplitNonLeafNode(par);
+  }
+  return;
 }
 
 /**
@@ -124,7 +167,6 @@ void RTreeSplit<MatType>::GetBoundSeeds(const RectangleTree& tree, int* iRet, in
   return;
 }
 
-
 void RTreeSplit<MatType>::AssignPointDestNode(
     const RectangleTree& oldTree,
     RectangleTree& treeOne,
@@ -132,9 +174,12 @@ void RTreeSplit<MatType>::AssignPointDestNode(
     const int intI,
     const int intJ)
 {
-  Log::assert(end > 1); // If this isn't true, the tree is really weird.
   int end = oldTree.count;
+  Log::assert(end > 1); // If this isn't true, the tree is really weird.
+
+  treeOne.insertPoint(oldTree.dataset.col(intI));
   oldTree.dataset.col(intI) = oldTree.dataset.col(--end); // decrement end
+  treeTwo.insertPoint(oldTree.dataset.col(intJ));
   oldTree.dataset.col(intJ) = oldTree.dataset.col(--end); // decrement end
   
   int index = 0;
@@ -190,12 +235,80 @@ void RTreeSplit<MatType>::AssignPointDestNode(
 
     oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
   }
+}
 
+void RTreeSplit<MatType>::AssignNodeDestNode(
+    const RectangleTree& oldTree,
+    RectangleTree& treeOne,
+    RectangleTree& treeTwo,
+    const int intI,
+    const int intJ)
+{
   
-}
+  int end = oldTree.getNumChildren();
+  Log::assert(end > 1); // If this isn't true, the tree is really weird.
 
+  treeOne.getChildren()[0] = oldTree.getChildren()[intI];
+  oldTree.getChildren[intI] = oldTree.getChildren()[--end]; // decrement end
+  treeTwo.getChildren()[0] = oldTree.getChildren()[intJ];
+  oldTree.getChildren()[intJ] = oldTree.getChildren()[--end]; // decrement end
  
+  int index = 0;
 
+  // In each iteration, we go through all of the nodes and find the one that causes the least
+  // increase of volume when added to one of the two new rectangles.  We then add it to that
+  // rectangle.
+  while() {
+    int bestIndex = 0;
+    double bestScore = 0;
+    int bestRect = 0;
+
+    // Calculate the increase in volume for assigning this point to each rectangle.
+    double volOne = 1.0;
+    double volTwo = 1.0;
+    for(int i = 0; i < bound.Dim(); i++) {
+      volOne *= treeOne.bound[i].width();
+      volTwo *= treeTwo.bound[i].width();
+    }
+
+    for(int j = 0; j < end; j++) {
+      double newVolOne = 1.0;
+      double newVolTwo = 1.0;
+      for(int i = 0; i < bound.Dim(); i++) {
+	double c = oldTree.dataset.col(index)[i];      
+	newVolOne *= treeOne.bound[i].contains(c) ? treeOne.bound[i].width() :
+	  (c < treeOne.bound[i].low() ? (high - c) : (c - low));
+	newVolTwo *= treeTwo.bound[i].contains(c) ? treeTwo.bound[i].width() :
+	  (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
+      }
+    
+      if((newVolOne - volOne) < (newVolTwo - volTwo)) {
+	if(newVolOne - volOne < bestScore) {
+	  bestScore = newVolOne - volOne;
+	  bestIndex = index;
+	  bestRect = 1;
+	}
+      } else {
+	if(newVolTwo - volTwo < bestScore) {
+	  bestScore = newVolTwo - volTwo;
+	  bestIndex = index;
+	  bestRect = 2;
+	}
+      }
+    }
+
+    // Assign the point that causes the least increase in volume 
+    // to the appropriate rectangle.
+    if(bestRect == 1)
+      treeOne.insertPoint(oldTree.dataset(bestIndex);
+    else
+      treeTwo.insertPoint(oldTree.dataset(bestIndex);
+
+    oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
+  }
+
+
+}
 
 
 }; // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
index bdde1d4..b4c6ff2 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -45,12 +45,14 @@ class RectangleTree
   //! The parent node (NULL if this is the root of the tree).
   RectangleTree* parent;
   //! The index of the first point in the dataset contained in this node (and
-  //! its children).
+  //! its children).  THIS IS ALWAYS 0 AT THE MOMENT.  IT EXISTS MERELY IN CASE
+  //! I THINK OF A WAY TO CHANGE THAT.  IN OTHER WORDS, IT WILL PROBABLY BE
+  //! REMOVED.
   size_t begin;
   //! The number of points in the dataset contained in this node (and its
   //! children).  
   size_t count;
-  //! The leaf size.
+  //! The leaf size.  (Maximum allowable leaf size.)
   size_t leafSize;
   //! The minimum leaf size.
   size_t minLeafSize;
@@ -94,8 +96,17 @@ class RectangleTree
   ~RectangleTree();
   
   /**
+   * Delete this node of the tree, but leave the stuff contained in it intact.
+   * This is used when splitting a node, where the data in this tree is moved to two
+   * other trees.
+   */
+  void softDelete();
+
+  /**
    * Inserts a point into the tree. The point will be copied to the data matrix
-   * of the leaf node where it is finally inserted.
+   * of the leaf node where it is finally inserted, but we pass by reference since
+   * it may be passed many times before it actually reaches a leaf.
+   * @param point The point (arma::vec&) to be inserted.
    */
   void InsertPoint(const arma::vec& point);
 
@@ -127,12 +138,12 @@ class RectangleTree
 
   //! Return the bound object for this node.
   const HRectBound& Bound() const { return bound; }
-  //! Return the bound object for this node.
+  //! Modify the bound object for this node.
   HRectBound& Bound() { return bound; }
 
   //! Return the statistic object for this node.
   const StatisticType& Stat() const { return stat; }
-  //! Return the statistic object for this node.
+  //! Modify the statistic object for this node.
   StatisticType& Stat() { return stat; }
 
   //! Return whether or not this node is a leaf (true if it has no children).
@@ -337,7 +348,7 @@ class RectangleTree
 
   /**
    * Splits the current node, recursing up the tree.
-   * CURRENTLY IT DOES NOT Also returns a list of the changed indices.
+   * CURRENTLY IT DOES NOT Also returns a list of the changed indices (because there are none).
    *
    * @param data Dataset which we are using.
    * @param oldFromNew Vector holding permuted indices NOT IMPLEMENTED.



More information about the mlpack-git mailing list