[mlpack-svn] r16608 - mlpack/trunk/src/mlpack/core/tree/rectangle_tree

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jun 2 18:58:12 EDT 2014


Author: andrewmw94
Date: Mon Jun  2 18:58:12 2014
New Revision: 16608

Log:
Fix/update some comments, almost finish the splitting algorithm.  Several miscellaneous changes.

Modified:
   mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
   mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp

Modified: mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp	(original)
+++ mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp	Mon Jun  2 18:58:12 2014
@@ -24,15 +24,17 @@
 public:
 
 /**
- * Split a leaf node using the "default" algorithm.  The methods for splitting non-leaf
- * nodes are private since they should only be called if a leaf node overflows.
+ * Split a leaf node using the "default" algorithm.  If necessary, this split will propagate
+ * upwards through the tree.  The methods for splitting non-leaf nodes are private since
+ * they should only be called if a leaf node overflows.
  */
 static bool SplitLeafNode(const RectangleTree& tree);
 
 private:
 
 /**
- * Split a non-leaf node using the "default" algorithm.
+ * Split a non-leaf node using the "default" algorithm.  If this is the root node and
+ * we need to move up the tree, a new root node is created.
  */
 static bool SplitNonLeafNode(const RectangleTree& tree);
 
@@ -56,21 +58,15 @@
     const int intI,
     const int intJ);
 
-static int GetBoundDestNode(
+/**
+ * Assign nodes to the two new nodes.
+ */
+static void AssignNodeDestNode(
     const RectangleTree& oldTree,
-    const RectangleTree& treeOne,
-    const RectangleTree& treeTwo,
-    const int index);
-
-};
-
-static double getPointToBoundScore(const HRectBound& bound, const arma::vec& point)
-{
-  double score = 1.0;
-  for(int i = 0; i < bound.dimensions; i++) {
-    return score;
-  }
-}
+    RectangleTree& treeOne,
+    RectangleTree& treeTwo,
+    const int intI,
+    const int intJ);
 
 
 }; // namespace tree

Modified: mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp	Mon Jun  2 18:58:12 2014
@@ -12,6 +12,12 @@
 namespace mlpack {
 namespace tree {
 
+/**
+ * We call GetPointSeeds to get the two points which will be the initial points in the new nodes
+ * We then call AssignPointDestNode to assign the remaining points to the two new nodes.
+ * Finally, we delete the old node and insert the new nodes into the tree, spliting the parent
+ * if necessary.
+ */
 template<typename MatType>
 void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
 {
@@ -22,13 +28,15 @@
   int j = 0;
   GetPointSeeds(tree, &i, &j);
 
-  RectangleTree treeOne = new RectangleTree();
-  treeOne.insertPoint(tree.dataset[i]);
-  RectangleTree treeTwo = new RectangleTree();
-  treeTwo.insertPoint(tree.dataset[j]);
-
+  // This will assign the ith and jth point appropriately.
   AssignPointDestNode(tree, treeOne, treeTwo, i, j);
   
+  // create the parent node if necessary
+  if(par == NULL) {
+    
+  }
+  
+  //Remove this node and insert treeOne and treeTwo
   RectangleTree* par = tree.parent();
   int index = 0;
   for(int i = 0; i < par.numOfChildren(); i++) {
@@ -40,6 +48,7 @@
   par.getChildren()[i] = treeOne;
   par.getChildren()[par.end++] = treeTwo;
 
+  //because we copied the points to treeOne and treeTwo, we can just delete this node
   delete tree;
 
   // we only add one at a time, so should only need to test for equality
@@ -52,16 +61,50 @@
   return;
 }
 
+/**
+ * We call GetBoundSeeds to get the two new nodes that this one will be broken
+ * into.  Then we call AssignNodeDestNode to move the children of this node
+ * into either of those two nodes.  Finally, we delete the now unused information
+ * and recurse up the tree if necessary.
+ */
 bool RTreeSplit<MatType>::SplitNonLeafNode(const RectangleTree& tree)
 {
   int i = 0;
   int j = 0;
   GetBoundSeeds(tree, &i, &j);
   
-  RectangleTree treeOne = new RectangleTree();
-  treeOne.insertRectangle()
-  RectangleTree treeTwo = new RectangleTree();
+  // This will assign the ith and jth rectangles appropriately.
+  AssignNodeDestNode(tree, treeOne, treeTwo, i, j);
+
+  // create the parent node if necessary
+  if(par == NULL) {
+    
+  }
   
+  //Remove this node and insert treeOne and treeTwo
+  RectangleTree* par = tree.parent();
+  int index = 0;
+  for(int i = 0; i < par.numOfChildren(); i++) {
+    if(par.getChildren()[i] == this) {
+      index = i;
+      break;
+    }
+  }
+  par.getChildren()[i] = treeOne;
+  par.getChildren()[par.end++] = treeTwo;
+
+  // Because we now have pointers to the information stored under this tree,
+  // we need to delete this node carefully.
+  tree.softDelete();
+
+  // we only add one at a time, so should only need to test for equality
+  // just in case, we use an assert.
+  boost::assert(numChildren <= maxNumChildren);
+
+  if(par.numOfChildren() == par.maxNumChildren) {
+    SplitNonLeafNode(par);
+  }
+  return;
 }
 
 /**
@@ -124,7 +167,6 @@
   return;
 }
 
-
 void RTreeSplit<MatType>::AssignPointDestNode(
     const RectangleTree& oldTree,
     RectangleTree& treeOne,
@@ -132,11 +174,14 @@
     const int intI,
     const int intJ)
 {
-  Log::assert(end > 1); // If this isn't true, the tree is really weird.
   int end = oldTree.count;
+  Log::assert(end > 1); // If this isn't true, the tree is really weird.
+
+  treeOne.insertPoint(oldTree.dataset.col(intI));
   oldTree.dataset.col(intI) = oldTree.dataset.col(--end); // decrement end
+  treeTwo.insertPoint(oldTree.dataset.col(intJ));
   oldTree.dataset.col(intJ) = oldTree.dataset.col(--end); // decrement end
-    
+  
   int index = 0;
 
   // In each iteration, we go through all points and find the one that causes the least
@@ -190,12 +235,80 @@
 
     oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
   }
+}
 
+void RTreeSplit<MatType>::AssignNodeDestNode(
+    const RectangleTree& oldTree,
+    RectangleTree& treeOne,
+    RectangleTree& treeTwo,
+    const int intI,
+    const int intJ)
+{
+  
+  int end = oldTree.getNumChildren();
+  Log::assert(end > 1); // If this isn't true, the tree is really weird.
 
-}
+  treeOne.getChildren()[0] = oldTree.getChildren()[intI];
+  oldTree.getChildren[intI] = oldTree.getChildren()[--end]; // decrement end
+  treeTwo.getChildren()[0] = oldTree.getChildren()[intJ];
+  oldTree.getChildren()[intJ] = oldTree.getChildren()[--end]; // decrement end
+ 
+  int index = 0;
 
+  // In each iteration, we go through all of the nodes and find the one that causes the least
+  // increase of volume when added to one of the two new rectangles.  We then add it to that
+  // rectangle.
+  while() {
+    int bestIndex = 0;
+    double bestScore = 0;
+    int bestRect = 0;
+
+    // Calculate the increase in volume for assigning this point to each rectangle.
+    double volOne = 1.0;
+    double volTwo = 1.0;
+    for(int i = 0; i < bound.Dim(); i++) {
+      volOne *= treeOne.bound[i].width();
+      volTwo *= treeTwo.bound[i].width();
+    }
+
+    for(int j = 0; j < end; j++) {
+      double newVolOne = 1.0;
+      double newVolTwo = 1.0;
+      for(int i = 0; i < bound.Dim(); i++) {
+	double c = oldTree.dataset.col(index)[i];      
+	newVolOne *= treeOne.bound[i].contains(c) ? treeOne.bound[i].width() :
+	  (c < treeOne.bound[i].low() ? (high - c) : (c - low));
+	newVolTwo *= treeTwo.bound[i].contains(c) ? treeTwo.bound[i].width() :
+	  (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
+      }
+    
+      if((newVolOne - volOne) < (newVolTwo - volTwo)) {
+	if(newVolOne - volOne < bestScore) {
+	  bestScore = newVolOne - volOne;
+	  bestIndex = index;
+	  bestRect = 1;
+	}
+      } else {
+	if(newVolTwo - volTwo < bestScore) {
+	  bestScore = newVolTwo - volTwo;
+	  bestIndex = index;
+	  bestRect = 2;
+	}
+      }
+    }
+
+    // Assign the point that causes the least increase in volume 
+    // to the appropriate rectangle.
+    if(bestRect == 1)
+      treeOne.insertPoint(oldTree.dataset(bestIndex);
+    else
+      treeTwo.insertPoint(oldTree.dataset(bestIndex);
+
+    oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
+  }
 
 
+}
 
 
 }; // namespace tree

Modified: mlpack/trunk/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp	(original)
+++ mlpack/trunk/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp	Mon Jun  2 18:58:12 2014
@@ -45,12 +45,14 @@
   //! The parent node (NULL if this is the root of the tree).
   RectangleTree* parent;
   //! The index of the first point in the dataset contained in this node (and
-  //! its children).
+  //! its children).  THIS IS ALWAYS 0 AT THE MOMENT.  IT EXISTS MERELY IN CASE
+  //! I THINK OF A WAY TO CHANGE THAT.  IN OTHER WORDS, IT WILL PROBABLY BE
+  //! REMOVED.
   size_t begin;
   //! The number of points in the dataset contained in this node (and its
-  //! children).
+  //! children).  
   size_t count;
-  //! The leaf size.
+  //! The leaf size.  (Maximum allowable leaf size.)
   size_t leafSize;
   //! The minimum leaf size.
   size_t minLeafSize;
@@ -64,7 +66,7 @@
   double furthestDescendantDistance;
   //! The dataset.
   MatType& dataset;
-
+  
  public:
   //! So other classes can use TreeType::Mat.
   typedef MatType Mat;
@@ -92,10 +94,19 @@
    * to any nodes which are children of this one.
    */
   ~RectangleTree();
+  
+  /**
+   * Delete this node of the tree, but leave the stuff contained in it intact.
+   * This is used when splitting a node, where the data in this tree is moved to two
+   * other trees.
+   */
+  void softDelete();
 
   /**
    * Inserts a point into the tree. The point will be copied to the data matrix
-   * of the leaf node where it is finally inserted.
+   * of the leaf node where it is finally inserted, but we pass by reference since
+   * it may be passed many times before it actually reaches a leaf.
+   * @param point The point (arma::vec&) to be inserted.
    */
   void InsertPoint(const arma::vec& point);
 
@@ -127,12 +138,12 @@
 
   //! Return the bound object for this node.
   const HRectBound& Bound() const { return bound; }
-  //! Return the bound object for this node.
+  //! Modify the bound object for this node.
   HRectBound& Bound() { return bound; }
 
   //! Return the statistic object for this node.
   const StatisticType& Stat() const { return stat; }
-  //! Return the statistic object for this node.
+  //! Modify the statistic object for this node.
   StatisticType& Stat() { return stat; }
 
   //! Return whether or not this node is a leaf (true if it has no children).
@@ -337,7 +348,7 @@
 
   /**
    * Splits the current node, recursing up the tree.
-   * CURRENTLY IT DOES NOT Also returns a list of the changed indices.
+   * CURRENTLY IT DOES NOT Also returns a list of the changed indices (because there are none).
    *
    * @param data Dataset which we are using.
    * @param oldFromNew Vector holding permuted indices NOT IMPLEMENTED.



More information about the mlpack-svn mailing list