[mlpack-git] master,mlpack-1.0.x: R tree stuff (fa627db)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:48:15 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit fa627db1ebdc4b092e02a149a2b7bd1285811ac7
Author: andrewmw94 <andrewmw94 at gmail.com>
Date:   Fri May 30 20:36:27 2014 +0000

    R tree stuff


>---------------------------------------------------------------

fa627db1ebdc4b092e02a149a2b7bd1285811ac7
 .../core/tree/rectangle_tree/r_tree_split.hpp      |  20 +++-
 .../core/tree/rectangle_tree/r_tree_split_impl.hpp | 127 +++++++++++++++++++--
 .../core/tree/rectangle_tree/rectangle_tree.hpp    |   6 +-
 .../tree/rectangle_tree/rectangle_tree_impl.hpp    |  34 +++++-
 4 files changed, 165 insertions(+), 22 deletions(-)

diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
index 85a3687..949803e 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
@@ -47,13 +47,14 @@ static void GetPointSeeds(const RectangleTree& tree, int &i, int &j);
 static void GetBoundSeeds(const RectangleTree& tree, int &i, int &j);
 
 /**
- * Get the index of the tree 
+ * Assign points to the two new nodes.
  */
-static int GetPointDestNode(
+static void AssignPointDestNode(
     const RectangleTree& oldTree,
-    const RectangleTree& treeOne,
-    const RectangleTree& treeTwo,
-    const int index);
+    RectangleTree& treeOne,
+    RectangleTree& treeTwo,
+    const int intI,
+    const int intJ);
 
 static int GetBoundDestNode(
     const RectangleTree& oldTree,
@@ -63,6 +64,15 @@ static int GetBoundDestNode(
 
 };
 
+static double getPointToBoundScore(const HRectBound& bound, const arma::vec& point)
+{
+  double score = 1.0;
+  for(int i = 0; i < bound.dimensions; i++) {
+    return score;
+  }
+}
+
+
 }; // namespace tree
 }; // namespace mlpack
 
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
index d182fec..babd973 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
@@ -23,19 +23,23 @@ bool RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
   GetPointSeeds(tree, &i, &j);
 
   RectangleTree treeOne = new RectangleTree();
-  treeOne.insertPoint(tree.points[i]);
+  treeOne.insertPoint(tree.dataset[i]);
   RectangleTree treeTwo = new RectangleTree();
-  treeTwo.insertPoint(tree.points[j]);
-
-  int treeOneCount = 1;
-  int treeTwoCount = 1;
-
-  
+  treeTwo.insertPoint(tree.dataset[j]);
 
+  AssignPointDestNode(tree, treeOne, treeTwo, i, j);
+  return true;
 }
 
 bool RTreeSplit<MatType>::SplitNonLeafNode(const RectangleTree& tree)
 {
+  int i = 0;
+  int j = 0;
+  GetBoundSeeds(tree, &i, &j);
+  
+  RectangleTree treeOne = new RectangleTree();
+  treeOne.insertRectangle()
+  RectangleTree treeTwo = new RectangleTree();
   
 }
 
@@ -52,10 +56,39 @@ void RTreeSplit<MatType>::GetPointSeeds(const RectangleTree& tree, int* iRet, in
   int worstI = 0;
   int worstJ = 0;
   for(int i = 0; i < tree.count; i++) {
-    for(int j = i; j < tree.count; j++) {
+    for(int j = i+1; j < tree.count; j++) {
+      double score = 1.0;
+      for(int k = 0; k < dimensions; k++) {
+	score *= std::abs(tree.dataset[i][k] - tree.dataset[j][k]);
+      }
+      if(score > worstPairScore) {
+	worstPairScore = score;
+	worstI = i;
+	worstJ = j;
+      }
+    }
+  }
+
+  *iRet = worstI;
+  *jRet = worstJ;
+  return;
+}
+
+/**
+ * Get the two bounds that will be used as seeds for the split of the node.
+ * The indices of the bounds will be stored in iRet and jRet.
+ */
+void RTreeSplit<MatType>::GetBoundSeeds(const RectangleTree& tree, int* iRet, int* jRet)
+{
+  double worstPairScore = 0.0;
+  int worstI = 0;
+  int worstJ = 0;
+  for(int i = 0; i < tree.numChildren; i++) {
+    for(int j = i+1; j < tree.numChildren; j++) {
       double score = 1.0;
       for(int k = 0; k < dimensions; k++) {
-	score *= std::abs(tree.points[i][k] - tree.points[j][k]);
+	score *= std::max(tree.children[i].bound[k].hi(), tree.children[j].bound[k].hi) - 
+	  std::min(tree.children[i].bound[k].low(), tree.children[j].bound[k].low());
       }
       if(score > worstPairScore) {
 	worstPairScore = score;
@@ -65,11 +98,83 @@ void RTreeSplit<MatType>::GetPointSeeds(const RectangleTree& tree, int* iRet, in
     }
   }
 
-  *iRet = i;
-  *jRet = j;
+  *iRet = worstI;
+  *jRet = worstJ;
   return;
 }
 
+void RTreeSplit<MatType>::AssignPointDestNode(
+    const RectangleTree& oldTree,
+    RectangleTree& treeOne,
+    RectangleTree& treeTwo,
+    const int intI,
+    const int intJ)
+{
+  int end = oldTree.count;
+  oldTree.data
+
+
+
+    
+  int index = 0;
+  while() {
+    int bestIndex = 0;
+    double bestScore = 0;
+    int bestRect = 0;
+
+    // Calculate the increase in volume for assigning this point to each rectangle.
+    double volOne = 1.0;
+    double volTwo = 1.0;
+    for(int i = 0; i < bound.Dim(); i++) {
+      volOne *= treeOne.bound[i].width();
+      volTwo *= treeTwo.bound[i].width();
+    }
+
+    for(int j = 0; j < end; j++) {
+      double newVolOne = 1.0;
+      double newVolTwo = 1.0;
+      for(int i = 0; i < bound.Dim(); i++) {
+	double c = oldTree.dataset.col(index)[i];      
+	newVolOne *= treeOne.bound[i].contains(c) ? treeOne.bound[i].width() :
+	  (c < treeOne.bound[i].low() ? (high - c) : (c - low));
+	newVolTwo *= treeTwo.bound[i].contains(c) ? treeTwo.bound[i].width() :
+	  (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
+      }
+    
+      if((newVolOne - volOne) < (newVolTwo - volTwo)) {
+	if(newVolOne - volOne < bestScore) {
+	  bestScore = newVolOne - volOne;
+	  bestIndex = index;
+	  bestRect = 1;
+	}
+      } else {
+	if(newVolTwo - volTwo < bestScore) {
+	  bestScore = newVolTwo - volTwo;
+	  bestIndex = index;
+	  bestRect = 2;
+	}
+      }
+    }
+
+    // Assign the point that causes the least increase in volume 
+    // to the appropriate rectangle.
+    if(bestRect == 1)
+      treeOne.insertPoint(oldTree.dataset(bestIndex);
+    else
+      treeTwo.insertPoint(oldTree.dataset(bestIndex);
+
+    // I have the imaginary removePoint here.  I need to find out more about how fast doing 
+    // various things with arma::mat is before I decide how to "remove" the points from the
+    // dataset.  Make sure this is not accidentally made the function for deleting points.
+    oldTree.removePoint(index);
+    end--;
+  }
+
+
+}
+
+
+
 
 
 }; // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
index 3bf26a0..5f89aac 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -34,8 +34,10 @@ template<typename StatisticType = EmptyStatistic,
 class RectangleTree
 {
  private:
-  //! The max number of child nodes an non-leaf node can have.
+  //! The max number of child nodes a non-leaf node can have.
   size_t maxNumChildren;
+  //! The minimum number of child nodes a non-leaf node can have.
+  size_t minNumChildren;
   //! The number of child nodes actually in use (0 if this is a leaf node).
   size_t numOfChildren;
   //! The child nodes (Starting at 0 and ending at (numOfChildren-1) ).
@@ -50,6 +52,8 @@ class RectangleTree
   size_t count;
   //! The leaf size.
   size_t leafSize;
+  //! The minimum leaf size.
+  size_t minLeafSize;
   //! The bound object for this node.
   HRectBound bound;
   //! Any extra data contained in the node.
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
index 1643f85..d454c46 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -22,11 +22,31 @@ template<typename StatisticType,
 	 typename DescentType>
 RectangleTree<StatisticType, MatType, SplitType, DescentType>::RectangleTree(
     MatType& data,
-    const size_t leafSize):
+    const size_t leafSize,
+    const size_t minLeafSize,
+    const size_t maxNumChildren,
+    const size_t minNumChildren):
 
 {
-  //Do the actual stuff here
-
+  this.maxNumChildren = maxNumChildren;
+  this.minNumChildren = minNumChildren;
+  this.numChildren = 0;
+  this.parent = NULL;
+  this.begin = 0;
+  this.count = 0;
+  this.leafSize = leafSize;
+  this.minLeafSize = minLeafSize;
+  this.bound = new HRectBound(data.n_rows);
+  this.stat = EmptyStatistic;
+  this.parentDistance = 0.0;
+  this.furthestDescendantDistance = 0.0;
+  this.dataset = new MatType(leafSize+1); // Add one to make splitting the node simpler
+
+  // For now, just insert the points in order.
+  // This won't actually work for any meaningful size of data since the root changes.
+  for(int i = 0; i < n_cols; i++) {
+    insertPoint(data.col(i));
+  }
 }
 
 /**
@@ -305,8 +325,12 @@ std::string BinarySpaceTree<StatisticType, MatType, SplitType, DescentType>::ToS
   convert << "  Leaf size: " << leafSize << std::endl;
   convert << "  Split dimension: " << splitDimension << std::endl;
 
-  // How many levels should we print?  This will print the top two tree levels.
-  for(
+  // How many levels should we print?  This will print the root and it's children.
+  if(parent == NULL) {
+    for(int i = 0; i < numChildren; i++) {
+      children[i].ToString();
+    }
+  }
 }
 
 }; //namespace tree



More information about the mlpack-git mailing list