[mlpack-svn] r17053 - mlpack/trunk/src/mlpack/core/tree/rectangle_tree
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun Aug 17 03:00:45 EDT 2014
Author: rcurtin
Date: Sun Aug 17 03:00:45 2014
New Revision: 17053
Log:
First pass on R* tree split code; make things 80 columns, use convenience
typedef, minor style changes.
Modified:
mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
Modified: mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp (original)
+++ mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp Sun Aug 17 03:00:45 2014
@@ -19,49 +19,49 @@
* as necessary.
*/
template<typename DescentType,
- typename StatisticType,
- typename MatType>
+ typename StatisticType,
+ typename MatType>
class RStarTreeSplit
{
-public:
-
-/**
- * Split a leaf node using the algorithm described in "The R*-tree: An Efficient and Robust Access method
- * for Points and Rectangles." If necessary, this split will propagate
- * upwards through the tree.
- */
-static void SplitLeafNode(RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* tree, std::vector<bool>& relevels);
-
-/**
- * Split a non-leaf node using the "default" algorithm. If this is a root node, the
- * tree increases in depth.
- */
-static bool SplitNonLeafNode(RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* tree, std::vector<bool>& relevels);
-
-private:
-/**
- * Class to allow for faster sorting.
- */
-class sortStruct {
-public:
- double d;
- int n;
-};
-
-/**
- * Comparator for sorting with sortStruct.
- */
-static bool structComp(const sortStruct& s1, const sortStruct& s2) {
- return s1.d < s2.d;
-}
-
-/**
- * Insert a node into another node.
- */
-static void InsertNodeIntoTree(
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* destTree,
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* srcNode);
-
+ public:
+ typedef RectangleTree<RStarTreeSplit, DescentType, StatisticType, MatType>
+ TreeType;
+
+ /**
+ * Split a leaf node using the algorithm described in "The R*-tree: An
+ * Efficient and Robust Access method for Points and Rectangles." If
+ * necessary, this split will propagate upwards through the tree.
+ */
+ static void SplitLeafNode(TreeType* tree, std::vector<bool>& relevels);
+
+ /**
+ * Split a non-leaf node using the "default" algorithm. If this is a root
+ * node, the tree increases in depth.
+ */
+ static bool SplitNonLeafNode(TreeType* tree, std::vector<bool>& relevels);
+
+ private:
+ /**
+ * Class to allow for faster sorting.
+ */
+ struct SortStruct
+ {
+ double d;
+ int n;
+ };
+
+ /**
+ * Comparator for sorting with SortStruct.
+ */
+ static bool StructComp(const SortStruct& s1, const SortStruct& s2)
+ {
+ return s1.d < s2.d;
+ }
+
+ /**
+ * Insert a node into another node.
+ */
+ static void InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode);
};
}; // namespace tree
Modified: mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp (original)
+++ mlpack/trunk/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp Sun Aug 17 03:00:45 2014
@@ -9,133 +9,153 @@
#include "r_star_tree_split.hpp"
#include "rectangle_tree.hpp"
+
#include <mlpack/core/math/range.hpp>
namespace mlpack {
namespace tree {
/**
- * We call GetPointSeeds to get the two points which will be the initial points in the new nodes
- * We then call AssignPointDestNode to assign the remaining points to the two new nodes.
- * Finally, we delete the old node and insert the new nodes into the tree, spliting the parent
- * if necessary.
+ * We call GetPointSeeds to get the two points which will be the initial points
+ * in the new nodes We then call AssignPointDestNode to assign the remaining
+ * points to the two new nodes. Finally, we delete the old node and insert the
+ * new nodes into the tree, spliting the parent if necessary.
*/
template<typename DescentType,
-typename StatisticType,
-typename MatType>
+ typename StatisticType,
+ typename MatType>
void RStarTreeSplit<DescentType, StatisticType, MatType>::SplitLeafNode(
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* tree,
- std::vector<bool>& relevels)
+ TreeType* tree,
+ std::vector<bool>& relevels)
{
- // If we are splitting the root node, we need will do things differently so that the constructor
- // and other methods don't confuse the end user by giving an address of another node.
- if (tree->Parent() == NULL) {
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* copy =
- new RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>(*tree, false); // We actually want to copy this way. Pointers and everything.
-
+ // If we are splitting the root node, we need will do things differently so
+ // that the constructor and other methods don't confuse the end user by giving
+ // an address of another node.
+ if (tree->Parent() == NULL)
+ {
+ // We actually want to copy this way. Pointers and everything.
+ TreeType* copy = new TreeType(*tree, false);
copy->Parent() = tree;
tree->Count() = 0;
tree->NullifyData();
- tree->Children()[(tree->NumChildren())++] = copy; // Because this was a leaf node, numChildren must be 0.
+ // Because this was a leaf node, numChildren must be 0.
+ tree->Children()[(tree->NumChildren())++] = copy;
assert(tree->NumChildren() == 1);
- RStarTreeSplit<DescentType, StatisticType, MatType>::SplitLeafNode(copy, relevels);
+
+ SplitLeafNode(copy, relevels);
+ return;
+ }
+
+ // If we haven't yet reinserted on this level, we try doing so now.
+ if (relevels[tree->TreeDepth()])
+ {
+ relevels[tree->TreeDepth()] = false;
+
+ // We sort the points by decreasing distance to the centroid of the bound.
+ // We then remove the first p entries and reinsert them at the root.
+ TreeType* root = tree;
+ while(root->Parent() != NULL)
+ root = root->Parent();
+ size_t p = tree->MaxLeafSize() * 0.3; // The paper says this works the best.
+ if (p == 0)
+ {
+ SplitLeafNode(tree, relevels);
+ return;
+ }
+
+ std::vector<SortStruct> sorted(tree->Count());
+ arma::vec centroid;
+ tree->Bound().Centroid(centroid); // Modifies centroid.
+ for (size_t i = 0; i < sorted.size(); i++)
+ {
+ sorted[i].d = tree->Bound().Metric().Evaluate(centroid,
+ tree->LocalDataset().col(i));
+ sorted[i].n = i;
+ }
+
+ std::sort(sorted.begin(), sorted.end(), StructComp);
+ std::vector<int> pointIndices(p);
+ for (size_t i = 0; i < p; i++)
+ {
+ // We start from the end of sorted.
+ pointIndices[i] = tree->Points()[sorted[sorted.size() - 1 - i].n];
+ root->DeletePoint(tree->Points()[sorted[sorted.size() - 1 - i].n],
+ relevels);
+ }
+
+ for (size_t i = 0; i < p; i++)
+ {
+ // We reverse the order again to reinsert the closest points first.
+ root->InsertPoint(pointIndices[p - 1 - i], relevels);
+ }
+
return;
}
-
- // If we haven't yet reinserted on this level, we try doing so now.
- if(relevels[tree->TreeDepth()]) {
- relevels[tree->TreeDepth()] = false;
- // We sort the points by decreasing distance to the centroid of the bound.
- // We then remove the first p entries and reinsert them at the root.
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* root = tree;
- while(root->Parent() != NULL)
- root = root->Parent();
- size_t p = tree->MaxLeafSize() * 0.3; // The paper says this works the best.
- if(p == 0) {
- SplitLeafNode(tree, relevels);
- return;
- }
-
- std::vector<sortStruct> sorted(tree->Count());
- arma::vec centroid;
- tree->Bound().Centroid(centroid); // Modifies centroid.
- for(size_t i = 0; i < sorted.size(); i++) {
- sorted[i].d = tree->Bound().Metric().Evaluate(centroid, tree->LocalDataset().col(i));
- sorted[i].n = i;
- }
-
- std::sort(sorted.begin(), sorted.end(), structComp);
- std::vector<int> pointIndices(p);
- for(size_t i = 0; i < p; i++) {
- pointIndices[i] = tree->Points()[sorted[sorted.size()-1-i].n]; // We start from the end of sorted.
- root->DeletePoint(tree->Points()[sorted[sorted.size()-1-i].n], relevels);
- }
- for(size_t i = 0; i < p; i++) { // We reverse the order again to reinsert the closest points first.
- root->InsertPoint(pointIndices[p-1-i], relevels);
- }
-
-// // If we went below min fill, delete this node and reinsert all points.
-// if(tree->Count() < tree->MinLeafSize()) {
-// std::vector<int> pointIndices(tree->Count());
-// for(size_t i = 0; i < tree->Count(); i++) {
-// pointIndices[i] = tree->Points()[i];
-// }
-// root->RemoveNode(tree, relevels);
-// for(size_t i = 0; i < pointIndices.size(); i++) {
-// root->InsertPoint(pointIndices[i], relevels);
-// }
-// //tree->SoftDelete();
-// }
- return;
- }
-
int bestOverlapIndexOnBestAxis = 0;
int bestAreaIndexOnBestAxis = 0;
bool tiedOnOverlap = false;
int bestAxis = 0;
double bestAxisScore = DBL_MAX;
- for (int j = 0; j < tree->Bound().Dim(); j++) {
+
+ for (int j = 0; j < tree->Bound().Dim(); j++)
+ {
double axisScore = 0.0;
// Since we only have points in the leaf nodes, we only need to sort once.
- std::vector<sortStruct> sorted(tree->Count());
- for (int i = 0; i < sorted.size(); i++) {
+ std::vector<SortStruct> sorted(tree->Count());
+ for (int i = 0; i < sorted.size(); i++)
+ {
sorted[i].d = tree->LocalDataset().col(i)[j];
sorted[i].n = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp);
+ std::sort(sorted.begin(), sorted.end(), StructComp);
// We'll store each of the three scores for each distribution.
- std::vector<double> areas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() + 2);
- std::vector<double> margins(tree->MaxLeafSize() - 2 * tree->MinLeafSize() + 2);
- std::vector<double> overlapedAreas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() + 2);
- for (int i = 0; i < areas.size(); i++) {
+ std::vector<double> areas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() +
+ 2);
+ std::vector<double> margins(tree->MaxLeafSize() - 2 * tree->MinLeafSize() +
+ 2);
+ std::vector<double> overlapedAreas(tree->MaxLeafSize() -
+ 2 * tree->MinLeafSize() + 2);
+
+ for (int i = 0; i < areas.size(); i++)
+ {
areas[i] = 0.0;
margins[i] = 0.0;
overlapedAreas[i] = 0.0;
}
- for (int i = 0; i < areas.size(); i++) {
- // The ith arrangement is obtained by placing the first tree->MinLeafSize() + i
- // points in one rectangle and the rest in another. Then we calculate the three
- // scores for that distribution.
+
+ for (int i = 0; i < areas.size(); i++)
+ {
+ // The ith arrangement is obtained by placing the first
+ // tree->MinLeafSize() + i points in one rectangle and the rest in
+ // another. Then we calculate the three scores for that distribution.
int cutOff = tree->MinLeafSize() + i;
+
// We'll calculate the max and min in each dimension by hand to save time.
std::vector<double> maxG1(tree->Bound().Dim());
std::vector<double> minG1(maxG1.size());
std::vector<double> maxG2(maxG1.size());
std::vector<double> minG2(maxG1.size());
- for (int k = 0; k < tree->Bound().Dim(); k++) {
+ for (int k = 0; k < tree->Bound().Dim(); k++)
+ {
minG1[k] = maxG1[k] = tree->LocalDataset().col(sorted[0].n)[k];
- minG2[k] = maxG2[k] = tree->LocalDataset().col(sorted[sorted.size() - 1].n)[k];
- for (int l = 1; l < tree->Count() - 1; l++) {
- if (l < cutOff) {
+ minG2[k] = maxG2[k] =
+ tree->LocalDataset().col(sorted[sorted.size() - 1].n)[k];
+
+ for (int l = 1; l < tree->Count() - 1; l++)
+ {
+ if (l < cutOff)
+ {
if (tree->LocalDataset().col(sorted[l].n)[k] < minG1[k])
minG1[k] = tree->LocalDataset().col(sorted[l].n)[k];
else if (tree->LocalDataset().col(sorted[l].n)[k] > maxG1[k])
maxG1[k] = tree->LocalDataset().col(sorted[l].n)[k];
- } else {
+ }
+ else
+ {
if (tree->LocalDataset().col(sorted[l].n)[k] < minG2[k])
minG2[k] = tree->LocalDataset().col(sorted[l].n)[k];
else if (tree->LocalDataset().col(sorted[l].n)[k] > maxG2[k])
@@ -143,30 +163,41 @@
}
}
}
+
double area1 = 1.0, area2 = 1.0;
double oArea = 1.0;
- for (int k = 0; k < maxG1.size(); k++) {
+ for (int k = 0; k < maxG1.size(); k++)
+ {
margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
area1 *= maxG1[k] - minG1[k];
area2 *= maxG2[k] - minG2[k];
- oArea *= maxG1[k] < minG2[k] || maxG2[k] < minG1[k] ? 0.0 : std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
+ oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
+ (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
}
+
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
axisScore += margins[i];
}
- if (axisScore < bestAxisScore) {
+ if (axisScore < bestAxisScore)
+ {
bestAxisScore = axisScore;
bestAxis = j;
bestOverlapIndexOnBestAxis = 0;
bestAreaIndexOnBestAxis = 0;
- for (int i = 1; i < areas.size(); i++) {
- if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis]) {
+
+ for (int i = 1; i < areas.size(); i++)
+ {
+ if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis])
+ {
tiedOnOverlap = false;
bestAreaIndexOnBestAxis = i;
bestOverlapIndexOnBestAxis = i;
- } else if (overlapedAreas[i] == overlapedAreas[bestOverlapIndexOnBestAxis]) {
+ }
+ else if (overlapedAreas[i] ==
+ overlapedAreas[bestOverlapIndexOnBestAxis])
+ {
tiedOnOverlap = true;
if (areas[i] < areas[bestAreaIndexOnBestAxis])
bestAreaIndexOnBestAxis = i;
@@ -175,54 +206,53 @@
}
}
- std::vector<sortStruct> sorted(tree->Count());
- for (int i = 0; i < sorted.size(); i++) {
+ std::vector<SortStruct> sorted(tree->Count());
+ for (int i = 0; i < sorted.size(); i++)
+ {
sorted[i].d = tree->LocalDataset().col(i)[bestAxis];
sorted[i].n = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp);
+ std::sort(sorted.begin(), sorted.end(), StructComp);
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType> *treeOne = new
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>(tree->Parent());
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType> *treeTwo = new
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>(tree->Parent());
+ TreeType* treeOne = new TreeType(tree->Parent());
+ TreeType* treeTwo = new TreeType(tree->Parent());
- if (tiedOnOverlap) {
- for (int i = 0; i < tree->Count(); i++) {
+ if (tiedOnOverlap)
+ {
+ for (int i = 0; i < tree->Count(); i++)
+ {
if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
treeOne->InsertPoint(tree->Points()[sorted[i].n]);
else
treeTwo->InsertPoint(tree->Points()[sorted[i].n]);
}
- } else {
- for (int i = 0; i < tree->Count(); i++) {
+ }
+ else
+ {
+ for (int i = 0; i < tree->Count(); i++)
+ {
if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
treeOne->InsertPoint(tree->Points()[sorted[i].n]);
else
treeTwo->InsertPoint(tree->Points()[sorted[i].n]);
}
- }
-
- //Remove this node and insert treeOne and treeTwo
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* par = tree->Parent();
- int index = -1;
- for (int i = 0; i < par->NumChildren(); i++) {
- if (par->Children()[i] == tree) {
- index = i;
- break;
- }
}
+
+ // Remove this node and insert treeOne and treeTwo.
+ TreeType* par = tree->Parent();
+ size_t index = 0;
+ while (par->Children()[index] != tree) { index++; }
+
assert(index != -1);
par->Children()[index] = treeOne;
par->Children()[par->NumChildren()++] = treeTwo;
- // we only add one at a time, so we should only need to test for equality
+ // We only add one at a time, so we should only need to test for equality
// just in case, we use an assert.
- assert(par->NumChildren() <= par->MaxNumChildren()+1);
- if (par->NumChildren() == par->MaxNumChildren()+1) {
+ assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+ if (par->NumChildren() == par->MaxNumChildren() + 1)
SplitNonLeafNode(par, relevels);
- }
assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
@@ -230,35 +260,36 @@
assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
tree->SoftDelete();
-
- return;
}
/**
* We call GetBoundSeeds to get the two new nodes that this one will be broken
- * into. Then we call AssignNodeDestNode to move the children of this node
- * into either of those two nodes. Finally, we delete the now unused information
- * and recurse up the tree if necessary. We don't need to worry about the bounds
+ * into. Then we call AssignNodeDestNode to move the children of this node into
+ * either of those two nodes. Finally, we delete the now unused information and
+ * recurse up the tree if necessary. We don't need to worry about the bounds
* higher up the tree because they were already updated if necessary.
*/
template<typename DescentType,
-typename StatisticType,
-typename MatType>
+ typename StatisticType,
+ typename MatType>
bool RStarTreeSplit<DescentType, StatisticType, MatType>::SplitNonLeafNode(
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* tree,
- std::vector<bool>& relevels)
+ TreeType* tree,
+ std::vector<bool>& relevels)
{
- // If we are splitting the root node, we need will do things differently so that the constructor
- // and other methods don't confuse the end user by giving an address of another node.
- if (tree->Parent() == NULL) {
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* copy =
- new RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>(*tree, false); // We actually want to copy this way. Pointers and everything.
+ // If we are splitting the root node, we need will do things differently so
+ // that the constructor and other methods don't confuse the end user by giving
+ // an address of another node.
+ if (tree->Parent() == NULL)
+ {
+ // We actually want to copy this way. Pointers and everything.
+ TreeType* copy = new TreeType(*tree, false);
copy->Parent() = tree;
tree->NumChildren() = 0;
tree->NullifyData();
tree->Children()[(tree->NumChildren())++] = copy;
- RStarTreeSplit<DescentType, StatisticType, MatType>::SplitNonLeafNode(copy, relevels);
+
+ SplitNonLeafNode(copy, relevels);
return true;
}
@@ -276,7 +307,7 @@
SplitNonLeafNode(tree, relevels);
return false;
}
-
+
std::vector<sortStruct> sorted(tree->NumChildren());
arma::vec c1;
tree->Bound().Centroid(c1); // Modifies c1.
@@ -287,24 +318,24 @@
sorted[i].n = i;
}
std::sort(sorted.begin(), sorted.end(), structComp);
-
+
//bool startBelowMinFill = tree->NumChildren() < tree->MinNumChildren();
-
+
std::vector<RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>*> removedNodes(p);
for(size_t i =0; i < p; i++) {
removedNodes[i] = tree->Children()[sorted[sorted.size()-1-i].n]; // We start from the end of sorted.
root->RemoveNode(tree->Children()[sorted[sorted.size()-1-i].n], relevels);
}
-
+
for(size_t i = 0; i < p; i++) {
root->InsertNode(removedNodes[p-1-i], tree->TreeDepth(), relevels); // We reverse the order again to reinsert the closest nodes first.
}
-
+
// If we went below min fill, delete this node and reinsert all children.
//SOMETHING IS WRONG. SHOULD NOT GO BELOW MIN FILL.
// if(!startBelowMinFill && tree->NumChildren() < tree->MinNumChildren())
// std::cout<<"MINFILLERROR "<< p << ", " << tree->NumChildren() << "; " << tree->MaxNumChildren()<<std::endl;
-
+
// if(tree->NumChildren() < tree->MinNumChildren()) {
// std::vector<RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>*> rmNodes(tree->NumChildren());
// for(size_t i = 0; i < rmNodes.size(); i++) {
@@ -315,15 +346,14 @@
// root->InsertNode(rmNodes[i], tree->TreeDepth(), relevels);
// }
// // tree->SoftDelete();
-// }
+// }
// assert(tree->NumChildren() >= tree->MinNumChildren());
// assert(tree->NumChildren() <= tree->MaxNumChildren());
-
+
return false;
}
*/
-
int bestOverlapIndexOnBestAxis = 0;
int bestAreaIndexOnBestAxis = 0;
@@ -331,51 +361,68 @@
bool lowIsBest = true;
int bestAxis = 0;
double bestAxisScore = DBL_MAX;
- for (int j = 0; j < tree->Bound().Dim(); j++) {
+ for (int j = 0; j < tree->Bound().Dim(); j++)
+ {
double axisScore = 0.0;
// We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<sortStruct> sorted(tree->NumChildren());
- for (int i = 0; i < sorted.size(); i++) {
+ std::vector<SortStruct> sorted(tree->NumChildren());
+ for (int i = 0; i < sorted.size(); i++)
+ {
sorted[i].d = tree->Children()[i]->Bound()[j].Lo();
sorted[i].n = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp);
+ std::sort(sorted.begin(), sorted.end(), StructComp);
// We'll store each of the three scores for each distribution.
- std::vector<double> areas(tree->MaxNumChildren() - 2 * tree->MinNumChildren() + 2);
- std::vector<double> margins(tree->MaxNumChildren() - 2 * tree->MinNumChildren() + 2);
- std::vector<double> overlapedAreas(tree->MaxNumChildren() - 2 * tree->MinNumChildren() + 2);
- for (int i = 0; i < areas.size(); i++) {
+ std::vector<double> areas(tree->MaxNumChildren() -
+ 2 * tree->MinNumChildren() + 2);
+ std::vector<double> margins(tree->MaxNumChildren() -
+ 2 * tree->MinNumChildren() + 2);
+ std::vector<double> overlapedAreas(tree->MaxNumChildren() -
+ 2 * tree->MinNumChildren() + 2);
+
+ for (int i = 0; i < areas.size(); i++)
+ {
areas[i] = 0.0;
margins[i] = 0.0;
overlapedAreas[i] = 0.0;
}
- for (int i = 0; i < areas.size(); i++) {
- // The ith arrangement is obtained by placing the first tree->MinNumChildren() + i
- // points in one rectangle and the rest in another. Then we calculate the three
- // scores for that distribution.
+ for (int i = 0; i < areas.size(); i++)
+ {
+ // The ith arrangement is obtained by placing the first
+ // tree->MinNumChildren() + i points in one rectangle and the rest in
+ // another. Then we calculate the three scores for that distribution.
int cutOff = tree->MinNumChildren() + i;
+
// We'll calculate the max and min in each dimension by hand to save time.
std::vector<double> maxG1(tree->Bound().Dim());
std::vector<double> minG1(maxG1.size());
std::vector<double> maxG2(maxG1.size());
std::vector<double> minG2(maxG1.size());
- for (int k = 0; k < tree->Bound().Dim(); k++) {
+ for (int k = 0; k < tree->Bound().Dim(); k++)
+ {
minG1[k] = tree->Children()[sorted[0].n]->Bound()[k].Lo();
maxG1[k] = tree->Children()[sorted[0].n]->Bound()[k].Hi();
- minG2[k] = tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Lo();
- maxG2[k] = tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Hi();
- for (int l = 1; l < tree->NumChildren() - 1; l++) {
- if (l < cutOff) {
+ minG2[k] =
+ tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Lo();
+ maxG2[k] =
+ tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Hi();
+
+ for (int l = 1; l < tree->NumChildren() - 1; l++)
+ {
+ if (l < cutOff)
+ {
if (tree->Children()[sorted[l].n]->Bound()[k].Lo() < minG1[k])
minG1[k] = tree->Children()[sorted[l].n]->Bound()[k].Lo();
else if (tree->Children()[sorted[l].n]->Bound()[k].Hi() > maxG1[k])
maxG1[k] = tree->Children()[sorted[l].n]->Bound()[k].Hi();
- } else {
+ }
+ else
+ {
if (tree->Children()[sorted[l].n]->Bound()[k].Lo() < minG2[k])
minG2[k] = tree->Children()[sorted[l].n]->Bound()[k].Lo();
else if (tree->Children()[sorted[l].n]->Bound()[k].Hi() > maxG2[k])
@@ -383,29 +430,40 @@
}
}
}
+
double area1 = 1.0, area2 = 1.0;
double oArea = 1.0;
- for (int k = 0; k < maxG1.size(); k++) {
+ for (int k = 0; k < maxG1.size(); k++)
+ {
margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
area1 *= maxG1[k] - minG1[k];
area2 *= maxG2[k] - minG2[k];
- oArea *= maxG1[k] < minG2[k] || maxG2[k] < minG1[k] ? 0.0 : std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
+ oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
+ (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
}
+
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
axisScore += margins[i];
}
- if (axisScore < bestAxisScore) {
+
+ if (axisScore < bestAxisScore)
+ {
bestAxisScore = axisScore;
bestAxis = j;
double bestOverlapIndexOnBestAxis = 0;
double bestAreaIndexOnBestAxis = 0;
- for (int i = 1; i < areas.size(); i++) {
- if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis]) {
+ for (int i = 1; i < areas.size(); i++)
+ {
+ if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis])
+ {
tiedOnOverlap = false;
bestAreaIndexOnBestAxis = i;
bestOverlapIndexOnBestAxis = i;
- } else if (overlapedAreas[i] == overlapedAreas[bestOverlapIndexOnBestAxis]) {
+ }
+ else if (overlapedAreas[i] ==
+ overlapedAreas[bestOverlapIndexOnBestAxis])
+ {
tiedOnOverlap = true;
if (areas[i] < areas[bestAreaIndexOnBestAxis])
bestAreaIndexOnBestAxis = i;
@@ -414,52 +472,70 @@
}
}
- //Now we do the same thing using Bound().Hi() and choose the best of the two.
- for (int j = 0; j < tree->Bound().Dim(); j++) {
+ // Now we do the same thing using Bound().Hi() and choose the best of the two.
+ for (int j = 0; j < tree->Bound().Dim(); j++)
+ {
double axisScore = 0.0;
// We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<sortStruct> sorted(tree->NumChildren());
- for (int i = 0; i < sorted.size(); i++) {
+ std::vector<SortStruct> sorted(tree->NumChildren());
+ for (int i = 0; i < sorted.size(); i++)
+ {
sorted[i].d = tree->Children()[i]->Bound()[j].Hi();
sorted[i].n = i;
}
- std::sort(sorted.begin(), sorted.end(), structComp);
+ std::sort(sorted.begin(), sorted.end(), StructComp);
// We'll store each of the three scores for each distribution.
- std::vector<double> areas(tree->MaxNumChildren() - 2 * tree->MinNumChildren() + 2);
- std::vector<double> margins(tree->MaxNumChildren() - 2 * tree->MinNumChildren() + 2);
- std::vector<double> overlapedAreas(tree->MaxNumChildren() - 2 * tree->MinNumChildren() + 2);
- for (int i = 0; i < areas.size(); i++) {
+ std::vector<double> areas(tree->MaxNumChildren() -
+ 2 * tree->MinNumChildren() + 2);
+ std::vector<double> margins(tree->MaxNumChildren() -
+ 2 * tree->MinNumChildren() + 2);
+ std::vector<double> overlapedAreas(tree->MaxNumChildren() -
+ 2 * tree->MinNumChildren() + 2);
+
+ for (int i = 0; i < areas.size(); i++)
+ {
areas[i] = 0.0;
margins[i] = 0.0;
overlapedAreas[i] = 0.0;
}
- for (int i = 0; i < areas.size(); i++) {
- // The ith arrangement is obtained by placing the first tree->MinNumChildren() + i
- // points in one rectangle and the rest in another. Then we calculate the three
- // scores for that distribution.
+ for (int i = 0; i < areas.size(); i++)
+ {
+ // The ith arrangement is obtained by placing the first
+ // tree->MinNumChildren() + i points in one rectangle and the rest in
+ // another. Then we calculate the three scores for that distribution.
int cutOff = tree->MinNumChildren() + i;
+
// We'll calculate the max and min in each dimension by hand to save time.
std::vector<double> maxG1(tree->Bound().Dim());
std::vector<double> minG1(maxG1.size());
std::vector<double> maxG2(maxG1.size());
std::vector<double> minG2(maxG1.size());
- for (int k = 0; k < tree->Bound().Dim(); k++) {
+
+ for (int k = 0; k < tree->Bound().Dim(); k++)
+ {
minG1[k] = tree->Children()[sorted[0].n]->Bound()[k].Lo();
maxG1[k] = tree->Children()[sorted[0].n]->Bound()[k].Hi();
- minG2[k] = tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Lo();
- maxG2[k] = tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Hi();
- for (int l = 1; l < tree->NumChildren() - 1; l++) {
- if (l < cutOff) {
+ minG2[k] =
+ tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Lo();
+ maxG2[k] =
+ tree->Children()[sorted[sorted.size() - 1].n]->Bound()[k].Hi();
+
+ for (int l = 1; l < tree->NumChildren() - 1; l++)
+ {
+ if (l < cutOff)
+ {
if (tree->Children()[sorted[l].n]->Bound()[k].Lo() < minG1[k])
minG1[k] = tree->Children()[sorted[l].n]->Bound()[k].Lo();
else if (tree->Children()[sorted[l].n]->Bound()[k].Hi() > maxG1[k])
maxG1[k] = tree->Children()[sorted[l].n]->Bound()[k].Hi();
- } else {
+ }
+ else
+ {
if (tree->Children()[sorted[l].n]->Bound()[k].Lo() < minG2[k])
minG2[k] = tree->Children()[sorted[l].n]->Bound()[k].Lo();
else if (tree->Children()[sorted[l].n]->Bound()[k].Hi() > maxG2[k])
@@ -467,30 +543,43 @@
}
}
}
+
double area1 = 1.0, area2 = 1.0;
double oArea = 1.0;
- for (int k = 0; k < maxG1.size(); k++) {
+
+ for (int k = 0; k < maxG1.size(); k++)
+ {
margins[i] += maxG1[k] - minG1[k] + maxG2[k] - minG2[k];
area1 *= maxG1[k] - minG1[k];
area2 *= maxG2[k] - minG2[k];
- oArea *= maxG1[k] < minG2[k] || maxG2[k] < minG1[k] ? 0.0 : std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]);
+ oArea *= (maxG1[k] < minG2[k] || maxG2[k] < minG1[k]) ? 0.0 :
+ (std::min(maxG1[k], maxG2[k]) - std::max(minG1[k], minG2[k]));
}
+
areas[i] += area1 + area2;
overlapedAreas[i] += oArea;
axisScore += margins[i];
}
- if (axisScore < bestAxisScore) {
+
+ if (axisScore < bestAxisScore)
+ {
bestAxisScore = axisScore;
bestAxis = j;
lowIsBest = false;
double bestOverlapIndexOnBestAxis = 0;
double bestAreaIndexOnBestAxis = 0;
- for (int i = 1; i < areas.size(); i++) {
- if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis]) {
+
+ for (int i = 1; i < areas.size(); i++)
+ {
+ if (overlapedAreas[i] < overlapedAreas[bestOverlapIndexOnBestAxis])
+ {
tiedOnOverlap = false;
bestAreaIndexOnBestAxis = i;
bestOverlapIndexOnBestAxis = i;
- } else if (overlapedAreas[i] == overlapedAreas[bestOverlapIndexOnBestAxis]) {
+ }
+ else if (overlapedAreas[i] ==
+ overlapedAreas[bestOverlapIndexOnBestAxis])
+ {
tiedOnOverlap = true;
if (areas[i] < areas[bestAreaIndexOnBestAxis])
bestAreaIndexOnBestAxis = i;
@@ -499,35 +588,43 @@
}
}
- std::vector<sortStruct> sorted(tree->NumChildren());
- if (lowIsBest) {
- for (int i = 0; i < sorted.size(); i++) {
+ std::vector<SortStruct> sorted(tree->NumChildren());
+ if (lowIsBest)
+ {
+ for (int i = 0; i < sorted.size(); i++)
+ {
sorted[i].d = tree->Children()[i]->Bound()[bestAxis].Lo();
sorted[i].n = i;
}
- } else {
- for (int i = 0; i < sorted.size(); i++) {
+ }
+ else
+ {
+ for (int i = 0; i < sorted.size(); i++)
+ {
sorted[i].d = tree->Children()[i]->Bound()[bestAxis].Hi();
sorted[i].n = i;
}
}
- std::sort(sorted.begin(), sorted.end(), structComp);
+ std::sort(sorted.begin(), sorted.end(), StructComp);
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType> *treeOne = new
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>(tree->Parent());
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType> *treeTwo = new
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>(tree->Parent());
+ TreeType* treeOne = new TreeType(tree->Parent());
+ TreeType* treeTwo = new TreeType(tree->Parent());
- if (tiedOnOverlap) {
- for (int i = 0; i < tree->NumChildren(); i++) {
+ if (tiedOnOverlap)
+ {
+ for (int i = 0; i < tree->NumChildren(); i++)
+ {
if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
InsertNodeIntoTree(treeOne, tree->Children()[sorted[i].n]);
else
InsertNodeIntoTree(treeTwo, tree->Children()[sorted[i].n]);
}
- } else {
- for (int i = 0; i < tree->NumChildren(); i++) {
+ }
+ else
+ {
+ for (int i = 0; i < tree->NumChildren(); i++)
+ {
if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
InsertNodeIntoTree(treeOne, tree->Children()[sorted[i].n]);
else
@@ -535,56 +632,53 @@
}
}
- //Remove this node and insert treeOne and treeTwo
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* par = tree->Parent();
- int index = 0;
- for (int i = 0; i < par->NumChildren(); i++) {
- if (par->Children()[i] == tree) {
- index = i;
- break;
- }
- }
+ // Remove this node and insert treeOne and treeTwo
+ TreeType* par = tree->Parent();
+ size_t index = 0;
+ while (par->Children()[index] != tree) { index++; }
+
par->Children()[index] = treeOne;
par->Children()[par->NumChildren()++] = treeTwo;
- // we only add one at a time, so we should only need to test for equality
+ // We only add one at a time, so we should only need to test for equality
// just in case, we use an assert.
- assert(par->NumChildren() <= par->MaxNumChildren()+1);
- if (par->NumChildren() == par->MaxNumChildren()+1) {
+ assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+ if (par->NumChildren() == par->MaxNumChildren() + 1)
+ {
SplitNonLeafNode(par, relevels);
}
-
- // We have to update the children of each of these new nodes so that they record the
- // correct parent.
- for (int i = 0; i < treeOne->NumChildren(); i++) {
+
+ // We have to update the children of each of these new nodes so that they
+ // record the correct parent.
+ for (int i = 0; i < treeOne->NumChildren(); i++)
treeOne->Children()[i]->Parent() = treeOne;
- }
- for (int i = 0; i < treeTwo->NumChildren(); i++) {
+
+ for (int i = 0; i < treeTwo->NumChildren(); i++)
treeTwo->Children()[i]->Parent() = treeTwo;
- }
assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
-
+
assert(treeOne->MaxNumChildren() < 7);
assert(treeTwo->MaxNumChildren() < 7);
tree->SoftDelete();
-
+
return false;
}
/**
- * Insert a node into another node. Expanding the bounds and updating the numberOfChildren.
+ * Insert a node into another node. Expanding the bounds and updating the
+ * numberOfChildren.
*/
template<typename DescentType,
typename StatisticType,
typename MatType>
void RStarTreeSplit<DescentType, StatisticType, MatType>::InsertNodeIntoTree(
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* destTree,
- RectangleTree<RStarTreeSplit<DescentType, StatisticType, MatType>, DescentType, StatisticType, MatType>* srcNode)
+ TreeType* destTree,
+ TreeType* srcNode)
{
destTree->Bound() |= srcNode->Bound();
destTree->Children()[destTree->NumChildren()++] = srcNode;
More information about the mlpack-svn
mailing list