[mlpackgit] master: Include overlapping points in each child's bounding box. (a4ac0b2)
gitdub at mlpack.org
gitdub at mlpack.org
Thu Aug 18 13:39:59 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/0f4b25acd6aaa14294c044874ba6cc0751712baa...0a19d07bd39e6223991976474bc79671ba8aa0f0
>
commit a4ac0b2b67ceac6f348cc878da150eeaded4cc0f
Author: MarcosPividori <marcos.pividori at gmail.com>
Date: Mon Aug 1 17:10:36 2016 0300
Include overlapping points in each child's bounding box.
>
a4ac0b2b67ceac6f348cc878da150eeaded4cc0f
src/mlpack/core/tree/spill_tree/spill_tree.hpp  16 +
.../core/tree/spill_tree/spill_tree_impl.hpp  29 +++++++
2 files changed, 10 insertions(+), 35 deletions()
diff git a/src/mlpack/core/tree/spill_tree/spill_tree.hpp b/src/mlpack/core/tree/spill_tree/spill_tree.hpp
index c48fbd5..29208ce 100644
 a/src/mlpack/core/tree/spill_tree/spill_tree.hpp
+++ b/src/mlpack/core/tree/spill_tree/spill_tree.hpp
@@ 156,14 +156,12 @@ class SpillTree
*
* @param parent Parent of this node. Its dataset will be modified!
* @param points Vector of indexes of points to be included in this node.
 * @param overlapIndex Index where the list of overlapping points starts.
* @param tau Overlapping size.
* @param maxLeafSize Size of each leaf in the tree.
* @param rho Balance threshold.
*/
SpillTree(SpillTree* parent,
std::vector<size_t>& points,
 const size_t overlapIndex,
const double tau = 0,
const size_t maxLeafSize = 20,
const double rho = 0.7);
@@ 368,13 +366,11 @@ class SpillTree
* Splits the current node, assigning its left and right children recursively.
*
* @param points Vector of indexes of points to be included in this node.
 * @param overlapIndex Index where the list of overlapping points starts.
* @param maxLeafSize Maximum number of points held in a leaf.
* @param tau Overlapping size.
* @param rho Balance threshold.
*/
void SplitNode(std::vector<size_t>& points,
 const size_t overlapIndex,
const size_t maxLeafSize,
const double tau,
const double rho);
@@ 389,14 +385,6 @@ class SpillTree
* @param points Vector of indexes of points to be included.
* @param leftPoints Indexes of points to be included in left child.
* @param rightPoints Indexes of points to be included in right child.
 * @param overlapIndexLeft Index in leftPoints where the list of overlapping
 points starts ( [overlapIndexLeft, leftPoints.size()) represents the
 indexes of the points from the right node that are included in the left
 node).
 * @param overlapIndexRight Index in rightPoints where the list of overlapping
 points starts ( [overlapIndexRight, rightPoints.size()) represents the
 indexes of the points from the left node that are included in the right
 node).
* @return Flag to know if the overlapping buffer was included.
*/
bool SplitPoints(const size_t splitDimension,
@@ 405,9 +393,7 @@ class SpillTree
const double rho,
const std::vector<size_t>& points,
std::vector<size_t>& leftPoints,
 std::vector<size_t>& rightPoints,
 size_t& overlapIndexLeft,
 size_t& overlapIndexRight);
+ std::vector<size_t>& rightPoints);
protected:
/**
* A default constructor. This is meant to only be used with
diff git a/src/mlpack/core/tree/spill_tree/spill_tree_impl.hpp b/src/mlpack/core/tree/spill_tree/spill_tree_impl.hpp
index e2f0f93..b8c7598 100644
 a/src/mlpack/core/tree/spill_tree/spill_tree_impl.hpp
+++ b/src/mlpack/core/tree/spill_tree/spill_tree_impl.hpp
@@ 42,7 +42,7 @@ SpillTree(
points.push_back(i);
// Do the actual splitting of this node.
 SplitNode(points, points.size(), maxLeafSize, tau, rho);
+ SplitNode(points, maxLeafSize, tau, rho);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ 78,7 +78,7 @@ SpillTree(
points.push_back(i);
// Do the actual splitting of this node.
 SplitNode(points, points.size(), maxLeafSize, tau, rho);
+ SplitNode(points, maxLeafSize, tau, rho);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ 93,7 +93,6 @@ SpillTree<MetricType, StatisticType, MatType, SplitType>::
SpillTree(
SpillTree* parent,
std::vector<size_t>& points,
 const size_t overlapIndex,
const double tau,
const size_t maxLeafSize,
const double rho) :
@@ 110,7 +109,7 @@ SpillTree(
localDataset(false)
{
// Perform the actual splitting.
 SplitNode(points, overlapIndex, maxLeafSize, tau, rho);
+ SplitNode(points, maxLeafSize, tau, rho);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ 448,14 +447,12 @@ template<typename MetricType,
class SplitType>
void SpillTree<MetricType, StatisticType, MatType, SplitType>::
SplitNode(std::vector<size_t>& points,
 const size_t overlapIndex,
const size_t maxLeafSize,
const double tau,
const double rho)
{
 // We need to expand the bounds of this node properly, ignoring overlapping
 // points (they will be included in the bound of the other node).
 for (size_t i = 0; i < overlapIndex; i++)
+ // We need to expand the bounds of this node properly.
+ for (size_t i = 0; i < points.size(); i++)
bound = dataset>cols(points[i], points[i]);
// Calculate the furthest descendant distance.
@@ 483,20 +480,17 @@ void SpillTree<MetricType, StatisticType, MatType, SplitType>::
}
std::vector<size_t> leftPoints, rightPoints;
 size_t overlapIndexLeft, overlapIndexRight;
// Split the node.
overlappingNode = SplitPoints(splitDimension, splitValue, tau, rho, points,
 leftPoints, rightPoints, overlapIndexLeft, overlapIndexRight);
+ leftPoints, rightPoints);
// We don't need the information in points, so lets clean it.
std::vector<size_t>().swap(points);
// Now we will recursively split the children by calling their constructors
// (which perform this splitting process).
 left = new SpillTree(this, leftPoints, overlapIndexLeft, tau, maxLeafSize,
 rho);
 right = new SpillTree(this, rightPoints, overlapIndexRight, tau, maxLeafSize,
 rho);
+ left = new SpillTree(this, leftPoints, tau, maxLeafSize, rho);
+ right = new SpillTree(this, rightPoints, tau, maxLeafSize, rho);
// Update count number, to represent the number of descendant points.
count = left>NumDescendants() + right>NumDescendants();
@@ 527,9 +521,7 @@ bool SpillTree<MetricType, StatisticType, MatType, SplitType>::
const double rho,
const std::vector<size_t>& points,
std::vector<size_t>& leftPoints,
 std::vector<size_t>& rightPoints,
 size_t& overlapIndexLeft,
 size_t& overlapIndexRight)
+ std::vector<size_t>& rightPoints)
{
std::vector<size_t> leftFrontier, rightFrontier;
@@ 556,9 +548,6 @@ bool SpillTree<MetricType, StatisticType, MatType, SplitType>::
const double p2 = double (rightPoints.size() + leftFrontier.size()) /
points.size();
 overlapIndexLeft = leftPoints.size();
 overlapIndexRight = rightPoints.size();

if ((p1 <= rho  rightFrontier.empty()) && (p2 <= rho  leftFrontier.empty()))
{
leftPoints.insert(leftPoints.end(), rightFrontier.begin(),
More information about the mlpackgit
mailing list