[mlpack-git] master: A first pass at refactoring BinarySpaceTree. (4329aa9)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Fri Jul 10 19:00:05 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/4a97187bbba7ce8a6191b714949dd818ef0f37d2...e5905e62c15d1bcff21e6359b11efcd7ab6d7ca0
>---------------------------------------------------------------
commit 4329aa9cb7a2c771f96c8f301e12b1d7661753f0
Author: ryan <ryan at ratml.org>
Date: Wed Apr 22 16:53:24 2015 -0400
A first pass at refactoring BinarySpaceTree.
>---------------------------------------------------------------
4329aa9cb7a2c771f96c8f301e12b1d7661753f0
.../tree/binary_space_tree/binary_space_tree.hpp | 78 +++++++++----------
.../binary_space_tree/binary_space_tree_impl.hpp | 88 +++++++++++-----------
2 files changed, 83 insertions(+), 83 deletions(-)
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 9af121b..d17532d 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -67,8 +67,9 @@ class BinarySpaceTree
double furthestDescendantDistance;
//! The minimum distance from the center to any edge of the bound.
double minimumBoundDistance;
- //! The dataset.
- MatType& dataset;
+ //! The dataset. If we are the root of the tree, we own the dataset and must
+ //! delete it.
+ MatType* dataset;
public:
//! So other classes can use TreeType::Mat.
@@ -88,92 +89,95 @@ class BinarySpaceTree
/**
* Construct this as the root node of a binary space tree using the given
- * dataset. This will modify the ordering of the points in the dataset!
+ * dataset. This will copy the input matrix; if you don't want this, consider
+ * using the constructor that takes an rvalue reference and use std::move().
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param data Dataset to create tree from. This will be copied!
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data, const size_t maxLeafSize = 20);
+ BinarySpaceTree(const MatType& data, const size_t maxLeafSize = 20);
/**
* Construct this as the root node of a binary space tree using the given
- * dataset. This will modify the ordering of points in the dataset! A
- * mapping of the old point indices to the new point indices is filled.
+ * dataset. This will copy the input matrix and modify its ordering; a
+ * mapping of the old point indices to the new point indices is filled. If
+ * you don't want the matrix to be copied, consider using the constructor that
+ * takes an rvalue reference and use std::move().
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param data Dataset to create tree from. This will be copied!
* @param oldFromNew Vector which will be filled with the old positions for
* each new point.
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data,
+ BinarySpaceTree(const MatType& data,
std::vector<size_t>& oldFromNew,
const size_t maxLeafSize = 20);
/**
* Construct this as the root node of a binary space tree using the given
- * dataset. This will modify the ordering of points in the dataset! A
+ * dataset. This will copy the input matrix and modify its ordering; a
* mapping of the old point indices to the new point indices is filled, as
- * well as a mapping of the new point indices to the old point indices.
+ * well as a mapping of the new point indices to the old point indices. If
+ * you don't want the matrix to be copied, consider using the constructor that
+ * takes an rvalue reference and use std::move().
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param data Dataset to create tree from. This will be copied!
* @param oldFromNew Vector which will be filled with the old positions for
* each new point.
* @param newFromOld Vector which will be filled with the new positions for
* each old point.
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data,
+ BinarySpaceTree(const MatType& data,
std::vector<size_t>& oldFromNew,
std::vector<size_t>& newFromOld,
const size_t maxLeafSize = 20);
/**
- * Construct this node on a subset of the given matrix, starting at column
- * begin and using count points. The ordering of that subset of points
- * will be modified! This is used for recursive tree-building by the other
- * constructors which don't specify point indices.
+ * Construct this node as a child of the given parent, starting at column
+ * begin_in and using count_in points. The ordering of that subset of points
+ * in the parent's data matrix will be modified! This is used for recursive
+ * tree-building by the other constructors which don't specify point indices.
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param parent Parent of this node. Its dataset will be modified!
* @param begin Index of point to start tree construction with.
* @param count Number of points to use to construct tree.
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data,
+ BinarySpaceTree(BinarySpaceTree* parent,
const size_t begin,
const size_t count,
- BinarySpaceTree* parent = NULL,
const size_t maxLeafSize = 20);
/**
- * Construct this node on a subset of the given matrix, starting at column
+ * Construct this node as a child of the given parent, starting at column
* begin_in and using count_in points. The ordering of that subset of points
- * will be modified! This is used for recursive tree-building by the other
- * constructors which don't specify point indices.
+ * in the parent's data matrix will be modified! This is used for recursive
+ * tree-building by the other constructors which don't specify point indices.
*
* A mapping of the old point indices to the new point indices is filled, but
* it is expected that the vector is already allocated with size greater than
* or equal to (begin_in + count_in), and if that is not true, invalid memory
* reads (and writes) will occur.
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param parent Parent of this node. Its dataset will be modified!
* @param begin Index of point to start tree construction with.
* @param count Number of points to use to construct tree.
* @param oldFromNew Vector which will be filled with the old positions for
* each new point.
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data,
+ BinarySpaceTree(BinarySpaceTree* parent,
const size_t begin,
const size_t count,
std::vector<size_t>& oldFromNew,
- BinarySpaceTree* parent = NULL,
const size_t maxLeafSize = 20);
/**
- * Construct this node on a subset of the given matrix, starting at column
+ * Construct this node as a child of the given parent, starting at column
* begin_in and using count_in points. The ordering of that subset of points
- * will be modified! This is used for recursive tree-building by the other
- * constructors which don't specify point indices.
+ * in the parent's data matrix will be modified! This is used for recursive
+ * tree-building by the other constructors which don't specify point indices.
*
* A mapping of the old point indices to the new point indices is filled, as
* well as a mapping of the new point indices to the old point indices. It is
@@ -181,7 +185,7 @@ class BinarySpaceTree
* equal to (begin_in + count_in), and if that is not true, invalid memory
* reads (and writes) will occur.
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param parent Parent of this node. Its dataset will be modified!
* @param begin Index of point to start tree construction with.
* @param count Number of points to use to construct tree.
* @param oldFromNew Vector which will be filled with the old positions for
@@ -190,12 +194,11 @@ class BinarySpaceTree
* each old point.
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data,
+ BinarySpaceTree(BinarySpaceTree* parent,
const size_t begin,
const size_t count,
std::vector<size_t>& oldFromNew,
std::vector<size_t>& newFromOld,
- BinarySpaceTree* parent = NULL,
const size_t maxLeafSize = 20);
/**
@@ -269,9 +272,9 @@ class BinarySpaceTree
BinarySpaceTree*& Parent() { return parent; }
//! Get the dataset which the tree is built on.
- const MatType& Dataset() const { return dataset; }
+ const MatType& Dataset() const { return *dataset; }
//! Modify the dataset which the tree is built on. Be careful!
- MatType& Dataset() { return dataset; }
+ MatType& Dataset() { return *dataset; }
//! Get the metric which the tree uses.
typename BoundType::MetricType Metric() const { return bound.Metric(); }
@@ -425,11 +428,9 @@ class BinarySpaceTree
/**
* Splits the current node, assigning its left and right children recursively.
*
- * @param data Dataset which we are using.
* @param maxLeafSize Maximum number of points held in a leaf.
*/
- void SplitNode(MatType& data,
- const size_t maxLeafSize);
+ void SplitNode(const size_t maxLeafSize);
/**
* Splits the current node, assigning its left and right children recursively.
@@ -439,8 +440,7 @@ class BinarySpaceTree
* @param oldFromNew Vector holding permuted indices.
* @param maxLeafSize Maximum number of points held in a leaf.
*/
- void SplitNode(MatType& data,
- std::vector<size_t>& oldFromNew,
+ void SplitNode(std::vector<size_t>& oldFromNew,
const size_t maxLeafSize);
public:
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 9764578..ebc4c23 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@ -23,7 +23,7 @@ template<typename BoundType,
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ const MatType& data,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
@@ -32,10 +32,10 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
count(data.n_cols), /* and spans all of the dataset. */
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
- dataset(data)
+ dataset(new MatType(data)) // Copies the dataset.
{
// Do the actual splitting of this node.
- SplitNode(data, maxLeafSize);
+ SplitNode(maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -46,7 +46,7 @@ template<typename BoundType,
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ const MatType& data,
std::vector<size_t>& oldFromNew,
const size_t maxLeafSize) :
left(NULL),
@@ -56,7 +56,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
count(data.n_cols),
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
- dataset(data)
+ dataset(new MatType(data)) // Copies the dataset.
{
// Initialize oldFromNew correctly.
oldFromNew.resize(data.n_cols);
@@ -64,7 +64,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
oldFromNew[i] = i; // Fill with unharmed indices.
// Now do the actual splitting.
- SplitNode(data, oldFromNew, maxLeafSize);
+ SplitNode(oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -75,7 +75,7 @@ template<typename BoundType,
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ const MatType& data,
std::vector<size_t>& oldFromNew,
std::vector<size_t>& newFromOld,
const size_t maxLeafSize) :
@@ -86,7 +86,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
count(data.n_cols),
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
- dataset(data)
+ dataset(new MatType(data)) // Copies the dataset.
{
// Initialize the oldFromNew vector correctly.
oldFromNew.resize(data.n_cols);
@@ -94,7 +94,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
oldFromNew[i] = i; // Fill with unharmed indices.
// Now do the actual splitting.
- SplitNode(data, oldFromNew, maxLeafSize);
+ SplitNode(oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -110,21 +110,20 @@ template<typename BoundType,
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ BinarySpaceTree* parent,
const size_t begin,
const size_t count,
- BinarySpaceTree* parent,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
parent(parent),
begin(begin),
count(count),
- bound(data.n_rows),
- dataset(data)
+ bound(parent->Dataset().n_rows),
+ dataset(&parent->Dataset()) // Point to the parent's dataset.
{
// Perform the actual splitting.
- SplitNode(data, maxLeafSize);
+ SplitNode(maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -135,26 +134,25 @@ template<typename BoundType,
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ BinarySpaceTree* parent,
const size_t begin,
const size_t count,
std::vector<size_t>& oldFromNew,
- BinarySpaceTree* parent,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
parent(parent),
begin(begin),
count(count),
- bound(data.n_rows),
- dataset(data)
+ bound(parent->Dataset().n_rows),
+ dataset(&parent->Dataset())
{
// Hopefully the vector is initialized correctly! We can't check that
// entirely but we can do a minor sanity check.
- assert(oldFromNew.size() == data.n_cols);
+ assert(oldFromNew.size() == dataset->n_cols);
// Perform the actual splitting.
- SplitNode(data, oldFromNew, maxLeafSize);
+ SplitNode(oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -165,34 +163,33 @@ template<typename BoundType,
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ BinarySpaceTree* parent,
const size_t begin,
const size_t count,
std::vector<size_t>& oldFromNew,
std::vector<size_t>& newFromOld,
- BinarySpaceTree* parent,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
parent(parent),
begin(begin),
count(count),
- bound(data.n_rows),
- dataset(data)
+ bound(parent->Dataset()->n_rows),
+ dataset(&parent->Dataset())
{
// Hopefully the vector is initialized correctly! We can't check that
// entirely but we can do a minor sanity check.
- Log::Assert(oldFromNew.size() == data.n_cols);
+ Log::Assert(oldFromNew.size() == dataset->n_cols);
// Perform the actual splitting.
- SplitNode(data, oldFromNew, maxLeafSize);
+ SplitNode(oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
// Map the newFromOld indices correctly.
- newFromOld.resize(data.n_cols);
- for (size_t i = 0; i < data.n_cols; i++)
+ newFromOld.resize(dataset->n_cols);
+ for (size_t i = 0; i < dataset->n_cols; i++)
newFromOld[oldFromNew[i]] = i;
}
@@ -215,7 +212,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
stat(other.stat),
parentDistance(other.parentDistance),
furthestDescendantDistance(other.furthestDescendantDistance),
- dataset(other.dataset)
+ dataset(new MatType(*other.dataset)) // Copy matrix.
{
// Create left and right children (if any).
if (other.Left())
@@ -247,6 +244,10 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::
delete left;
if (right)
delete right;
+
+ // If we're the root, delete the matrix.
+ if (!parent)
+ delete dataset;
}
/**
@@ -514,11 +515,10 @@ template<typename BoundType,
typename MatType,
typename SplitType>
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
- MatType& data,
const size_t maxLeafSize)
{
// We need to expand the bounds of this node properly.
- bound |= data.cols(begin, begin + count - 1);
+ bound |= dataset->cols(begin, begin + count - 1);
// Calculate the furthest descendant distance.
furthestDescendantDistance = 0.5 * bound.Diameter();
@@ -534,7 +534,8 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
// Split the node. The elements of 'data' are reordered by the splitting
// algorithm. This function call updates splitCol.
- const bool split = SplitType::SplitNode(bound, data, begin, count, splitCol);
+ const bool split = SplitType::SplitNode(bound, *dataset, begin, count,
+ splitCol);
// The node may not be always split. For instance, if all the points are the
// same, we can't split them.
@@ -543,10 +544,10 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
// Now that we know the split column, we will recursively split the children
// by calling their constructors (which perform this splitting process).
- left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
- splitCol - begin, this, maxLeafSize);
- right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
- begin + count - splitCol, this, maxLeafSize);
+ left = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, begin,
+ splitCol - begin, maxLeafSize);
+ right = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, splitCol,
+ begin + count - splitCol, maxLeafSize);
// Calculate parent distances for those two nodes.
arma::vec centroid, leftCentroid, rightCentroid;
@@ -568,13 +569,12 @@ template<typename BoundType,
typename MatType,
typename SplitType>
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
- MatType& data,
std::vector<size_t>& oldFromNew,
const size_t maxLeafSize)
{
// This should be a single function for Bound.
// We need to expand the bounds of this node properly.
- bound |= data.cols(begin, begin + count - 1);
+ bound |= dataset->cols(begin, begin + count - 1);
// Calculate the furthest descendant distance.
furthestDescendantDistance = 0.5 * bound.Diameter();
@@ -590,8 +590,8 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
// Split the node. The elements of 'data' are reordered by the splitting
// algorithm. This function call updates splitCol and oldFromNew.
- const bool split = SplitType::SplitNode(bound, data, begin, count, splitCol,
- oldFromNew);
+ const bool split = SplitType::SplitNode(bound, *dataset, begin, count,
+ splitCol, oldFromNew);
// The node may not be always split. For instance, if all the points are the
// same, we can't split them.
@@ -600,10 +600,10 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
// Now that we know the split column, we will recursively split the children
// by calling their constructors (which perform this splitting process).
- left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
- splitCol - begin, oldFromNew, this, maxLeafSize);
- right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
- begin + count - splitCol, oldFromNew, this, maxLeafSize);
+ left = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, begin,
+ splitCol - begin, oldFromNew, maxLeafSize);
+ right = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, splitCol,
+ begin + count - splitCol, oldFromNew, maxLeafSize);
// Calculate parent distances for those two nodes.
arma::vec centroid, leftCentroid, rightCentroid;
More information about the mlpack-git
mailing list