[mlpack-git] master: Remove unnecessary splitDimension and maxLeafSize. This results in a pretty trivial speedup (like, 0.5%?), but it makes the tree require less memory. Also I've removed a couple of entirely unused functions. (46e038c)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Tue Apr 7 17:27:56 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/81fe638aa0c8d9592bbf7b434110140eb9bb86e7...46e038cdb082ccbd44e0583e1bae6d6da03e26d1
>---------------------------------------------------------------
commit 46e038cdb082ccbd44e0583e1bae6d6da03e26d1
Author: ryan <ryan at ratml.org>
Date: Tue Apr 7 17:27:12 2015 -0400
Remove unnecessary splitDimension and maxLeafSize.
This results in a pretty trivial speedup (like, 0.5%?), but it makes the tree require less memory.
Also I've removed a couple of entirely unused functions.
>---------------------------------------------------------------
46e038cdb082ccbd44e0583e1bae6d6da03e26d1
.../tree/binary_space_tree/binary_space_tree.hpp | 53 ++------------
.../binary_space_tree/binary_space_tree_impl.hpp | 83 +++++-----------------
.../core/tree/binary_space_tree/mean_split.hpp | 2 -
.../tree/binary_space_tree/mean_split_impl.hpp | 6 +-
src/mlpack/methods/neighbor_search/allknn_main.cpp | 7 ++
5 files changed, 32 insertions(+), 119 deletions(-)
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 6f98c6c..9af121b 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -56,14 +56,10 @@ class BinarySpaceTree
//! The number of points of the dataset contained in this node (and its
//! children).
size_t count;
- //! The max leaf size.
- size_t maxLeafSize;
//! The bound object for this node.
BoundType bound;
//! Any extra data contained in the node.
StatisticType stat;
- //! The dimension this node split on if it is a parent.
- size_t splitDimension;
//! The distance from the centroid of this node to the centroid of the parent.
double parentDistance;
//! The worst possible distance to the furthest descendant, cached to speed
@@ -257,14 +253,6 @@ class BinarySpaceTree
//! Return whether or not this node is a leaf (true if it has no children).
bool IsLeaf() const;
- //! Return the max leaf size.
- size_t MaxLeafSize() const { return maxLeafSize; }
- //! Modify the max leaf size.
- size_t& MaxLeafSize() { return maxLeafSize; }
-
- //! Fills the tree to the specified level.
- size_t ExtendTree(const size_t level);
-
//! Gets the left child of this node.
BinarySpaceTree* Left() const { return left; }
//! Modify the left child of this node.
@@ -280,11 +268,6 @@ class BinarySpaceTree
//! Modify the parent of this node.
BinarySpaceTree*& Parent() { return parent; }
- //! Get the split dimension for this node.
- size_t SplitDimension() const { return splitDimension; }
- //! Modify the split dimension for this node.
- size_t& SplitDimension() { return splitDimension; }
-
//! Get the dataset which the tree is built on.
const MatType& Dataset() const { return dataset; }
//! Modify the dataset which the tree is built on. Be careful!
@@ -410,11 +393,6 @@ class BinarySpaceTree
}
/**
- * Returns the dimension this parent's children are split on.
- */
- size_t GetSplitDimension() const;
-
- /**
* Obtains the number of nodes in the tree, starting with this.
*/
size_t TreeSize() const;
@@ -445,33 +423,13 @@ class BinarySpaceTree
private:
/**
- * Private copy constructor, available only to fill (pad) the tree to a
- * specified level.
- */
- BinarySpaceTree(const size_t begin,
- const size_t count,
- BoundType bound,
- StatisticType stat,
- const int maxLeafSize = 20) :
- left(NULL),
- right(NULL),
- begin(begin),
- count(count),
- bound(bound),
- stat(stat),
- maxLeafSize(maxLeafSize) { }
-
- BinarySpaceTree* CopyMe()
- {
- return new BinarySpaceTree(begin, count, bound, stat, maxLeafSize);
- }
-
- /**
* Splits the current node, assigning its left and right children recursively.
*
* @param data Dataset which we are using.
+ * @param maxLeafSize Maximum number of points held in a leaf.
*/
- void SplitNode(MatType& data);
+ void SplitNode(MatType& data,
+ const size_t maxLeafSize);
/**
* Splits the current node, assigning its left and right children recursively.
@@ -479,8 +437,11 @@ class BinarySpaceTree
*
* @param data Dataset which we are using.
* @param oldFromNew Vector holding permuted indices.
+ * @param maxLeafSize Maximum number of points held in a leaf.
*/
- void SplitNode(MatType& data, std::vector<size_t>& oldFromNew);
+ void SplitNode(MatType& data,
+ std::vector<size_t>& oldFromNew,
+ const size_t maxLeafSize);
public:
/**
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 35db29a..a068899 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@ -30,13 +30,12 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(NULL),
begin(0), /* This root node starts at index 0, */
count(data.n_cols), /* and spans all of the dataset. */
- maxLeafSize(maxLeafSize),
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
dataset(data)
{
// Do the actual splitting of this node.
- SplitNode(data);
+ SplitNode(data, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -55,7 +54,6 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(NULL),
begin(0),
count(data.n_cols),
- maxLeafSize(maxLeafSize),
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
dataset(data)
@@ -66,7 +64,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
oldFromNew[i] = i; // Fill with unharmed indices.
// Now do the actual splitting.
- SplitNode(data, oldFromNew);
+ SplitNode(data, oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -86,7 +84,6 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(NULL),
begin(0),
count(data.n_cols),
- maxLeafSize(maxLeafSize),
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
dataset(data)
@@ -97,7 +94,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
oldFromNew[i] = i; // Fill with unharmed indices.
// Now do the actual splitting.
- SplitNode(data, oldFromNew);
+ SplitNode(data, oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -123,12 +120,11 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(parent),
begin(begin),
count(count),
- maxLeafSize(maxLeafSize),
bound(data.n_rows),
dataset(data)
{
// Perform the actual splitting.
- SplitNode(data);
+ SplitNode(data, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -150,7 +146,6 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(parent),
begin(begin),
count(count),
- maxLeafSize(maxLeafSize),
bound(data.n_rows),
dataset(data)
{
@@ -159,7 +154,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
assert(oldFromNew.size() == data.n_cols);
// Perform the actual splitting.
- SplitNode(data, oldFromNew);
+ SplitNode(data, oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -182,7 +177,6 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(parent),
begin(begin),
count(count),
- maxLeafSize(maxLeafSize),
bound(data.n_rows),
dataset(data)
{
@@ -191,7 +185,7 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
Log::Assert(oldFromNew.size() == data.n_cols);
// Perform the actual splitting.
- SplitNode(data, oldFromNew);
+ SplitNode(data, oldFromNew, maxLeafSize);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@ -202,21 +196,6 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
newFromOld[oldFromNew[i]] = i;
}
-/*
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree() :
- left(NULL),
- right(NULL),
- parent(NULL),
- begin(0),
- count(0),
- bound(),
- stat(),
- maxLeafSize(20) // Default max leaf size is 20.
-{
- // Nothing to do.
-}*/
-
/**
* Create a binary space tree by copying the other tree. Be careful! This can
* take a long time and use a lot of memory.
@@ -232,10 +211,8 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
parent(other.parent),
begin(other.begin),
count(other.count),
- maxLeafSize(other.maxLeafSize),
bound(other.bound),
stat(other.stat),
- splitDimension(other.splitDimension),
parentDistance(other.parentDistance),
furthestDescendantDistance(other.furthestDescendantDistance),
dataset(other.dataset)
@@ -340,32 +317,6 @@ BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::FindByBeginCount(
return NULL;
}
-template<typename BoundType,
- typename StatisticType,
- typename MatType,
- typename SplitType>
-size_t BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::ExtendTree(
- size_t level)
-{
- --level;
- // Return the number of nodes duplicated.
- size_t nodesDuplicated = 0;
- if (level > 0)
- {
- if (!left)
- {
- left = CopyMe();
- ++nodesDuplicated;
- }
- nodesDuplicated += left->ExtendTree(level);
- if (right)
- {
- nodesDuplicated += right->ExtendTree(level);
- }
- }
- return nodesDuplicated;
-}
-
/* TODO: we can likely calculate this earlier, then store the
* result in a private member variable; for now, we can
* just calculate as needed...
@@ -563,7 +514,8 @@ template<typename BoundType,
typename MatType,
typename SplitType>
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
- MatType& data)
+ MatType& data,
+ const size_t maxLeafSize)
{
// We need to expand the bounds of this node properly.
bound |= data.cols(begin, begin + count - 1);
@@ -581,9 +533,8 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
size_t splitCol;
// Split the node. The elements of 'data' are reordered by the splitting
- // algorithm. This function call updates splitDimension and splitCol.
- const bool split = SplitType::SplitNode(bound, data, begin, count,
- splitDimension, splitCol);
+ // algorithm. This function call updates splitCol.
+ const bool split = SplitType::SplitNode(bound, data, begin, count, splitCol);
// The node may not be always split. For instance, if all the points are the
// same, we can't split them.
@@ -618,7 +569,8 @@ template<typename BoundType,
typename SplitType>
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
MatType& data,
- std::vector<size_t>& oldFromNew)
+ std::vector<size_t>& oldFromNew,
+ const size_t maxLeafSize)
{
// This should be a single function for Bound.
// We need to expand the bounds of this node properly.
@@ -637,10 +589,9 @@ void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
size_t splitCol;
// Split the node. The elements of 'data' are reordered by the splitting
- // algorithm. This function call updates splitDimension, splitCol and
- // oldFromNew.
- const bool split = SplitType::SplitNode(bound, data, begin, count,
- splitDimension, splitCol, oldFromNew);
+ // algorithm. This function call updates splitCol and oldFromNew.
+ const bool split = SplitType::SplitNode(bound, data, begin, count, splitCol,
+ oldFromNew);
// The node may not be always split. For instance, if all the points are the
// same, we can't split them.
@@ -686,9 +637,7 @@ std::string BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::
convert << " Bound: " << std::endl;
convert << mlpack::util::Indent(bound.ToString(), 2);
convert << " Statistic: " << std::endl;
- convert << mlpack::util::Indent(stat.ToString(), 2);
- convert << " Max leaf size: " << maxLeafSize << std::endl;
- convert << " Split dimension: " << splitDimension << std::endl;
+ convert << mlpack::util::Indent(stat->ToString(), 2);
// How many levels should we print? This will print the top two tree levels.
if (left != NULL && parent == NULL)
diff --git a/src/mlpack/core/tree/binary_space_tree/mean_split.hpp b/src/mlpack/core/tree/binary_space_tree/mean_split.hpp
index c6e3c04..219cc8c 100644
--- a/src/mlpack/core/tree/binary_space_tree/mean_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/mean_split.hpp
@@ -41,7 +41,6 @@ class MeanSplit
MatType& data,
const size_t begin,
const size_t count,
- size_t& splitDimension,
size_t& splitCol);
/**
@@ -64,7 +63,6 @@ class MeanSplit
MatType& data,
const size_t begin,
const size_t count,
- size_t& splitDimension,
size_t& splitCol,
std::vector<size_t>& oldFromNew);
diff --git a/src/mlpack/core/tree/binary_space_tree/mean_split_impl.hpp b/src/mlpack/core/tree/binary_space_tree/mean_split_impl.hpp
index d4c638b..f9d07fe 100644
--- a/src/mlpack/core/tree/binary_space_tree/mean_split_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/mean_split_impl.hpp
@@ -18,10 +18,9 @@ bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
- size_t& splitDimension,
size_t& splitCol)
{
- splitDimension = data.n_rows; // Indicate invalid.
+ size_t splitDimension = data.n_rows; // Indicate invalid.
double maxWidth = -1;
// Find the split dimension.
@@ -56,11 +55,10 @@ bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
- size_t& splitDimension,
size_t& splitCol,
std::vector<size_t>& oldFromNew)
{
- splitDimension = data.n_rows; // Indicate invalid.
+ size_t splitDimension = data.n_rows; // Indicate invalid.
double maxWidth = -1;
// Find the split dimension.
diff --git a/src/mlpack/methods/neighbor_search/allknn_main.cpp b/src/mlpack/methods/neighbor_search/allknn_main.cpp
index 06ebc28..3aaf45e 100644
--- a/src/mlpack/methods/neighbor_search/allknn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/allknn_main.cpp
@@ -68,6 +68,13 @@ int main(int argc, char *argv[])
// Give CLI the command line parameters the user passed in.
CLI::ParseCommandLine(argc, argv);
+ Log::Info << "sizeof(BinarySpaceTree<>): " << sizeof(BinarySpaceTree<bound::HRectBound<2>>) << ".\n";
+ Log::Info << "sizeof(HRectBound<2>): " << sizeof(bound::HRectBound<2>) << ".\n";
+ Log::Info << "sizeof(NeighborSearchStat): " << sizeof(NeighborSearchStat<NearestNeighborSort>) << ".\n";
+ Log::Info << "sizeof(TreeType): " <<
+sizeof(BinarySpaceTree<bound::HRectBound<2>,
+NeighborSearchStat<NearestNeighborSort>>) << ".\n";
+
if (CLI::GetParam<int>("seed") != 0)
math::RandomSeed((size_t) CLI::GetParam<int>("seed"));
else
More information about the mlpack-git
mailing list