[mlpack-git] master: Merge branch 'serialize' of https://github.com/rcurtin/mlpack into rcurtin-serialize (d470910)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Fri Jul 10 19:00:30 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/4a97187bbba7ce8a6191b714949dd818ef0f37d2...e5905e62c15d1bcff21e6359b11efcd7ab6d7ca0
>---------------------------------------------------------------
commit d470910697bdd1fec66f049438d8fc6a73a7a5e7
Merge: 4a97187 5611e1f
Author: Ryan Curtin <ryan at ratml.org>
Date: Fri Jul 10 22:57:34 2015 +0000
Merge branch 'serialize' of https://github.com/rcurtin/mlpack into rcurtin-serialize
>---------------------------------------------------------------
d470910697bdd1fec66f049438d8fc6a73a7a5e7
CMakeLists.txt | 1 +
src/mlpack/core/arma_extend/Mat_extra_bones.hpp | 4 +
src/mlpack/core/arma_extend/Mat_extra_meat.hpp | 32 +
src/mlpack/core/arma_extend/SpMat_extra_bones.hpp | 6 +-
src/mlpack/core/arma_extend/SpMat_extra_meat.hpp | 34 +-
src/mlpack/core/arma_extend/arma_extend.hpp | 5 +
src/mlpack/core/data/CMakeLists.txt | 3 +
src/mlpack/core/data/extension.hpp | 33 +
src/mlpack/core/data/format.hpp | 26 +
src/mlpack/core/data/load.hpp | 42 +-
src/mlpack/core/data/load_impl.hpp | 109 +++-
src/mlpack/core/data/save.hpp | 40 +-
src/mlpack/core/data/save_impl.hpp | 95 ++-
src/mlpack/core/data/serialization_shim.hpp | 575 +++++++++++++++++
src/mlpack/core/dists/discrete_distribution.hpp | 11 +-
src/mlpack/core/dists/gaussian_distribution.cpp | 3 +-
src/mlpack/core/dists/gaussian_distribution.hpp | 16 +
src/mlpack/core/dists/laplace_distribution.hpp | 10 +
src/mlpack/core/dists/regression_distribution.hpp | 23 +-
src/mlpack/core/kernels/cosine_distance.hpp | 4 +
src/mlpack/core/kernels/epanechnikov_kernel.hpp | 6 +
.../core/kernels/epanechnikov_kernel_impl.hpp | 9 +
src/mlpack/core/kernels/example_kernel.hpp | 7 +
src/mlpack/core/kernels/gaussian_kernel.hpp | 8 +
.../core/kernels/hyperbolic_tangent_kernel.hpp | 8 +
src/mlpack/core/kernels/laplacian_kernel.hpp | 7 +
src/mlpack/core/kernels/linear_kernel.hpp | 4 +
src/mlpack/core/kernels/polynomial_kernel.hpp | 8 +
src/mlpack/core/kernels/spherical_kernel.hpp | 8 +
src/mlpack/core/kernels/triangular_kernel.hpp | 7 +
src/mlpack/core/math/range.hpp | 6 +
src/mlpack/core/math/range_impl.hpp | 8 +
src/mlpack/core/metrics/ip_metric.hpp | 16 +-
src/mlpack/core/metrics/ip_metric_impl.hpp | 33 +-
src/mlpack/core/metrics/lmetric.hpp | 4 +
src/mlpack/core/metrics/mahalanobis_distance.hpp | 4 +
.../core/metrics/mahalanobis_distance_impl.hpp | 9 +
src/mlpack/core/tree/ballbound.hpp | 18 +-
src/mlpack/core/tree/ballbound_impl.hpp | 21 +
.../tree/binary_space_tree/binary_space_tree.hpp | 115 ++--
.../binary_space_tree/binary_space_tree_impl.hpp | 225 +++++--
src/mlpack/core/tree/hrectbound.hpp | 6 +
src/mlpack/core/tree/hrectbound_impl.hpp | 20 +
src/mlpack/core/tree/statistic.hpp | 8 +-
src/mlpack/core/util/sfinae_utility.hpp | 1 -
.../linear_regression/linear_regression.hpp | 20 +-
.../methods/neighbor_search/neighbor_search.hpp | 11 +-
.../neighbor_search/neighbor_search_impl.hpp | 56 +-
.../neighbor_search/neighbor_search_stat.hpp | 18 +-
.../methods/range_search/range_search_stat.hpp | 9 +-
src/mlpack/prereqs.hpp | 5 +
src/mlpack/tests/CMakeLists.txt | 1 +
src/mlpack/tests/allkfn_test.cpp | 7 +-
src/mlpack/tests/allknn_test.cpp | 7 +-
src/mlpack/tests/load_save_test.cpp | 106 ++++
src/mlpack/tests/range_search_test.cpp | 1 -
src/mlpack/tests/serialization_test.cpp | 688 +++++++++++++++++++++
src/mlpack/tests/tree_test.cpp | 84 ++-
58 files changed, 2378 insertions(+), 273 deletions(-)
diff --cc src/mlpack/core/kernels/spherical_kernel.hpp
index a89e266,c8b4763..978082b
--- a/src/mlpack/core/kernels/spherical_kernel.hpp
+++ b/src/mlpack/core/kernels/spherical_kernel.hpp
@@@ -95,10 -95,15 +95,18 @@@ class SphericalKerne
{
return (t <= bandwidth) ? 1.0 : 0.0;
}
+ double Gradient(double t) {
+ return t == bandwidth ? arma::datum::nan : 0.0;
+ }
+ //! Serialize the object.
+ template<typename Archive>
+ void Serialize(Archive& ar, const unsigned int /* version */)
+ {
+ ar & data::CreateNVP(bandwidth, "bandwidth");
+ ar & data::CreateNVP(bandwidthSquared, "bandwidthSquared");
+ }
+
//! Return a string representation of the kernel.
std::string ToString() const
{
diff --cc src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index c358ec0,6e2e21f..c90b0de
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@@ -65,10 -65,9 +65,11 @@@ class BinarySpaceTre
//! The worst possible distance to the furthest descendant, cached to speed
//! things up.
double furthestDescendantDistance;
+ //! The minimum distance from the center to any edge of the bound.
+ double minimumBoundDistance;
- //! The dataset.
- MatType& dataset;
+ //! The dataset. If we are the root of the tree, we own the dataset and must
+ //! delete it.
+ MatType* dataset;
public:
//! So other classes can use TreeType::Mat.
@@@ -128,28 -132,26 +134,27 @@@
const size_t maxLeafSize = 20);
/**
- * Construct this node on a subset of the given matrix, starting at column
- * begin and using count points. The ordering of that subset of points
- * will be modified! This is used for recursive tree-building by the other
- * constructors which don't specify point indices.
+ * Construct this node as a child of the given parent, starting at column
- * begin_in and using count_in points. The ordering of that subset of points
- * in the parent's data matrix will be modified! This is used for recursive
++ * begin and using count points. The ordering of that subset of points in the
++ * parent's data matrix will be modified! This is used for recursive
+ * tree-building by the other constructors which don't specify point indices.
*
- * @param data Dataset to create tree from. This will be modified!
+ * @param parent Parent of this node. Its dataset will be modified!
* @param begin Index of point to start tree construction with.
* @param count Number of points to use to construct tree.
* @param maxLeafSize Size of each leaf in the tree.
*/
- BinarySpaceTree(MatType& data,
+ BinarySpaceTree(BinarySpaceTree* parent,
const size_t begin,
const size_t count,
+ SplitType& splitter,
- BinarySpaceTree* parent = NULL,
const size_t maxLeafSize = 20);
/**
- * Construct this node on a subset of the given matrix, starting at column
- * begin_in and using count_in points. The ordering of that subset of points
- * will be modified! This is used for recursive tree-building by the other
- * constructors which don't specify point indices.
+ * Construct this node as a child of the given parent, starting at column
- * begin_in and using count_in points. The ordering of that subset of points
- * in the parent's data matrix will be modified! This is used for recursive
++ * begin and using count points. The ordering of that subset of points in the
++ * parent's data matrix will be modified! This is used for recursive
+ * tree-building by the other constructors which don't specify point indices.
*
* A mapping of the old point indices to the new point indices is filled, but
* it is expected that the vector is already allocated with size greater than
@@@ -167,15 -169,13 +172,14 @@@
const size_t begin,
const size_t count,
std::vector<size_t>& oldFromNew,
+ SplitType& splitter,
- BinarySpaceTree* parent = NULL,
const size_t maxLeafSize = 20);
/**
- * Construct this node on a subset of the given matrix, starting at column
- * begin_in and using count_in points. The ordering of that subset of points
- * will be modified! This is used for recursive tree-building by the other
- * constructors which don't specify point indices.
+ * Construct this node as a child of the given parent, starting at column
- * begin_in and using count_in points. The ordering of that subset of points
- * in the parent's data matrix will be modified! This is used for recursive
++ * begin and using count points. The ordering of that subset of points in the
++ * parent's data matrix will be modified! This is used for recursive
+ * tree-building by the other constructors which don't specify point indices.
*
* A mapping of the old point indices to the new point indices is filled, as
* well as a mapping of the new point indices to the old point indices. It is
@@@ -197,8 -197,6 +201,7 @@@
const size_t count,
std::vector<size_t>& oldFromNew,
std::vector<size_t>& newFromOld,
+ SplitType& splitter,
- BinarySpaceTree* parent = NULL,
const size_t maxLeafSize = 20);
/**
@@@ -210,9 -208,26 +213,19 @@@
BinarySpaceTree(const BinarySpaceTree& other);
/**
+ * Initialize the tree from a boost::serialization archive.
+ *
- * @param ar Archive to load tree from. Must be an iarchive, not an oarchive!
++ * @param ar Archive to load tree from. Must be an iarchive, not an oarchive.
+ */
+ template<typename Archive>
+ BinarySpaceTree(
+ Archive& ar,
+ const typename boost::enable_if<typename Archive::is_loading>::type* = 0);
+
+ /**
* Deletes this node, deallocating the memory for the children and calling
* their destructors in turn. This will invalidate any pointers or references
- * to any nodes which are children of this one. Also, if this is the root of
- * the tree (specifically, if Parent() == NULL), it will delete the dataset.
- *
- * So, if you want to preserve other parts of the tree, the easiest way is
- * probably to set Left() or Right() to NULL and maintain those branches
- * yourself. If you want to preserve the dataset, the easiest way is probably
- * to set Parent() to NULL before deleting the object. Either way, if you
- * find yourself doing that, exercise care and caution.
+ * to any nodes which are children of this one.
*/
~BinarySpaceTree();
@@@ -428,36 -443,49 +441,50 @@@
/**
* Splits the current node, assigning its left and right children recursively.
*
- * @param data Dataset which we are using.
* @param maxLeafSize Maximum number of points held in a leaf.
++ * @param splitter Instantiated SplitType object.
*/
- void SplitNode(MatType& data,
- const size_t maxLeafSize,
- SplitType& splitter);
- void SplitNode(const size_t maxLeafSize);
++ void SplitNode(const size_t maxLeafSize, SplitType& splitter);
/**
* Splits the current node, assigning its left and right children recursively.
* Also returns a list of the changed indices.
*
-- * @param data Dataset which we are using.
* @param oldFromNew Vector holding permuted indices.
* @param maxLeafSize Maximum number of points held in a leaf.
++ * @param splitter Instantiated SplitType object.
*/
- void SplitNode(MatType& data,
- std::vector<size_t>& oldFromNew,
+ void SplitNode(std::vector<size_t>& oldFromNew,
- const size_t maxLeafSize);
+ const size_t maxLeafSize,
+ SplitType& splitter);
+ protected:
+ /**
+ * A default constructor. This is meant to only be used with
+ * boost::serialization, which is allowed with the friend declaration below.
+ * This does not return a valid tree! The method must be protected, so that
+ * the serialization shim can work with the default constructor.
+ */
+ BinarySpaceTree();
+
+ //! Friend access is given for the default constructor.
+ friend class boost::serialization::access;
+
public:
/**
+ * Serialize the tree.
+ */
+ template<typename Archive>
+ void Serialize(Archive& ar, const unsigned int version);
+
+ /**
* Returns a string representation of this object.
*/
std::string ToString() const;
--
};
-}; // namespace tree
-}; // namespace mlpack
+} // namespace tree
+} // namespace mlpack
// Include implementation.
#include "binary_space_tree_impl.hpp"
diff --cc src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 61f43da,db2ad36..7ad8298
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@@ -32,11 -33,10 +33,11 @@@ BinarySpaceTree<BoundType, StatisticTyp
count(data.n_cols), /* and spans all of the dataset. */
bound(data.n_rows),
parentDistance(0), // Parent distance for the root is 0: it has no parent.
- dataset(data)
+ dataset(new MatType(data)) // Copies the dataset.
{
// Do the actual splitting of this node.
- SplitNode(maxLeafSize);
+ SplitType splitter;
- SplitNode(data, maxLeafSize, splitter);
++ SplitNode(maxLeafSize, splitter);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@@ -65,8 -65,7 +66,8 @@@ BinarySpaceTree<BoundType, StatisticTyp
oldFromNew[i] = i; // Fill with unharmed indices.
// Now do the actual splitting.
- SplitNode(oldFromNew, maxLeafSize);
+ SplitType splitter;
- SplitNode(data, oldFromNew, maxLeafSize, splitter);
++ SplitNode(oldFromNew, maxLeafSize, splitter);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@@ -96,8 -95,7 +97,8 @@@ BinarySpaceTree<BoundType, StatisticTyp
oldFromNew[i] = i; // Fill with unharmed indices.
// Now do the actual splitting.
- SplitNode(oldFromNew, maxLeafSize);
+ SplitType splitter;
- SplitNode(data, oldFromNew, maxLeafSize, splitter);
++ SplitNode(oldFromNew, maxLeafSize, splitter);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@@ -113,22 -111,20 +114,21 @@@ template<typename BoundType
typename MatType,
typename SplitType>
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
- MatType& data,
+ BinarySpaceTree* parent,
const size_t begin,
const size_t count,
+ SplitType& splitter,
- BinarySpaceTree* parent,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
parent(parent),
begin(begin),
count(count),
- bound(data.n_rows),
- dataset(data)
+ bound(parent->Dataset().n_rows),
+ dataset(&parent->Dataset()) // Point to the parent's dataset.
{
// Perform the actual splitting.
- SplitNode(data, maxLeafSize, splitter);
- SplitNode(maxLeafSize);
++ SplitNode(maxLeafSize, splitter);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@@ -143,8 -139,6 +143,7 @@@ BinarySpaceTree<BoundType, StatisticTyp
const size_t begin,
const size_t count,
std::vector<size_t>& oldFromNew,
+ SplitType& splitter,
- BinarySpaceTree* parent,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
@@@ -156,10 -150,10 +155,10 @@@
{
// Hopefully the vector is initialized correctly! We can't check that
// entirely but we can do a minor sanity check.
- assert(oldFromNew.size() == data.n_cols);
+ assert(oldFromNew.size() == dataset->n_cols);
// Perform the actual splitting.
- SplitNode(data, oldFromNew, maxLeafSize, splitter);
- SplitNode(oldFromNew, maxLeafSize);
++ SplitNode(oldFromNew, maxLeafSize, splitter);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@@ -175,8 -169,6 +174,7 @@@ BinarySpaceTree<BoundType, StatisticTyp
const size_t count,
std::vector<size_t>& oldFromNew,
std::vector<size_t>& newFromOld,
+ SplitType& splitter,
- BinarySpaceTree* parent,
const size_t maxLeafSize) :
left(NULL),
right(NULL),
@@@ -188,10 -180,10 +186,10 @@@
{
// Hopefully the vector is initialized correctly! We can't check that
// entirely but we can do a minor sanity check.
- Log::Assert(oldFromNew.size() == data.n_cols);
+ Log::Assert(oldFromNew.size() == dataset->n_cols);
// Perform the actual splitting.
- SplitNode(data, oldFromNew, maxLeafSize, splitter);
- SplitNode(oldFromNew, maxLeafSize);
++ SplitNode(oldFromNew, maxLeafSize, splitter);
// Create the statistic depending on if we are a leaf or not.
stat = StatisticType(*this);
@@@ -520,12 -557,10 +563,11 @@@ template<typename BoundType
typename MatType,
typename SplitType>
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
- MatType& data,
- const size_t maxLeafSize)
+ const size_t maxLeafSize,
+ SplitType& splitter)
{
// We need to expand the bounds of this node properly.
- bound |= data.cols(begin, begin + count - 1);
+ bound |= dataset->cols(begin, begin + count - 1);
// Calculate the furthest descendant distance.
furthestDescendantDistance = 0.5 * bound.Diameter();
@@@ -541,7 -576,8 +583,8 @@@
// Split the node. The elements of 'data' are reordered by the splitting
// algorithm. This function call updates splitCol.
- const bool split = splitter.SplitNode(bound, data, begin, count, splitCol);
- const bool split = SplitType::SplitNode(bound, *dataset, begin, count,
++ const bool split = splitter.SplitNode(bound, *dataset, begin, count,
+ splitCol);
// The node may not be always split. For instance, if all the points are the
// same, we can't split them.
@@@ -550,10 -586,10 +593,10 @@@
// Now that we know the split column, we will recursively split the children
// by calling their constructors (which perform this splitting process).
- left = new BinarySpaceTree(data, begin, splitCol - begin, splitter, this,
- left = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, begin,
- splitCol - begin, maxLeafSize);
- right = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, splitCol,
- begin + count - splitCol, maxLeafSize);
++ left = new BinarySpaceTree(this, begin, splitCol - begin, splitter,
+ maxLeafSize);
- right = new BinarySpaceTree(data, splitCol, begin + count - splitCol,
- splitter, this, maxLeafSize);
++ right = new BinarySpaceTree(this, splitCol, begin + count - splitCol,
++ splitter, maxLeafSize);
// Calculate parent distances for those two nodes.
arma::vec centroid, leftCentroid, rightCentroid;
@@@ -575,10 -611,8 +618,9 @@@ template<typename BoundType
typename MatType,
typename SplitType>
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
- MatType& data,
std::vector<size_t>& oldFromNew,
- const size_t maxLeafSize)
+ const size_t maxLeafSize,
+ SplitType& splitter)
{
// This should be a single function for Bound.
// We need to expand the bounds of this node properly.
@@@ -598,8 -632,8 +640,8 @@@
// Split the node. The elements of 'data' are reordered by the splitting
// algorithm. This function call updates splitCol and oldFromNew.
- const bool split = splitter.SplitNode(bound, data, begin, count, splitCol,
- const bool split = SplitType::SplitNode(bound, *dataset, begin, count,
- splitCol, oldFromNew);
++ const bool split = splitter.SplitNode(bound, *dataset, begin, count, splitCol,
+ oldFromNew);
// The node may not be always split. For instance, if all the points are the
// same, we can't split them.
@@@ -608,10 -642,10 +650,10 @@@
// Now that we know the split column, we will recursively split the children
// by calling their constructors (which perform this splitting process).
- left = new BinarySpaceTree(data, begin, splitCol - begin, oldFromNew,
- splitter, this, maxLeafSize);
- right = new BinarySpaceTree(data, splitCol, begin + count - splitCol,
- oldFromNew, splitter, this, maxLeafSize);
- left = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, begin,
- splitCol - begin, oldFromNew, maxLeafSize);
- right = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, splitCol,
- begin + count - splitCol, oldFromNew, maxLeafSize);
++ left = new BinarySpaceTree(this, begin, splitCol - begin, oldFromNew,
++ splitter, maxLeafSize);
++ right = new BinarySpaceTree(this, splitCol, begin + count - splitCol,
++ oldFromNew, splitter, maxLeafSize);
// Calculate parent distances for those two nodes.
arma::vec centroid, leftCentroid, rightCentroid;
diff --cc src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
index 6251e8f,c6dc565..48a65fa
--- a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
@@@ -81,16 -60,13 +63,16 @@@ NeighborSearch(const typename TreeType:
}
// Construct the object.
-template<typename SortPolicy, typename MetricType, typename TreeType>
-NeighborSearch<SortPolicy, MetricType, TreeType>::NeighborSearch(
- TreeType* referenceTree,
- const bool singleMode,
- const MetricType metric) :
+template<typename SortPolicy,
+ typename MetricType,
+ typename TreeType,
+ template<typename> class TraversalType>
+NeighborSearch<SortPolicy, MetricType, TreeType, TraversalType>::
+NeighborSearch(TreeType* referenceTree,
+ const bool singleMode,
+ const MetricType metric) :
- referenceSet(referenceTree->Dataset()),
referenceTree(referenceTree),
+ referenceSet(referenceTree->Dataset()),
treeOwner(false),
naive(false),
singleMode(singleMode),
@@@ -204,8 -165,12 +178,12 @@@ void NeighborSearch<SortPolicy, MetricT
Timer::Stop("tree_building");
Timer::Start("computing_neighbors");
+ // Create the helper object for the tree traversal.
+ RuleType rules(referenceSet, queryTree->Dataset(), *neighborPtr,
+ *distancePtr, metric);
+
// Create the traverser.
- typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);
+ TraversalType<RuleType> traverser(rules);
traverser.Traverse(*queryTree, *referenceTree);
More information about the mlpack-git
mailing list