[mlpack-git] master: Merge branch 'serialize' of https://github.com/rcurtin/mlpack into rcurtin-serialize (d470910)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Fri Jul 10 19:00:30 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/4a97187bbba7ce8a6191b714949dd818ef0f37d2...e5905e62c15d1bcff21e6359b11efcd7ab6d7ca0

>---------------------------------------------------------------

commit d470910697bdd1fec66f049438d8fc6a73a7a5e7
Merge: 4a97187 5611e1f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Jul 10 22:57:34 2015 +0000

    Merge branch 'serialize' of https://github.com/rcurtin/mlpack into rcurtin-serialize


>---------------------------------------------------------------

d470910697bdd1fec66f049438d8fc6a73a7a5e7
 CMakeLists.txt                                     |   1 +
 src/mlpack/core/arma_extend/Mat_extra_bones.hpp    |   4 +
 src/mlpack/core/arma_extend/Mat_extra_meat.hpp     |  32 +
 src/mlpack/core/arma_extend/SpMat_extra_bones.hpp  |   6 +-
 src/mlpack/core/arma_extend/SpMat_extra_meat.hpp   |  34 +-
 src/mlpack/core/arma_extend/arma_extend.hpp        |   5 +
 src/mlpack/core/data/CMakeLists.txt                |   3 +
 src/mlpack/core/data/extension.hpp                 |  33 +
 src/mlpack/core/data/format.hpp                    |  26 +
 src/mlpack/core/data/load.hpp                      |  42 +-
 src/mlpack/core/data/load_impl.hpp                 | 109 +++-
 src/mlpack/core/data/save.hpp                      |  40 +-
 src/mlpack/core/data/save_impl.hpp                 |  95 ++-
 src/mlpack/core/data/serialization_shim.hpp        | 575 +++++++++++++++++
 src/mlpack/core/dists/discrete_distribution.hpp    |  11 +-
 src/mlpack/core/dists/gaussian_distribution.cpp    |   3 +-
 src/mlpack/core/dists/gaussian_distribution.hpp    |  16 +
 src/mlpack/core/dists/laplace_distribution.hpp     |  10 +
 src/mlpack/core/dists/regression_distribution.hpp  |  23 +-
 src/mlpack/core/kernels/cosine_distance.hpp        |   4 +
 src/mlpack/core/kernels/epanechnikov_kernel.hpp    |   6 +
 .../core/kernels/epanechnikov_kernel_impl.hpp      |   9 +
 src/mlpack/core/kernels/example_kernel.hpp         |   7 +
 src/mlpack/core/kernels/gaussian_kernel.hpp        |   8 +
 .../core/kernels/hyperbolic_tangent_kernel.hpp     |   8 +
 src/mlpack/core/kernels/laplacian_kernel.hpp       |   7 +
 src/mlpack/core/kernels/linear_kernel.hpp          |   4 +
 src/mlpack/core/kernels/polynomial_kernel.hpp      |   8 +
 src/mlpack/core/kernels/spherical_kernel.hpp       |   8 +
 src/mlpack/core/kernels/triangular_kernel.hpp      |   7 +
 src/mlpack/core/math/range.hpp                     |   6 +
 src/mlpack/core/math/range_impl.hpp                |   8 +
 src/mlpack/core/metrics/ip_metric.hpp              |  16 +-
 src/mlpack/core/metrics/ip_metric_impl.hpp         |  33 +-
 src/mlpack/core/metrics/lmetric.hpp                |   4 +
 src/mlpack/core/metrics/mahalanobis_distance.hpp   |   4 +
 .../core/metrics/mahalanobis_distance_impl.hpp     |   9 +
 src/mlpack/core/tree/ballbound.hpp                 |  18 +-
 src/mlpack/core/tree/ballbound_impl.hpp            |  21 +
 .../tree/binary_space_tree/binary_space_tree.hpp   | 115 ++--
 .../binary_space_tree/binary_space_tree_impl.hpp   | 225 +++++--
 src/mlpack/core/tree/hrectbound.hpp                |   6 +
 src/mlpack/core/tree/hrectbound_impl.hpp           |  20 +
 src/mlpack/core/tree/statistic.hpp                 |   8 +-
 src/mlpack/core/util/sfinae_utility.hpp            |   1 -
 .../linear_regression/linear_regression.hpp        |  20 +-
 .../methods/neighbor_search/neighbor_search.hpp    |  11 +-
 .../neighbor_search/neighbor_search_impl.hpp       |  56 +-
 .../neighbor_search/neighbor_search_stat.hpp       |  18 +-
 .../methods/range_search/range_search_stat.hpp     |   9 +-
 src/mlpack/prereqs.hpp                             |   5 +
 src/mlpack/tests/CMakeLists.txt                    |   1 +
 src/mlpack/tests/allkfn_test.cpp                   |   7 +-
 src/mlpack/tests/allknn_test.cpp                   |   7 +-
 src/mlpack/tests/load_save_test.cpp                | 106 ++++
 src/mlpack/tests/range_search_test.cpp             |   1 -
 src/mlpack/tests/serialization_test.cpp            | 688 +++++++++++++++++++++
 src/mlpack/tests/tree_test.cpp                     |  84 ++-
 58 files changed, 2378 insertions(+), 273 deletions(-)

diff --cc src/mlpack/core/kernels/spherical_kernel.hpp
index a89e266,c8b4763..978082b
--- a/src/mlpack/core/kernels/spherical_kernel.hpp
+++ b/src/mlpack/core/kernels/spherical_kernel.hpp
@@@ -95,10 -95,15 +95,18 @@@ class SphericalKerne
    {
      return (t <= bandwidth) ? 1.0 : 0.0;
    }
 +  double Gradient(double t) {
 +    return t == bandwidth ? arma::datum::nan : 0.0;
 +  }
  
+   //! Serialize the object.
+   template<typename Archive>
+   void Serialize(Archive& ar, const unsigned int /* version */)
+   {
+     ar & data::CreateNVP(bandwidth, "bandwidth");
+     ar & data::CreateNVP(bandwidthSquared, "bandwidthSquared");
+   }
+ 
    //! Return a string representation of the kernel.
    std::string ToString() const
    {
diff --cc src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index c358ec0,6e2e21f..c90b0de
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@@ -65,10 -65,9 +65,11 @@@ class BinarySpaceTre
    //! The worst possible distance to the furthest descendant, cached to speed
    //! things up.
    double furthestDescendantDistance;
 +  //! The minimum distance from the center to any edge of the bound.
 +  double minimumBoundDistance;
-   //! The dataset.
-   MatType& dataset;
+   //! The dataset.  If we are the root of the tree, we own the dataset and must
+   //! delete it.
+   MatType* dataset;
  
   public:
    //! So other classes can use TreeType::Mat.
@@@ -128,28 -132,26 +134,27 @@@
                    const size_t maxLeafSize = 20);
  
    /**
-    * Construct this node on a subset of the given matrix, starting at column
-    * begin and using count points.  The ordering of that subset of points
-    * will be modified!  This is used for recursive tree-building by the other
-    * constructors which don't specify point indices.
+    * Construct this node as a child of the given parent, starting at column
 -   * begin_in and using count_in points.  The ordering of that subset of points
 -   * in the parent's data matrix will be modified!  This is used for recursive
++   * begin and using count points.  The ordering of that subset of points in the
++   * parent's data matrix will be modified!  This is used for recursive
+    * tree-building by the other constructors which don't specify point indices.
     *
-    * @param data Dataset to create tree from.  This will be modified!
+    * @param parent Parent of this node.  Its dataset will be modified!
     * @param begin Index of point to start tree construction with.
     * @param count Number of points to use to construct tree.
     * @param maxLeafSize Size of each leaf in the tree.
     */
-   BinarySpaceTree(MatType& data,
+   BinarySpaceTree(BinarySpaceTree* parent,
                    const size_t begin,
                    const size_t count,
 +                  SplitType& splitter,
-                   BinarySpaceTree* parent = NULL,
                    const size_t maxLeafSize = 20);
  
    /**
-    * Construct this node on a subset of the given matrix, starting at column
-    * begin_in and using count_in points.  The ordering of that subset of points
-    * will be modified!  This is used for recursive tree-building by the other
-    * constructors which don't specify point indices.
+    * Construct this node as a child of the given parent, starting at column
 -   * begin_in and using count_in points.  The ordering of that subset of points
 -   * in the parent's data matrix will be modified!  This is used for recursive
++   * begin and using count points.  The ordering of that subset of points in the
++   * parent's data matrix will be modified!  This is used for recursive
+    * tree-building by the other constructors which don't specify point indices.
     *
     * A mapping of the old point indices to the new point indices is filled, but
     * it is expected that the vector is already allocated with size greater than
@@@ -167,15 -169,13 +172,14 @@@
                    const size_t begin,
                    const size_t count,
                    std::vector<size_t>& oldFromNew,
 +                  SplitType& splitter,
-                   BinarySpaceTree* parent = NULL,
                    const size_t maxLeafSize = 20);
  
    /**
-    * Construct this node on a subset of the given matrix, starting at column
-    * begin_in and using count_in points.  The ordering of that subset of points
-    * will be modified!  This is used for recursive tree-building by the other
-    * constructors which don't specify point indices.
+    * Construct this node as a child of the given parent, starting at column
 -   * begin_in and using count_in points.  The ordering of that subset of points
 -   * in the parent's data matrix will be modified!  This is used for recursive
++   * begin and using count points.  The ordering of that subset of points in the
++   * parent's data matrix will be modified!  This is used for recursive
+    * tree-building by the other constructors which don't specify point indices.
     *
     * A mapping of the old point indices to the new point indices is filled, as
     * well as a mapping of the new point indices to the old point indices.  It is
@@@ -197,8 -197,6 +201,7 @@@
                    const size_t count,
                    std::vector<size_t>& oldFromNew,
                    std::vector<size_t>& newFromOld,
 +                  SplitType& splitter,
-                   BinarySpaceTree* parent = NULL,
                    const size_t maxLeafSize = 20);
  
    /**
@@@ -210,9 -208,26 +213,19 @@@
    BinarySpaceTree(const BinarySpaceTree& other);
  
    /**
+    * Initialize the tree from a boost::serialization archive.
+    *
 -   * @param ar Archive to load tree from.  Must be an iarchive, not an oarchive!
++   * @param ar Archive to load tree from.  Must be an iarchive, not an oarchive.
+    */
+   template<typename Archive>
+   BinarySpaceTree(
+       Archive& ar,
+       const typename boost::enable_if<typename Archive::is_loading>::type* = 0);
+ 
+   /**
     * Deletes this node, deallocating the memory for the children and calling
     * their destructors in turn.  This will invalidate any pointers or references
 -   * to any nodes which are children of this one.  Also, if this is the root of
 -   * the tree (specifically, if Parent() == NULL), it will delete the dataset.
 -   *
 -   * So, if you want to preserve other parts of the tree, the easiest way is
 -   * probably to set Left() or Right() to NULL and maintain those branches
 -   * yourself.  If you want to preserve the dataset, the easiest way is probably
 -   * to set Parent() to NULL before deleting the object.  Either way, if you
 -   * find yourself doing that, exercise care and caution.
 +   * to any nodes which are children of this one.
     */
    ~BinarySpaceTree();
  
@@@ -428,36 -443,49 +441,50 @@@
    /**
     * Splits the current node, assigning its left and right children recursively.
     *
-    * @param data Dataset which we are using.
     * @param maxLeafSize Maximum number of points held in a leaf.
++   * @param splitter Instantiated SplitType object.
     */
-   void SplitNode(MatType& data,
-                  const size_t maxLeafSize,
-                  SplitType& splitter);
 -  void SplitNode(const size_t maxLeafSize);
++  void SplitNode(const size_t maxLeafSize, SplitType& splitter);
  
    /**
     * Splits the current node, assigning its left and right children recursively.
     * Also returns a list of the changed indices.
     *
--   * @param data Dataset which we are using.
     * @param oldFromNew Vector holding permuted indices.
     * @param maxLeafSize Maximum number of points held in a leaf.
++   * @param splitter Instantiated SplitType object.
     */
-   void SplitNode(MatType& data,
-                  std::vector<size_t>& oldFromNew,
+   void SplitNode(std::vector<size_t>& oldFromNew,
 -                 const size_t maxLeafSize);
 +                 const size_t maxLeafSize,
 +                 SplitType& splitter);
  
+  protected:
+   /**
+    * A default constructor.  This is meant to only be used with
+    * boost::serialization, which is allowed with the friend declaration below.
+    * This does not return a valid tree!  The method must be protected, so that
+    * the serialization shim can work with the default constructor.
+    */
+   BinarySpaceTree();
+ 
+   //! Friend access is given for the default constructor.
+   friend class boost::serialization::access;
+ 
   public:
    /**
+    * Serialize the tree.
+    */
+   template<typename Archive>
+   void Serialize(Archive& ar, const unsigned int version);
+ 
+   /**
     * Returns a string representation of this object.
     */
    std::string ToString() const;
--
  };
  
 -}; // namespace tree
 -}; // namespace mlpack
 +} // namespace tree
 +} // namespace mlpack
  
  // Include implementation.
  #include "binary_space_tree_impl.hpp"
diff --cc src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 61f43da,db2ad36..7ad8298
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@@ -32,11 -33,10 +33,11 @@@ BinarySpaceTree<BoundType, StatisticTyp
      count(data.n_cols), /* and spans all of the dataset. */
      bound(data.n_rows),
      parentDistance(0), // Parent distance for the root is 0: it has no parent.
-     dataset(data)
+     dataset(new MatType(data)) // Copies the dataset.
  {
    // Do the actual splitting of this node.
 -  SplitNode(maxLeafSize);
 +  SplitType splitter;
-   SplitNode(data, maxLeafSize, splitter);
++  SplitNode(maxLeafSize, splitter);
  
    // Create the statistic depending on if we are a leaf or not.
    stat = StatisticType(*this);
@@@ -65,8 -65,7 +66,8 @@@ BinarySpaceTree<BoundType, StatisticTyp
      oldFromNew[i] = i; // Fill with unharmed indices.
  
    // Now do the actual splitting.
 -  SplitNode(oldFromNew, maxLeafSize);
 +  SplitType splitter;
-   SplitNode(data, oldFromNew, maxLeafSize, splitter);
++  SplitNode(oldFromNew, maxLeafSize, splitter);
  
    // Create the statistic depending on if we are a leaf or not.
    stat = StatisticType(*this);
@@@ -96,8 -95,7 +97,8 @@@ BinarySpaceTree<BoundType, StatisticTyp
      oldFromNew[i] = i; // Fill with unharmed indices.
  
    // Now do the actual splitting.
 -  SplitNode(oldFromNew, maxLeafSize);
 +  SplitType splitter;
-   SplitNode(data, oldFromNew, maxLeafSize, splitter);
++  SplitNode(oldFromNew, maxLeafSize, splitter);
  
    // Create the statistic depending on if we are a leaf or not.
    stat = StatisticType(*this);
@@@ -113,22 -111,20 +114,21 @@@ template<typename BoundType
           typename MatType,
           typename SplitType>
  BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
-     MatType& data,
+     BinarySpaceTree* parent,
      const size_t begin,
      const size_t count,
 +    SplitType& splitter,
-     BinarySpaceTree* parent,
      const size_t maxLeafSize) :
      left(NULL),
      right(NULL),
      parent(parent),
      begin(begin),
      count(count),
-     bound(data.n_rows),
-     dataset(data)
+     bound(parent->Dataset().n_rows),
+     dataset(&parent->Dataset()) // Point to the parent's dataset.
  {
    // Perform the actual splitting.
-   SplitNode(data, maxLeafSize, splitter);
 -  SplitNode(maxLeafSize);
++  SplitNode(maxLeafSize, splitter);
  
    // Create the statistic depending on if we are a leaf or not.
    stat = StatisticType(*this);
@@@ -143,8 -139,6 +143,7 @@@ BinarySpaceTree<BoundType, StatisticTyp
      const size_t begin,
      const size_t count,
      std::vector<size_t>& oldFromNew,
 +    SplitType& splitter,
-     BinarySpaceTree* parent,
      const size_t maxLeafSize) :
      left(NULL),
      right(NULL),
@@@ -156,10 -150,10 +155,10 @@@
  {
    // Hopefully the vector is initialized correctly!  We can't check that
    // entirely but we can do a minor sanity check.
-   assert(oldFromNew.size() == data.n_cols);
+   assert(oldFromNew.size() == dataset->n_cols);
  
    // Perform the actual splitting.
-   SplitNode(data, oldFromNew, maxLeafSize, splitter);
 -  SplitNode(oldFromNew, maxLeafSize);
++  SplitNode(oldFromNew, maxLeafSize, splitter);
  
    // Create the statistic depending on if we are a leaf or not.
    stat = StatisticType(*this);
@@@ -175,8 -169,6 +174,7 @@@ BinarySpaceTree<BoundType, StatisticTyp
      const size_t count,
      std::vector<size_t>& oldFromNew,
      std::vector<size_t>& newFromOld,
 +    SplitType& splitter,
-     BinarySpaceTree* parent,
      const size_t maxLeafSize) :
      left(NULL),
      right(NULL),
@@@ -188,10 -180,10 +186,10 @@@
  {
    // Hopefully the vector is initialized correctly!  We can't check that
    // entirely but we can do a minor sanity check.
-   Log::Assert(oldFromNew.size() == data.n_cols);
+   Log::Assert(oldFromNew.size() == dataset->n_cols);
  
    // Perform the actual splitting.
-   SplitNode(data, oldFromNew, maxLeafSize, splitter);
 -  SplitNode(oldFromNew, maxLeafSize);
++  SplitNode(oldFromNew, maxLeafSize, splitter);
  
    // Create the statistic depending on if we are a leaf or not.
    stat = StatisticType(*this);
@@@ -520,12 -557,10 +563,11 @@@ template<typename BoundType
           typename MatType,
           typename SplitType>
  void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
-     MatType& data,
 -    const size_t maxLeafSize)
 +    const size_t maxLeafSize,
 +    SplitType& splitter)
  {
    // We need to expand the bounds of this node properly.
-   bound |= data.cols(begin, begin + count - 1);
+   bound |= dataset->cols(begin, begin + count - 1);
  
    // Calculate the furthest descendant distance.
    furthestDescendantDistance = 0.5 * bound.Diameter();
@@@ -541,7 -576,8 +583,8 @@@
  
    // Split the node. The elements of 'data' are reordered by the splitting
    // algorithm. This function call updates splitCol.
-   const bool split = splitter.SplitNode(bound, data, begin, count, splitCol);
 -  const bool split = SplitType::SplitNode(bound, *dataset, begin, count,
++  const bool split = splitter.SplitNode(bound, *dataset, begin, count,
+       splitCol);
  
    // The node may not be always split. For instance, if all the points are the
    // same, we can't split them.
@@@ -550,10 -586,10 +593,10 @@@
  
    // Now that we know the split column, we will recursively split the children
    // by calling their constructors (which perform this splitting process).
-   left = new BinarySpaceTree(data, begin, splitCol - begin, splitter, this,
 -  left = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, begin,
 -      splitCol - begin, maxLeafSize);
 -  right = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, splitCol,
 -      begin + count - splitCol, maxLeafSize);
++  left = new BinarySpaceTree(this, begin, splitCol - begin, splitter,
 +      maxLeafSize);
-   right = new BinarySpaceTree(data, splitCol, begin + count - splitCol,
-       splitter, this, maxLeafSize);
++  right = new BinarySpaceTree(this, splitCol, begin + count - splitCol,
++      splitter, maxLeafSize);
  
    // Calculate parent distances for those two nodes.
    arma::vec centroid, leftCentroid, rightCentroid;
@@@ -575,10 -611,8 +618,9 @@@ template<typename BoundType
           typename MatType,
           typename SplitType>
  void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode(
-     MatType& data,
      std::vector<size_t>& oldFromNew,
 -    const size_t maxLeafSize)
 +    const size_t maxLeafSize,
 +    SplitType& splitter)
  {
    // This should be a single function for Bound.
    // We need to expand the bounds of this node properly.
@@@ -598,8 -632,8 +640,8 @@@
  
    // Split the node. The elements of 'data' are reordered by the splitting
    // algorithm. This function call updates splitCol and oldFromNew.
-   const bool split = splitter.SplitNode(bound, data, begin, count, splitCol,
 -  const bool split = SplitType::SplitNode(bound, *dataset, begin, count,
 -      splitCol, oldFromNew);
++  const bool split = splitter.SplitNode(bound, *dataset, begin, count, splitCol,
 +      oldFromNew);
  
    // The node may not be always split. For instance, if all the points are the
    // same, we can't split them.
@@@ -608,10 -642,10 +650,10 @@@
  
    // Now that we know the split column, we will recursively split the children
    // by calling their constructors (which perform this splitting process).
-   left = new BinarySpaceTree(data, begin, splitCol - begin, oldFromNew,
-       splitter, this, maxLeafSize);
-   right = new BinarySpaceTree(data, splitCol, begin + count - splitCol,
-       oldFromNew, splitter, this, maxLeafSize);
 -  left = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, begin,
 -      splitCol - begin, oldFromNew, maxLeafSize);
 -  right = new BinarySpaceTree<BoundType, StatisticType, MatType>(this, splitCol,
 -      begin + count - splitCol, oldFromNew, maxLeafSize);
++  left = new BinarySpaceTree(this, begin, splitCol - begin, oldFromNew,
++      splitter, maxLeafSize);
++  right = new BinarySpaceTree(this, splitCol, begin + count - splitCol,
++      oldFromNew, splitter, maxLeafSize);
  
    // Calculate parent distances for those two nodes.
    arma::vec centroid, leftCentroid, rightCentroid;
diff --cc src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
index 6251e8f,c6dc565..48a65fa
--- a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
@@@ -81,16 -60,13 +63,16 @@@ NeighborSearch(const typename TreeType:
  }
  
  // Construct the object.
 -template<typename SortPolicy, typename MetricType, typename TreeType>
 -NeighborSearch<SortPolicy, MetricType, TreeType>::NeighborSearch(
 -    TreeType* referenceTree,
 -    const bool singleMode,
 -    const MetricType metric) :
 +template<typename SortPolicy,
 +         typename MetricType,
 +         typename TreeType,
 +         template<typename> class TraversalType>
 +NeighborSearch<SortPolicy, MetricType, TreeType, TraversalType>::
 +NeighborSearch(TreeType* referenceTree,
 +               const bool singleMode,
 +               const MetricType metric) :
-     referenceSet(referenceTree->Dataset()),
      referenceTree(referenceTree),
+     referenceSet(referenceTree->Dataset()),
      treeOwner(false),
      naive(false),
      singleMode(singleMode),
@@@ -204,8 -165,12 +178,12 @@@ void NeighborSearch<SortPolicy, MetricT
      Timer::Stop("tree_building");
      Timer::Start("computing_neighbors");
  
+     // Create the helper object for the tree traversal.
+     RuleType rules(referenceSet, queryTree->Dataset(), *neighborPtr,
+         *distancePtr, metric);
+ 
      // Create the traverser.
 -    typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);
 +    TraversalType<RuleType> traverser(rules);
  
      traverser.Traverse(*queryTree, *referenceTree);
  



More information about the mlpack-git mailing list