[mlpack-git] master: Add a constructor that takes ownership of the data. (f8ceffa)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Jul 29 16:42:38 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/f8ceffae0613b350f4d6bdd46c6c8633a40b4897...6ee21879488fe98612a4619b17f8b51e8da5215b

>---------------------------------------------------------------

commit f8ceffae0613b350f4d6bdd46c6c8633a40b4897
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Jul 28 03:58:28 2015 +0000

    Add a constructor that takes ownership of the data.


>---------------------------------------------------------------

f8ceffae0613b350f4d6bdd46c6c8633a40b4897
 .../tree/binary_space_tree/binary_space_tree.hpp   | 48 +++++++++++++
 .../binary_space_tree/binary_space_tree_impl.hpp   | 82 ++++++++++++++++++++++
 2 files changed, 130 insertions(+)

diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 61bcee2..80d1be2 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -138,6 +138,54 @@ class BinarySpaceTree
                   const size_t maxLeafSize = 20);
 
   /**
+   * Construct this as the root node of a binary space tree using the given
+   * dataset.  This will take ownership of the data matrix; if you don't want
+   * this, consider using the constructor that takes a const reference to a
+   * dataset.
+   *
+   * @param data Dataset to create tree from.
+   * @param maxLeafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType&& data,
+                  const size_t maxLeafSize = 20);
+
+  /**
+   * Construct this as the root node of a binary space tree using the given
+   * dataset.  This will take ownership of the data matrix; a mapping of the
+   * old point indices to the new point indices is filled.  If you don't want
+   * the matrix to have its ownership taken, consider using the constructor that
+   * takes a const reference to a dataset.
+   *
+   * @param data Dataset to create tree from.
+   * @param oldFromNew Vector which will be filled with the old positions for
+   *     each new point.
+   * @param maxLeafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType&& data,
+                  std::vector<size_t>& oldFromNew,
+                  const size_t maxLeafSize = 20);
+
+  /**
+   * Construct this as the root node of a binary space tree using the given
+   * dataset.  This will take ownership of the data matrix; a mapping of the old
+   * point indices to the new point indices is filled, as well as a mapping of
+   * the new point indices to the old point indices.  If you don't want the
+   * matrix to have its ownership taken, consider using the constructor that
+   * takes a const reference to a dataset.
+   *
+   * @param data Dataset to create tree from.
+   * @param oldFromNew Vector which will be filled with the old positions for
+   *     each new point.
+   * @param newFromOld Vector which will be filled with the new positions for
+   *     each old point.
+   * @param maxLeafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType&& data,
+                  std::vector<size_t>& oldFromNew,
+                  std::vector<size_t>& newFromOld,
+                  const size_t maxLeafSize = 20);
+
+  /**
    * Construct this node as a child of the given parent, starting at column
    * begin and using count points.  The ordering of that subset of points in the
    * parent's data matrix will be modified!  This is used for recursive
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
index 9129b59..08dc7f7 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
@@ -121,6 +121,88 @@ template<typename MetricType,
          template<typename BoundMetricType> class BoundType,
          template<typename BoundType, typename MatType> class SplitType>
 BinarySpaceTree<MetricType, StatisticType, MatType, BoundType, SplitType>::
+BinarySpaceTree(MatType&& data, const size_t maxLeafSize) :
+    left(NULL),
+    right(NULL),
+    parent(NULL),
+    begin(0),
+    count(data.n_cols),
+    bound(data.n_rows),
+    parentDistance(0), // Parent distance for the root is 0: it has no parent.
+    dataset(std::move(data))
+{
+  // Do the actual splitting of this node.
+  SplitType<BoundType<MetricType>, MatType> splitter;
+  SplitNode(maxLeafSize, splitter);
+
+  // Create the statistic depending on if we are a leaf or not.
+  stat = StatisticType(*this);
+}
+
+template<typename MetricType,
+         typename StatisticType,
+         typename MatType,
+         template<typename BoundMetricType> class BoundType,
+         template<typename BoundType, typename MatType> class SplitType>
+BinarySpaceTree<MetricType, StatisticType, MatType, BoundType, SplitType>::
+BinarySpaceTree(
+    MatType&& data,
+    std::vector<size_t>& oldFromNew,
+    const size_t maxLeafSize) :
+    left(NULL),
+    right(NULL),
+    parent(NULL),
+    begin(0),
+    count(data.n_cols),
+    bound(data.n_rows),
+    parentDistance(0), // Parent distance for the root is 0: it has no parent.
+    dataset(std::move(data))
+{
+  // Initialize oldFromNew correctly.
+  oldFromNew.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; i++)
+    oldFromNew[i] = i; // Fill with unharmed indices.
+
+  // Now do the actual splitting.
+  SplitType<BoundType<MetricType>, MatType> splitter;
+  SplitNode(oldFromNew, maxLeafSize, splitter);
+
+  // Create the statistic depending on if we are a leaf or not.
+  stat = StatisticType(*this);
+}
+
+template<typename MetricType,
+         typename StatisticType,
+         typename MatType,
+         template<typename BoundMetricType> class BoundType,
+         template<typename BoundType, typename MatType> class SplitType>
+BinarySpaceTree<MetricType, StatisticType, MatType, BoundType, SplitType>::
+BinarySpaceTree(
+    MatType&& data,
+    std::vector<size_t>& oldFromNew,
+    std::vector<size_t>& newFromOld,
+    const size_t maxLeafSize) :
+    left(NULL),
+    right(NULL),
+    parent(NULL),
+    begin(0),
+    count(data.n_cols),
+    bound(data.n_rows),
+    parentDistance(0), // Parent distance for the root is 0: it has no parent.
+    dataset(std::move(data))
+{
+  // Map the newFromOld indices correctly.
+  newFromOld.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; i++)
+    newFromOld[oldFromNew[i]] = i;
+}
+
+template<typename MetricType,
+         typename StatisticType,
+         typename MatType,
+         template<typename BoundMetricType> class BoundType,
+         template<typename BoundType, typename MatType> class SplitType>
+BinarySpaceTree<MetricType, StatisticType, MatType, BoundType, SplitType>::
 BinarySpaceTree(
     BinarySpaceTree* parent,
     const size_t begin,



More information about the mlpack-git mailing list