[mlpack-git] master, mlpack-1.0.x: Add files and some preliminary code for R tree (2cbe7ef)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:47:08 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 2cbe7ef1615dacea41d4344cb22ebc255c34f077
Author: andrewmw94 <andrewmw94 at gmail.com>
Date:   Mon May 19 17:32:08 2014 +0000

    Add files and some preliminary code for R tree


>---------------------------------------------------------------

2cbe7ef1615dacea41d4344cb22ebc255c34f077
 .../tree/binary_space_tree/binary_space_tree.hpp   |   8 +-
 src/mlpack/core/tree/rectangle_tree.hpp            |  19 ++
 .../core/tree/rectangle_tree/rectangle_tree.hpp    | 337 +++++++++++++++++++++
 .../tree/rectangle_tree/rectangle_tree_impl.hpp    |  21 ++
 .../rectangle_tree/rectangle_tree_traverser.hpp    |   1 +
 .../rectangle_tree_traverser_impl.hpp              |   1 +
 src/mlpack/core/tree/rectangle_tree/traits.hpp     |  56 ++++
 7 files changed, 438 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 183c711..6b38b6f 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -24,8 +24,6 @@ namespace tree /** Trees and tree-building procedures. */ {
  * from it.  If you need to add or delete a node, the better procedure is to
  * rebuild the tree entirely.
  *
- * This tree does take one parameter, which is the leaf size to be used.
- *
  * @tparam BoundType The bound used for each node.  The valid types of bounds
  *     and the necessary skeleton interface for this class can be found in
  *     bounds/.
@@ -307,12 +305,12 @@ class BinarySpaceTree
    */
   double FurthestDescendantDistance() const;
 
-  //! Modify the distance from the center of this node to the center of the
-  //! parent node.
-  double& ParentDistance() { return parentDistance; }
   //! Return the distance from the center of this node to the center of the
   //! parent node.
   double ParentDistance() const { return parentDistance; }
+  //! Modify the distance from the center of this node to the center of the
+  //! parent node.
+  double& ParentDistance() { return parentDistance; }
 
   /**
    * Return the specified child (0 will be left, 1 will be right).  If the index
diff --git a/src/mlpack/core/tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree.hpp
new file mode 100644
index 0000000..12f30fd
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree.hpp
@@ -0,0 +1,19 @@
+/**
+ * @file rectangle_tree.hpp
+ * @author Andrew Wells
+ * 
+ * Include all the necessary filse to use the Rectangle Type Trees (RTree, RStarTree, XTree,
+ * and HilbertRTree.)
+ */
+#ifndef __MLPACK_CORE_TREE_RECTINGLINEAR_TREE_RECTANGLINEAR_TREE_HPP
+#define __MLPACK_CORE_TREE_RECTINGLINEAR_TREE_RECTANGLINEAR_TREE_HPP
+
+/* we include bounds.hpp since it gives us the necessary files.
+ * However, we will not use the "ballbounds" option.
+ */ 
+#include "bounds.hpp"
+#include "rectangle_tree/rectangle_tree.hpp"
+#include "rectangle_tree/rectangle_tree_traverser.hpp"
+#include "rectangle_tree/traits.hpp"
+
+#endif
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
new file mode 100644
index 0000000..5ed3348
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -0,0 +1,337 @@
+/**
+ * @file rectangle_tree.hpp
+ *
+ * Definition of generalized rectangle type trees (r_tree, r_star_tree, x_tree, and hilbert_r_tree).
+ */
+#ifndef __MLPACK_CORE_TREE_RECTINGLE_TREE_RECTANGLE_TREE_HPP
+#define __MLPACK_CORE_TREE_RECTINGLE_TREE_RECTANGLE_TREE_HPP
+
+#include <mlpack/core.hpp>
+
+#include "../statistic.hpp"
+
+namespace mlpack {
+namespace tree /** Trees and tree-building procedures. */ {
+
+/**
+ * A rectangle type tree tree, such as an R-tree or X-tree.  Once the
+ * bound and type of dataset is defined, the tree will construct itself.  Call
+ * the constructor with the dataset to build the tree on, and the entire tree
+ * will be built.
+ *
+ * This tree does allow growth, so you can add and delete nodes
+ * from it.
+ *
+ * @tparam StatisticType Extra data contained in the node.  See statistic.hpp
+ *     for the necessary skeleton interface.
+ * @tparam MatType The dataset class.
+ */   
+
+template<typename StatisticType = EmptyStatistic,
+	   typename MatType = arma::mat>
+class RectangleTree
+{
+ private:
+  //! The max number of child nodes an non-leaf node can have.
+  size_t maxNumChildren;
+  //! The number of child nodes actually in use (0 if this is a leaf node).
+  size_t numOfChildren;
+  //! The child nodes (Starting at 0 and ending at (numOfChildren-1) ).
+  std::vector<RectangleTree*> children;
+  //! The parent node (NULL if this is the root of the tree).
+  RectangleTree* parent;
+  //! The index of the first point in the dataset contained in this node (and
+  //! its children).
+  size_t begin;
+  //! The number of points in the dataset contained in this node (and its
+  //! children).
+  size_t count;
+  //! The leaf size.
+  size_t leafSize;
+  //! The bound object for this node.
+  HRectBound bound;
+  //! Any extra data contained in the node.
+  StatisticType stat;
+  //! The distance from the centroid of this node to the centroid of the parent.
+  double parentDistance;
+  //! The discance to the furthest descendant, cached to speed things up.
+  double furthestDescendantDistance;
+  //! The dataset.
+  MatType& dataset;
+
+ public:
+  //! So other classes can use TreeType::Mat.
+  typedef MatType Mat;
+
+  //! A traverser for rectangle type trees; see
+  //! rectangle_tree_traverser.hpp for implementation.
+  template<typename RuleType>
+  class RectangleTreeTraverser;
+
+  /**
+   * Construct this as the root node of a rectangle type tree using the given
+   * dataset.  This will modify the ordering of the points in the dataset!
+   *
+   * @param data Dataset from which to create the tree.  This will be modified!
+   * @param leafSize Size of each leaf in the tree;
+   * @param maxNumChildren The maximum number of child nodes a non-leaf node may have.
+   */
+  RectangleTree(MatType& data, const size_t leafSize = 20, const size_t maxNumChildren = 5);
+
+  //TODO implement the oldFromNew stuff if applicable.
+
+  /**
+   * Deletes this node, deallocating the memory for the children and calling
+   * their destructors in turn.  This will invalidate any younters or references
+   * to any nodes which are children of this one.
+   */
+  ~RectangleTree();
+
+  /**
+   * Find a node in this tree by its begin and count (const).
+   *
+   * Every node is uniquely identified by these two numbers.
+   * This is useful for communicating position over the network,
+   * when pointers would be invalid.
+   *
+   * @param begin The begin() of the node to find.
+   * @param count The count() of the node to find.
+   * @return The found node, or NULL if not found.
+   */
+  const RectangleTree* FindByBeginCount(size_t begin, size_t count) const;
+
+  /**
+   * Find a node in this tree by its begin and count.
+   *
+   * Every node is uniquely identified by these two numbers.
+   * This is useful for communicating position over the network,
+   * when pointers would be invalid.
+   *
+   * @param begin The begin() of the node to find.
+   * @param count The count() of the node to find.
+   * @return The found node, or NULL if not found.
+   */
+  RectangleTree* FindByBeginCount(size_t begin, size_t count);
+
+  //! Return the bound object for this node.
+  const HRectBound& Bound() const { return bound; }
+  //! Return the bound object for this node.
+  HRectBound& Bound() { return bound; }
+
+  //! Return the statistic object for this node.
+  const StatisticType& Stat() const { return stat; }
+  //! Return the statistic object for this node.
+  StatisticType& Stat() { return stat; }
+
+  //! Return whether or not this node is a leaf (true if it has no children).
+  bool IsLeaf() const;
+
+  //! Return the leaf size.
+  size_t LeafSize() const { return leafSize; }
+  //! Modify the leaf size.
+  size_t& LeafSize() { return leafSize; }
+
+  //! Gets the parent of this node.
+  RectangleTree* Parent() const { return parent; }
+  //! Modify the parent of this node.
+  RectangleTree*& Parent() { return parent; }
+
+  //! Get the dataset which the tree is built on.
+  const arma::mat& Dataset() const { return dataset; }
+  //! Modify the dataset which the tree is built on.  Be careful!
+  arma::mat& Dataset() { return dataset; }
+
+  //! Get the metric which the tree uses.
+  typename BoundType::MetricType Metric() const { return bound.Metric(); }
+
+  //! Get the centroid of the node and store it in the given vector.
+  void Centroid(arma::vec& centroid) { bound.Centroid(centroid); }
+
+  //! Return the number of children in this node.
+  size_t NumChildren() const;
+
+  /**
+   * Return the furthest distance to a point held in this node.  If this is not
+   * a leaf node, then the distance is 0 because the node holds no points.
+   */
+  double FurthestPointDistance() const;
+
+  /**
+   * Return the furthest possible descendant distance.  This returns the maximum
+   * distance from the centroid to the edge of the bound and not the empirical
+   * quantity which is the actual furthest descendant distance.  So the actual
+   * furthest descendant distance may be less than what this method returns (but
+   * it will never be greater than this).
+   */
+  double FurthestDescendantDistance() const;
+
+  //! Return the distance from the center of this node to the center of the
+  //! parent node.
+  double ParentDistance() const { return parentDistance; }
+  //! Modify the distance from the center of this node to the center of the
+  //! parent node.
+  double& ParentDistance() { return parentDistance; }
+
+  /**
+   * Return the specified child.
+   *
+   * @param child Index of child to return.
+   */
+  RectangleTree& Child(const size_t child) const;
+
+  //! Return the number of points in this node (0 if not a leaf).
+  size_t NumPoints() const;
+
+  /**
+   * Return the number of descendants of this node.  For a non-leaf in a binary
+   * space tree, this is the number of points at the descendant leaves.  For a
+   * leaf, this is the number of points in the leaf.
+   */
+  size_t NumDescendants() const;
+
+  /**
+   * Return the index (with reference to the dataset) of a particular descendant
+   * of this node.  The index should be greater than zero but less than the
+   * number of descendants.
+   *
+   * @param index Index of the descendant.
+   */
+  size_t Descendant(const size_t index) const;
+
+  /**
+   * Return the index (with reference to the dataset) of a particular point in
+   * this node.  This will happily return invalid indices if the given index is
+   * greater than the number of points in this node (obtained with NumPoints())
+   * -- be careful.
+   *
+   * @param index Index of point for which a dataset index is wanted.
+   */
+  size_t Point(const size_t index) const;
+
+  //! Return the minimum distance to another node.
+  double MinDistance(const RectangleTree* other) const
+  {
+    return bound.MinDistance(other->Bound());
+  }
+
+  //! Return the maximum distance to another node.
+  double MaxDistance(const RectangleTree* other) const
+  {
+    return bound.MaxDistance(other->Bound());
+  }
+
+  //! Return the minimum and maximum distance to another node.
+  math::Range RangeDistance(const RectangleTree* other) const
+  {
+    return bound.RangeDistance(other->Bound());
+  }
+
+
+  //! Return the minimum distance to another point.
+  template<typename VecType>
+  double MinDistance(const VecType& point,
+                     typename boost::enable_if<IsVector<VecType> >::type* = 0)
+      const
+  {
+    return bound.MinDistance(point);
+  }
+
+  //! Return the maximum distance to another point.
+  template<typename VecType>
+  double MaxDistance(const VecType& point,
+                     typename boost::enable_if<IsVector<VecType> >::type* = 0)
+      const
+  {
+    return bound.MaxDistance(point);
+  }
+
+  //! Return the minimum and maximum distance to another point.
+  template<typename VecType>
+  math::Range
+  RangeDistance(const VecType& point,
+                typename boost::enable_if<IsVector<VecType> >::type* = 0) const
+  {
+    return bound.RangeDistance(point);
+  }
+
+ /**
+   * Obtains the number of nodes in the tree, starting with this.
+   */
+  size_t TreeSize() const;
+
+  /**
+   * Obtains the number of levels below this node in the tree, starting with
+   * this.
+   */
+  size_t TreeDepth() const;
+
+  //! Return the index of the beginning point of this subset.
+  size_t Begin() const { return begin; }
+  //! Modify the index of the beginning point of this subset.
+  size_t& Begin() { return begin; }
+
+  /**
+   * Gets the index one beyond the last index in the subset.
+   */
+  size_t End() const;
+
+  //! Return the number of points in this subset.
+  size_t Count() const { return count; }
+  //! Modify the number of points in this subset.
+  size_t& Count() { return count; }
+
+  //! Returns false: this tree type does not have self children.
+  static bool HasSelfChildren() { return false; }
+
+ private:
+  /**
+   * Private copy constructor, available only to fill (pad) the tree to a
+   * specified level.
+   */
+  RectangleTree(const size_t begin,
+                  const size_t count,
+                  HRectBound bound,
+                  StatisticType stat,
+                  const int leafSize = 20) :
+      begin(begin),
+      count(count),
+      bound(bound),
+      stat(stat),
+      leafSize(leafSize) { }
+
+  RectangleTree* CopyMe()
+  {
+    return new RectangleTree(begin, count, bound, stat, leafSize);
+  }
+
+  /**
+   * Splits the current node, assigning its left and right children recursively.
+   *
+   * @param data Dataset which we are using.
+   */
+  void SplitNode(MatType& data);
+
+  /**
+   * Splits the current node, assigning its left and right children recursively.
+   * Also returns a list of the changed indices.
+   *
+   * @param data Dataset which we are using.
+   * @param oldFromNew Vector holding permuted indices.
+   */
+  void SplitNode(MatType& data, std::vector<size_t>& oldFromNew);
+
+ public:
+  /**
+   * Returns a string representation of this object.
+   */
+  std::string ToString() const;
+
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "rectangle_tree_impl.hpp"
+
+#endif
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
new file mode 100644
index 0000000..8dea9f3
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -0,0 +1,21 @@
+/**
+ * @file rectangle_tree_impl.hpp
+ *
+ * Implementation of generalized rectangle tree.
+ */
+#ifndef __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_IMPL_HPP
+
+// In case it wasn't included already for sem reason.
+#include "rectangle_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+template<typename StatisticType,
+	   typename MatType>
+RectangleTree<StatisticType, MatType>::RectangleTree()
+
+
+}; //namespace tree
+}; //namespace mlpack
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
@@ -0,0 +1 @@
+ 
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser_impl.hpp
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser_impl.hpp
@@ -0,0 +1 @@
+ 
diff --git a/src/mlpack/core/tree/rectangle_tree/traits.hpp b/src/mlpack/core/tree/rectangle_tree/traits.hpp
new file mode 100644
index 0000000..5bef337
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/traits.hpp
@@ -0,0 +1,56 @@
+/**
+ * @file traits.hpp
+ * @author Andrew Wells
+ *
+ * Specialization of the TreeTraits class for the RectangleTree type of tree.
+ */
+#ifndef __MLPACK_CORE_TREE_RECTANGLE_TREE_TRAITS_HPP
+#define __MLPACK_CORE_TREE_RECTANGLE_TREE_TRAITS_HPP
+
+#include <mlpack/core/tree/tree_traits.hpp>
+
+namespace mlpack {
+namespace tree {
+
+/**
+ * This is a specialization of the TreeType class to the RectangleTree tree
+ * type.  It defines characteristics of the rectangle type trees, and is used to
+ * help write tree-independent (but still optimized) tree-based algorithms.  See
+ * mlpack/core/tree/tree_traits.hpp for more information.
+ */
+template<typename StatisticType,
+         typename MatType>
+class TreeTraits<RectangleTree<StatisticType, MatType> >
+{
+ public:
+  /**
+   * The R-tree cannot easily calculate the distance from a node to
+   * its parent; so RectangleTree<...>::ParentDistance() does not exist.
+   */
+  static const bool HasParentDistance = false;
+
+  /**
+   * An R-tree can have overlapping children.
+   */
+  static const bool HasOverlappingChildren = true;
+
+  /**
+   * There is no guarantee that the first point in a node is its centroid.
+   */
+  static const bool FirstPointIsCentroid = false;
+
+  /**
+   * Points are not contained at multiple levels of the R-tree.
+   */
+  static const bool HasSelfChildren = false;
+
+  /**
+   * Points are rearranged during building of the tree.
+   */
+  static const bool RearrangesDataset = true;
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif



More information about the mlpack-git mailing list