[mlpack-git] master, mlpack-1.0.x: Example tree. Not really to be used, but for documentation. (8ca8c38)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:43:53 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 8ca8c3858f4da34182a519d11011b1310558c82f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Wed Feb 12 18:47:38 2014 +0000

    Example tree.  Not really to be used, but for documentation.


>---------------------------------------------------------------

8ca8c3858f4da34182a519d11011b1310558c82f
 src/mlpack/core/tree/example_tree.hpp      | 238 +++++++++++++++++++++++++++++
 src/mlpack/core/tree/example_tree_impl.hpp | 164 ++++++++++++++++++++
 2 files changed, 402 insertions(+)

diff --git a/src/mlpack/core/tree/example_tree.hpp b/src/mlpack/core/tree/example_tree.hpp
new file mode 100644
index 0000000..db4b9db
--- /dev/null
+++ b/src/mlpack/core/tree/example_tree.hpp
@@ -0,0 +1,238 @@
+/**
+ * @file example_tree.hpp
+ * @author Ryan Curtin
+ *
+ * An example tree.  This contains all the functions that mlpack trees must
+ * implement (although the actual implementations here don't make any sense
+ * because this is just an example).
+ */
+#ifndef __MLPACK_CORE_TREE_EXAMPLE_TREE_HPP
+#define __MLPACK_CORE_TREE_EXAMPLE_TREE_HPP
+
+namespace mlpack {
+namespace tree {
+
+/**
+ * This is not an actual space tree but instead an example tree that exists to
+ * show and document all the functions that mlpack trees must implement.  For a
+ * better overview of trees, see @ref trees.  Also be aware that the
+ * implementations of each of the methods in this example tree are entirely fake
+ * and do not work; this example tree exists for its API, not its
+ * implementation.
+ *
+ * Note that trees often have different properties.  These properties are known
+ * at compile-time through the mlpack::tree::TreeTraits class, and some
+ * properties may imply the existence (or non-existence) of certain functions.
+ * Refer to the TreeTraits for more documentation on that.
+ *
+ * The three template parameters below must be template parameters to the tree,
+ * in the order given below.  More template parameters are fine, but they must
+ * come after the first three.
+ *
+ * @tparam MetricType This defines the space in which the tree will be built.
+ *      For some trees, arbitrary metrics cannot be used, and a template
+ *      metaprogramming approach should be used to issue a compile-time error if
+ *      a metric cannot be used with a specific tree type.  One example is the
+ *      tree::BinarySpaceTree tree type, which cannot work with the
+ *      metric::IPMetric class.
+ * @tparam StatisticType A tree node can hold a statistic, which is sometimes
+ *      useful for various dual-tree algorithms.  The tree itself does not need
+ *      to know anything about how the statistic works, but it needs to hold a
+ *      StatisticType in each node.  It can be assumed that the StatisticType
+ *      class has a constructor StatisticType(const ExampleTree&).
+ * @tparam MatType A tree could be built on a dense matrix or a sparse matrix.
+ *      All mlpack trees should be able to support any Armadillo-compatible
+ *      matrix type.  When the tree is written it should be assumed that MatType
+ *      has the same functionality as arma::mat.
+ */
+template<typename MetricType = metric::LMetric<2, true>,
+         typename StatisticType = EmptyStatistic,
+         typename MatType = arma::mat>
+class ExampleTree
+{
+ public:
+  /**
+   * This constructor will build the tree given a dataset and an instantiated
+   * metric.  Note that the parameter is a MatType& and not an arma::mat&.  The
+   * dataset is not modified by the tree-building process (if it is, see the
+   * documentation for mlpack::tree::TreeTraits::RearrangesDataset for how to
+   * deal with that situation).  The MetricType parameter is necessary even
+   * though some metrics do not hold any state.  This is so that the tree does
+   * not have to worry about instantiating the metric (if the tree had to worry
+   * about this, this would almost certainly incur additional runtime complexity
+   * and a larger runtime size of the tree node objects, which is to be
+   * avoided).  The metric can't be const, in case MetricType::Evaluate() is
+   * non-const.
+   *
+   * When this constructor is finished, the entire tree will be built and ready
+   * to use.  The constructor should call the constructor of the statistic for
+   * each node that is built (see tree::EmptyStatistic for more information).
+   *
+   * @param dataset The dataset that the tree will be built on.
+   * @param metric The instantiated metric to use to build the dataset.
+   */
+  ExampleTree(const MatType& dataset,
+              MetricType& metric);
+
+  //! Return the number of children of this node.
+  size_t NumChildren() const;
+
+  //! Return a particular child of this node.
+  const ExampleTree& Child(const size_t i) const;
+  //! Modify a particular child of this node.
+  ExampleTree& Child(const size_t i);
+
+  //! Return the parent node (NULL if this is the root of the tree).
+  ExampleTree* Parent() const;
+
+  //! Return the number of points held in this node.
+  size_t NumPoints() const;
+
+  /**
+   * Return the index of a particular point of this node.  mlpack trees do not,
+   * in general, hold the actual dataset, and instead just hold the indices of
+   * the points they contain.  Thus, you might use this function in code like
+   * this:
+   *
+   * @code
+   * arma::vec thirdPoint = dataset.col(treeNode.Point(2));
+   * @endcode
+   */
+  size_t Point(const size_t i) const;
+
+  /**
+   * Get the number of descendant points.  This is the number of unique points
+   * held in this node plus the number of points held in all descendant nodes.
+   * This could be calculated at build-time and cached, or could be calculated
+   * at run-time.  This may be harder to calculate for trees that may hold
+   * points in multiple nodes (like cover trees and spill trees, for instance).
+   */
+  size_t NumDescendants() const;
+
+  /**
+   * Get the index of a particular descendant point.  The ordering of the
+   * descendants does not matter, as long as calling Descendant(0) through
+   * Descendant(NumDescendants() - 1) will return the indices of every
+   * unique descendant point of the node.
+   */
+  size_t Descendant(const size_t i) const;
+
+  //! Get the statistic for this node.
+  const StatisticType& Stat() const;
+  //! Modify the statistic for this node.
+  StatisticType& Stat();
+
+  //! Get the instantiated metric for this node.
+  const MetricType& Metric() const;
+  //! Modify the instantiated metric for this node.
+  MetricType& Metric();
+
+  /**
+   * Return the minimum distance between this node and a point.  It is not
+   * required that the exact minimum distance between the node and the point is
+   * returned but instead a lower bound on the minimum distance will suffice.
+   * See the definitions in @ref trees for more information.
+   *
+   * @param point Point to return [lower bound on] minimum distance to.
+   */
+  double MinDistance(const MatType& point) const;
+
+  /**
+   * Return the minimum distance between this node and another node.  It is not
+   * required that the exact minimum distance between the two nodes be returned
+   * but instead a lower bound on the minimum distance will suffice.  See the
+   * definitions in @ref trees for more information.
+   *
+   * @param node Node to return [lower bound on] minimum distance to.
+   */
+  double MinDistance(const ExampleTree& other) const;
+
+  /**
+   * Return the maximum distance between this node and a point.  It is not
+   * required that the exact maximum distance between the node and the point is
+   * returned but instead an upper bound on the maximum distance will suffice.
+   * See the definitions in @ref trees for more information.
+   *
+   * @param point Point to return [upper bound on] maximum distance to.
+   */
+  double MaxDistance(const MatType& point) const;
+
+  /**
+   * Return the maximum distance between this node and another node.  It is not
+   * required that the exact maximum distance between the two nodes be returned
+   * but instead an upper bound on the maximum distance will suffice.  See the
+   * definitions in @ref trees for more information.
+   *
+   * @param node Node to return [upper bound on] maximum distance to.
+   */
+  double MaxDistance(const ExampleTree& other) const;
+
+  /**
+   * Return both the minimum and maximum distances between this node and a point
+   * as a math::Range object.  This overload is given because it is possible
+   * that, for some tree types, calculation of both at once is faster than a
+   * call to MinDistance() then MaxDistance().  It is not necessary that the
+   * minimum and maximum distances be exact; it is sufficient to return a lower
+   * bound on the minimum distance and an upper bound on the maximum distance.
+   * See the definitions in @ref trees for more information.
+   *
+   * @param point Point to return [bounds on] minimum and maximum distances to.
+   */
+  math::Range RangeDistance(const MatType& point) const;
+
+  /**
+   * Return both the minimum and maximum distances between this node and another
+   * node as a math::Range object.  This overload is given because it is
+   * possible that, for some tree types, calculation of both at once is faster
+   * than a call to MinDistance() then MaxDistance().  It is not necessary that
+   * the minimum and maximum distances be exact; it is sufficient to return a
+   * lower bound on the minimum distance and an upper bound on the maximum
+   * distance.  See the definitions in @ref trees for more information.
+   *
+   * @param node Node to return [bounds on] minimum and maximum distances to.
+   */
+  math::Range RangeDistance(const ExampleTree& other) const;
+
+  /**
+   * Fill the given vector with the center of the node.
+   *
+   * @param centroid Vector to be filled with the center of the node.
+   */
+  void Centroid(arma::vec& centroid) const;
+
+  /**
+   * Get the distance from the center of the node to the furthest descendant
+   * point of this node.  This does not necessarily need to be the exact
+   * furthest descendant distance but instead can be an upper bound.  See the
+   * definitions in @ref trees for more information.
+   */
+  double FurthestDescendantDistance() const;
+
+  /**
+   * Get the distance from the center of this node to the center of the parent
+   * node.
+   */
+  double ParentDistance() const;
+
+ private:
+  //! This member is just here so the ExampleTree compiles without warnings.  It
+  //! is not required to be a member in every type of tree.
+  StatisticType stat;
+
+  /**
+   * This member is just here so the ExampleTree compiles without warnings.  It
+   * is not required to be a member in every type of tree.  Be aware that
+   * storing the metric as a member and not a reference may mean that for some
+   * metrics (such as metric::MahalanobisDistance in high dimensionality) may
+   * incur lots of unnecessary matrix copying.
+   */
+  MetricType& metric;
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+//! Include implementation (which does not make any sense!).
+#include "example_tree_impl.hpp"
+
+#endif
diff --git a/src/mlpack/core/tree/example_tree_impl.hpp b/src/mlpack/core/tree/example_tree_impl.hpp
new file mode 100644
index 0000000..856f3a3
--- /dev/null
+++ b/src/mlpack/core/tree/example_tree_impl.hpp
@@ -0,0 +1,164 @@
+/**
+ * @file example_tree_impl.hpp
+ * @author Ryan Curtin
+ *
+ * A fake implementation of the functions defined in ExampleTree.  Do not refer
+ * to these as guidelines and do not use ExampleTree in your work because it
+ * *will* *not* *work*.  The reason these implementations are here is so that we
+ * can make tests that ensure mlpack dual-tree algorithms can compile with
+ * *only* the functions specified in ExampleTree.
+ */
+#ifndef __MLPACK_CORE_TREE_EXAMPLE_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_EXAMPLE_TREE_IMPL_HPP
+
+namespace mlpack {
+namespace tree {
+
+template<typename MetricType, typename StatisticType, typename MatType>
+ExampleTree<MetricType, StatisticType, MatType>::ExampleTree(
+    const MatType& dataset, MetricType& metric) : metric(metric), stat(*this)
+{ }
+
+template<typename MetricType, typename StatisticType, typename MatType>
+size_t ExampleTree<MetricType, StatisticType, MatType>::NumChildren() const
+{
+  return 0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+const ExampleTree<MetricType, StatisticType, MatType>&
+ExampleTree<MetricType, StatisticType, MatType>::Child(const size_t i) const
+{
+  return *this;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+ExampleTree<MetricType, StatisticType, MatType>&
+ExampleTree<MetricType, StatisticType, MatType>::Child(const size_t i)
+{
+  return *this;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+ExampleTree<MetricType, StatisticType, MatType>*
+ExampleTree<MetricType, StatisticType, MatType>::Parent() const
+{
+  return NULL;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+size_t ExampleTree<MetricType, StatisticType, MatType>::NumPoints() const
+{
+  return 0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+size_t ExampleTree<MetricType, StatisticType, MatType>::Point(const size_t i)
+    const
+{
+  return 0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+size_t ExampleTree<MetricType, StatisticType, MatType>::NumDescendants() const
+{
+  return 0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+size_t ExampleTree<MetricType, StatisticType, MatType>::Descendant(
+    const size_t i) const
+{
+  return 0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+const StatisticType& ExampleTree<MetricType, StatisticType, MatType>::Stat()
+    const
+{
+  return stat;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+StatisticType& ExampleTree<MetricType, StatisticType, MatType>::Stat()
+{
+  return stat;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+const MetricType& ExampleTree<MetricType, StatisticType, MatType>::Metric()
+    const
+{
+  return metric;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+MetricType& ExampleTree<MetricType, StatisticType, MatType>::Metric()
+{
+  return metric;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+double ExampleTree<MetricType, StatisticType, MatType>::MinDistance(
+    const MatType& point) const
+{
+  return 0.0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+double ExampleTree<MetricType, StatisticType, MatType>::MinDistance(
+    const ExampleTree& node) const
+{
+  return 0.0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+double ExampleTree<MetricType, StatisticType, MatType>::MaxDistance(
+    const MatType& point) const
+{
+  return 0.0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+double ExampleTree<MetricType, StatisticType, MatType>::MaxDistance(
+    const ExampleTree& node) const
+{
+  return 0.0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+math::Range ExampleTree<MetricType, StatisticType, MatType>::RangeDistance(
+    const MatType& point) const
+{
+  return math::Range(0.0, 0.0);
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+math::Range ExampleTree<MetricType, StatisticType, MatType>::RangeDistance(
+    const ExampleTree& node) const
+{
+  return math::Range(0.0, 0.0);
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+void ExampleTree<MetricType, StatisticType, MatType>::Centroid(
+    arma::vec& centroid) const
+{ }
+
+template<typename MetricType, typename StatisticType, typename MatType>
+double ExampleTree<MetricType, StatisticType, MatType>::
+    FurthestDescendantDistance() const
+{
+  return 0.0;
+}
+
+template<typename MetricType, typename StatisticType, typename MatType>
+double ExampleTree<MetricType, StatisticType, MatType>::ParentDistance() const
+{
+  return 0.0;
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif



More information about the mlpack-git mailing list