[mlpack-git] master: Add documentation for tree typedefs. (7485dfc)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Fri Aug 14 18:06:58 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/80adf7a6898dea87fa3569464b34d760b0642228...7485dfc808f2e65caf9b9a857347186518380e07

>---------------------------------------------------------------

commit 7485dfc808f2e65caf9b9a857347186518380e07
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Aug 14 18:06:45 2015 -0400

    Add documentation for tree typedefs.


>---------------------------------------------------------------

7485dfc808f2e65caf9b9a857347186518380e07
 src/mlpack/core/tree/binary_space_tree/typedef.hpp | 89 +++++++++++++++++++++-
 src/mlpack/core/tree/cover_tree/typedef.hpp        | 18 +++++
 src/mlpack/core/tree/rectangle_tree/typedef.hpp    | 38 ++++++++-
 3 files changed, 139 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/core/tree/binary_space_tree/typedef.hpp b/src/mlpack/core/tree/binary_space_tree/typedef.hpp
index a787415..b977548 100644
--- a/src/mlpack/core/tree/binary_space_tree/typedef.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/typedef.hpp
@@ -15,7 +15,40 @@ namespace mlpack {
 namespace tree {
 
 /**
- * midpoint split kd-tree
+ * The standard midpoint-split kd-tree.  This is not the original formulation by
+ * Bentley but instead the later formulation by Deng and Moore, which only holds
+ * points in the leaves of the tree.  When recursively splitting nodes, the
+ * KDTree class select the dimension with maximum variance to split on, and
+ * picks the midpoint of the range in that dimension as the value on which to
+ * split nodes.
+ *
+ * For more information, see the following papers.
+ *
+ * @code
+ * @article{bentley1975multidimensional,
+ *   title={Multidimensional binary search trees used for associative searching},
+ *   author={Bentley, J.L.},
+ *   journal={Communications of the ACM},
+ *   volume={18},
+ *   number={9},
+ *   pages={509--517},
+ *   year={1975},
+ *   publisher={ACM}
+ * }
+ *
+ * @inproceedings{deng1995multiresolution,
+ *   title={Multiresolution instance-based learning},
+ *   author={Deng, K. and Moore, A.W.},
+ *   booktitle={Proceedings of the 1995 International Joint Conference on AI
+ *       (IJCAI-95)},
+ *   pages={1233--1239},
+ *   year={1995}
+ * }
+ * @endcode
+ *
+ * This template typedef satisfies the TreeType policy API.
+ *
+ * @see @ref trees, BinarySpaceTree, MeanSplitKDTree
  */
 template<typename MetricType, typename StatisticType, typename MatType>
 using KDTree = BinarySpaceTree<MetricType,
@@ -25,7 +58,14 @@ using KDTree = BinarySpaceTree<MetricType,
                                MidpointSplit>;
 
 /**
- * mean split kd-tree
+ * A mean-split kd-tree.  This is the same as the KDTree, but this particular
+ * implementation will use the mean of the data in the split dimension as the
+ * value on which to split, instead of the midpoint.  This can sometimes give
+ * better performance, but it is not always clear which type of tree is best.
+ *
+ * This template typedef satisfies the TreeType policy API.
+ *
+ * @see @ref trees, BinarySpaceTree, KDTree
  */
 template<typename MetricType, typename StatisticType, typename MatType>
 using MeanSplitKDTree = BinarySpaceTree<MetricType,
@@ -35,7 +75,29 @@ using MeanSplitKDTree = BinarySpaceTree<MetricType,
                                         MeanSplit>;
 
 /**
- * midpoint split ball tree
+ * A midpoint-split ball tree.  This tree holds its points only in the leaves,
+ * similar to the KDTree and MeanSplitKDTree.  However, the bounding shape of
+ * each node is a ball, not a hyper-rectangle.  This can make the ball tree
+ * advantageous in some higher-dimensional situations and for some datasets.
+ * The tree construction algorithm here is the same as Omohundro's 'K-d
+ * construction algorithm', except the splitting value is the midpoint, not the
+ * median.  This can result in trees that better reflect the data, although they
+ * may be unbalanced.
+ *
+ * @code
+ * @techreport{omohundro1989five,
+ *   author={S.M. Omohundro},
+ *   title={Five balltree construction algorithms},
+ *   year={1989},
+ *   institution={University of California, Berkeley International Computer
+ *       Science Institute Technical Reports},
+ *   number={TR-89-063}
+ * }
+ * @endcode
+ *
+ * This template typedef satisfies the TreeType policy API.
+ *
+ * @see @ref trees, BinarySpaceTree, KDTree, MeanSplitBallTree
  */
 template<typename MetricType, typename StatisticType, typename MatType>
 using BallTree = BinarySpaceTree<MetricType,
@@ -45,7 +107,26 @@ using BallTree = BinarySpaceTree<MetricType,
                                  MidpointSplit>;
 
 /**
- * mean split ball tree
+ * A mean-split ball tree.  This tree, like the BallTree, holds its points only
+ * in the leaves.  The tree construction algorithm here is the same as
+ * Omohundro's 'K-dc onstruction algorithm', except the splitting value is the
+ * mean, not the median.  This can result in trees that better reflect the data,
+ * although they may be unbalanced.
+ *
+ * @code
+ * @techreport{omohundro1989five,
+ *   author={S.M. Omohundro},
+ *   title={Five balltree construction algorithms},
+ *   year={1989},
+ *   institution={University of California, Berkeley International Computer
+ *       Science Institute Technical Reports},
+ *   number={TR-89-063}
+ * }
+ * @endcode
+ *
+ * This template typedef satisfies the TreeType policy API.
+ *
+ * @see @ref trees, BinarySpaceTree, BallTree, MeanSplitKDTree
  */
 template<typename MetricType, typename StatisticType, typename MatType>
 using MeanSplitBallTree = BinarySpaceTree<MetricType,
diff --git a/src/mlpack/core/tree/cover_tree/typedef.hpp b/src/mlpack/core/tree/cover_tree/typedef.hpp
index 164af2e..6a3b82a 100644
--- a/src/mlpack/core/tree/cover_tree/typedef.hpp
+++ b/src/mlpack/core/tree/cover_tree/typedef.hpp
@@ -12,6 +12,24 @@
 namespace mlpack {
 namespace tree {
 
+/**
+ * The standard cover tree, as detailed in the original cover tree paper:
+ *
+ * @code
+ * @inproceedings{
+ *   author={Beygelzimer, A. and Kakade, S. and Langford, J.},
+ *   title={Cover trees for nearest neighbor},
+ *   booktitle={Proceedings of the 23rd International Conference on Machine
+ *       Learning (ICML 2006)},
+ *   pages={97--104},
+ *   year={2006}
+ * }
+ * @endcode
+ *
+ * This template typedef satisfies the requirements of the TreeType API.
+ *
+ * @see @ref trees, CoverTree
+ */
 template<typename MetricType, typename StatisticType, typename MatType>
 using StandardCoverTree = CoverTree<MetricType,
                                     StatisticType,
diff --git a/src/mlpack/core/tree/rectangle_tree/typedef.hpp b/src/mlpack/core/tree/rectangle_tree/typedef.hpp
index dd5906e..89924b7 100644
--- a/src/mlpack/core/tree/rectangle_tree/typedef.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/typedef.hpp
@@ -14,7 +14,24 @@ namespace mlpack {
 namespace tree {
 
 /**
- * regular r-tree
+ * An implementation of the R tree that satisfies the TreeType policy API.
+ *
+ * This is the same R-tree structure as proposed by Guttman:
+ *
+ * @code
+ * @inproceedings{guttman1984r,
+ *   title={R-trees: a dynamic index structure for spatial searching},
+ *   author={Guttman, A.},
+ *   booktitle={Proceedings of the 1984 ACM SIGMOD International Conference on
+ *       Management of Data (SIGMOD '84)},
+ *   volume={14},
+ *   number={2},
+ *   year={1984},
+ *   publisher={ACM}
+ * }
+ * @endcode
+ *
+ * @see @ref trees, RStarTree
  */
 template<typename MetricType, typename StatisticType, typename MatType>
 using RTree = RectangleTree<MetricType,
@@ -24,7 +41,24 @@ using RTree = RectangleTree<MetricType,
                             RTreeDescentHeuristic>;
 
 /**
- * r*-tree
+ * The R*-tree, a more recent variant of the R tree.  This template typedef
+ * satisfies the TreeType policy API.
+ *
+ * @code
+ * @inproceedings{beckmann1990r,
+ *   title={The R*-tree: an efficient and robust access method for points and
+ *       rectangles},
+ *   author={Beckmann, N. and Kriegel, H.-P. and Schneider, R. and Seeger, B.},
+ *   booktitle={Proceedings of the 1990 ACM SIGMOD International Conference on
+ *       Management of Data (SIGMOD '90)},
+ *   volume={19},
+ *   number={2},
+ *   year={1990},
+ *   publisher={ACM}
+ * }
+ * @endcode
+ *
+ * @see @ref trees, RTree
  */
 template<typename MetricType, typename StatisticType, typename MatType>
 using RStarTree = RectangleTree<MetricType,



More information about the mlpack-git mailing list