[mlpack-svn] r13333 - in mlpack/trunk/src/mlpack/core/tree: . binary_space_tree cover_tree

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Aug 4 23:00:19 EDT 2012


Author: rcurtin
Date: 2012-08-04 23:00:19 -0400 (Sat, 04 Aug 2012)
New Revision: 13333

Added:
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree/
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree/single_tree_traverser_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/
   mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser_impl.hpp
Removed:
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp
   mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp
   mlpack/trunk/src/mlpack/core/tree/first_point_is_root.hpp
   mlpack/trunk/src/mlpack/core/tree/traversers/
Modified:
   mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt
Log:
Restructure trees entirely.  Each tree will have its own SingleTreeTraverser and
DualTreeTraverser, and each tree type should be kept in its own directory (for
organizational purposes).  The previous traversers in traversers/ have been
adapted to either the BinarySpaceTree or CoverTree and moved there.


Modified: mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt	2012-08-04 22:03:50 UTC (rev 13332)
+++ mlpack/trunk/src/mlpack/core/tree/CMakeLists.txt	2012-08-05 03:00:19 UTC (rev 13333)
@@ -5,23 +5,26 @@
 set(SOURCES
   ballbound.hpp
   ballbound_impl.hpp
-  binary_space_tree.hpp
-  binary_space_tree_impl.hpp
+  binary_space_tree/binary_space_tree.hpp
+  binary_space_tree/binary_space_tree_impl.hpp
+  binary_space_tree/dual_tree_traverser.hpp
+  binary_space_tree/dual_tree_traverser_impl.hpp
+  binary_space_tree/single_tree_traverser.hpp
+  binary_space_tree/single_tree_traverser_impl.hpp
   bounds.hpp
-  cover_tree.hpp
-  cover_tree_impl.hpp
-  first_point_is_root.hpp
+  cover_tree/cover_tree.hpp
+  cover_tree/cover_tree_impl.hpp
+  cover_tree/first_point_is_root.hpp
+  cover_tree/single_tree_traverser.hpp
+  cover_tree/single_tree_traverser_impl.hpp
+  cover_tree/dual_tree_traverser.hpp
+  cover_tree/dual_tree_traverser_impl.hpp
   hrectbound.hpp
   hrectbound_impl.hpp
   periodichrectbound.hpp
   periodichrectbound_impl.hpp
   statistic.hpp
   mrkd_statistic.hpp
-  traversers/single_tree_breadth_first_traverser.hpp
-  traversers/single_tree_depth_first_traverser.hpp
-  traversers/dual_tree_depth_first_traverser.hpp
-  traversers/dual_tree_breadth_first_traverser.hpp
-  traversers/dual_cover_tree_traverser.hpp
 )
 
 # add directory name to sources

Copied: mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,399 @@
+/**
+ * @file binary_space_tree.hpp
+ *
+ * Definition of generalized binary space partitioning tree (BinarySpaceTree).
+ */
+#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_BINARY_SPACE_TREE_HPP
+#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_BINARY_SPACE_TREE_HPP
+
+#include <mlpack/core.hpp>
+
+#include "../statistic.hpp"
+
+// Bad!
+#include <mlpack/methods/neighbor_search/neighbor_search_rules.hpp>
+
+namespace mlpack {
+namespace tree /** Trees and tree-building procedures. */ {
+
+/**
+ * A binary space partitioning tree, such as a KD-tree or a ball tree.  Once the
+ * bound and type of dataset is defined, the tree will construct itself.  Call
+ * the constructor with the dataset to build the tree on, and the entire tree
+ * will be built.
+ *
+ * This particular tree does not allow growth, so you cannot add or delete nodes
+ * from it.  If you need to add or delete a node, the better procedure is to
+ * rebuild the tree entirely.
+ *
+ * This tree does take one parameter, which is the leaf size to be used.
+ *
+ * @tparam BoundType The bound used for each node.  The valid types of bounds
+ *     and the necessary skeleton interface for this class can be found in
+ *     bounds/.
+ * @tparam StatisticType Extra data contained in the node.  See statistic.hpp
+ *     for the necessary skeleton interface.
+ */
+template<typename BoundType,
+         typename StatisticType = EmptyStatistic,
+         typename MatType = arma::mat>
+class BinarySpaceTree
+{
+ private:
+  //! The left child node.
+  BinarySpaceTree* left;
+  //! The right child node.
+  BinarySpaceTree* right;
+  //! The index of the first point in the dataset contained in this node (and
+  //! its children).
+  size_t begin;
+  //! The number of points of the dataset contained in this node (and its
+  //! children).
+  size_t count;
+  //! The bound object for this node.
+  BoundType bound;
+  //! Any extra data contained in the node.
+  StatisticType stat;
+  //! The leaf size.
+  size_t leafSize;
+  //! The dimension this node split on if it is a parent.
+  size_t splitDimension;
+
+ public:
+  //! So other classes can use TreeType::Mat.
+  typedef MatType Mat;
+
+  //! A single-tree traverser for binary space trees; see
+  //! single_tree_traverser.hpp for implementation.
+  template<typename RuleType>
+  class SingleTreeTraverser;
+
+  //! A dual-tree traverser for binary space trees; see dual_tree_traverser.hpp.
+  template<typename RuleType>
+  class DualTreeTraverser;
+
+  /**
+   * Construct this as the root node of a binary space tree using the given
+   * dataset.  This will modify the ordering of the points in the dataset!
+   *
+   * @param data Dataset to create tree from.  This will be modified!
+   * @param leafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType& data, const size_t leafSize = 20);
+
+  /**
+   * Construct this as the root node of a binary space tree using the given
+   * dataset.  This will modify the ordering of points in the dataset!  A
+   * mapping of the old point indices to the new point indices is filled.
+   *
+   * @param data Dataset to create tree from.  This will be modified!
+   * @param oldFromNew Vector which will be filled with the old positions for
+   *     each new point.
+   * @param leafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType& data,
+                  std::vector<size_t>& oldFromNew,
+                  const size_t leafSize = 20);
+
+  /**
+   * Construct this as the root node of a binary space tree using the given
+   * dataset.  This will modify the ordering of points in the dataset!  A
+   * mapping of the old point indices to the new point indices is filled, as
+   * well as a mapping of the new point indices to the old point indices.
+   *
+   * @param data Dataset to create tree from.  This will be modified!
+   * @param oldFromNew Vector which will be filled with the old positions for
+   *     each new point.
+   * @param newFromOld Vector which will be filled with the new positions for
+   *     each old point.
+   * @param leafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType& data,
+                  std::vector<size_t>& oldFromNew,
+                  std::vector<size_t>& newFromOld,
+                  const size_t leafSize = 20);
+
+  /**
+   * Construct this node on a subset of the given matrix, starting at column
+   * begin and using count points.  The ordering of that subset of points
+   * will be modified!  This is used for recursive tree-building by the other
+   * constructors which don't specify point indices.
+   *
+   * @param data Dataset to create tree from.  This will be modified!
+   * @param begin Index of point to start tree construction with.
+   * @param count Number of points to use to construct tree.
+   * @param leafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType& data,
+                  const size_t begin,
+                  const size_t count,
+                  const size_t leafSize = 20);
+
+  /**
+   * Construct this node on a subset of the given matrix, starting at column
+   * begin_in and using count_in points.  The ordering of that subset of points
+   * will be modified!  This is used for recursive tree-building by the other
+   * constructors which don't specify point indices.
+   *
+   * A mapping of the old point indices to the new point indices is filled, but
+   * it is expected that the vector is already allocated with size greater than
+   * or equal to (begin_in + count_in), and if that is not true, invalid memory
+   * reads (and writes) will occur.
+   *
+   * @param data Dataset to create tree from.  This will be modified!
+   * @param begin Index of point to start tree construction with.
+   * @param count Number of points to use to construct tree.
+   * @param oldFromNew Vector which will be filled with the old positions for
+   *     each new point.
+   * @param leafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType& data,
+                  const size_t begin,
+                  const size_t count,
+                  std::vector<size_t>& oldFromNew,
+                  const size_t leafSize = 20);
+
+  /**
+   * Construct this node on a subset of the given matrix, starting at column
+   * begin_in and using count_in points.  The ordering of that subset of points
+   * will be modified!  This is used for recursive tree-building by the other
+   * constructors which don't specify point indices.
+   *
+   * A mapping of the old point indices to the new point indices is filled, as
+   * well as a mapping of the new point indices to the old point indices.  It is
+   * expected that the vector is already allocated with size greater than or
+   * equal to (begin_in + count_in), and if that is not true, invalid memory
+   * reads (and writes) will occur.
+   *
+   * @param data Dataset to create tree from.  This will be modified!
+   * @param begin Index of point to start tree construction with.
+   * @param count Number of points to use to construct tree.
+   * @param oldFromNew Vector which will be filled with the old positions for
+   *     each new point.
+   * @param newFromOld Vector which will be filled with the new positions for
+   *     each old point.
+   * @param leafSize Size of each leaf in the tree.
+   */
+  BinarySpaceTree(MatType& data,
+                  const size_t begin,
+                  const size_t count,
+                  std::vector<size_t>& oldFromNew,
+                  std::vector<size_t>& newFromOld,
+                  const size_t leafSize = 20);
+
+  /**
+   * Create an empty tree node.
+   */
+  BinarySpaceTree();
+
+  /**
+   * Deletes this node, deallocating the memory for the children and calling
+   * their destructors in turn.  This will invalidate any pointers or references
+   * to any nodes which are children of this one.
+   */
+  ~BinarySpaceTree();
+
+  /**
+   * Find a node in this tree by its begin and count (const).
+   *
+   * Every node is uniquely identified by these two numbers.
+   * This is useful for communicating position over the network,
+   * when pointers would be invalid.
+   *
+   * @param begin The begin() of the node to find.
+   * @param count The count() of the node to find.
+   * @return The found node, or NULL if not found.
+   */
+  const BinarySpaceTree* FindByBeginCount(size_t begin,
+                                          size_t count) const;
+
+  /**
+   * Find a node in this tree by its begin and count.
+   *
+   * Every node is uniquely identified by these two numbers.
+   * This is useful for communicating position over the network,
+   * when pointers would be invalid.
+   *
+   * @param begin The begin() of the node to find.
+   * @param count The count() of the node to find.
+   * @return The found node, or NULL if not found.
+   */
+  BinarySpaceTree* FindByBeginCount(size_t begin, size_t count);
+
+  //! Return the bound object for this node.
+  const BoundType& Bound() const;
+  //! Return the bound object for this node.
+  BoundType& Bound();
+
+  //! Return the statistic object for this node.
+  const StatisticType& Stat() const;
+  //! Return the statistic object for this node.
+  StatisticType& Stat();
+
+  //! Return whether or not this node is a leaf (true if it has no children).
+  bool IsLeaf() const;
+
+  //! Return the leaf size.
+  size_t LeafSize() const;
+
+  //! Fills the tree to the specified level.
+  size_t ExtendTree(const size_t level);
+
+  //! Gets the left child of this node.
+  BinarySpaceTree* Left() const;
+
+  //! Gets the right child of this node.
+  BinarySpaceTree* Right() const;
+
+  //! Return the number of children in this node.
+  size_t NumChildren() const;
+
+  /**
+   * Return the specified child (0 will be left, 1 will be right).  If the index
+   * is greater than 1, this will return the right child.
+   *
+   * @param child Index of child to return.
+   */
+  BinarySpaceTree& Child(const size_t child) const;
+
+  //! Return the number of points in this node (0 if not a leaf).
+  size_t NumPoints() const;
+
+  /**
+   * Return the index (with reference to the dataset) of a particular point in
+   * this node.  This will happily return invalid indices if the given index is
+   * greater than the number of points in this node (obtained with NumPoints())
+   * -- be careful.
+   *
+   * @param index Index of point for which a dataset index is wanted.
+   */
+  size_t Point(const size_t index) const;
+
+  //! Return the minimum distance to another node.
+  double MinDistance(const BinarySpaceTree* other) const
+  {
+    return bound.MinDistance(other->Bound());
+  }
+
+  //! Return the maximum distance to another node.
+  double MaxDistance(const BinarySpaceTree* other) const
+  {
+    return bound.MaxDistance(other->Bound());
+  }
+
+  //! Return the minimum distance to another point.
+  double MinDistance(const arma::vec& point) const
+  {
+    return bound.MinDistance(point);
+  }
+
+  //! Return the maximum distance to another point.
+  double MaxDistance(const arma::vec& point) const
+  {
+    return bound.MaxDistance(point);
+  }
+
+  /**
+  * Returns the dimension this parent's children are split on.
+  */
+  size_t GetSplitDimension() const;
+
+  /**
+   * Obtains the number of nodes in the tree, starting with this.
+   */
+  size_t TreeSize() const;
+
+  /**
+   * Obtains the number of levels below this node in the tree, starting with
+   * this.
+   */
+  size_t TreeDepth() const;
+
+  /**
+   * Gets the index of the beginning point of this subset.
+   */
+  size_t Begin() const;
+
+  /**
+   * Gets the index one beyond the last index in the subset.
+   */
+  size_t End() const;
+
+  /**
+   * Gets the number of points in this subset.
+   */
+  size_t Count() const;
+
+  //! Returns false: this tree type does not have self children.
+  static bool HasSelfChildren() { return false; }
+
+ private:
+  /**
+   * Private copy constructor, available only to fill (pad) the tree to a
+   * specified level.
+   */
+  BinarySpaceTree(const size_t begin,
+                  const size_t count,
+                  BoundType bound,
+                  StatisticType stat,
+                  const int leafSize = 20) :
+      left(NULL),
+      right(NULL),
+      begin(begin),
+      count(count),
+      bound(bound),
+      stat(stat),
+      leafSize(leafSize) { }
+
+  BinarySpaceTree* CopyMe()
+  {
+    return new BinarySpaceTree(begin, count, bound, stat, leafSize);
+  }
+
+  /**
+   * Splits the current node, assigning its left and right children recursively.
+   *
+   * @param data Dataset which we are using.
+   */
+  void SplitNode(MatType& data);
+
+  /**
+   * Splits the current node, assigning its left and right children recursively.
+   * Also returns a list of the changed indices.
+   *
+   * @param data Dataset which we are using.
+   * @param oldFromNew Vector holding permuted indices.
+   */
+  void SplitNode(MatType& data, std::vector<size_t>& oldFromNew);
+
+  /**
+   * Find the index to split on for this node, given that we are splitting in
+   * the given split dimension on the specified split value.
+   *
+   * @param data Dataset which we are using.
+   * @param splitDim Dimension of dataset to split on.
+   * @param splitVal Value to split on, in the given split dimension.
+   */
+  size_t GetSplitIndex(MatType& data, int splitDim, double splitVal);
+
+  /**
+   * Find the index to split on for this node, given that we are splitting in
+   * the given split dimension on the specified split value.  Also returns a
+   * list of the changed indices.
+   *
+   * @param data Dataset which we are using.
+   * @param splitDim Dimension of dataset to split on.
+   * @param splitVal Value to split on, in the given split dimension.
+   * @param oldFromNew Vector holding permuted indices.
+   */
+  size_t GetSplitIndex(MatType& data, int splitDim, double splitVal,
+      std::vector<size_t>& oldFromNew);
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "binary_space_tree_impl.hpp"
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree/binary_space_tree_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,662 @@
+/**
+ * @file binary_space_tree_impl.hpp
+ *
+ * Implementation of generalized space partitioning tree.
+ */
+#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_BINARY_SPACE_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_BINARY_SPACE_TREE_IMPL_HPP
+
+// In case it wasn't included already for some reason.
+#include "binary_space_tree.hpp"
+
+#include <mlpack/core/util/cli.hpp>
+#include <mlpack/core/util/log.hpp>
+
+namespace mlpack {
+namespace tree {
+
+// Each of these overloads is kept as a separate function to keep the overhead
+// from the two std::vectors out, if possible.
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
+    MatType& data,
+    const size_t leafSize) :
+    left(NULL),
+    right(NULL),
+    begin(0), /* This root node starts at index 0, */
+    count(data.n_cols), /* and spans all of the dataset. */
+    bound(data.n_rows),
+    stat(),
+    leafSize(leafSize)
+{
+  // Do the actual splitting of this node.
+  SplitNode(data);
+
+  // Create the statistic depending on if we are a leaf or not.
+  if (IsLeaf())
+    stat = StatisticType(data, begin, count);
+  else
+    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
+    MatType& data,
+    std::vector<size_t>& oldFromNew,
+    const size_t leafSize) :
+    left(NULL),
+    right(NULL),
+    begin(0),
+    count(data.n_cols),
+    bound(data.n_rows),
+    stat(),
+    leafSize(leafSize)
+{
+  // Initialize oldFromNew correctly.
+  oldFromNew.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; i++)
+    oldFromNew[i] = i; // Fill with unharmed indices.
+
+  // Now do the actual splitting.
+  SplitNode(data, oldFromNew);
+
+  // Create the statistic depending on if we are a leaf or not.
+  if (IsLeaf())
+    stat = StatisticType(data, begin, count);
+  else
+    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
+    MatType& data,
+    std::vector<size_t>& oldFromNew,
+    std::vector<size_t>& newFromOld,
+    const size_t leafSize) :
+    left(NULL),
+    right(NULL),
+    begin(0),
+    count(data.n_cols),
+    bound(data.n_rows),
+    stat(),
+    leafSize(leafSize)
+{
+  // Initialize the oldFromNew vector correctly.
+  oldFromNew.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; i++)
+    oldFromNew[i] = i; // Fill with unharmed indices.
+
+  // Now do the actual splitting.
+  SplitNode(data, oldFromNew);
+
+  // Create the statistic depending on if we are a leaf or not.
+  if (IsLeaf())
+    stat = StatisticType(data, begin, count);
+  else
+    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
+
+  // Map the newFromOld indices correctly.
+  newFromOld.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; i++)
+    newFromOld[oldFromNew[i]] = i;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
+    MatType& data,
+    const size_t begin,
+    const size_t count,
+    const size_t leafSize) :
+    left(NULL),
+    right(NULL),
+    begin(begin),
+    count(count),
+    bound(data.n_rows),
+    stat(),
+    leafSize(leafSize)
+{
+  // Perform the actual splitting.
+  SplitNode(data);
+
+  // Create the statistic depending on if we are a leaf or not.
+  if (IsLeaf())
+    stat = StatisticType(data, begin, count);
+  else
+    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
+    MatType& data,
+    const size_t begin,
+    const size_t count,
+    std::vector<size_t>& oldFromNew,
+    const size_t leafSize) :
+    left(NULL),
+    right(NULL),
+    begin(begin),
+    count(count),
+    bound(data.n_rows),
+    stat(),
+    leafSize(leafSize)
+{
+  // Hopefully the vector is initialized correctly!  We can't check that
+  // entirely but we can do a minor sanity check.
+  assert(oldFromNew.size() == data.n_cols);
+
+  // Perform the actual splitting.
+  SplitNode(data, oldFromNew);
+
+  // Create the statistic depending on if we are a leaf or not.
+  if (IsLeaf())
+    stat = StatisticType(data, begin, count);
+  else
+    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
+    MatType& data,
+    const size_t begin,
+    const size_t count,
+    std::vector<size_t>& oldFromNew,
+    std::vector<size_t>& newFromOld,
+    const size_t leafSize) :
+    left(NULL),
+    right(NULL),
+    begin(begin),
+    count(count),
+    bound(data.n_rows),
+    stat(),
+    leafSize(leafSize)
+{
+  // Hopefully the vector is initialized correctly!  We can't check that
+  // entirely but we can do a minor sanity check.
+  Log::Assert(oldFromNew.size() == data.n_cols);
+
+  // Perform the actual splitting.
+  SplitNode(data, oldFromNew);
+
+  // Create the statistic depending on if we are a leaf or not.
+  if (IsLeaf())
+    stat = StatisticType(data, begin, count);
+  else
+    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
+
+  // Map the newFromOld indices correctly.
+  newFromOld.resize(data.n_cols);
+  for (size_t i = 0; i < data.n_cols; i++)
+    newFromOld[oldFromNew[i]] = i;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree() :
+    left(NULL),
+    right(NULL),
+    begin(0),
+    count(0),
+    bound(),
+    stat(),
+    leafSize(20) // Default leaf size is 20.
+{
+  // Nothing to do.
+}
+
+/**
+ * Deletes this node, deallocating the memory for the children and calling their
+ * destructors in turn.  This will invalidate any pointers or references to any
+ * nodes which are children of this one.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::~BinarySpaceTree()
+{
+  if (left)
+    delete left;
+  if (right)
+    delete right;
+}
+
+/**
+ * Find a node in this tree by its begin and count.
+ *
+ * Every node is uniquely identified by these two numbers.
+ * This is useful for communicating position over the network,
+ * when pointers would be invalid.
+ *
+ * @param queryBegin The Begin() of the node to find.
+ * @param queryCount The Count() of the node to find.
+ * @return The found node, or NULL if nothing is found.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+const BinarySpaceTree<BoundType, StatisticType, MatType>*
+BinarySpaceTree<BoundType, StatisticType, MatType>::FindByBeginCount(
+    size_t queryBegin,
+    size_t queryCount) const
+{
+  Log::Assert(queryBegin >= begin);
+  Log::Assert(queryCount <= count);
+
+  if (begin == queryBegin && count == queryCount)
+    return this;
+  else if (IsLeaf())
+    return NULL;
+  else if (queryBegin < right->Begin())
+    return left->FindByBeginCount(queryBegin, queryCount);
+  else
+    return right->FindByBeginCount(queryBegin, queryCount);
+}
+
+/**
+ * Find a node in this tree by its begin and count (const).
+ *
+ * Every node is uniquely identified by these two numbers.
+ * This is useful for communicating position over the network,
+ * when pointers would be invalid.
+ *
+ * @param queryBegin the Begin() of the node to find
+ * @param queryCount the Count() of the node to find
+ * @return the found node, or NULL
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+BinarySpaceTree<BoundType, StatisticType, MatType>*
+BinarySpaceTree<BoundType, StatisticType, MatType>::FindByBeginCount(
+    const size_t queryBegin,
+    const size_t queryCount)
+{
+  mlpack::Log::Assert(begin >= queryBegin);
+  mlpack::Log::Assert(count <= queryCount);
+
+  if (begin == queryBegin && count == queryCount)
+    return this;
+  else if (IsLeaf())
+    return NULL;
+  else if (queryBegin < left->End())
+    return left->FindByBeginCount(queryBegin, queryCount);
+  else if (right)
+    return right->FindByBeginCount(queryBegin, queryCount);
+  else
+    return NULL;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+size_t BinarySpaceTree<BoundType, StatisticType, MatType>::ExtendTree(
+    size_t level)
+{
+  --level;
+  // Return the number of nodes duplicated.
+  size_t nodesDuplicated = 0;
+  if (level > 0)
+  {
+    if (!left)
+    {
+      left = CopyMe();
+      ++nodesDuplicated;
+    }
+    nodesDuplicated += left->ExtendTree(level);
+    if (right)
+    {
+      nodesDuplicated += right->ExtendTree(level);
+    }
+  }
+  return nodesDuplicated;
+}
+
+/* TODO: we can likely calculate this earlier, then store the
+ *   result in a private member variable; for now, we can
+ *   just calculate as needed...
+ *
+ *   Also, perhaps we should rewrite these recursive functions
+ *     to avoid exceeding the stack limit
+ */
+
+template<typename BoundType, typename StatisticType, typename MatType>
+size_t BinarySpaceTree<BoundType, StatisticType, MatType>::TreeSize() const
+{
+  // Recursively count the nodes on each side of the tree.  The plus one is
+  // because we have to count this node, too.
+  return 1 + (left ? left->TreeSize() : 0) + (right ? right->TreeSize() : 0);
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+size_t BinarySpaceTree<BoundType, StatisticType, MatType>::TreeDepth() const
+{
+  // Recursively count the depth on each side of the tree.  The plus one is
+  // because we have to count this node, too.
+  return 1 + std::max((left ? left->TreeDepth() : 0),
+                      (right ? right->TreeDepth() : 0));
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+inline const
+    BoundType& BinarySpaceTree<BoundType, StatisticType, MatType>::Bound() const
+{
+  return bound;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+inline BoundType& BinarySpaceTree<BoundType, StatisticType, MatType>::Bound()
+{
+  return bound;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+inline const StatisticType&
+    BinarySpaceTree<BoundType, StatisticType, MatType>::Stat() const
+{
+  return stat;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+inline StatisticType& BinarySpaceTree<BoundType, StatisticType, MatType>::Stat()
+{
+  return stat;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::GetSplitDimension() const
+{
+  return splitDimension;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+inline bool BinarySpaceTree<BoundType, StatisticType, MatType>::IsLeaf() const
+{
+  return !left;
+}
+
+/**
+ * Gets the left branch of the tree.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline BinarySpaceTree<BoundType, StatisticType, MatType>*
+    BinarySpaceTree<BoundType, StatisticType, MatType>::Left() const
+{
+  return left;
+}
+
+/**
+ * Gets the right branch.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline BinarySpaceTree<BoundType, StatisticType, MatType>*
+    BinarySpaceTree<BoundType, StatisticType, MatType>::Right() const
+{
+  return right;
+}
+
+/**
+ * Returns the number of children in this node.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t
+    BinarySpaceTree<BoundType, StatisticType, MatType>::NumChildren() const
+{
+  if (left && right)
+    return 2;
+  if (left)
+    return 1;
+
+  return 0;
+}
+
+/**
+ * Return the specified child.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline BinarySpaceTree<BoundType, StatisticType, MatType>&
+    BinarySpaceTree<BoundType, StatisticType, MatType>::Child(
+    const size_t child) const
+{
+  if (child == 0)
+    return *left;
+  else
+    return *right;
+}
+
+/**
+ * Return the number of points contained in this node.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t
+BinarySpaceTree<BoundType, StatisticType, MatType>::NumPoints() const
+{
+  if (left)
+    return 0;
+
+  return count;
+}
+
+/**
+ * Return the index of a particular point contained in this node.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t
+BinarySpaceTree<BoundType, StatisticType, MatType>::Point(const size_t index)
+    const
+{
+  return (begin + index);
+}
+
+/**
+ * Gets the index of the begin point of this subset.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::Begin() const
+{
+  return begin;
+}
+
+/**
+ * Gets the index one beyond the last index in the series.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::End() const
+{
+  return begin + count;
+}
+
+/**
+ * Gets the number of points in this subset.
+ */
+template<typename BoundType, typename StatisticType, typename MatType>
+inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::Count() const
+{
+  return count;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+void
+    BinarySpaceTree<BoundType, StatisticType, MatType>::SplitNode(MatType& data)
+{
+  // We need to expand the bounds of this node properly.
+  bound |= data.cols(begin, begin + count - 1);
+
+  // Now, check if we need to split at all.
+  if (count <= leafSize)
+    return; // We can't split this.
+
+  // Figure out which dimension to split on.
+  size_t splitDim = data.n_rows; // Indicate invalid by maxDim + 1.
+  double maxWidth = -1;
+
+  // Find the split dimension.
+  for (size_t d = 0; d < data.n_rows; d++)
+  {
+    double width = bound[d].Width();
+
+    if (width > maxWidth)
+    {
+      maxWidth = width;
+      splitDim = d;
+    }
+  }
+  splitDimension = splitDim;
+
+  // Split in the middle of that dimension.
+  double splitVal = bound[splitDim].Mid();
+
+  if (maxWidth == 0) // All these points are the same.  We can't split.
+    return;
+
+  // Perform the actual splitting.  This will order the dataset such that points
+  // with value in dimension split_dim less than or equal to splitVal are on
+  // the left of splitCol, and points with value in dimension splitDim greater
+  // than splitVal are on the right side of splitCol.
+  size_t splitCol = GetSplitIndex(data, splitDim, splitVal);
+
+  // Now that we know the split column, we will recursively split the children
+  // by calling their constructors (which perform this splitting process).
+  left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
+      splitCol - begin, leafSize);
+  right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
+      begin + count - splitCol, leafSize);
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+void BinarySpaceTree<BoundType, StatisticType, MatType>::SplitNode(
+    MatType& data,
+    std::vector<size_t>& oldFromNew)
+{
+  // This should be a single function for Bound.
+  // We need to expand the bounds of this node properly.
+  bound |= data.cols(begin, begin + count - 1);
+
+  // First, check if we need to split at all.
+  if (count <= leafSize)
+    return; // We can't split this.
+
+  // Figure out which dimension to split on.
+  size_t splitDim = data.n_rows; // Indicate invalid by max_dim + 1.
+  double maxWidth = -1;
+
+  // Find the split dimension.
+  for (size_t d = 0; d < data.n_rows; d++)
+  {
+    double width = bound[d].Width();
+
+    if (width > maxWidth)
+    {
+      maxWidth = width;
+      splitDim = d;
+    }
+  }
+  splitDimension = splitDim;
+
+  // Split in the middle of that dimension.
+  double splitVal = bound[splitDim].Mid();
+
+  if (maxWidth == 0) // All these points are the same.  We can't split.
+    return;
+
+  // Perform the actual splitting.  This will order the dataset such that points
+  // with value in dimension split_dim less than or equal to splitVal are on
+  // the left of splitCol, and points with value in dimension splitDim greater
+  // than splitVal are on the right side of splitCol.
+  size_t splitCol = GetSplitIndex(data, splitDim, splitVal, oldFromNew);
+
+  // Now that we know the split column, we will recursively split the children
+  // by calling their constructors (which perform this splitting process).
+  left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
+      splitCol - begin, oldFromNew, leafSize);
+  right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
+      begin + count - splitCol, oldFromNew, leafSize);
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+size_t BinarySpaceTree<BoundType, StatisticType, MatType>::GetSplitIndex(
+    MatType& data,
+    int splitDim,
+    double splitVal)
+{
+  // This method modifies the input dataset.  We loop both from the left and
+  // right sides of the points contained in this node.  The points less than
+  // split_val should be on the left side of the matrix, and the points greater
+  // than split_val should be on the right side of the matrix.
+  size_t left = begin;
+  size_t right = begin + count - 1;
+
+  // First half-iteration of the loop is out here because the termination
+  // condition is in the middle.
+  while ((data(splitDim, left) < splitVal) && (left <= right))
+    left++;
+  while ((data(splitDim, right) >= splitVal) && (left <= right))
+    right--;
+
+  while (left <= right)
+  {
+    // Swap columns.
+    data.swap_cols(left, right);
+
+    // See how many points on the left are correct.  When they are correct,
+    // increase the left counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it later.
+    while ((data(splitDim, left) < splitVal) && (left <= right))
+      left++;
+
+    // Now see how many points on the right are correct.  When they are correct,
+    // decrease the right counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it with the wrong point we found in the
+    // previous loop.
+    while ((data(splitDim, right) >= splitVal) && (left <= right))
+      right--;
+  }
+
+  Log::Assert(left == right + 1);
+
+  return left;
+}
+
+template<typename BoundType, typename StatisticType, typename MatType>
+size_t BinarySpaceTree<BoundType, StatisticType, MatType>::GetSplitIndex(
+    MatType& data,
+    int splitDim,
+    double splitVal,
+    std::vector<size_t>& oldFromNew)
+{
+  // This method modifies the input dataset.  We loop both from the left and
+  // right sides of the points contained in this node.  The points less than
+  // split_val should be on the left side of the matrix, and the points greater
+  // than split_val should be on the right side of the matrix.
+  size_t left = begin;
+  size_t right = begin + count - 1;
+
+  // First half-iteration of the loop is out here because the termination
+  // condition is in the middle.
+  while ((data(splitDim, left) < splitVal) && (left <= right))
+    left++;
+  while ((data(splitDim, right) >= splitVal) && (left <= right))
+    right--;
+
+  while (left <= right)
+  {
+    // Swap columns.
+    data.swap_cols(left, right);
+
+    // Update the indices for what we changed.
+    size_t t = oldFromNew[left];
+    oldFromNew[left] = oldFromNew[right];
+    oldFromNew[right] = t;
+
+    // See how many points on the left are correct.  When they are correct,
+    // increase the left counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it later.
+    while ((data(splitDim, left) < splitVal) && (left <= right))
+      left++;
+
+    // Now see how many points on the right are correct.  When they are correct,
+    // decrease the right counter accordingly.  When we encounter one that isn't
+    // correct, stop.  We will switch it with the wrong point we found in the
+    // previous loop.
+    while ((data(splitDim, right) >= splitVal) && (left <= right))
+      right--;
+  }
+
+  Log::Assert(left == right + 1);
+
+  return left;
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/dual_tree_depth_first_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,58 @@
+/**
+ * @file dual_tree_traverser.hpp
+ * @author Ryan Curtin
+ *
+ * Defines the DualTreeTraverser for the BinarySpaceTree tree type.  This is a
+ * nested class of BinarySpaceTree which traverses two trees in a depth-first
+ * manner with a given set of rules which indicate the branches which can be
+ * pruned and the order in which to recurse.
+ */
+#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_HPP
+#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_HPP
+
+#include <mlpack/core.hpp>
+
+#include "binary_space_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+template<typename BoundType, typename StatisticType, typename MatType>
+template<typename RuleType>
+class BinarySpaceTree<BoundType, StatisticType, MatType>::DualTreeTraverser
+{
+ public:
+  /**
+   * Instantiate the dual-tree traverser with the given rule set.
+   */
+  DualTreeTraverser(RuleType& rule);
+
+  /**
+   * Traverse the two trees.  This does not reset the number of prunes.
+   *
+   * @param queryNode The query node to be traversed.
+   * @param referenceNode The reference node to be traversed.
+   */
+  void Traverse(BinarySpaceTree& queryNode, BinarySpaceTree& referenceNode);
+
+  //! Get the number of prunes.
+  size_t NumPrunes() const { return numPrunes; }
+  //! Modify the number of prunes.
+  size_t& NumPrunes() { return numPrunes; }
+
+ private:
+  //! Reference to the rules with which the trees will be traversed.
+  RuleType& rule;
+
+  //! The number of nodes which have been pruned during traversal.
+  size_t numPrunes;
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "dual_tree_traverser_impl.hpp"
+
+#endif // __MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_HPP
+

Copied: mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser_impl.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/dual_tree_depth_first_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,111 @@
+/**
+ * @file dual_tree_traverser_impl.hpp
+ * @author Ryan Curtin
+ *
+ * Implementation of the DualTreeTraverser
+ * way to perform a dual-tree traversal of two trees.  The trees must be the
+ * same type.
+ */
+#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
+#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "dual_tree_traverser.hpp"
+
+namespace mlpack {
+namespace tree {
+
+template<typename BoundType, typename StatisticType, typename MatType>
+template<typename RuleType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::
+DualTreeTraverser<RuleType>::DualTreeTraverser(RuleType& rule) :
+    rule(rule),
+    numPrunes(0)
+{ /* Nothing to do. */ }
+
+template<typename BoundType, typename StatisticType, typename MatType>
+template<typename RuleType>
+void BinarySpaceTree<BoundType, StatisticType, MatType>::
+DualTreeTraverser<RuleType>::Traverse(
+    BinarySpaceTree<BoundType, StatisticType, MatType>& queryNode,
+    BinarySpaceTree<BoundType, StatisticType, MatType>& referenceNode)
+{
+  // Check if pruning can occur.
+  if (rule.CanPrune(queryNode, referenceNode))
+  {
+    ++numPrunes;
+    return;
+  }
+
+  // If both are leaves, we must evaluate the base case.
+  if (queryNode.IsLeaf() && referenceNode.IsLeaf())
+  {
+    // Loop through each of the points in each node.
+    for (size_t query = queryNode.Begin(); query < queryNode.End(); ++query)
+      for (size_t ref = referenceNode.Begin(); ref < referenceNode.End(); ++ref)
+        rule.BaseCase(query, ref);
+  }
+  else if ((!queryNode.IsLeaf()) && referenceNode.IsLeaf())
+  {
+    // We have to recurse down the query node.
+    if (rule.LeftFirst(referenceNode, queryNode))
+    {
+      Traverse(*queryNode.Left(), referenceNode);
+      Traverse(*queryNode.Right(), referenceNode);
+    }
+    else
+    {
+      Traverse(*queryNode.Right(), referenceNode);
+      Traverse(*queryNode.Left(), referenceNode);
+    }
+  }
+  else if (queryNode.IsLeaf() && (!referenceNode.IsLeaf()))
+  {
+    // We have to recurse down the reference node.
+    if (rule.LeftFirst(queryNode, referenceNode))
+    {
+      Traverse(queryNode, *referenceNode.Left());
+      Traverse(queryNode, *referenceNode.Right());
+    }
+    else
+    {
+      Traverse(queryNode, *referenceNode.Right());
+      Traverse(queryNode, *referenceNode.Left());
+    }
+  }
+  else
+  {
+    // We have to recurse down both.
+    if (rule.LeftFirst(*queryNode.Left(), referenceNode))
+    {
+      Traverse(*queryNode.Left(), *referenceNode.Left());
+      Traverse(*queryNode.Left(), *referenceNode.Right());
+    }
+    else
+    {
+      Traverse(*queryNode.Left(), *referenceNode.Right());
+      Traverse(*queryNode.Left(), *referenceNode.Left());
+    }
+
+    // Now recurse to the right query child.
+    if (rule.LeftFirst(*queryNode.Right(), referenceNode))
+    {
+      Traverse(*queryNode.Right(), *referenceNode.Left());
+      Traverse(*queryNode.Right(), *referenceNode.Right());
+    }
+    else
+    {
+      Traverse(*queryNode.Right(), *referenceNode.Right());
+      Traverse(*queryNode.Right(), *referenceNode.Left());
+    }
+  }
+
+  // Now update any necessary information after recursion.
+  rule.UpdateAfterRecursion(queryNode, referenceNode);
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif // __MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
+

Copied: mlpack/trunk/src/mlpack/core/tree/binary_space_tree/single_tree_traverser_impl.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/single_tree_depth_first_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree/single_tree_traverser_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree/single_tree_traverser_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,79 @@
+/**
+ * @file single_tree_traverser_impl.hpp
+ * @author Ryan Curtin
+ *
+ * A nested class of BinarySpaceTree which traverses the entire tree with a
+ * given set of rules which indicate the branches which can be pruned and the
+ * order in which to recurse.  This traverser is a depth-first traverser.
+ */
+#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_SINGLE_TREE_TRAVERSER_IMPL_HPP
+#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_SINGLE_TREE_TRAVERSER_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "single_tree_traverser.hpp"
+
+#include <stack>
+
+namespace mlpack {
+namespace tree {
+
+template<typename BoundType, typename StatisticType, typename MatType>
+template<typename RuleType>
+BinarySpaceTree<BoundType, StatisticType, MatType>::
+SingleTreeTraverser<RuleType>::SingleTreeTraverser(RuleType& rule) :
+    rule(rule),
+    numPrunes(0)
+{ /* Nothing to do. */ }
+
+template<typename BoundType, typename StatisticType, typename MatType>
+template<typename RuleType>
+void BinarySpaceTree<BoundType, StatisticType, MatType>::
+SingleTreeTraverser<RuleType>::Traverse(
+    const size_t queryIndex,
+    BinarySpaceTree<BoundType, StatisticType, MatType>& referenceNode)
+{
+  // This is a non-recursive implementation (which should be faster).
+
+  // The stack of points to look at.
+  std::stack<BinarySpaceTree<BoundType, StatisticType, MatType>*> pointStack;
+  pointStack.push(&referenceNode);
+
+  while (!pointStack.empty())
+  {
+    BinarySpaceTree<BoundType, StatisticType, MatType>* node = pointStack.top();
+    pointStack.pop();
+
+    // Check if we can prune this node.
+    if (rule.CanPrune(queryIndex, *node))
+    {
+      ++numPrunes;
+      continue;
+    }
+
+    // If we are a leaf, run the base case as necessary.
+    if (node->IsLeaf())
+    {
+      for (size_t i = node->Begin(); i < node->End(); ++i)
+        rule.BaseCase(queryIndex, i);
+    }
+    else
+    {
+      // Otherwise recurse by distance.
+      if (rule.LeftFirst(queryIndex, *node))
+      {
+        pointStack.push(node->Right());
+        pointStack.push(node->Left());
+      }
+      else
+      {
+        pointStack.push(node->Left());
+        pointStack.push(node->Right());
+      }
+    }
+  }
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp	2012-08-04 22:03:50 UTC (rev 13332)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -1,421 +0,0 @@
-/**
- * @file binary_space_tree.hpp
- *
- * Definition of generalized binary space partitioning tree (BinarySpaceTree).
- */
-#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_HPP
-#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_HPP
-
-#include <mlpack/core.hpp>
-
-#include "statistic.hpp"
-
-#include "traversers/single_tree_depth_first_traverser.hpp"
-#include "traversers/dual_tree_depth_first_traverser.hpp"
-
-// Bad!
-#include <mlpack/methods/neighbor_search/neighbor_search_rules.hpp>
-
-namespace mlpack {
-namespace tree /** Trees and tree-building procedures. */ {
-
-/**
- * A binary space partitioning tree, such as a KD-tree or a ball tree.  Once the
- * bound and type of dataset is defined, the tree will construct itself.  Call
- * the constructor with the dataset to build the tree on, and the entire tree
- * will be built.
- *
- * This particular tree does not allow growth, so you cannot add or delete nodes
- * from it.  If you need to add or delete a node, the better procedure is to
- * rebuild the tree entirely.
- *
- * This tree does take one parameter, which is the leaf size to be used.
- *
- * @tparam BoundType The bound used for each node.  The valid types of bounds
- *     and the necessary skeleton interface for this class can be found in
- *     bounds/.
- * @tparam StatisticType Extra data contained in the node.  See statistic.hpp
- *     for the necessary skeleton interface.
- */
-template<typename BoundType,
-         typename StatisticType = EmptyStatistic,
-         typename MatType = arma::mat>
-class BinarySpaceTree
-{
- private:
-  //! The left child node.
-  BinarySpaceTree* left;
-  //! The right child node.
-  BinarySpaceTree* right;
-  //! The index of the first point in the dataset contained in this node (and
-  //! its children).
-  size_t begin;
-  //! The number of points of the dataset contained in this node (and its
-  //! children).
-  size_t count;
-  //! The bound object for this node.
-  BoundType bound;
-  //! Any extra data contained in the node.
-  StatisticType stat;
-  //! The leaf size.
-  size_t leafSize;
-  //! The dimension this node split on if it is a parent.
-  size_t splitDimension;
-
- public:
-  //! So other classes can use TreeType::Mat.
-  typedef MatType Mat;
-
-  //! Our preferred traverser.  We are using a struct here so that we don't need
-  //! to specify the template parameters.
-  template<typename RuleType>
-  struct PreferredTraverser
-  {
-    //! We use a depth-first search by default.
-    typedef SingleTreeDepthFirstTraverser<
-        BinarySpaceTree<BoundType, StatisticType, MatType>,
-        RuleType
-    > Type;
-  };
-
-  template<typename RuleType>
-  struct PreferredDualTraverser
-  {
-    typedef DualTreeDepthFirstTraverser<
-        BinarySpaceTree<BoundType, StatisticType, MatType>,
-        RuleType
-    > Type;
-  };
-
-  template<typename SortPolicy, typename MetricType, typename TreeType>
-  struct PreferredRules
-  {
-    typedef neighbor::NeighborSearchRules<SortPolicy, MetricType, TreeType>
-        Type;
-  };
-
-  /**
-   * Construct this as the root node of a binary space tree using the given
-   * dataset.  This will modify the ordering of the points in the dataset!
-   *
-   * @param data Dataset to create tree from.  This will be modified!
-   * @param leafSize Size of each leaf in the tree.
-   */
-  BinarySpaceTree(MatType& data, const size_t leafSize = 20);
-
-  /**
-   * Construct this as the root node of a binary space tree using the given
-   * dataset.  This will modify the ordering of points in the dataset!  A
-   * mapping of the old point indices to the new point indices is filled.
-   *
-   * @param data Dataset to create tree from.  This will be modified!
-   * @param oldFromNew Vector which will be filled with the old positions for
-   *     each new point.
-   * @param leafSize Size of each leaf in the tree.
-   */
-  BinarySpaceTree(MatType& data,
-                  std::vector<size_t>& oldFromNew,
-                  const size_t leafSize = 20);
-
-  /**
-   * Construct this as the root node of a binary space tree using the given
-   * dataset.  This will modify the ordering of points in the dataset!  A
-   * mapping of the old point indices to the new point indices is filled, as
-   * well as a mapping of the new point indices to the old point indices.
-   *
-   * @param data Dataset to create tree from.  This will be modified!
-   * @param oldFromNew Vector which will be filled with the old positions for
-   *     each new point.
-   * @param newFromOld Vector which will be filled with the new positions for
-   *     each old point.
-   * @param leafSize Size of each leaf in the tree.
-   */
-  BinarySpaceTree(MatType& data,
-                  std::vector<size_t>& oldFromNew,
-                  std::vector<size_t>& newFromOld,
-                  const size_t leafSize = 20);
-
-  /**
-   * Construct this node on a subset of the given matrix, starting at column
-   * begin and using count points.  The ordering of that subset of points
-   * will be modified!  This is used for recursive tree-building by the other
-   * constructors which don't specify point indices.
-   *
-   * @param data Dataset to create tree from.  This will be modified!
-   * @param begin Index of point to start tree construction with.
-   * @param count Number of points to use to construct tree.
-   * @param leafSize Size of each leaf in the tree.
-   */
-  BinarySpaceTree(MatType& data,
-                  const size_t begin,
-                  const size_t count,
-                  const size_t leafSize = 20);
-
-  /**
-   * Construct this node on a subset of the given matrix, starting at column
-   * begin_in and using count_in points.  The ordering of that subset of points
-   * will be modified!  This is used for recursive tree-building by the other
-   * constructors which don't specify point indices.
-   *
-   * A mapping of the old point indices to the new point indices is filled, but
-   * it is expected that the vector is already allocated with size greater than
-   * or equal to (begin_in + count_in), and if that is not true, invalid memory
-   * reads (and writes) will occur.
-   *
-   * @param data Dataset to create tree from.  This will be modified!
-   * @param begin Index of point to start tree construction with.
-   * @param count Number of points to use to construct tree.
-   * @param oldFromNew Vector which will be filled with the old positions for
-   *     each new point.
-   * @param leafSize Size of each leaf in the tree.
-   */
-  BinarySpaceTree(MatType& data,
-                  const size_t begin,
-                  const size_t count,
-                  std::vector<size_t>& oldFromNew,
-                  const size_t leafSize = 20);
-
-  /**
-   * Construct this node on a subset of the given matrix, starting at column
-   * begin_in and using count_in points.  The ordering of that subset of points
-   * will be modified!  This is used for recursive tree-building by the other
-   * constructors which don't specify point indices.
-   *
-   * A mapping of the old point indices to the new point indices is filled, as
-   * well as a mapping of the new point indices to the old point indices.  It is
-   * expected that the vector is already allocated with size greater than or
-   * equal to (begin_in + count_in), and if that is not true, invalid memory
-   * reads (and writes) will occur.
-   *
-   * @param data Dataset to create tree from.  This will be modified!
-   * @param begin Index of point to start tree construction with.
-   * @param count Number of points to use to construct tree.
-   * @param oldFromNew Vector which will be filled with the old positions for
-   *     each new point.
-   * @param newFromOld Vector which will be filled with the new positions for
-   *     each old point.
-   * @param leafSize Size of each leaf in the tree.
-   */
-  BinarySpaceTree(MatType& data,
-                  const size_t begin,
-                  const size_t count,
-                  std::vector<size_t>& oldFromNew,
-                  std::vector<size_t>& newFromOld,
-                  const size_t leafSize = 20);
-
-  /**
-   * Create an empty tree node.
-   */
-  BinarySpaceTree();
-
-  /**
-   * Deletes this node, deallocating the memory for the children and calling
-   * their destructors in turn.  This will invalidate any pointers or references
-   * to any nodes which are children of this one.
-   */
-  ~BinarySpaceTree();
-
-  /**
-   * Find a node in this tree by its begin and count (const).
-   *
-   * Every node is uniquely identified by these two numbers.
-   * This is useful for communicating position over the network,
-   * when pointers would be invalid.
-   *
-   * @param begin The begin() of the node to find.
-   * @param count The count() of the node to find.
-   * @return The found node, or NULL if not found.
-   */
-  const BinarySpaceTree* FindByBeginCount(size_t begin,
-                                          size_t count) const;
-
-  /**
-   * Find a node in this tree by its begin and count.
-   *
-   * Every node is uniquely identified by these two numbers.
-   * This is useful for communicating position over the network,
-   * when pointers would be invalid.
-   *
-   * @param begin The begin() of the node to find.
-   * @param count The count() of the node to find.
-   * @return The found node, or NULL if not found.
-   */
-  BinarySpaceTree* FindByBeginCount(size_t begin, size_t count);
-
-  //! Return the bound object for this node.
-  const BoundType& Bound() const;
-  //! Return the bound object for this node.
-  BoundType& Bound();
-
-  //! Return the statistic object for this node.
-  const StatisticType& Stat() const;
-  //! Return the statistic object for this node.
-  StatisticType& Stat();
-
-  //! Return whether or not this node is a leaf (true if it has no children).
-  bool IsLeaf() const;
-
-  //! Return the leaf size.
-  size_t LeafSize() const;
-
-  //! Fills the tree to the specified level.
-  size_t ExtendTree(const size_t level);
-
-  //! Gets the left child of this node.
-  BinarySpaceTree* Left() const;
-
-  //! Gets the right child of this node.
-  BinarySpaceTree* Right() const;
-
-  //! Return the number of children in this node.
-  size_t NumChildren() const;
-
-  /**
-   * Return the specified child (0 will be left, 1 will be right).  If the index
-   * is greater than 1, this will return the right child.
-   *
-   * @param child Index of child to return.
-   */
-  BinarySpaceTree& Child(const size_t child) const;
-
-  //! Return the number of points in this node (0 if not a leaf).
-  size_t NumPoints() const;
-
-  /**
-   * Return the index (with reference to the dataset) of a particular point in
-   * this node.  This will happily return invalid indices if the given index is
-   * greater than the number of points in this node (obtained with NumPoints())
-   * -- be careful.
-   *
-   * @param index Index of point for which a dataset index is wanted.
-   */
-  size_t Point(const size_t index) const;
-
-  //! Return the minimum distance to another node.
-  double MinDistance(const BinarySpaceTree* other) const
-  {
-    return bound.MinDistance(other->Bound());
-  }
-
-  //! Return the maximum distance to another node.
-  double MaxDistance(const BinarySpaceTree* other) const
-  {
-    return bound.MaxDistance(other->Bound());
-  }
-
-  //! Return the minimum distance to another point.
-  double MinDistance(const arma::vec& point) const
-  {
-    return bound.MinDistance(point);
-  }
-
-  //! Return the maximum distance to another point.
-  double MaxDistance(const arma::vec& point) const
-  {
-    return bound.MaxDistance(point);
-  }
-
-  /**
-  * Returns the dimension this parent's children are split on.
-  */
-  size_t GetSplitDimension() const;
-
-  /**
-   * Obtains the number of nodes in the tree, starting with this.
-   */
-  size_t TreeSize() const;
-
-  /**
-   * Obtains the number of levels below this node in the tree, starting with
-   * this.
-   */
-  size_t TreeDepth() const;
-
-  /**
-   * Gets the index of the beginning point of this subset.
-   */
-  size_t Begin() const;
-
-  /**
-   * Gets the index one beyond the last index in the subset.
-   */
-  size_t End() const;
-
-  /**
-   * Gets the number of points in this subset.
-   */
-  size_t Count() const;
-
-  //! Returns false: this tree type does not have self children.
-  static bool HasSelfChildren() { return false; }
-
- private:
-  /**
-   * Private copy constructor, available only to fill (pad) the tree to a
-   * specified level.
-   */
-  BinarySpaceTree(const size_t begin,
-                  const size_t count,
-                  BoundType bound,
-                  StatisticType stat,
-                  const int leafSize = 20) :
-      left(NULL),
-      right(NULL),
-      begin(begin),
-      count(count),
-      bound(bound),
-      stat(stat),
-      leafSize(leafSize) { }
-
-  BinarySpaceTree* CopyMe()
-  {
-    return new BinarySpaceTree(begin, count, bound, stat, leafSize);
-  }
-
-  /**
-   * Splits the current node, assigning its left and right children recursively.
-   *
-   * @param data Dataset which we are using.
-   */
-  void SplitNode(MatType& data);
-
-  /**
-   * Splits the current node, assigning its left and right children recursively.
-   * Also returns a list of the changed indices.
-   *
-   * @param data Dataset which we are using.
-   * @param oldFromNew Vector holding permuted indices.
-   */
-  void SplitNode(MatType& data, std::vector<size_t>& oldFromNew);
-
-  /**
-   * Find the index to split on for this node, given that we are splitting in
-   * the given split dimension on the specified split value.
-   *
-   * @param data Dataset which we are using.
-   * @param splitDim Dimension of dataset to split on.
-   * @param splitVal Value to split on, in the given split dimension.
-   */
-  size_t GetSplitIndex(MatType& data, int splitDim, double splitVal);
-
-  /**
-   * Find the index to split on for this node, given that we are splitting in
-   * the given split dimension on the specified split value.  Also returns a
-   * list of the changed indices.
-   *
-   * @param data Dataset which we are using.
-   * @param splitDim Dimension of dataset to split on.
-   * @param splitVal Value to split on, in the given split dimension.
-   * @param oldFromNew Vector holding permuted indices.
-   */
-  size_t GetSplitIndex(MatType& data, int splitDim, double splitVal,
-      std::vector<size_t>& oldFromNew);
-};
-
-}; // namespace tree
-}; // namespace mlpack
-
-// Include implementation.
-#include "binary_space_tree_impl.hpp"
-
-#endif

Deleted: mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp	2012-08-04 22:03:50 UTC (rev 13332)
+++ mlpack/trunk/src/mlpack/core/tree/binary_space_tree_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -1,662 +0,0 @@
-/**
- * @file binary_space_tree_impl.hpp
- *
- * Implementation of generalized space partitioning tree.
- */
-#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_IMPL_HPP
-#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_IMPL_HPP
-
-// In case it wasn't included already for some reason.
-#include "binary_space_tree.hpp"
-
-#include <mlpack/core/util/cli.hpp>
-#include <mlpack/core/util/log.hpp>
-
-namespace mlpack {
-namespace tree {
-
-// Each of these overloads is kept as a separate function to keep the overhead
-// from the two std::vectors out, if possible.
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
-    MatType& data,
-    const size_t leafSize) :
-    left(NULL),
-    right(NULL),
-    begin(0), /* This root node starts at index 0, */
-    count(data.n_cols), /* and spans all of the dataset. */
-    bound(data.n_rows),
-    stat(),
-    leafSize(leafSize)
-{
-  // Do the actual splitting of this node.
-  SplitNode(data);
-
-  // Create the statistic depending on if we are a leaf or not.
-  if (IsLeaf())
-    stat = StatisticType(data, begin, count);
-  else
-    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
-    MatType& data,
-    std::vector<size_t>& oldFromNew,
-    const size_t leafSize) :
-    left(NULL),
-    right(NULL),
-    begin(0),
-    count(data.n_cols),
-    bound(data.n_rows),
-    stat(),
-    leafSize(leafSize)
-{
-  // Initialize oldFromNew correctly.
-  oldFromNew.resize(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; i++)
-    oldFromNew[i] = i; // Fill with unharmed indices.
-
-  // Now do the actual splitting.
-  SplitNode(data, oldFromNew);
-
-  // Create the statistic depending on if we are a leaf or not.
-  if (IsLeaf())
-    stat = StatisticType(data, begin, count);
-  else
-    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
-    MatType& data,
-    std::vector<size_t>& oldFromNew,
-    std::vector<size_t>& newFromOld,
-    const size_t leafSize) :
-    left(NULL),
-    right(NULL),
-    begin(0),
-    count(data.n_cols),
-    bound(data.n_rows),
-    stat(),
-    leafSize(leafSize)
-{
-  // Initialize the oldFromNew vector correctly.
-  oldFromNew.resize(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; i++)
-    oldFromNew[i] = i; // Fill with unharmed indices.
-
-  // Now do the actual splitting.
-  SplitNode(data, oldFromNew);
-
-  // Create the statistic depending on if we are a leaf or not.
-  if (IsLeaf())
-    stat = StatisticType(data, begin, count);
-  else
-    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
-
-  // Map the newFromOld indices correctly.
-  newFromOld.resize(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; i++)
-    newFromOld[oldFromNew[i]] = i;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
-    MatType& data,
-    const size_t begin,
-    const size_t count,
-    const size_t leafSize) :
-    left(NULL),
-    right(NULL),
-    begin(begin),
-    count(count),
-    bound(data.n_rows),
-    stat(),
-    leafSize(leafSize)
-{
-  // Perform the actual splitting.
-  SplitNode(data);
-
-  // Create the statistic depending on if we are a leaf or not.
-  if (IsLeaf())
-    stat = StatisticType(data, begin, count);
-  else
-    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
-    MatType& data,
-    const size_t begin,
-    const size_t count,
-    std::vector<size_t>& oldFromNew,
-    const size_t leafSize) :
-    left(NULL),
-    right(NULL),
-    begin(begin),
-    count(count),
-    bound(data.n_rows),
-    stat(),
-    leafSize(leafSize)
-{
-  // Hopefully the vector is initialized correctly!  We can't check that
-  // entirely but we can do a minor sanity check.
-  assert(oldFromNew.size() == data.n_cols);
-
-  // Perform the actual splitting.
-  SplitNode(data, oldFromNew);
-
-  // Create the statistic depending on if we are a leaf or not.
-  if (IsLeaf())
-    stat = StatisticType(data, begin, count);
-  else
-    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree(
-    MatType& data,
-    const size_t begin,
-    const size_t count,
-    std::vector<size_t>& oldFromNew,
-    std::vector<size_t>& newFromOld,
-    const size_t leafSize) :
-    left(NULL),
-    right(NULL),
-    begin(begin),
-    count(count),
-    bound(data.n_rows),
-    stat(),
-    leafSize(leafSize)
-{
-  // Hopefully the vector is initialized correctly!  We can't check that
-  // entirely but we can do a minor sanity check.
-  Log::Assert(oldFromNew.size() == data.n_cols);
-
-  // Perform the actual splitting.
-  SplitNode(data, oldFromNew);
-
-  // Create the statistic depending on if we are a leaf or not.
-  if (IsLeaf())
-    stat = StatisticType(data, begin, count);
-  else
-    stat = StatisticType(data, begin, count, left->Stat(), right->Stat());
-
-  // Map the newFromOld indices correctly.
-  newFromOld.resize(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; i++)
-    newFromOld[oldFromNew[i]] = i;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::BinarySpaceTree() :
-    left(NULL),
-    right(NULL),
-    begin(0),
-    count(0),
-    bound(),
-    stat(),
-    leafSize(20) // Default leaf size is 20.
-{
-  // Nothing to do.
-}
-
-/**
- * Deletes this node, deallocating the memory for the children and calling their
- * destructors in turn.  This will invalidate any pointers or references to any
- * nodes which are children of this one.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>::~BinarySpaceTree()
-{
-  if (left)
-    delete left;
-  if (right)
-    delete right;
-}
-
-/**
- * Find a node in this tree by its begin and count.
- *
- * Every node is uniquely identified by these two numbers.
- * This is useful for communicating position over the network,
- * when pointers would be invalid.
- *
- * @param queryBegin The Begin() of the node to find.
- * @param queryCount The Count() of the node to find.
- * @return The found node, or NULL if nothing is found.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-const BinarySpaceTree<BoundType, StatisticType, MatType>*
-BinarySpaceTree<BoundType, StatisticType, MatType>::FindByBeginCount(
-    size_t queryBegin,
-    size_t queryCount) const
-{
-  Log::Assert(queryBegin >= begin);
-  Log::Assert(queryCount <= count);
-
-  if (begin == queryBegin && count == queryCount)
-    return this;
-  else if (IsLeaf())
-    return NULL;
-  else if (queryBegin < right->Begin())
-    return left->FindByBeginCount(queryBegin, queryCount);
-  else
-    return right->FindByBeginCount(queryBegin, queryCount);
-}
-
-/**
- * Find a node in this tree by its begin and count (const).
- *
- * Every node is uniquely identified by these two numbers.
- * This is useful for communicating position over the network,
- * when pointers would be invalid.
- *
- * @param queryBegin the Begin() of the node to find
- * @param queryCount the Count() of the node to find
- * @return the found node, or NULL
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-BinarySpaceTree<BoundType, StatisticType, MatType>*
-BinarySpaceTree<BoundType, StatisticType, MatType>::FindByBeginCount(
-    const size_t queryBegin,
-    const size_t queryCount)
-{
-  mlpack::Log::Assert(begin >= queryBegin);
-  mlpack::Log::Assert(count <= queryCount);
-
-  if (begin == queryBegin && count == queryCount)
-    return this;
-  else if (IsLeaf())
-    return NULL;
-  else if (queryBegin < left->End())
-    return left->FindByBeginCount(queryBegin, queryCount);
-  else if (right)
-    return right->FindByBeginCount(queryBegin, queryCount);
-  else
-    return NULL;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-size_t BinarySpaceTree<BoundType, StatisticType, MatType>::ExtendTree(
-    size_t level)
-{
-  --level;
-  // Return the number of nodes duplicated.
-  size_t nodesDuplicated = 0;
-  if (level > 0)
-  {
-    if (!left)
-    {
-      left = CopyMe();
-      ++nodesDuplicated;
-    }
-    nodesDuplicated += left->ExtendTree(level);
-    if (right)
-    {
-      nodesDuplicated += right->ExtendTree(level);
-    }
-  }
-  return nodesDuplicated;
-}
-
-/* TODO: we can likely calculate this earlier, then store the
- *   result in a private member variable; for now, we can
- *   just calculate as needed...
- *
- *   Also, perhaps we should rewrite these recursive functions
- *     to avoid exceeding the stack limit
- */
-
-template<typename BoundType, typename StatisticType, typename MatType>
-size_t BinarySpaceTree<BoundType, StatisticType, MatType>::TreeSize() const
-{
-  // Recursively count the nodes on each side of the tree.  The plus one is
-  // because we have to count this node, too.
-  return 1 + (left ? left->TreeSize() : 0) + (right ? right->TreeSize() : 0);
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-size_t BinarySpaceTree<BoundType, StatisticType, MatType>::TreeDepth() const
-{
-  // Recursively count the depth on each side of the tree.  The plus one is
-  // because we have to count this node, too.
-  return 1 + std::max((left ? left->TreeDepth() : 0),
-                      (right ? right->TreeDepth() : 0));
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-inline const
-    BoundType& BinarySpaceTree<BoundType, StatisticType, MatType>::Bound() const
-{
-  return bound;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-inline BoundType& BinarySpaceTree<BoundType, StatisticType, MatType>::Bound()
-{
-  return bound;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-inline const StatisticType&
-    BinarySpaceTree<BoundType, StatisticType, MatType>::Stat() const
-{
-  return stat;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-inline StatisticType& BinarySpaceTree<BoundType, StatisticType, MatType>::Stat()
-{
-  return stat;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::GetSplitDimension() const
-{
-  return splitDimension;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-inline bool BinarySpaceTree<BoundType, StatisticType, MatType>::IsLeaf() const
-{
-  return !left;
-}
-
-/**
- * Gets the left branch of the tree.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline BinarySpaceTree<BoundType, StatisticType, MatType>*
-    BinarySpaceTree<BoundType, StatisticType, MatType>::Left() const
-{
-  return left;
-}
-
-/**
- * Gets the right branch.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline BinarySpaceTree<BoundType, StatisticType, MatType>*
-    BinarySpaceTree<BoundType, StatisticType, MatType>::Right() const
-{
-  return right;
-}
-
-/**
- * Returns the number of children in this node.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t
-    BinarySpaceTree<BoundType, StatisticType, MatType>::NumChildren() const
-{
-  if (left && right)
-    return 2;
-  if (left)
-    return 1;
-
-  return 0;
-}
-
-/**
- * Return the specified child.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline BinarySpaceTree<BoundType, StatisticType, MatType>&
-    BinarySpaceTree<BoundType, StatisticType, MatType>::Child(
-    const size_t child) const
-{
-  if (child == 0)
-    return *left;
-  else
-    return *right;
-}
-
-/**
- * Return the number of points contained in this node.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t
-BinarySpaceTree<BoundType, StatisticType, MatType>::NumPoints() const
-{
-  if (left)
-    return 0;
-
-  return count;
-}
-
-/**
- * Return the index of a particular point contained in this node.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t
-BinarySpaceTree<BoundType, StatisticType, MatType>::Point(const size_t index)
-    const
-{
-  return (begin + index);
-}
-
-/**
- * Gets the index of the begin point of this subset.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::Begin() const
-{
-  return begin;
-}
-
-/**
- * Gets the index one beyond the last index in the series.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::End() const
-{
-  return begin + count;
-}
-
-/**
- * Gets the number of points in this subset.
- */
-template<typename BoundType, typename StatisticType, typename MatType>
-inline size_t BinarySpaceTree<BoundType, StatisticType, MatType>::Count() const
-{
-  return count;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-void
-    BinarySpaceTree<BoundType, StatisticType, MatType>::SplitNode(MatType& data)
-{
-  // We need to expand the bounds of this node properly.
-  bound |= data.cols(begin, begin + count - 1);
-
-  // Now, check if we need to split at all.
-  if (count <= leafSize)
-    return; // We can't split this.
-
-  // Figure out which dimension to split on.
-  size_t splitDim = data.n_rows; // Indicate invalid by maxDim + 1.
-  double maxWidth = -1;
-
-  // Find the split dimension.
-  for (size_t d = 0; d < data.n_rows; d++)
-  {
-    double width = bound[d].Width();
-
-    if (width > maxWidth)
-    {
-      maxWidth = width;
-      splitDim = d;
-    }
-  }
-  splitDimension = splitDim;
-
-  // Split in the middle of that dimension.
-  double splitVal = bound[splitDim].Mid();
-
-  if (maxWidth == 0) // All these points are the same.  We can't split.
-    return;
-
-  // Perform the actual splitting.  This will order the dataset such that points
-  // with value in dimension split_dim less than or equal to splitVal are on
-  // the left of splitCol, and points with value in dimension splitDim greater
-  // than splitVal are on the right side of splitCol.
-  size_t splitCol = GetSplitIndex(data, splitDim, splitVal);
-
-  // Now that we know the split column, we will recursively split the children
-  // by calling their constructors (which perform this splitting process).
-  left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
-      splitCol - begin, leafSize);
-  right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
-      begin + count - splitCol, leafSize);
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-void BinarySpaceTree<BoundType, StatisticType, MatType>::SplitNode(
-    MatType& data,
-    std::vector<size_t>& oldFromNew)
-{
-  // This should be a single function for Bound.
-  // We need to expand the bounds of this node properly.
-  bound |= data.cols(begin, begin + count - 1);
-
-  // First, check if we need to split at all.
-  if (count <= leafSize)
-    return; // We can't split this.
-
-  // Figure out which dimension to split on.
-  size_t splitDim = data.n_rows; // Indicate invalid by max_dim + 1.
-  double maxWidth = -1;
-
-  // Find the split dimension.
-  for (size_t d = 0; d < data.n_rows; d++)
-  {
-    double width = bound[d].Width();
-
-    if (width > maxWidth)
-    {
-      maxWidth = width;
-      splitDim = d;
-    }
-  }
-  splitDimension = splitDim;
-
-  // Split in the middle of that dimension.
-  double splitVal = bound[splitDim].Mid();
-
-  if (maxWidth == 0) // All these points are the same.  We can't split.
-    return;
-
-  // Perform the actual splitting.  This will order the dataset such that points
-  // with value in dimension split_dim less than or equal to splitVal are on
-  // the left of splitCol, and points with value in dimension splitDim greater
-  // than splitVal are on the right side of splitCol.
-  size_t splitCol = GetSplitIndex(data, splitDim, splitVal, oldFromNew);
-
-  // Now that we know the split column, we will recursively split the children
-  // by calling their constructors (which perform this splitting process).
-  left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin,
-      splitCol - begin, oldFromNew, leafSize);
-  right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol,
-      begin + count - splitCol, oldFromNew, leafSize);
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-size_t BinarySpaceTree<BoundType, StatisticType, MatType>::GetSplitIndex(
-    MatType& data,
-    int splitDim,
-    double splitVal)
-{
-  // This method modifies the input dataset.  We loop both from the left and
-  // right sides of the points contained in this node.  The points less than
-  // split_val should be on the left side of the matrix, and the points greater
-  // than split_val should be on the right side of the matrix.
-  size_t left = begin;
-  size_t right = begin + count - 1;
-
-  // First half-iteration of the loop is out here because the termination
-  // condition is in the middle.
-  while ((data(splitDim, left) < splitVal) && (left <= right))
-    left++;
-  while ((data(splitDim, right) >= splitVal) && (left <= right))
-    right--;
-
-  while (left <= right)
-  {
-    // Swap columns.
-    data.swap_cols(left, right);
-
-    // See how many points on the left are correct.  When they are correct,
-    // increase the left counter accordingly.  When we encounter one that isn't
-    // correct, stop.  We will switch it later.
-    while ((data(splitDim, left) < splitVal) && (left <= right))
-      left++;
-
-    // Now see how many points on the right are correct.  When they are correct,
-    // decrease the right counter accordingly.  When we encounter one that isn't
-    // correct, stop.  We will switch it with the wrong point we found in the
-    // previous loop.
-    while ((data(splitDim, right) >= splitVal) && (left <= right))
-      right--;
-  }
-
-  Log::Assert(left == right + 1);
-
-  return left;
-}
-
-template<typename BoundType, typename StatisticType, typename MatType>
-size_t BinarySpaceTree<BoundType, StatisticType, MatType>::GetSplitIndex(
-    MatType& data,
-    int splitDim,
-    double splitVal,
-    std::vector<size_t>& oldFromNew)
-{
-  // This method modifies the input dataset.  We loop both from the left and
-  // right sides of the points contained in this node.  The points less than
-  // split_val should be on the left side of the matrix, and the points greater
-  // than split_val should be on the right side of the matrix.
-  size_t left = begin;
-  size_t right = begin + count - 1;
-
-  // First half-iteration of the loop is out here because the termination
-  // condition is in the middle.
-  while ((data(splitDim, left) < splitVal) && (left <= right))
-    left++;
-  while ((data(splitDim, right) >= splitVal) && (left <= right))
-    right--;
-
-  while (left <= right)
-  {
-    // Swap columns.
-    data.swap_cols(left, right);
-
-    // Update the indices for what we changed.
-    size_t t = oldFromNew[left];
-    oldFromNew[left] = oldFromNew[right];
-    oldFromNew[right] = t;
-
-    // See how many points on the left are correct.  When they are correct,
-    // increase the left counter accordingly.  When we encounter one that isn't
-    // correct, stop.  We will switch it later.
-    while ((data(splitDim, left) < splitVal) && (left <= right))
-      left++;
-
-    // Now see how many points on the right are correct.  When they are correct,
-    // decrease the right counter accordingly.  When we encounter one that isn't
-    // correct, stop.  We will switch it with the wrong point we found in the
-    // previous loop.
-    while ((data(splitDim, right) >= splitVal) && (left <= right))
-      right--;
-  }
-
-  Log::Assert(left == right + 1);
-
-  return left;
-}
-
-}; // namespace tree
-}; // namespace mlpack
-
-#endif

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,335 @@
+/**
+ * @file cover_tree.hpp
+ * @author Ryan Curtin
+ *
+ * Definition of CoverTree, which can be used in place of the BinarySpaceTree.
+ */
+#ifndef __MLPACK_CORE_TREE_COVER_TREE_COVER_TREE_HPP
+#define __MLPACK_CORE_TREE_COVER_TREE_COVER_TREE_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/metrics/lmetric.hpp>
+#include "first_point_is_root.hpp"
+#include "../statistic.hpp"
+
+namespace mlpack {
+namespace tree {
+
+/**
+ * A cover tree is a tree specifically designed to speed up nearest-neighbor
+ * computation in high-dimensional spaces.  Each non-leaf node references a
+ * point and has a nonzero number of children, including a "self-child" which
+ * references the same point.  A leaf node represents only one point.
+ *
+ * The tree can be thought of as a hierarchy with the root node at the top level
+ * and the leaf nodes at the bottom level.  Each level in the tree has an
+ * assigned 'scale' i.  The tree follows these three conditions:
+ *
+ * - nesting: the level C_i is a subset of the level C_{i - 1}.
+ * - covering: all node in level C_{i - 1} have at least one node in the
+ *     level C_i with distance less than or equal to EC^i (exactly one of these
+ *     is a parent of the point in level C_{i - 1}.
+ * - separation: all nodes in level C_i have distance greater than EC^i to all
+ *     other nodes in level C_i.
+ *
+ * The value 'EC' refers to the expansion constant, which is a parameter of the
+ * tree.  These three properties make the cover tree very good for fast,
+ * high-dimensional nearest-neighbor search.
+ *
+ * The theoretical structure of the tree contains many 'implicit' nodes which
+ * only have a "self-child" (a child referencing the same point, but at a lower
+ * scale level).  This practical implementation only constructs explicit nodes
+ * -- non-leaf nodes with more than one child.  A leaf node has no children, and
+ * its scale level is INT_MIN.
+ *
+ * For more information on cover trees, see
+ *
+ * @code
+ * @inproceedings{
+ *   author = {Beygelzimer, Alina and Kakade, Sham and Langford, John},
+ *   title = {Cover trees for nearest neighbor},
+ *   booktitle = {Proceedings of the 23rd International Conference on Machine
+ *     Learning},
+ *   series = {ICML '06},
+ *   year = {2006},
+ *   pages = {97--104]
+ * }
+ * @endcode
+ *
+ * For information on runtime bounds of the nearest-neighbor computation using
+ * cover trees, see the following paper, presented at NIPS 2009:
+ *
+ * @code
+ * @inproceedings{
+ *   author = {Ram, P., and Lee, D., and March, W.B., and Gray, A.G.},
+ *   title = {Linear-time Algorithms for Pairwise Statistical Problems},
+ *   booktitle = {Advances in Neural Information Processing Systems 22},
+ *   editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C.K.I. Williams
+ *     and A. Culotta},
+ *   pages = {1527--1535},
+ *   year = {2009}
+ * }
+ * @endcode
+ *
+ * The CoverTree class offers three template parameters; a custom metric type
+ * can be used with MetricType (this class defaults to the L2-squared metric).
+ * The root node's point can be chosen with the RootPointPolicy; by default, the
+ * FirstPointIsRoot policy is used, meaning the first point in the dataset is
+ * used.  The StatisticType policy allows you to define statistics which can be
+ * gathered during the creation of the tree.
+ *
+ * @tparam MetricType Metric type to use during tree construction.
+ * @tparam RootPointPolicy Determines which point to use as the root node.
+ * @tparam StatisticType Statistic to be used during tree creation.
+ */
+template<typename MetricType = metric::LMetric<2>,
+         typename RootPointPolicy = FirstPointIsRoot,
+         typename StatisticType = EmptyStatistic>
+class CoverTree
+{
+ public:
+  typedef arma::mat Mat;
+
+  /**
+   * Create the cover tree with the given dataset and given expansion constant.
+   * The dataset will not be modified during the building procedure (unlike
+   * BinarySpaceTree).
+   *
+   * @param dataset Reference to the dataset to build a tree on.
+   * @param expansionConstant Expansion constant (EC) to use during tree
+   *      building (default 2.0).
+   */
+  CoverTree(const arma::mat& dataset,
+            const double expansionConstant = 2.0,
+            MetricType* metric = NULL);
+
+  /**
+   * Construct a child cover tree node.  This constructor is not meant to be
+   * used externally, but it could be used to insert another node into a tree.
+   * This procedure uses only one vector for the near set, the far set, and the
+   * used set (this is to prevent unnecessary memory allocation in recursive
+   * calls to this constructor).  Therefore, the size of the near set, far set,
+   * and used set must be passed in.  The near set will be entirely used up, and
+   * some of the far set may be used.  The value of usedSetSize will be set to
+   * the number of points used in the construction of this node, and the value
+   * of farSetSize will be modified to reflect the number of points in the far
+   * set _after_ the construction of this node.
+   *
+   * If you are calling this manually, be careful that the given scale is
+   * as small as possible, or you may be creating an implicit node in your tree.
+   *
+   * @param dataset Reference to the dataset to build a tree on.
+   * @param expansionConstant Expansion constant (EC) to use during tree
+   *     building.
+   * @param pointIndex Index of the point this node references.
+   * @param scale Scale of this level in the tree.
+   * @param indices Array of indices, ordered [ nearSet | farSet | usedSet ];
+   *     will be modified to [ farSet | usedSet ].
+   * @param distances Array of distances, ordered the same way as the indices.
+   *     These represent the distances between the point specified by pointIndex
+   *     and each point in the indices array.
+   * @param nearSetSize Size of the near set; if 0, this will be a leaf.
+   * @param farSetSize Size of the far set; may be modified (if this node uses
+   *     any points in the far set).
+   * @param usedSetSize The number of points used will be added to this number.
+   */
+  CoverTree(const arma::mat& dataset,
+            const double expansionConstant,
+            const size_t pointIndex,
+            const int scale,
+            const double parentDistance,
+            arma::Col<size_t>& indices,
+            arma::vec& distances,
+            size_t nearSetSize,
+            size_t& farSetSize,
+            size_t& usedSetSize,
+            MetricType& metric = NULL);
+
+  /**
+   * Delete this cover tree node and its children.
+   */
+  ~CoverTree();
+
+  //! A single-tree cover tree traverser; see single_tree_traverser.hpp for
+  //! implementation.
+  template<typename RuleType>
+  class SingleTreeTraverser;
+
+  //! A dual-tree cover tree traverser; see dual_tree_traverser.hpp.
+  template<typename RuleType>
+  class DualTreeTraverser;
+
+  //! Get a reference to the dataset.
+  const arma::mat& Dataset() const { return dataset; }
+
+  //! Get the index of the point which this node represents.
+  size_t Point() const { return point; }
+  //! For compatibility with other trees; the argument is ignored.
+  size_t Point(const size_t) const { return point; }
+
+  // Fake
+  CoverTree* Left() const { return NULL; }
+  CoverTree* Right() const { return NULL; }
+  size_t Begin() const { return 0; }
+  size_t Count() const { return 0; }
+  size_t End() const { return 0; }
+  bool IsLeaf() const { return (children.size() == 0); }
+  size_t NumPoints() const { return 1; }
+
+  //! Get a particular child node.
+  const CoverTree& Child(const size_t index) const { return *children[index]; }
+  //! Modify a particular child node.
+  CoverTree& Child(const size_t index) { return *children[index]; }
+
+  //! Get the number of children.
+  size_t NumChildren() const { return children.size(); }
+
+  //! Get the children.
+  const std::vector<CoverTree*>& Children() const { return children; }
+  //! Modify the children manually (maybe not a great idea).
+  std::vector<CoverTree*>& Children() { return children; }
+
+  //! Get the scale of this node.
+  int Scale() const { return scale; }
+  //! Modify the scale of this node.  Be careful...
+  int& Scale() { return scale; }
+
+  //! Get the expansion constant.
+  double ExpansionConstant() const { return expansionConstant; }
+  //! Modify the expansion constant; don't do this, you'll break everything.
+  double& ExpansionConstant() { return expansionConstant; }
+
+  //! Get the statistic for this node.
+  const StatisticType& Stat() const { return stat; }
+  //! Modify the statistic for this node.
+  StatisticType& Stat() { return stat; }
+
+  //! Return the minimum distance to another node.
+  double MinDistance(const CoverTree* other) const;
+
+  //! Return the minimum distance to another point.
+  double MinDistance(const arma::vec& other) const;
+
+  //! Return the maximum distance to another node.
+  double MaxDistance(const CoverTree* other) const;
+
+  //! Return the maximum distance to another point.
+  double MaxDistance(const arma::vec& other) const;
+
+  //! Returns true: this tree does have self-children.
+  static bool HasSelfChildren() { return true; }
+
+  //! Get the distance to the parent.
+  double ParentDistance() const { return parentDistance; }
+
+  //! Get the distance to teh furthest descendant.
+  double FurthestDescendantDistance() const
+  { return furthestDescendantDistance; }
+
+ private:
+  //! Reference to the matrix which this tree is built on.
+  const arma::mat& dataset;
+
+  //! Index of the point in the matrix which this node represents.
+  size_t point;
+
+  //! The list of children; the first is the self-child.
+  std::vector<CoverTree*> children;
+
+  //! Scale level of the node.
+  int scale;
+
+  //! The expansion constant used to construct the tree.
+  double expansionConstant;
+
+  //! The instantiated statistic.
+  StatisticType stat;
+
+  //! Distance to the parent.
+  double parentDistance;
+
+  //! Distance to the furthest descendant.
+  double furthestDescendantDistance;
+
+  /**
+   * Fill the vector of distances with the distances between the point specified
+   * by pointIndex and each point in the indices array.  The distances of the
+   * first pointSetSize points in indices are calculated (so, this does not
+   * necessarily need to use all of the points in the arrays).
+   *
+   * @param pointIndex Point to build the distances for.
+   * @param indices List of indices to compute distances for.
+   * @param distances Vector to store calculated distances in.
+   * @param pointSetSize Number of points in arrays to calculate distances for.
+   */
+  void ComputeDistances(const size_t pointIndex,
+                        const arma::Col<size_t>& indices,
+                        arma::vec& distances,
+                        const size_t pointSetSize,
+                        MetricType& metric);
+  /**
+   * Split the given indices and distances into a near and a far set, returning
+   * the number of points in the near set.  The distances must already be
+   * initialized.  This will order the indices and distances such that the
+   * points in the near set make up the first part of the array and the far set
+   * makes up the rest:  [ nearSet | farSet ].
+   *
+   * @param indices List of indices; will be reordered.
+   * @param distances List of distances; will be reordered.
+   * @param bound If the distance is less than or equal to this bound, the point
+   *      is placed into the near set.
+   * @param pointSetSize Size of point set (because we may be sorting a smaller
+   *      list than the indices vector will hold).
+   */
+  size_t SplitNearFar(arma::Col<size_t>& indices,
+                      arma::vec& distances,
+                      const double bound,
+                      const size_t pointSetSize);
+
+  /**
+   * Assuming that the list of indices and distances is sorted as
+   * [ childFarSet | childUsedSet | farSet | usedSet ],
+   * resort the sets so the organization is
+   * [ childFarSet | farSet | childUsedSet | usedSet ].
+   *
+   * The size_t parameters specify the sizes of each set in the array.  Only the
+   * ordering of the indices and distances arrays will be modified (not their
+   * actual contents).
+   *
+   * The size of any of the four sets can be zero and this method will handle
+   * that case accordingly.
+   *
+   * @param indices List of indices to sort.
+   * @param distances List of distances to sort.
+   * @param childFarSetSize Number of points in child far set (childFarSet).
+   * @param childUsedSetSize Number of points in child used set (childUsedSet).
+   * @param farSetSize Number of points in far set (farSet).
+   */
+  size_t SortPointSet(arma::Col<size_t>& indices,
+                      arma::vec& distances,
+                      const size_t childFarSetSize,
+                      const size_t childUsedSetSize,
+                      const size_t farSetSize);
+
+  void MoveToUsedSet(arma::Col<size_t>& indices,
+                     arma::vec& distances,
+                     size_t& nearSetSize,
+                     size_t& farSetSize,
+                     size_t& usedSetSize,
+                     arma::Col<size_t>& childIndices,
+                     const size_t childFarSetSize,
+                     const size_t childUsedSetSize);
+  size_t PruneFarSet(arma::Col<size_t>& indices,
+                     arma::vec& distances,
+                     const double bound,
+                     const size_t nearSetSize,
+                     const size_t pointSetSize);
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "cover_tree_impl.hpp"
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/cover_tree_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,770 @@
+/**
+ * @file cover_tree_impl.hpp
+ * @author Ryan Curtin
+ *
+ * Implementation of CoverTree class.
+ */
+#ifndef __MLPACK_CORE_TREE_COVER_TREE_COVER_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_COVER_TREE_COVER_TREE_IMPL_HPP
+
+// In case it hasn't already been included.
+#include "cover_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+// Create the cover tree.
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+CoverTree<MetricType, RootPointPolicy, StatisticType>::CoverTree(
+    const arma::mat& dataset,
+    const double expansionConstant,
+    MetricType* metric) :
+    dataset(dataset),
+    point(RootPointPolicy::ChooseRoot(dataset)),
+    expansionConstant(expansionConstant),
+    parentDistance(0),
+    furthestDescendantDistance(0)
+{
+  // If we need to create a metric, do that.  We'll just do it on the heap.
+  bool localMetric = false;
+  if (metric == NULL)
+  {
+    localMetric = true; // So we know we need to free it.
+    metric = new MetricType();
+  }
+
+  // Kick off the building.  Create the indices array and the distances array.
+  arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1,
+      dataset.n_cols - 1, dataset.n_cols - 1);
+  // This is now [1 2 3 4 ... n].  We must be sure that our point does not
+  // occur.
+  if (point != 0)
+    indices[point - 1] = 0; // Put 0 back into the set; remove what was there.
+
+  arma::vec distances(dataset.n_cols - 1);
+
+  // Build the initial distances.
+  ComputeDistances(point, indices, distances, dataset.n_cols - 1, *metric);
+
+  // Now determine the scale factor of the root node.
+  const double maxDistance = max(distances);
+  scale = (int) ceil(log(maxDistance) / log(expansionConstant));
+  const double bound = pow(expansionConstant, scale - 1);
+
+  // Unfortunately, we can't call out to other constructors, so we have to copy
+  // a little bit of code from the other constructor.  First we build the self
+  // child.
+  size_t childNearSetSize = SplitNearFar(indices, distances, bound,
+      dataset.n_cols - 1);
+  size_t childFarSetSize = (dataset.n_cols - 1) - childNearSetSize;
+  size_t usedSetSize = 0;
+  children.push_back(new CoverTree(dataset, expansionConstant, point, scale - 1,
+      0, indices, distances, childNearSetSize, childFarSetSize, usedSetSize,
+      *metric));
+
+  furthestDescendantDistance = children[0]->FurthestDescendantDistance();
+
+  // If we created an implicit node, take its self-child instead (this could
+  // happen multiple times).
+  while (children[children.size() - 1]->NumChildren() == 1)
+  {
+    CoverTree* old = children[children.size() - 1];
+    children.erase(children.begin() + children.size() - 1);
+
+    // Now take its child.
+    children.push_back(&(old->Child(0)));
+
+    // Remove its child (so it doesn't delete it).
+    old->Children().erase(old->Children().begin() + old->Children().size() - 1);
+
+    // Now delete it.
+    delete old;
+  }
+
+  size_t nearSetSize = (dataset.n_cols - 1) - usedSetSize;
+
+  // We have no far set, so the array is organized thusly:
+  // [ near | used ].  No resorting is necessary.
+  // Therefore, go ahead and build the children.
+  while (nearSetSize > 0)
+  {
+    // We want to select the furthest point in the near set as the next child.
+    size_t newPointIndex = nearSetSize - 1;
+
+    // Swap to front if necessary.
+    if (newPointIndex != 0)
+    {
+      const size_t tempIndex = indices[newPointIndex];
+      const double tempDist = distances[newPointIndex];
+
+      indices[newPointIndex] = indices[0];
+      distances[newPointIndex] = distances[0];
+
+      indices[0] = tempIndex;
+      distances[0] = tempDist;
+    }
+
+    if (distances[0] > furthestDescendantDistance)
+      furthestDescendantDistance = distances[0];
+
+    size_t childUsedSetSize = 0;
+
+    // If there's only one point left, we don't need this crap.
+    if (nearSetSize == 1)
+    {
+      size_t childNearSetSize = 0;
+      size_t childFarSetSize = 0;
+      children.push_back(new CoverTree(dataset, expansionConstant,
+          indices[0], scale - 1, distances[0], indices, distances,
+          childNearSetSize, childFarSetSize, usedSetSize, *metric));
+
+      // And we're done.
+      break;
+    }
+
+    // Create the near and far set indices and distance vectors.
+    arma::Col<size_t> childIndices(nearSetSize);
+    childIndices.rows(0, (nearSetSize - 2)) = indices.rows(1, nearSetSize - 1);
+    // Put the current point into the used set, so when we move our indices to
+    // the used set, this will be done for us.
+    childIndices(nearSetSize - 1) = indices[0];
+    arma::vec childDistances(nearSetSize);
+
+    // Build distances for the child.
+    ComputeDistances(indices[0], childIndices, childDistances,
+        nearSetSize - 1, *metric);
+    childDistances(nearSetSize - 1) = 0;
+
+    // Split into near and far sets for this point.
+    childNearSetSize = SplitNearFar(childIndices, childDistances, bound,
+        nearSetSize - 1);
+
+    // Build this child (recursively).
+    childUsedSetSize = 1; // Mark self point as used.
+    childFarSetSize = ((nearSetSize - 1) - childNearSetSize);
+    children.push_back(new CoverTree(dataset, expansionConstant, indices[0],
+        scale - 1, distances[0], childIndices, childDistances, childNearSetSize,
+        childFarSetSize, childUsedSetSize, *metric));
+
+    // If we created an implicit node, take its self-child instead (this could
+    // happen multiple times).
+    while (children[children.size() - 1]->NumChildren() == 1)
+    {
+      CoverTree* old = children[children.size() - 1];
+      children.erase(children.begin() + children.size() - 1);
+
+      // Now take its child.
+      children.push_back(&(old->Child(0)));
+
+      // Remove its child (so it doesn't delete it).
+      old->Children().erase(old->Children().begin() + old->Children().size()
+          - 1);
+
+      // Now delete it.
+      delete old;
+    }
+
+    // Now with the child created, it returns the childIndices and
+    // childDistances vectors in this form:
+    // [ childFar | childUsed ]
+    // For each point in the childUsed set, we must move that point to the used
+    // set in our own vector.
+    size_t farSetSize = 0;
+    MoveToUsedSet(indices, distances, nearSetSize, farSetSize, usedSetSize,
+        childIndices, childFarSetSize, childUsedSetSize);
+  }
+
+  // Calculate furthest descendant.
+  for (size_t i = 0; i < usedSetSize; ++i)
+    if (distances[i] > furthestDescendantDistance)
+      furthestDescendantDistance = distances[i];
+
+  Log::Assert(furthestDescendantDistance <= pow(expansionConstant, scale + 1));
+
+  if (localMetric)
+    delete metric;
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+CoverTree<MetricType, RootPointPolicy, StatisticType>::CoverTree(
+    const arma::mat& dataset,
+    const double expansionConstant,
+    const size_t pointIndex,
+    const int scale,
+    const double parentDistance,
+    arma::Col<size_t>& indices,
+    arma::vec& distances,
+    size_t nearSetSize,
+    size_t& farSetSize,
+    size_t& usedSetSize,
+    MetricType& metric) :
+    dataset(dataset),
+    point(pointIndex),
+    scale(scale),
+    expansionConstant(expansionConstant),
+    parentDistance(parentDistance),
+    furthestDescendantDistance(0)
+{
+  // If the size of the near set is 0, this is a leaf.
+  if (nearSetSize == 0)
+  {
+    this->scale = INT_MIN;
+    return;
+  }
+
+  // Determine the next scale level.  This should be the first level where there
+  // are any points in the far set.  So, if we know the maximum distance in the
+  // distances array, this will be the largest i such that
+  //   maxDistance > pow(ec, i)
+  // and using this for the scale factor should guarantee we are not creating an
+  // implicit node.  If the maximum distance is 0, every point in the near set
+  // will be created as a leaf, and a child to this node.  We also do not need
+  // to change the furthestChildDistance or furthestDescendantDistance.
+  const double maxDistance = max(distances.rows(0,
+      nearSetSize + farSetSize - 1));
+  if (maxDistance == 0)
+  {
+    // Make the self child at the lowest possible level.
+    // This should not modify farSetSize or usedSetSize.
+    size_t tempSize = 0;
+    children.push_back(new CoverTree(dataset, expansionConstant, pointIndex,
+        INT_MIN, 0, indices, distances, 0, tempSize, usedSetSize, metric));
+
+    // Every point in the near set should be a leaf.
+    for (size_t i = 0; i < nearSetSize; ++i)
+    {
+      // farSetSize and usedSetSize will not be modified.
+      children.push_back(new CoverTree(dataset, expansionConstant, indices[i],
+          INT_MIN, 0, indices, distances, 0, tempSize, usedSetSize, metric));
+      usedSetSize++;
+    }
+
+    // Re-sort the dataset.  We have
+    // [ used | far | other used ]
+    // and we want
+    // [ far | all used ].
+    SortPointSet(indices, distances, 0, usedSetSize, farSetSize);
+
+    return;
+  }
+
+  const int nextScale = std::min(scale,
+      (int) ceil(log(maxDistance) / log(expansionConstant))) - 1;
+  const double bound = pow(expansionConstant, nextScale);
+
+  // This needs to be taken out.  It's a sanity check for now.
+  Log::Assert(nextScale < scale);
+
+  // First, make the self child.  We must split the given near set into the near
+  // set and far set for the self child.
+  size_t childNearSetSize =
+      SplitNearFar(indices, distances, bound, nearSetSize);
+
+  // Build the self child (recursively).
+  size_t childFarSetSize = nearSetSize - childNearSetSize;
+  size_t childUsedSetSize = 0;
+  children.push_back(new CoverTree(dataset, expansionConstant, pointIndex,
+      nextScale, 0, indices, distances, childNearSetSize, childFarSetSize,
+      childUsedSetSize, metric));
+
+  // The self-child can't modify the furthestChildDistance away from 0, but it
+  // can modify the furthestDescendantDistance.
+  furthestDescendantDistance = children[0]->FurthestDescendantDistance();
+
+  // If we created an implicit node, take its self-child instead (this could
+  // happen multiple times).
+  while (children[children.size() - 1]->NumChildren() == 1)
+  {
+    CoverTree* old = children[children.size() - 1];
+    children.erase(children.begin() + children.size() - 1);
+
+    // Now take its child.
+    children.push_back(&(old->Child(0)));
+
+    // Remove its child (so it doesn't delete it).
+    old->Children().erase(old->Children().begin() + old->Children().size() - 1);
+
+    // Now delete it.
+    delete old;
+  }
+
+  // Now the arrays, in memory, look like this:
+  // [ childFar | childUsed | far | used ]
+  // but we need to move the used points past our far set:
+  // [ childFar | far | childUsed + used ]
+  // and keeping in mind that childFar = our near set,
+  // [ near | far | childUsed + used ]
+  // is what we are trying to make.
+  SortPointSet(indices, distances, childFarSetSize, childUsedSetSize,
+      farSetSize);
+
+  // Update size of near set and used set.
+  nearSetSize -= childUsedSetSize;
+  usedSetSize += childUsedSetSize;
+
+  // Now for each point in the near set, we need to make children.  To save
+  // computation later, we'll create an array holding the points in the near
+  // set, and then after each run we'll check which of those (if any) were used
+  // and we will remove them.  ...if that's faster.  I think it is.
+  while (nearSetSize > 0)
+  {
+    size_t newPointIndex = nearSetSize - 1;
+
+    // Swap to front if necessary.
+    if (newPointIndex != 0)
+    {
+      const size_t tempIndex = indices[newPointIndex];
+      const double tempDist = distances[newPointIndex];
+
+      indices[newPointIndex] = indices[0];
+      distances[newPointIndex] = distances[0];
+
+      indices[0] = tempIndex;
+      distances[0] = tempDist;
+    }
+
+    // Will this be a new furthest child?
+    if (distances[0] > furthestDescendantDistance)
+      furthestDescendantDistance = distances[0];
+
+    // If there's only one point left, we don't need this crap.
+    if ((nearSetSize == 1) && (farSetSize == 0))
+    {
+      size_t childNearSetSize = 0;
+      children.push_back(new CoverTree(dataset, expansionConstant,
+          indices[0], nextScale, distances[0], indices, distances,
+          childNearSetSize, farSetSize, usedSetSize, metric));
+
+      // Because the far set size is 0, we don't have to do any swapping to
+      // move the point into the used set.
+      ++usedSetSize;
+      --nearSetSize;
+
+      // And we're done.
+      break;
+    }
+
+    // Create the near and far set indices and distance vectors.  We don't fill
+    // in the self-point, yet.
+    arma::Col<size_t> childIndices(nearSetSize + farSetSize);
+    childIndices.rows(0, (nearSetSize + farSetSize - 2)) = indices.rows(1,
+        nearSetSize + farSetSize - 1);
+    arma::vec childDistances(nearSetSize + farSetSize);
+
+    // Build distances for the child.
+    ComputeDistances(indices[0], childIndices, childDistances, nearSetSize
+        + farSetSize - 1, metric);
+
+    // Split into near and far sets for this point.
+    childNearSetSize = SplitNearFar(childIndices, childDistances, bound,
+        nearSetSize + farSetSize - 1);
+    childFarSetSize = PruneFarSet(childIndices, childDistances,
+        expansionConstant * bound, childNearSetSize,
+        (nearSetSize + farSetSize - 1));
+
+    // Now that we know the near and far set sizes, we can put the used point
+    // (the self point) in the correct place; now, when we call
+    // MoveToUsedSet(), it will move the self-point correctly.  The distance
+    // does not matter.
+    childIndices(childNearSetSize + childFarSetSize) = indices[0];
+    childDistances(childNearSetSize + childFarSetSize) = 0;
+
+    // Build this child (recursively).
+    childUsedSetSize = 1; // Mark self point as used.
+    children.push_back(new CoverTree(dataset, expansionConstant, indices[0],
+        nextScale, distances[0], childIndices, childDistances, childNearSetSize,
+        childFarSetSize, childUsedSetSize, metric));
+
+    // If we created an implicit node, take its self-child instead (this could
+    // happen multiple times).
+    while (children[children.size() - 1]->NumChildren() == 1)
+    {
+      CoverTree* old = children[children.size() - 1];
+      children.erase(children.begin() + children.size() - 1);
+
+      // Now take its child.
+      children.push_back(&(old->Child(0)));
+
+      // Remove its child (so it doesn't delete it).
+      old->Children().erase(old->Children().begin() + old->Children().size()
+          - 1);
+
+      // Now delete it.
+      delete old;
+    }
+
+    // Now with the child created, it returns the childIndices and
+    // childDistances vectors in this form:
+    // [ childFar | childUsed ]
+    // For each point in the childUsed set, we must move that point to the used
+    // set in our own vector.
+    MoveToUsedSet(indices, distances, nearSetSize, farSetSize, usedSetSize,
+        childIndices, childFarSetSize, childUsedSetSize);
+  }
+
+  // Calculate furthest descendant.
+  for (size_t i = (nearSetSize + farSetSize); i < (nearSetSize + farSetSize +
+      usedSetSize); ++i)
+    if (distances[i] > furthestDescendantDistance)
+      furthestDescendantDistance = distances[i];
+
+  Log::Assert(furthestDescendantDistance <= pow(expansionConstant, scale + 1));
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+CoverTree<MetricType, RootPointPolicy, StatisticType>::~CoverTree()
+{
+  // Delete each child.
+  for (size_t i = 0; i < children.size(); ++i)
+    delete children[i];
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+double CoverTree<MetricType, RootPointPolicy, StatisticType>::MinDistance(
+    const CoverTree<MetricType, RootPointPolicy, StatisticType>* other) const
+{
+  // Every cover tree node will contain points up to EC^(scale + 1) away.
+  return MetricType::Evaluate(dataset.unsafe_col(point),
+      other->Dataset().unsafe_col(other->Point())) -
+      std::pow(expansionConstant, scale + 1) -
+      std::pow(other->ExpansionConstant(), other->Scale() + 1);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+double CoverTree<MetricType, RootPointPolicy, StatisticType>::MinDistance(
+    const arma::vec& other) const
+{
+  return MetricType::Evaluate(dataset.unsafe_col(point), other) -
+      std::pow(expansionConstant, scale + 1);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+double CoverTree<MetricType, RootPointPolicy, StatisticType>::MaxDistance(
+    const CoverTree<MetricType, RootPointPolicy, StatisticType>* other) const
+{
+  return MetricType::Evaluate(dataset.unsafe_col(point),
+      other->Dataset().unsafe_col(other->Point())) +
+      std::pow(expansionConstant, scale + 1) +
+      std::pow(other->ExpansionConstant(), other->Scale() + 1);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+double CoverTree<MetricType, RootPointPolicy, StatisticType>::MaxDistance(
+    const arma::vec& other) const
+{
+  return MetricType::Evaluate(dataset.unsafe_col(point), other) +
+      std::pow(expansionConstant, scale + 1);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+size_t CoverTree<MetricType, RootPointPolicy, StatisticType>::SplitNearFar(
+    arma::Col<size_t>& indices,
+    arma::vec& distances,
+    const double bound,
+    const size_t pointSetSize)
+{
+  // Sanity check; there is no guarantee that this condition will not be true.
+  // ...or is there?
+  if (pointSetSize <= 1)
+    return 0;
+
+  // We'll traverse from both left and right.
+  size_t left = 0;
+  size_t right = pointSetSize - 1;
+
+  // A modification of quicksort, with the pivot value set to the bound.
+  // Everything on the left of the pivot will be less than or equal to the
+  // bound; everything on the right will be greater than the bound.
+  while ((distances[left] <= bound) && (left != right))
+    ++left;
+  while ((distances[right] > bound) && (left != right))
+    --right;
+
+  while (left != right)
+  {
+    // Now swap the values and indices.
+    const size_t tempPoint = indices[left];
+    const double tempDist = distances[left];
+
+    indices[left] = indices[right];
+    distances[left] = distances[right];
+
+    indices[right] = tempPoint;
+    distances[right] = tempDist;
+
+    // Traverse the left, seeing how many points are correctly on that side.
+    // When we encounter an incorrect point, stop.  We will switch it later.
+    while ((distances[left] <= bound) && (left != right))
+      ++left;
+
+    // Traverse the right, seeing how many points are correctly on that side.
+    // When we encounter an incorrect point, stop.  We will switch it with the
+    // wrong point from the left side.
+    while ((distances[right] > bound) && (left != right))
+      --right;
+  }
+
+  // The final left value is the index of the first far value.
+  return left;
+}
+
+// Returns the maximum distance between points.
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+void CoverTree<MetricType, RootPointPolicy, StatisticType>::ComputeDistances(
+    const size_t pointIndex,
+    const arma::Col<size_t>& indices,
+    arma::vec& distances,
+    const size_t pointSetSize,
+    MetricType& metric)
+{
+  // For each point, rebuild the distances.  The indices do not need to be
+  // modified.
+  for (size_t i = 0; i < pointSetSize; ++i)
+  {
+    distances[i] = metric.Evaluate(dataset.unsafe_col(pointIndex),
+        dataset.unsafe_col(indices[i]));
+  }
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+size_t CoverTree<MetricType, RootPointPolicy, StatisticType>::SortPointSet(
+    arma::Col<size_t>& indices,
+    arma::vec& distances,
+    const size_t childFarSetSize,
+    const size_t childUsedSetSize,
+    const size_t farSetSize)
+{
+  // We'll use low-level memcpy calls ourselves, just to ensure it's done
+  // quickly and the way we want it to be.  Unfortunately this takes up more
+  // memory than one-element swaps, but there's not a great way around that.
+  const size_t bufferSize = std::min(farSetSize, childUsedSetSize);
+  const size_t bigCopySize = std::max(farSetSize, childUsedSetSize);
+
+  // Sanity check: there is no need to sort if the buffer size is going to be
+  // zero.
+  if (bufferSize == 0)
+    return (childFarSetSize + farSetSize);
+
+  size_t* indicesBuffer = new size_t[bufferSize];
+  double* distancesBuffer = new double[bufferSize];
+
+  // The start of the memory region to copy to the buffer.
+  const size_t bufferFromLocation = ((bufferSize == farSetSize) ?
+      (childFarSetSize + childUsedSetSize) : childFarSetSize);
+  // The start of the memory region to move directly to the new place.
+  const size_t directFromLocation = ((bufferSize == farSetSize) ?
+      childFarSetSize : (childFarSetSize + childUsedSetSize));
+  // The destination to copy the buffer back to.
+  const size_t bufferToLocation = ((bufferSize == farSetSize) ?
+      childFarSetSize : (childFarSetSize + farSetSize));
+  // The destination of the directly moved memory region.
+  const size_t directToLocation = ((bufferSize == farSetSize) ?
+      (childFarSetSize + farSetSize) : childFarSetSize);
+
+  // Copy the smaller piece to the buffer.
+  memcpy(indicesBuffer, indices.memptr() + bufferFromLocation,
+      sizeof(size_t) * bufferSize);
+  memcpy(distancesBuffer, distances.memptr() + bufferFromLocation,
+      sizeof(double) * bufferSize);
+
+  // Now move the other memory.
+  memmove(indices.memptr() + directToLocation,
+      indices.memptr() + directFromLocation, sizeof(size_t) * bigCopySize);
+  memmove(distances.memptr() + directToLocation,
+      distances.memptr() + directFromLocation, sizeof(double) * bigCopySize);
+
+  // Now copy the temporary memory to the right place.
+  memcpy(indices.memptr() + bufferToLocation, indicesBuffer,
+      sizeof(size_t) * bufferSize);
+  memcpy(distances.memptr() + bufferToLocation, distancesBuffer,
+      sizeof(double) * bufferSize);
+
+  delete[] indicesBuffer;
+  delete[] distancesBuffer;
+
+  // This returns the complete size of the far set.
+  return (childFarSetSize + farSetSize);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+void CoverTree<MetricType, RootPointPolicy, StatisticType>::MoveToUsedSet(
+    arma::Col<size_t>& indices,
+    arma::vec& distances,
+    size_t& nearSetSize,
+    size_t& farSetSize,
+    size_t& usedSetSize,
+    arma::Col<size_t>& childIndices,
+    const size_t childFarSetSize, // childNearSetSize is 0 in this case.
+    const size_t childUsedSetSize)
+{
+  const size_t originalSum = nearSetSize + farSetSize + usedSetSize;
+
+  // Loop across the set.  We will swap points as we need.  It should be noted
+  // that farSetSize and nearSetSize may change with each iteration of this loop
+  // (depending on if we make a swap or not).
+  size_t startChildUsedSet = 0; // Where to start in the child set.
+  for (size_t i = 0; i < nearSetSize; ++i)
+  {
+    // Discover if this point was in the child's used set.
+    for (size_t j = startChildUsedSet; j < childUsedSetSize; ++j)
+    {
+      if (childIndices[childFarSetSize + j] == indices[i])
+      {
+        // We have found a point; a swap is necessary.
+
+        // Since this point is from the near set, to preserve the near set, we
+        // must do a swap.
+        if (farSetSize > 0)
+        {
+          if ((nearSetSize - 1) != i)
+          {
+            // In this case it must be a three-way swap.
+            size_t tempIndex = indices[nearSetSize + farSetSize - 1];
+            double tempDist = distances[nearSetSize + farSetSize - 1];
+
+            size_t tempNearIndex = indices[nearSetSize - 1];
+            double tempNearDist = distances[nearSetSize - 1];
+
+            indices[nearSetSize + farSetSize - 1] = indices[i];
+            distances[nearSetSize + farSetSize - 1] = distances[i];
+
+            indices[nearSetSize - 1] = tempIndex;
+            distances[nearSetSize - 1] = tempDist;
+
+            indices[i] = tempNearIndex;
+            distances[i] = tempNearDist;
+          }
+          else
+          {
+            // We can do a two-way swap.
+            size_t tempIndex = indices[nearSetSize + farSetSize - 1];
+            double tempDist = distances[nearSetSize + farSetSize - 1];
+
+            indices[nearSetSize + farSetSize - 1] = indices[i];
+            distances[nearSetSize + farSetSize - 1] = distances[i];
+
+            indices[i] = tempIndex;
+            distances[i] = tempDist;
+          }
+        }
+        else if ((nearSetSize - 1) != i)
+        {
+          // A two-way swap is possible.
+          size_t tempIndex = indices[nearSetSize + farSetSize - 1];
+          double tempDist = distances[nearSetSize + farSetSize - 1];
+
+          indices[nearSetSize + farSetSize - 1] = indices[i];
+          distances[nearSetSize + farSetSize - 1] = distances[i];
+
+          indices[i] = tempIndex;
+          distances[i] = tempDist;
+        }
+        else
+        {
+          // No swap is necessary.
+        }
+
+        // We don't need to do a complete preservation of the child index set,
+        // but we want to make sure we only loop over points we haven't seen.
+        // So increment the child counter by 1 and move a point if we need.
+        if (j != startChildUsedSet)
+        {
+          childIndices[childFarSetSize + j] = childIndices[childFarSetSize +
+              startChildUsedSet];
+        }
+
+        // Update all counters from the swaps we have done.
+        ++startChildUsedSet;
+        --nearSetSize;
+        --i; // Since we moved a point out of the near set we must step back.
+
+        break; // Break out of this for loop; back to the first one.
+      }
+    }
+  }
+
+  // Now loop over the far set.  This loop is different because we only require
+  // a normal two-way swap instead of the three-way swap to preserve the near
+  // set / far set ordering.
+  for (size_t i = 0; i < farSetSize; ++i)
+  {
+    // Discover if this point was in the child's used set.
+    for (size_t j = startChildUsedSet; j < childUsedSetSize; ++j)
+    {
+      if (childIndices[childFarSetSize + j] == indices[i + nearSetSize])
+      {
+        // We have found a point to swap.
+
+        // Perform the swap.
+        size_t tempIndex = indices[nearSetSize + farSetSize - 1];
+        double tempDist = distances[nearSetSize + farSetSize - 1];
+
+        indices[nearSetSize + farSetSize - 1] = indices[nearSetSize + i];
+        distances[nearSetSize + farSetSize - 1] = distances[nearSetSize + i];
+
+        indices[nearSetSize + i] = tempIndex;
+        distances[nearSetSize + i] = tempDist;
+
+        if (j != startChildUsedSet)
+        {
+          childIndices[childFarSetSize + j] = childIndices[childFarSetSize +
+              startChildUsedSet];
+        }
+
+        // Update all counters from the swaps we have done.
+        ++startChildUsedSet;
+        --farSetSize;
+        --i;
+
+        break; // Break out of this for loop; back to the first one.
+      }
+    }
+  }
+
+  // Update used set size.
+  usedSetSize += childUsedSetSize;
+
+  Log::Assert(originalSum == (nearSetSize + farSetSize + usedSetSize));
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+size_t CoverTree<MetricType, RootPointPolicy, StatisticType>::PruneFarSet(
+    arma::Col<size_t>& indices,
+    arma::vec& distances,
+    const double bound,
+    const size_t nearSetSize,
+    const size_t pointSetSize)
+{
+  // What we are trying to do is remove any points greater than the bound from
+  // the far set.  We don't care what happens to those indices and distances...
+  // so, we don't need to properly swap points -- just drop new ones in place.
+  size_t left = nearSetSize;
+  size_t right = pointSetSize - 1;
+  while ((distances[left] <= bound) && (left != right))
+    ++left;
+  while ((distances[right] > bound) && (left != right))
+    --right;
+
+  while (left != right)
+  {
+    // We don't care what happens to the point which should be on the right.
+    indices[left] = indices[right];
+    distances[left] = distances[right];
+    --right; // Since we aren't changing the right.
+
+    // Advance to next location which needs to switch.
+    while ((distances[left] <= bound) && (left != right))
+      ++left;
+    while ((distances[right] > bound) && (left != right))
+      --right;
+  }
+
+  // The far set size is the left pointer, with the near set size subtracted
+  // from it.
+  return (left - nearSetSize);
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/dual_cover_tree_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,60 @@
+/**
+ * @file dual_tree_traverser.hpp
+ * @author Ryan Curtin
+ *
+ * A dual-tree traverser for the cover tree.
+ */
+#ifndef __MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_HPP
+#define __MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_HPP
+
+#include <mlpack/core.hpp>
+#include <queue>
+
+namespace mlpack {
+namespace tree {
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+class CoverTree<MetricType, RootPointPolicy, StatisticType>::DualTreeTraverser
+{
+ public:
+  /**
+   * Initialize the dual tree traverser with the given rule type.
+   */
+  DualTreeTraverser(RuleType& rule);
+
+  /**
+   * Traverse the two specified trees.
+   *
+   * @param queryNode Root of query tree.
+   * @param referenceNode Root of reference tree.
+   */
+  void Traverse(CoverTree& queryNode, CoverTree& referenceNode);
+
+  /**
+   * Helper function for traversal of the two trees.
+   */
+  void Traverse(CoverTree& queryNode,
+                CoverTree& referenceNode,
+                const size_t parent);
+
+  //! Get the number of pruned nodes.
+  size_t NumPrunes() const { return numPrunes; }
+  //! Modify the number of pruned nodes.
+  size_t& NumPrunes() { return numPrunes; }
+
+ private:
+  //! The instantiated rule set for pruning branches.
+  RuleType& rule;
+
+  //! The number of pruned nodes.
+  size_t numPrunes;
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "dual_tree_traverser_impl.hpp"
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/dual_cover_tree_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,99 @@
+/**
+ * @file dual_tree_traverser_impl.hpp
+ * @author Ryan Curtin
+ *
+ * A dual-tree traverser for the cover tree.
+ */
+#ifndef __MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
+#define __MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
+
+#include <mlpack/core.hpp>
+#include <queue>
+
+namespace mlpack {
+namespace tree {
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+CoverTree<MetricType, RootPointPolicy, StatisticType>::
+DualTreeTraverser<RuleType>::DualTreeTraverser(RuleType& rule) :
+    rule(rule),
+    numPrunes(0)
+{ /* Nothing to do. */ }
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+void CoverTree<MetricType, RootPointPolicy, StatisticType>::
+DualTreeTraverser<RuleType>::Traverse(
+    CoverTree<MetricType, RootPointPolicy, StatisticType>& queryNode,
+    CoverTree<MetricType, RootPointPolicy, StatisticType>& referenceNode)
+{
+  // Start traversal with an invalid parent index.
+  Traverse(queryNode, referenceNode, size_t() - 1);
+}
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+void CoverTree<MetricType, RootPointPolicy, StatisticType>::
+DualTreeTraverser<RuleType>::Traverse(
+  CoverTree<MetricType, RootPointPolicy, StatisticType>& queryNode,
+  CoverTree<MetricType, RootPointPolicy, StatisticType>& referenceNode,
+  const size_t parent)
+{
+  std::queue<CoverTree<MetricType, RootPointPolicy, StatisticType>*>
+      referenceQueue;
+  std::queue<size_t> referenceParents;
+
+  referenceQueue.push(&referenceNode);
+  referenceParents.push(parent);
+
+  while (!referenceQueue.empty())
+  {
+    CoverTree<MetricType, RootPointPolicy, StatisticType>& reference =
+        *referenceQueue.front();
+    referenceQueue.pop();
+
+    size_t refParent = referenceParents.front();
+    referenceParents.pop();
+
+    // Do the base case, if we need to.
+    if (refParent != reference.Point())
+      rule.BaseCase(queryNode.Point(), reference.Point());
+
+    if (((queryNode.Scale() < reference.Scale()) &&
+         (reference.NumChildren() != 0)) ||
+         (queryNode.NumChildren() == 0))
+    {
+      // We must descend the reference node.  Pruning happens here.
+      for (size_t i = 0; i < reference.NumChildren(); ++i)
+      {
+        // Can we prune?
+        if (!rule.CanPrune(queryNode, reference.Child(i)))
+        {
+          referenceQueue.push(&(reference.Child(i)));
+          referenceParents.push(reference.Point());
+        }
+        else
+        {
+          ++numPrunes;
+        }
+      }
+    }
+    else
+    {
+      // We must descend the query node.  No pruning happens here.  For the
+      // self-child, we trick the recursion into thinking that the base case
+      // has already been done (which it has).
+      if (queryNode.NumChildren() >= 1)
+        Traverse(queryNode.Child(0), reference, reference.Point());
+
+      for (size_t i = 1; i < queryNode.NumChildren(); ++i)
+        Traverse(queryNode.Child(i), reference, size_t() - 1);
+    }
+  }
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/first_point_is_root.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,37 @@
+/**
+ * @file first_point_is_root.hpp
+ * @author Ryan Curtin
+ *
+ * A very simple policy for the cover tree; the first point in the dataset is
+ * chosen as the root of the cover tree.
+ */
+#ifndef __MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP
+#define __MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace tree {
+
+/**
+ * This class is meant to be used as a choice for the policy class
+ * RootPointPolicy of the CoverTree class.  This policy determines which point
+ * is used for the root node of the cover tree.  This particular implementation
+ * simply chooses the first point in the dataset as the root.  A more complex
+ * implementation might choose, for instance, the point with least maximum
+ * distance to other points (the closest to the "middle").
+ */
+class FirstPointIsRoot
+{
+ public:
+  /**
+   * Return the point to be used as the root point of the cover tree.  This just
+   * returns 0.
+   */
+  static size_t ChooseRoot(const arma::mat& /* dataset */) { return 0; }
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif // __MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/single_tree_breadth_first_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,57 @@
+/**
+ * @file single_tree_traverser.hpp
+ * @author Ryan Curtin
+ *
+ * Defines the SingleTreeTraverser for the cover tree.  This implements a
+ * single-tree breadth-first recursion with a pruning rule and a base case (two
+ * point) rule.
+ */
+#ifndef __MLPACK_CORE_TREE_COVER_TREE_SINGLE_TREE_TRAVERSER_HPP
+#define __MLPACK_CORE_TREE_COVER_TREE_SINGLE_TREE_TRAVERSER_HPP
+
+#include <mlpack/core.hpp>
+
+#include "cover_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+class CoverTree<MetricType, RootPointPolicy, StatisticType>::SingleTreeTraverser
+{
+ public:
+  /**
+   * Initialize the single tree traverser with the given rule.
+   */
+  SingleTreeTraverser(RuleType& rule);
+
+  /**
+   * Traverse the tree with the given point.
+   *
+   * @param queryIndex The index of the point in the query set which is used as
+   *      the query point.
+   * @param referenceNode The tree node to be traversed.
+   */
+  void Traverse(const size_t queryIndex, CoverTree& referenceNode);
+
+  //! Get the number of prunes so far.
+  size_t NumPrunes() const { return numPrunes; }
+  //! Set the number of prunes (good for a reset to 0).
+  size_t& NumPrunes() { return numPrunes; }
+
+ private:
+  //! Reference to the rules with which the tree will be traversed.
+  RuleType& rule;
+
+  //! The number of nodes which have been pruned during traversal.
+  size_t numPrunes;
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "single_tree_traverser_impl.hpp"
+
+#endif

Copied: mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser_impl.hpp (from rev 13324, mlpack/trunk/src/mlpack/core/tree/traversers/single_tree_breadth_first_traverser.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree/single_tree_traverser_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -0,0 +1,83 @@
+/**
+ * @file single_tree_traverser_impl.hpp
+ * @author Ryan Curtin
+ *
+ * Implementation of the single tree traverser for cover trees, which implements
+ * a breadth-first traversal.
+ */
+#ifndef __MLPACK_CORE_TREE_COVER_TREE_SINGLE_TREE_TRAVERSER_IMPL_HPP
+#define __MLPACK_CORE_TREE_COVER_TREE_SINGLE_TREE_TRAVERSER_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "single_tree_traverser.hpp"
+
+#include <queue>
+
+namespace mlpack {
+namespace tree {
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+CoverTree<MetricType, RootPointPolicy, StatisticType>::
+SingleTreeTraverser<RuleType>::SingleTreeTraverser(RuleType& rule) :
+    rule(rule),
+    numPrunes(0)
+{ /* Nothing to do. */ }
+
+template<typename MetricType, typename RootPointPolicy, typename StatisticType>
+template<typename RuleType>
+void CoverTree<MetricType, RootPointPolicy, StatisticType>::
+SingleTreeTraverser<RuleType>::Traverse(
+    const size_t queryIndex,
+    CoverTree<MetricType, RootPointPolicy, StatisticType>& referenceNode)
+{
+  // This is a non-recursive implementation (which should be faster than a
+  // recursive implementation).
+  std::queue<CoverTree<MetricType, RootPointPolicy, StatisticType>*> pointQueue;
+  std::queue<size_t> parentPoints; // For if this tree has self-children.
+
+  pointQueue.push(&referenceNode);
+  parentPoints.push(size_t() - 1); // Invalid value.
+
+  while (!pointQueue.empty())
+  {
+    CoverTree<MetricType, RootPointPolicy, StatisticType>* node =
+        pointQueue.front();
+    pointQueue.pop();
+
+    // Check if we can prune this node.
+    if (rule.CanPrune(queryIndex, *node))
+    {
+      parentPoints.pop(); // Pop the parent point off.
+
+      ++numPrunes;
+      continue;
+    }
+
+    // If this tree type has self-children, we need to make sure we don't run
+    // the base case if the parent already had it run.
+    size_t baseCaseStart = 0;
+    if (parentPoints.front() == node->Point(0))
+      baseCaseStart = 1; // Skip base case we've already evaluated.
+
+    parentPoints.pop();
+
+    // First run the base case for any points this node might hold.
+    for (size_t i = baseCaseStart; i < node->NumPoints(); ++i)
+      rule.BaseCase(queryIndex, node->Point(i));
+
+    // Now push children (and their parent points) into the FIFO.  Maybe it
+    // would be better to push these back in a particular order.
+    const size_t parentPoint = node->Point(0);
+    for (size_t i = 0; i < node->NumChildren(); ++i)
+    {
+      pointQueue.push(&(node->Child(i)));
+      parentPoints.push(parentPoint);
+    }
+  }
+}
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp	2012-08-04 22:03:50 UTC (rev 13332)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -1,358 +0,0 @@
-/**
- * @file cover_tree.hpp
- * @author Ryan Curtin
- *
- * Definition of CoverTree, which can be used in place of the BinarySpaceTree.
- */
-#ifndef __MLPACK_CORE_TREE_COVER_TREE_HPP
-#define __MLPACK_CORE_TREE_COVER_TREE_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/core/metrics/lmetric.hpp>
-#include "first_point_is_root.hpp"
-#include "traversers/single_tree_breadth_first_traverser.hpp"
-#include "traversers/dual_cover_tree_traverser.hpp"
-#include "statistic.hpp"
-#include <mlpack/methods/neighbor_search/neighbor_search_cover_rules.hpp>
-
-namespace mlpack {
-namespace tree {
-
-/**
- * A cover tree is a tree specifically designed to speed up nearest-neighbor
- * computation in high-dimensional spaces.  Each non-leaf node references a
- * point and has a nonzero number of children, including a "self-child" which
- * references the same point.  A leaf node represents only one point.
- *
- * The tree can be thought of as a hierarchy with the root node at the top level
- * and the leaf nodes at the bottom level.  Each level in the tree has an
- * assigned 'scale' i.  The tree follows these three conditions:
- *
- * - nesting: the level C_i is a subset of the level C_{i - 1}.
- * - covering: all node in level C_{i - 1} have at least one node in the
- *     level C_i with distance less than or equal to EC^i (exactly one of these
- *     is a parent of the point in level C_{i - 1}.
- * - separation: all nodes in level C_i have distance greater than EC^i to all
- *     other nodes in level C_i.
- *
- * The value 'EC' refers to the expansion constant, which is a parameter of the
- * tree.  These three properties make the cover tree very good for fast,
- * high-dimensional nearest-neighbor search.
- *
- * The theoretical structure of the tree contains many 'implicit' nodes which
- * only have a "self-child" (a child referencing the same point, but at a lower
- * scale level).  This practical implementation only constructs explicit nodes
- * -- non-leaf nodes with more than one child.  A leaf node has no children, and
- * its scale level is INT_MIN.
- *
- * For more information on cover trees, see
- *
- * @code
- * @inproceedings{
- *   author = {Beygelzimer, Alina and Kakade, Sham and Langford, John},
- *   title = {Cover trees for nearest neighbor},
- *   booktitle = {Proceedings of the 23rd International Conference on Machine
- *     Learning},
- *   series = {ICML '06},
- *   year = {2006},
- *   pages = {97--104]
- * }
- * @endcode
- *
- * For information on runtime bounds of the nearest-neighbor computation using
- * cover trees, see the following paper, presented at NIPS 2009:
- *
- * @code
- * @inproceedings{
- *   author = {Ram, P., and Lee, D., and March, W.B., and Gray, A.G.},
- *   title = {Linear-time Algorithms for Pairwise Statistical Problems},
- *   booktitle = {Advances in Neural Information Processing Systems 22},
- *   editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C.K.I. Williams
- *     and A. Culotta},
- *   pages = {1527--1535},
- *   year = {2009}
- * }
- * @endcode
- *
- * The CoverTree class offers three template parameters; a custom metric type
- * can be used with MetricType (this class defaults to the L2-squared metric).
- * The root node's point can be chosen with the RootPointPolicy; by default, the
- * FirstPointIsRoot policy is used, meaning the first point in the dataset is
- * used.  The StatisticType policy allows you to define statistics which can be
- * gathered during the creation of the tree.
- *
- * @tparam MetricType Metric type to use during tree construction.
- * @tparam RootPointPolicy Determines which point to use as the root node.
- * @tparam StatisticType Statistic to be used during tree creation.
- */
-template<typename MetricType = metric::LMetric<2>,
-         typename RootPointPolicy = FirstPointIsRoot,
-         typename StatisticType = EmptyStatistic>
-class CoverTree
-{
- public:
-  typedef arma::mat Mat;
-
-  /**
-   * Create the cover tree with the given dataset and given expansion constant.
-   * The dataset will not be modified during the building procedure (unlike
-   * BinarySpaceTree).
-   *
-   * @param dataset Reference to the dataset to build a tree on.
-   * @param expansionConstant Expansion constant (EC) to use during tree
-   *      building (default 2.0).
-   */
-  CoverTree(const arma::mat& dataset,
-            const double expansionConstant = 2.0,
-            MetricType* metric = NULL);
-
-  /**
-   * Construct a child cover tree node.  This constructor is not meant to be
-   * used externally, but it could be used to insert another node into a tree.
-   * This procedure uses only one vector for the near set, the far set, and the
-   * used set (this is to prevent unnecessary memory allocation in recursive
-   * calls to this constructor).  Therefore, the size of the near set, far set,
-   * and used set must be passed in.  The near set will be entirely used up, and
-   * some of the far set may be used.  The value of usedSetSize will be set to
-   * the number of points used in the construction of this node, and the value
-   * of farSetSize will be modified to reflect the number of points in the far
-   * set _after_ the construction of this node.
-   *
-   * If you are calling this manually, be careful that the given scale is
-   * as small as possible, or you may be creating an implicit node in your tree.
-   *
-   * @param dataset Reference to the dataset to build a tree on.
-   * @param expansionConstant Expansion constant (EC) to use during tree
-   *     building.
-   * @param pointIndex Index of the point this node references.
-   * @param scale Scale of this level in the tree.
-   * @param indices Array of indices, ordered [ nearSet | farSet | usedSet ];
-   *     will be modified to [ farSet | usedSet ].
-   * @param distances Array of distances, ordered the same way as the indices.
-   *     These represent the distances between the point specified by pointIndex
-   *     and each point in the indices array.
-   * @param nearSetSize Size of the near set; if 0, this will be a leaf.
-   * @param farSetSize Size of the far set; may be modified (if this node uses
-   *     any points in the far set).
-   * @param usedSetSize The number of points used will be added to this number.
-   */
-  CoverTree(const arma::mat& dataset,
-            const double expansionConstant,
-            const size_t pointIndex,
-            const int scale,
-            const double parentDistance,
-            arma::Col<size_t>& indices,
-            arma::vec& distances,
-            size_t nearSetSize,
-            size_t& farSetSize,
-            size_t& usedSetSize,
-            MetricType& metric = NULL);
-
-  /**
-   * Delete this cover tree node and its children.
-   */
-  ~CoverTree();
-
-  //! Define this tree's preferred traverser.
-  template<typename RuleType>
-  struct PreferredTraverser
-  {
-    typedef SingleTreeBreadthFirstTraverser<
-        CoverTree<MetricType, RootPointPolicy, StatisticType>,
-        RuleType
-    > Type;
-  };
-
-  template<typename RuleType>
-  struct PreferredDualTraverser
-  {
-    typedef DualCoverTreeTraverser<
-        CoverTree<MetricType, RootPointPolicy, StatisticType>,
-        RuleType
-    > Type;
-  };
-
-  template<typename SortPolicy, typename Metric2Type, typename TreeType>
-  struct PreferredRules
-  {
-    typedef neighbor::NeighborSearchCoverRules<
-        SortPolicy,
-        Metric2Type,
-        TreeType
-    > Type;
-  };
-
-  //! Get a reference to the dataset.
-  const arma::mat& Dataset() const { return dataset; }
-
-  //! Get the index of the point which this node represents.
-  size_t Point() const { return point; }
-  //! For compatibility with other trees; the argument is ignored.
-  size_t Point(const size_t) const { return point; }
-
-  // Fake
-  CoverTree* Left() const { return NULL; }
-  CoverTree* Right() const { return NULL; }
-  size_t Begin() const { return 0; }
-  size_t Count() const { return 0; }
-  size_t End() const { return 0; }
-  bool IsLeaf() const { return (children.size() == 0); }
-  size_t NumPoints() const { return 1; }
-
-  //! Get a particular child node.
-  const CoverTree& Child(const size_t index) const { return *children[index]; }
-  //! Modify a particular child node.
-  CoverTree& Child(const size_t index) { return *children[index]; }
-
-  //! Get the number of children.
-  size_t NumChildren() const { return children.size(); }
-
-  //! Get the children.
-  const std::vector<CoverTree*>& Children() const { return children; }
-  //! Modify the children manually (maybe not a great idea).
-  std::vector<CoverTree*>& Children() { return children; }
-
-  //! Get the scale of this node.
-  int Scale() const { return scale; }
-  //! Modify the scale of this node.  Be careful...
-  int& Scale() { return scale; }
-
-  //! Get the expansion constant.
-  double ExpansionConstant() const { return expansionConstant; }
-  //! Modify the expansion constant; don't do this, you'll break everything.
-  double& ExpansionConstant() { return expansionConstant; }
-
-  //! Get the statistic for this node.
-  const StatisticType& Stat() const { return stat; }
-  //! Modify the statistic for this node.
-  StatisticType& Stat() { return stat; }
-
-  //! Return the minimum distance to another node.
-  double MinDistance(const CoverTree* other) const;
-
-  //! Return the minimum distance to another point.
-  double MinDistance(const arma::vec& other) const;
-
-  //! Return the maximum distance to another node.
-  double MaxDistance(const CoverTree* other) const;
-
-  //! Return the maximum distance to another point.
-  double MaxDistance(const arma::vec& other) const;
-
-  //! Returns true: this tree does have self-children.
-  static bool HasSelfChildren() { return true; }
-
-  //! Get the distance to the parent.
-  double ParentDistance() const { return parentDistance; }
-
-  //! Get the distance to teh furthest descendant.
-  double FurthestDescendantDistance() const
-  { return furthestDescendantDistance; }
-
- private:
-  //! Reference to the matrix which this tree is built on.
-  const arma::mat& dataset;
-
-  //! Index of the point in the matrix which this node represents.
-  size_t point;
-
-  //! The list of children; the first is the self-child.
-  std::vector<CoverTree*> children;
-
-  //! Scale level of the node.
-  int scale;
-
-  //! The expansion constant used to construct the tree.
-  double expansionConstant;
-
-  //! The instantiated statistic.
-  StatisticType stat;
-
-  //! Distance to the parent.
-  double parentDistance;
-
-  //! Distance to the furthest descendant.
-  double furthestDescendantDistance;
-
-  /**
-   * Fill the vector of distances with the distances between the point specified
-   * by pointIndex and each point in the indices array.  The distances of the
-   * first pointSetSize points in indices are calculated (so, this does not
-   * necessarily need to use all of the points in the arrays).
-   *
-   * @param pointIndex Point to build the distances for.
-   * @param indices List of indices to compute distances for.
-   * @param distances Vector to store calculated distances in.
-   * @param pointSetSize Number of points in arrays to calculate distances for.
-   */
-  void ComputeDistances(const size_t pointIndex,
-                        const arma::Col<size_t>& indices,
-                        arma::vec& distances,
-                        const size_t pointSetSize,
-                        MetricType& metric);
-  /**
-   * Split the given indices and distances into a near and a far set, returning
-   * the number of points in the near set.  The distances must already be
-   * initialized.  This will order the indices and distances such that the
-   * points in the near set make up the first part of the array and the far set
-   * makes up the rest:  [ nearSet | farSet ].
-   *
-   * @param indices List of indices; will be reordered.
-   * @param distances List of distances; will be reordered.
-   * @param bound If the distance is less than or equal to this bound, the point
-   *      is placed into the near set.
-   * @param pointSetSize Size of point set (because we may be sorting a smaller
-   *      list than the indices vector will hold).
-   */
-  size_t SplitNearFar(arma::Col<size_t>& indices,
-                      arma::vec& distances,
-                      const double bound,
-                      const size_t pointSetSize);
-
-  /**
-   * Assuming that the list of indices and distances is sorted as
-   * [ childFarSet | childUsedSet | farSet | usedSet ],
-   * resort the sets so the organization is
-   * [ childFarSet | farSet | childUsedSet | usedSet ].
-   *
-   * The size_t parameters specify the sizes of each set in the array.  Only the
-   * ordering of the indices and distances arrays will be modified (not their
-   * actual contents).
-   *
-   * The size of any of the four sets can be zero and this method will handle
-   * that case accordingly.
-   *
-   * @param indices List of indices to sort.
-   * @param distances List of distances to sort.
-   * @param childFarSetSize Number of points in child far set (childFarSet).
-   * @param childUsedSetSize Number of points in child used set (childUsedSet).
-   * @param farSetSize Number of points in far set (farSet).
-   */
-  size_t SortPointSet(arma::Col<size_t>& indices,
-                      arma::vec& distances,
-                      const size_t childFarSetSize,
-                      const size_t childUsedSetSize,
-                      const size_t farSetSize);
-
-  void MoveToUsedSet(arma::Col<size_t>& indices,
-                     arma::vec& distances,
-                     size_t& nearSetSize,
-                     size_t& farSetSize,
-                     size_t& usedSetSize,
-                     arma::Col<size_t>& childIndices,
-                     const size_t childFarSetSize,
-                     const size_t childUsedSetSize);
-  size_t PruneFarSet(arma::Col<size_t>& indices,
-                     arma::vec& distances,
-                     const double bound,
-                     const size_t nearSetSize,
-                     const size_t pointSetSize);
-};
-
-}; // namespace tree
-}; // namespace mlpack
-
-// Include implementation.
-#include "cover_tree_impl.hpp"
-
-#endif

Deleted: mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp	2012-08-04 22:03:50 UTC (rev 13332)
+++ mlpack/trunk/src/mlpack/core/tree/cover_tree_impl.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -1,770 +0,0 @@
-/**
- * @file cover_tree_impl.hpp
- * @author Ryan Curtin
- *
- * Implementation of CoverTree class.
- */
-#ifndef __MLPACK_CORE_TREE_COVER_TREE_IMPL_HPP
-#define __MLPACK_CORE_TREE_COVER_TREE_IMPL_HPP
-
-// In case it hasn't already been included.
-#include "cover_tree.hpp"
-
-namespace mlpack {
-namespace tree {
-
-// Create the cover tree.
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-CoverTree<MetricType, RootPointPolicy, StatisticType>::CoverTree(
-    const arma::mat& dataset,
-    const double expansionConstant,
-    MetricType* metric) :
-    dataset(dataset),
-    point(RootPointPolicy::ChooseRoot(dataset)),
-    expansionConstant(expansionConstant),
-    parentDistance(0),
-    furthestDescendantDistance(0)
-{
-  // If we need to create a metric, do that.  We'll just do it on the heap.
-  bool localMetric = false;
-  if (metric == NULL)
-  {
-    localMetric = true; // So we know we need to free it.
-    metric = new MetricType();
-  }
-
-  // Kick off the building.  Create the indices array and the distances array.
-  arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1,
-      dataset.n_cols - 1, dataset.n_cols - 1);
-  // This is now [1 2 3 4 ... n].  We must be sure that our point does not
-  // occur.
-  if (point != 0)
-    indices[point - 1] = 0; // Put 0 back into the set; remove what was there.
-
-  arma::vec distances(dataset.n_cols - 1);
-
-  // Build the initial distances.
-  ComputeDistances(point, indices, distances, dataset.n_cols - 1, *metric);
-
-  // Now determine the scale factor of the root node.
-  const double maxDistance = max(distances);
-  scale = (int) ceil(log(maxDistance) / log(expansionConstant));
-  const double bound = pow(expansionConstant, scale - 1);
-
-  // Unfortunately, we can't call out to other constructors, so we have to copy
-  // a little bit of code from the other constructor.  First we build the self
-  // child.
-  size_t childNearSetSize = SplitNearFar(indices, distances, bound,
-      dataset.n_cols - 1);
-  size_t childFarSetSize = (dataset.n_cols - 1) - childNearSetSize;
-  size_t usedSetSize = 0;
-  children.push_back(new CoverTree(dataset, expansionConstant, point, scale - 1,
-      0, indices, distances, childNearSetSize, childFarSetSize, usedSetSize,
-      *metric));
-
-  furthestDescendantDistance = children[0]->FurthestDescendantDistance();
-
-  // If we created an implicit node, take its self-child instead (this could
-  // happen multiple times).
-  while (children[children.size() - 1]->NumChildren() == 1)
-  {
-    CoverTree* old = children[children.size() - 1];
-    children.erase(children.begin() + children.size() - 1);
-
-    // Now take its child.
-    children.push_back(&(old->Child(0)));
-
-    // Remove its child (so it doesn't delete it).
-    old->Children().erase(old->Children().begin() + old->Children().size() - 1);
-
-    // Now delete it.
-    delete old;
-  }
-
-  size_t nearSetSize = (dataset.n_cols - 1) - usedSetSize;
-
-  // We have no far set, so the array is organized thusly:
-  // [ near | used ].  No resorting is necessary.
-  // Therefore, go ahead and build the children.
-  while (nearSetSize > 0)
-  {
-    // We want to select the furthest point in the near set as the next child.
-    size_t newPointIndex = nearSetSize - 1;
-
-    // Swap to front if necessary.
-    if (newPointIndex != 0)
-    {
-      const size_t tempIndex = indices[newPointIndex];
-      const double tempDist = distances[newPointIndex];
-
-      indices[newPointIndex] = indices[0];
-      distances[newPointIndex] = distances[0];
-
-      indices[0] = tempIndex;
-      distances[0] = tempDist;
-    }
-
-    if (distances[0] > furthestDescendantDistance)
-      furthestDescendantDistance = distances[0];
-
-    size_t childUsedSetSize = 0;
-
-    // If there's only one point left, we don't need this crap.
-    if (nearSetSize == 1)
-    {
-      size_t childNearSetSize = 0;
-      size_t childFarSetSize = 0;
-      children.push_back(new CoverTree(dataset, expansionConstant,
-          indices[0], scale - 1, distances[0], indices, distances,
-          childNearSetSize, childFarSetSize, usedSetSize, *metric));
-
-      // And we're done.
-      break;
-    }
-
-    // Create the near and far set indices and distance vectors.
-    arma::Col<size_t> childIndices(nearSetSize);
-    childIndices.rows(0, (nearSetSize - 2)) = indices.rows(1, nearSetSize - 1);
-    // Put the current point into the used set, so when we move our indices to
-    // the used set, this will be done for us.
-    childIndices(nearSetSize - 1) = indices[0];
-    arma::vec childDistances(nearSetSize);
-
-    // Build distances for the child.
-    ComputeDistances(indices[0], childIndices, childDistances,
-        nearSetSize - 1, *metric);
-    childDistances(nearSetSize - 1) = 0;
-
-    // Split into near and far sets for this point.
-    childNearSetSize = SplitNearFar(childIndices, childDistances, bound,
-        nearSetSize - 1);
-
-    // Build this child (recursively).
-    childUsedSetSize = 1; // Mark self point as used.
-    childFarSetSize = ((nearSetSize - 1) - childNearSetSize);
-    children.push_back(new CoverTree(dataset, expansionConstant, indices[0],
-        scale - 1, distances[0], childIndices, childDistances, childNearSetSize,
-        childFarSetSize, childUsedSetSize, *metric));
-
-    // If we created an implicit node, take its self-child instead (this could
-    // happen multiple times).
-    while (children[children.size() - 1]->NumChildren() == 1)
-    {
-      CoverTree* old = children[children.size() - 1];
-      children.erase(children.begin() + children.size() - 1);
-
-      // Now take its child.
-      children.push_back(&(old->Child(0)));
-
-      // Remove its child (so it doesn't delete it).
-      old->Children().erase(old->Children().begin() + old->Children().size()
-          - 1);
-
-      // Now delete it.
-      delete old;
-    }
-
-    // Now with the child created, it returns the childIndices and
-    // childDistances vectors in this form:
-    // [ childFar | childUsed ]
-    // For each point in the childUsed set, we must move that point to the used
-    // set in our own vector.
-    size_t farSetSize = 0;
-    MoveToUsedSet(indices, distances, nearSetSize, farSetSize, usedSetSize,
-        childIndices, childFarSetSize, childUsedSetSize);
-  }
-
-  // Calculate furthest descendant.
-  for (size_t i = 0; i < usedSetSize; ++i)
-    if (distances[i] > furthestDescendantDistance)
-      furthestDescendantDistance = distances[i];
-
-  Log::Assert(furthestDescendantDistance <= pow(expansionConstant, scale + 1));
-
-  if (localMetric)
-    delete metric;
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-CoverTree<MetricType, RootPointPolicy, StatisticType>::CoverTree(
-    const arma::mat& dataset,
-    const double expansionConstant,
-    const size_t pointIndex,
-    const int scale,
-    const double parentDistance,
-    arma::Col<size_t>& indices,
-    arma::vec& distances,
-    size_t nearSetSize,
-    size_t& farSetSize,
-    size_t& usedSetSize,
-    MetricType& metric) :
-    dataset(dataset),
-    point(pointIndex),
-    scale(scale),
-    expansionConstant(expansionConstant),
-    parentDistance(parentDistance),
-    furthestDescendantDistance(0)
-{
-  // If the size of the near set is 0, this is a leaf.
-  if (nearSetSize == 0)
-  {
-    this->scale = INT_MIN;
-    return;
-  }
-
-  // Determine the next scale level.  This should be the first level where there
-  // are any points in the far set.  So, if we know the maximum distance in the
-  // distances array, this will be the largest i such that
-  //   maxDistance > pow(ec, i)
-  // and using this for the scale factor should guarantee we are not creating an
-  // implicit node.  If the maximum distance is 0, every point in the near set
-  // will be created as a leaf, and a child to this node.  We also do not need
-  // to change the furthestChildDistance or furthestDescendantDistance.
-  const double maxDistance = max(distances.rows(0,
-      nearSetSize + farSetSize - 1));
-  if (maxDistance == 0)
-  {
-    // Make the self child at the lowest possible level.
-    // This should not modify farSetSize or usedSetSize.
-    size_t tempSize = 0;
-    children.push_back(new CoverTree(dataset, expansionConstant, pointIndex,
-        INT_MIN, 0, indices, distances, 0, tempSize, usedSetSize, metric));
-
-    // Every point in the near set should be a leaf.
-    for (size_t i = 0; i < nearSetSize; ++i)
-    {
-      // farSetSize and usedSetSize will not be modified.
-      children.push_back(new CoverTree(dataset, expansionConstant, indices[i],
-          INT_MIN, 0, indices, distances, 0, tempSize, usedSetSize, metric));
-      usedSetSize++;
-    }
-
-    // Re-sort the dataset.  We have
-    // [ used | far | other used ]
-    // and we want
-    // [ far | all used ].
-    SortPointSet(indices, distances, 0, usedSetSize, farSetSize);
-
-    return;
-  }
-
-  const int nextScale = std::min(scale,
-      (int) ceil(log(maxDistance) / log(expansionConstant))) - 1;
-  const double bound = pow(expansionConstant, nextScale);
-
-  // This needs to be taken out.  It's a sanity check for now.
-  Log::Assert(nextScale < scale);
-
-  // First, make the self child.  We must split the given near set into the near
-  // set and far set for the self child.
-  size_t childNearSetSize =
-      SplitNearFar(indices, distances, bound, nearSetSize);
-
-  // Build the self child (recursively).
-  size_t childFarSetSize = nearSetSize - childNearSetSize;
-  size_t childUsedSetSize = 0;
-  children.push_back(new CoverTree(dataset, expansionConstant, pointIndex,
-      nextScale, 0, indices, distances, childNearSetSize, childFarSetSize,
-      childUsedSetSize, metric));
-
-  // The self-child can't modify the furthestChildDistance away from 0, but it
-  // can modify the furthestDescendantDistance.
-  furthestDescendantDistance = children[0]->FurthestDescendantDistance();
-
-  // If we created an implicit node, take its self-child instead (this could
-  // happen multiple times).
-  while (children[children.size() - 1]->NumChildren() == 1)
-  {
-    CoverTree* old = children[children.size() - 1];
-    children.erase(children.begin() + children.size() - 1);
-
-    // Now take its child.
-    children.push_back(&(old->Child(0)));
-
-    // Remove its child (so it doesn't delete it).
-    old->Children().erase(old->Children().begin() + old->Children().size() - 1);
-
-    // Now delete it.
-    delete old;
-  }
-
-  // Now the arrays, in memory, look like this:
-  // [ childFar | childUsed | far | used ]
-  // but we need to move the used points past our far set:
-  // [ childFar | far | childUsed + used ]
-  // and keeping in mind that childFar = our near set,
-  // [ near | far | childUsed + used ]
-  // is what we are trying to make.
-  SortPointSet(indices, distances, childFarSetSize, childUsedSetSize,
-      farSetSize);
-
-  // Update size of near set and used set.
-  nearSetSize -= childUsedSetSize;
-  usedSetSize += childUsedSetSize;
-
-  // Now for each point in the near set, we need to make children.  To save
-  // computation later, we'll create an array holding the points in the near
-  // set, and then after each run we'll check which of those (if any) were used
-  // and we will remove them.  ...if that's faster.  I think it is.
-  while (nearSetSize > 0)
-  {
-    size_t newPointIndex = nearSetSize - 1;
-
-    // Swap to front if necessary.
-    if (newPointIndex != 0)
-    {
-      const size_t tempIndex = indices[newPointIndex];
-      const double tempDist = distances[newPointIndex];
-
-      indices[newPointIndex] = indices[0];
-      distances[newPointIndex] = distances[0];
-
-      indices[0] = tempIndex;
-      distances[0] = tempDist;
-    }
-
-    // Will this be a new furthest child?
-    if (distances[0] > furthestDescendantDistance)
-      furthestDescendantDistance = distances[0];
-
-    // If there's only one point left, we don't need this crap.
-    if ((nearSetSize == 1) && (farSetSize == 0))
-    {
-      size_t childNearSetSize = 0;
-      children.push_back(new CoverTree(dataset, expansionConstant,
-          indices[0], nextScale, distances[0], indices, distances,
-          childNearSetSize, farSetSize, usedSetSize, metric));
-
-      // Because the far set size is 0, we don't have to do any swapping to
-      // move the point into the used set.
-      ++usedSetSize;
-      --nearSetSize;
-
-      // And we're done.
-      break;
-    }
-
-    // Create the near and far set indices and distance vectors.  We don't fill
-    // in the self-point, yet.
-    arma::Col<size_t> childIndices(nearSetSize + farSetSize);
-    childIndices.rows(0, (nearSetSize + farSetSize - 2)) = indices.rows(1,
-        nearSetSize + farSetSize - 1);
-    arma::vec childDistances(nearSetSize + farSetSize);
-
-    // Build distances for the child.
-    ComputeDistances(indices[0], childIndices, childDistances, nearSetSize
-        + farSetSize - 1, metric);
-
-    // Split into near and far sets for this point.
-    childNearSetSize = SplitNearFar(childIndices, childDistances, bound,
-        nearSetSize + farSetSize - 1);
-    childFarSetSize = PruneFarSet(childIndices, childDistances,
-        expansionConstant * bound, childNearSetSize,
-        (nearSetSize + farSetSize - 1));
-
-    // Now that we know the near and far set sizes, we can put the used point
-    // (the self point) in the correct place; now, when we call
-    // MoveToUsedSet(), it will move the self-point correctly.  The distance
-    // does not matter.
-    childIndices(childNearSetSize + childFarSetSize) = indices[0];
-    childDistances(childNearSetSize + childFarSetSize) = 0;
-
-    // Build this child (recursively).
-    childUsedSetSize = 1; // Mark self point as used.
-    children.push_back(new CoverTree(dataset, expansionConstant, indices[0],
-        nextScale, distances[0], childIndices, childDistances, childNearSetSize,
-        childFarSetSize, childUsedSetSize, metric));
-
-    // If we created an implicit node, take its self-child instead (this could
-    // happen multiple times).
-    while (children[children.size() - 1]->NumChildren() == 1)
-    {
-      CoverTree* old = children[children.size() - 1];
-      children.erase(children.begin() + children.size() - 1);
-
-      // Now take its child.
-      children.push_back(&(old->Child(0)));
-
-      // Remove its child (so it doesn't delete it).
-      old->Children().erase(old->Children().begin() + old->Children().size()
-          - 1);
-
-      // Now delete it.
-      delete old;
-    }
-
-    // Now with the child created, it returns the childIndices and
-    // childDistances vectors in this form:
-    // [ childFar | childUsed ]
-    // For each point in the childUsed set, we must move that point to the used
-    // set in our own vector.
-    MoveToUsedSet(indices, distances, nearSetSize, farSetSize, usedSetSize,
-        childIndices, childFarSetSize, childUsedSetSize);
-  }
-
-  // Calculate furthest descendant.
-  for (size_t i = (nearSetSize + farSetSize); i < (nearSetSize + farSetSize +
-      usedSetSize); ++i)
-    if (distances[i] > furthestDescendantDistance)
-      furthestDescendantDistance = distances[i];
-
-  Log::Assert(furthestDescendantDistance <= pow(expansionConstant, scale + 1));
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-CoverTree<MetricType, RootPointPolicy, StatisticType>::~CoverTree()
-{
-  // Delete each child.
-  for (size_t i = 0; i < children.size(); ++i)
-    delete children[i];
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-double CoverTree<MetricType, RootPointPolicy, StatisticType>::MinDistance(
-    const CoverTree<MetricType, RootPointPolicy, StatisticType>* other) const
-{
-  // Every cover tree node will contain points up to EC^(scale + 1) away.
-  return MetricType::Evaluate(dataset.unsafe_col(point),
-      other->Dataset().unsafe_col(other->Point())) -
-      std::pow(expansionConstant, scale + 1) -
-      std::pow(other->ExpansionConstant(), other->Scale() + 1);
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-double CoverTree<MetricType, RootPointPolicy, StatisticType>::MinDistance(
-    const arma::vec& other) const
-{
-  return MetricType::Evaluate(dataset.unsafe_col(point), other) -
-      std::pow(expansionConstant, scale + 1);
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-double CoverTree<MetricType, RootPointPolicy, StatisticType>::MaxDistance(
-    const CoverTree<MetricType, RootPointPolicy, StatisticType>* other) const
-{
-  return MetricType::Evaluate(dataset.unsafe_col(point),
-      other->Dataset().unsafe_col(other->Point())) +
-      std::pow(expansionConstant, scale + 1) +
-      std::pow(other->ExpansionConstant(), other->Scale() + 1);
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-double CoverTree<MetricType, RootPointPolicy, StatisticType>::MaxDistance(
-    const arma::vec& other) const
-{
-  return MetricType::Evaluate(dataset.unsafe_col(point), other) +
-      std::pow(expansionConstant, scale + 1);
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-size_t CoverTree<MetricType, RootPointPolicy, StatisticType>::SplitNearFar(
-    arma::Col<size_t>& indices,
-    arma::vec& distances,
-    const double bound,
-    const size_t pointSetSize)
-{
-  // Sanity check; there is no guarantee that this condition will not be true.
-  // ...or is there?
-  if (pointSetSize <= 1)
-    return 0;
-
-  // We'll traverse from both left and right.
-  size_t left = 0;
-  size_t right = pointSetSize - 1;
-
-  // A modification of quicksort, with the pivot value set to the bound.
-  // Everything on the left of the pivot will be less than or equal to the
-  // bound; everything on the right will be greater than the bound.
-  while ((distances[left] <= bound) && (left != right))
-    ++left;
-  while ((distances[right] > bound) && (left != right))
-    --right;
-
-  while (left != right)
-  {
-    // Now swap the values and indices.
-    const size_t tempPoint = indices[left];
-    const double tempDist = distances[left];
-
-    indices[left] = indices[right];
-    distances[left] = distances[right];
-
-    indices[right] = tempPoint;
-    distances[right] = tempDist;
-
-    // Traverse the left, seeing how many points are correctly on that side.
-    // When we encounter an incorrect point, stop.  We will switch it later.
-    while ((distances[left] <= bound) && (left != right))
-      ++left;
-
-    // Traverse the right, seeing how many points are correctly on that side.
-    // When we encounter an incorrect point, stop.  We will switch it with the
-    // wrong point from the left side.
-    while ((distances[right] > bound) && (left != right))
-      --right;
-  }
-
-  // The final left value is the index of the first far value.
-  return left;
-}
-
-// Returns the maximum distance between points.
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-void CoverTree<MetricType, RootPointPolicy, StatisticType>::ComputeDistances(
-    const size_t pointIndex,
-    const arma::Col<size_t>& indices,
-    arma::vec& distances,
-    const size_t pointSetSize,
-    MetricType& metric)
-{
-  // For each point, rebuild the distances.  The indices do not need to be
-  // modified.
-  for (size_t i = 0; i < pointSetSize; ++i)
-  {
-    distances[i] = metric.Evaluate(dataset.unsafe_col(pointIndex),
-        dataset.unsafe_col(indices[i]));
-  }
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-size_t CoverTree<MetricType, RootPointPolicy, StatisticType>::SortPointSet(
-    arma::Col<size_t>& indices,
-    arma::vec& distances,
-    const size_t childFarSetSize,
-    const size_t childUsedSetSize,
-    const size_t farSetSize)
-{
-  // We'll use low-level memcpy calls ourselves, just to ensure it's done
-  // quickly and the way we want it to be.  Unfortunately this takes up more
-  // memory than one-element swaps, but there's not a great way around that.
-  const size_t bufferSize = std::min(farSetSize, childUsedSetSize);
-  const size_t bigCopySize = std::max(farSetSize, childUsedSetSize);
-
-  // Sanity check: there is no need to sort if the buffer size is going to be
-  // zero.
-  if (bufferSize == 0)
-    return (childFarSetSize + farSetSize);
-
-  size_t* indicesBuffer = new size_t[bufferSize];
-  double* distancesBuffer = new double[bufferSize];
-
-  // The start of the memory region to copy to the buffer.
-  const size_t bufferFromLocation = ((bufferSize == farSetSize) ?
-      (childFarSetSize + childUsedSetSize) : childFarSetSize);
-  // The start of the memory region to move directly to the new place.
-  const size_t directFromLocation = ((bufferSize == farSetSize) ?
-      childFarSetSize : (childFarSetSize + childUsedSetSize));
-  // The destination to copy the buffer back to.
-  const size_t bufferToLocation = ((bufferSize == farSetSize) ?
-      childFarSetSize : (childFarSetSize + farSetSize));
-  // The destination of the directly moved memory region.
-  const size_t directToLocation = ((bufferSize == farSetSize) ?
-      (childFarSetSize + farSetSize) : childFarSetSize);
-
-  // Copy the smaller piece to the buffer.
-  memcpy(indicesBuffer, indices.memptr() + bufferFromLocation,
-      sizeof(size_t) * bufferSize);
-  memcpy(distancesBuffer, distances.memptr() + bufferFromLocation,
-      sizeof(double) * bufferSize);
-
-  // Now move the other memory.
-  memmove(indices.memptr() + directToLocation,
-      indices.memptr() + directFromLocation, sizeof(size_t) * bigCopySize);
-  memmove(distances.memptr() + directToLocation,
-      distances.memptr() + directFromLocation, sizeof(double) * bigCopySize);
-
-  // Now copy the temporary memory to the right place.
-  memcpy(indices.memptr() + bufferToLocation, indicesBuffer,
-      sizeof(size_t) * bufferSize);
-  memcpy(distances.memptr() + bufferToLocation, distancesBuffer,
-      sizeof(double) * bufferSize);
-
-  delete[] indicesBuffer;
-  delete[] distancesBuffer;
-
-  // This returns the complete size of the far set.
-  return (childFarSetSize + farSetSize);
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-void CoverTree<MetricType, RootPointPolicy, StatisticType>::MoveToUsedSet(
-    arma::Col<size_t>& indices,
-    arma::vec& distances,
-    size_t& nearSetSize,
-    size_t& farSetSize,
-    size_t& usedSetSize,
-    arma::Col<size_t>& childIndices,
-    const size_t childFarSetSize, // childNearSetSize is 0 in this case.
-    const size_t childUsedSetSize)
-{
-  const size_t originalSum = nearSetSize + farSetSize + usedSetSize;
-
-  // Loop across the set.  We will swap points as we need.  It should be noted
-  // that farSetSize and nearSetSize may change with each iteration of this loop
-  // (depending on if we make a swap or not).
-  size_t startChildUsedSet = 0; // Where to start in the child set.
-  for (size_t i = 0; i < nearSetSize; ++i)
-  {
-    // Discover if this point was in the child's used set.
-    for (size_t j = startChildUsedSet; j < childUsedSetSize; ++j)
-    {
-      if (childIndices[childFarSetSize + j] == indices[i])
-      {
-        // We have found a point; a swap is necessary.
-
-        // Since this point is from the near set, to preserve the near set, we
-        // must do a swap.
-        if (farSetSize > 0)
-        {
-          if ((nearSetSize - 1) != i)
-          {
-            // In this case it must be a three-way swap.
-            size_t tempIndex = indices[nearSetSize + farSetSize - 1];
-            double tempDist = distances[nearSetSize + farSetSize - 1];
-
-            size_t tempNearIndex = indices[nearSetSize - 1];
-            double tempNearDist = distances[nearSetSize - 1];
-
-            indices[nearSetSize + farSetSize - 1] = indices[i];
-            distances[nearSetSize + farSetSize - 1] = distances[i];
-
-            indices[nearSetSize - 1] = tempIndex;
-            distances[nearSetSize - 1] = tempDist;
-
-            indices[i] = tempNearIndex;
-            distances[i] = tempNearDist;
-          }
-          else
-          {
-            // We can do a two-way swap.
-            size_t tempIndex = indices[nearSetSize + farSetSize - 1];
-            double tempDist = distances[nearSetSize + farSetSize - 1];
-
-            indices[nearSetSize + farSetSize - 1] = indices[i];
-            distances[nearSetSize + farSetSize - 1] = distances[i];
-
-            indices[i] = tempIndex;
-            distances[i] = tempDist;
-          }
-        }
-        else if ((nearSetSize - 1) != i)
-        {
-          // A two-way swap is possible.
-          size_t tempIndex = indices[nearSetSize + farSetSize - 1];
-          double tempDist = distances[nearSetSize + farSetSize - 1];
-
-          indices[nearSetSize + farSetSize - 1] = indices[i];
-          distances[nearSetSize + farSetSize - 1] = distances[i];
-
-          indices[i] = tempIndex;
-          distances[i] = tempDist;
-        }
-        else
-        {
-          // No swap is necessary.
-        }
-
-        // We don't need to do a complete preservation of the child index set,
-        // but we want to make sure we only loop over points we haven't seen.
-        // So increment the child counter by 1 and move a point if we need.
-        if (j != startChildUsedSet)
-        {
-          childIndices[childFarSetSize + j] = childIndices[childFarSetSize +
-              startChildUsedSet];
-        }
-
-        // Update all counters from the swaps we have done.
-        ++startChildUsedSet;
-        --nearSetSize;
-        --i; // Since we moved a point out of the near set we must step back.
-
-        break; // Break out of this for loop; back to the first one.
-      }
-    }
-  }
-
-  // Now loop over the far set.  This loop is different because we only require
-  // a normal two-way swap instead of the three-way swap to preserve the near
-  // set / far set ordering.
-  for (size_t i = 0; i < farSetSize; ++i)
-  {
-    // Discover if this point was in the child's used set.
-    for (size_t j = startChildUsedSet; j < childUsedSetSize; ++j)
-    {
-      if (childIndices[childFarSetSize + j] == indices[i + nearSetSize])
-      {
-        // We have found a point to swap.
-
-        // Perform the swap.
-        size_t tempIndex = indices[nearSetSize + farSetSize - 1];
-        double tempDist = distances[nearSetSize + farSetSize - 1];
-
-        indices[nearSetSize + farSetSize - 1] = indices[nearSetSize + i];
-        distances[nearSetSize + farSetSize - 1] = distances[nearSetSize + i];
-
-        indices[nearSetSize + i] = tempIndex;
-        distances[nearSetSize + i] = tempDist;
-
-        if (j != startChildUsedSet)
-        {
-          childIndices[childFarSetSize + j] = childIndices[childFarSetSize +
-              startChildUsedSet];
-        }
-
-        // Update all counters from the swaps we have done.
-        ++startChildUsedSet;
-        --farSetSize;
-        --i;
-
-        break; // Break out of this for loop; back to the first one.
-      }
-    }
-  }
-
-  // Update used set size.
-  usedSetSize += childUsedSetSize;
-
-  Log::Assert(originalSum == (nearSetSize + farSetSize + usedSetSize));
-}
-
-template<typename MetricType, typename RootPointPolicy, typename StatisticType>
-size_t CoverTree<MetricType, RootPointPolicy, StatisticType>::PruneFarSet(
-    arma::Col<size_t>& indices,
-    arma::vec& distances,
-    const double bound,
-    const size_t nearSetSize,
-    const size_t pointSetSize)
-{
-  // What we are trying to do is remove any points greater than the bound from
-  // the far set.  We don't care what happens to those indices and distances...
-  // so, we don't need to properly swap points -- just drop new ones in place.
-  size_t left = nearSetSize;
-  size_t right = pointSetSize - 1;
-  while ((distances[left] <= bound) && (left != right))
-    ++left;
-  while ((distances[right] > bound) && (left != right))
-    --right;
-
-  while (left != right)
-  {
-    // We don't care what happens to the point which should be on the right.
-    indices[left] = indices[right];
-    distances[left] = distances[right];
-    --right; // Since we aren't changing the right.
-
-    // Advance to next location which needs to switch.
-    while ((distances[left] <= bound) && (left != right))
-      ++left;
-    while ((distances[right] > bound) && (left != right))
-      --right;
-  }
-
-  // The far set size is the left pointer, with the near set size subtracted
-  // from it.
-  return (left - nearSetSize);
-}
-
-}; // namespace tree
-}; // namespace mlpack
-
-#endif

Deleted: mlpack/trunk/src/mlpack/core/tree/first_point_is_root.hpp
===================================================================
--- mlpack/trunk/src/mlpack/core/tree/first_point_is_root.hpp	2012-08-04 22:03:50 UTC (rev 13332)
+++ mlpack/trunk/src/mlpack/core/tree/first_point_is_root.hpp	2012-08-05 03:00:19 UTC (rev 13333)
@@ -1,37 +0,0 @@
-/**
- * @file first_point_is_root.hpp
- * @author Ryan Curtin
- *
- * A very simple policy for the cover tree; the first point in the dataset is
- * chosen as the root of the cover tree.
- */
-#ifndef __MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP
-#define __MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace tree {
-
-/**
- * This class is meant to be used as a choice for the policy class
- * RootPointPolicy of the CoverTree class.  This policy determines which point
- * is used for the root node of the cover tree.  This particular implementation
- * simply chooses the first point in the dataset as the root.  A more complex
- * implementation might choose, for instance, the point with least maximum
- * distance to other points (the closest to the "middle").
- */
-class FirstPointIsRoot
-{
- public:
-  /**
-   * Return the point to be used as the root point of the cover tree.  This just
-   * returns 0.
-   */
-  static size_t ChooseRoot(const arma::mat& /* dataset */) { return 0; }
-};
-
-}; // namespace tree
-}; // namespace mlpack
-
-#endif // __MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP




More information about the mlpack-svn mailing list