[mlpack-svn] r15964 - mlpack/trunk/src/mlpack/methods/emst

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Oct 8 15:56:26 EDT 2013


Author: rcurtin
Date: Tue Oct  8 15:56:26 2013
New Revision: 15964

Log:
Move DTBStat into a separate file.


Added:
   mlpack/trunk/src/mlpack/methods/emst/dtb_stat.hpp
      - copied, changed from r15963, /mlpack/trunk/src/mlpack/methods/emst/dtb.hpp
Modified:
   mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/emst/dtb.hpp

Modified: mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt	(original)
+++ mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt	Tue Oct  8 15:56:26 2013
@@ -1,14 +1,15 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-   # union_find
-   union_find.hpp
-   # dtb
-   dtb.hpp
-   dtb_impl.hpp
-   dtb_rules.hpp
-   dtb_rules_impl.hpp
-   edge_pair.hpp
+  # union_find
+  union_find.hpp
+  # dtb
+  dtb.hpp
+  dtb_impl.hpp
+  dtb_rules.hpp
+  dtb_rules_impl.hpp
+  dtb_stat.hpp
+  edge_pair.hpp
 )
 
 # Add directory name to sources.

Modified: mlpack/trunk/src/mlpack/methods/emst/dtb.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/emst/dtb.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/emst/dtb.hpp	Tue Oct  8 15:56:26 2013
@@ -20,6 +20,7 @@
 #ifndef __MLPACK_METHODS_EMST_DTB_HPP
 #define __MLPACK_METHODS_EMST_DTB_HPP
 
+#include "dtb_stat.hpp"
 #include "edge_pair.hpp"
 
 #include <mlpack/core.hpp>
@@ -31,69 +32,6 @@
 namespace emst /** Euclidean Minimum Spanning Trees. */ {
 
 /**
- * A statistic for use with MLPACK trees, which stores the upper bound on
- * distance to nearest neighbors and the component which this node belongs to.
- */
-class DTBStat
-{
- private:
-  //! Upper bound on the distance to the nearest neighbor of any point in this
-  //! node.
-  double maxNeighborDistance;
-
-  //! Lower bound on the distance to the nearest neighbor of any point in this
-  //! node.
-  double minNeighborDistance;
-
-  //! Total bound for pruning.
-  double bound;
-
-  //! The index of the component that all points in this node belong to.  This
-  //! is the same index returned by UnionFind for all points in this node.  If
-  //! points in this node are in different components, this value will be
-  //! negative.
-  int componentMembership;
-
- public:
-  /**
-   * A generic initializer.  Sets the maximum neighbor distance to its default,
-   * and the component membership to -1 (no component).
-   */
-  DTBStat();
-
-  /**
-   * This is called when a node is finished initializing.  We set the maximum
-   * neighbor distance to its default, and if possible, we set the component
-   * membership of the node (if it has only one point and no children).
-   *
-   * @param node Node that has been finished.
-   */
-  template<typename TreeType>
-  DTBStat(const TreeType& node);
-
-  //! Get the maximum neighbor distance.
-  double MaxNeighborDistance() const { return maxNeighborDistance; }
-  //! Modify the maximum neighbor distance.
-  double& MaxNeighborDistance() { return maxNeighborDistance; }
-
-  //! Get the minimum neighbor distance.
-  double MinNeighborDistance() const { return minNeighborDistance; }
-  //! Modify the minimum neighbor distance.
-  double& MinNeighborDistance() { return minNeighborDistance; }
-
-  //! Get the total bound for pruning.
-  double Bound() const { return bound; }
-  //! Modify the total bound for pruning.
-  double& Bound() { return bound; }
-
-  //! Get the component membership of this node.
-  int ComponentMembership() const { return componentMembership; }
-  //! Modify the component membership of this node.
-  int& ComponentMembership() { return componentMembership; }
-
-}; // class DTBStat
-
-/**
  * Performs the MST calculation using the Dual-Tree Boruvka algorithm, using any
  * type of tree.
  *

Copied: mlpack/trunk/src/mlpack/methods/emst/dtb_stat.hpp (from r15963, /mlpack/trunk/src/mlpack/methods/emst/dtb.hpp)
==============================================================================
--- /mlpack/trunk/src/mlpack/methods/emst/dtb.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/emst/dtb_stat.hpp	Tue Oct  8 15:56:26 2013
@@ -2,33 +2,15 @@
  * @file dtb.hpp
  * @author Bill March (march at gatech.edu)
  *
- * Contains an implementation of the DualTreeBoruvka algorithm for finding a
- * Euclidean Minimum Spanning Tree using the kd-tree data structure.
- *
- * @code
- * @inproceedings{
- *   author = {March, W.B., Ram, P., and Gray, A.G.},
- *   title = {{Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis,
- *      Applications.}},
- *   booktitle = {Proceedings of the 16th ACM SIGKDD International Conference
- *      on Knowledge Discovery and Data Mining}
- *   series = {KDD 2010},
- *   year = {2010}
- * }
- * @endcode
+ * DTBStat is the StatisticType used by trees when performing EMST.
  */
-#ifndef __MLPACK_METHODS_EMST_DTB_HPP
-#define __MLPACK_METHODS_EMST_DTB_HPP
-
-#include "edge_pair.hpp"
+#ifndef __MLPACK_METHODS_EMST_DTB_STAT_HPP
+#define __MLPACK_METHODS_EMST_DTB_STAT_HPP
 
 #include <mlpack/core.hpp>
-#include <mlpack/core/metrics/lmetric.hpp>
-
-#include <mlpack/core/tree/binary_space_tree.hpp>
 
 namespace mlpack {
-namespace emst /** Euclidean Minimum Spanning Trees. */ {
+namespace emst {
 
 /**
  * A statistic for use with MLPACK trees, which stores the upper bound on
@@ -93,176 +75,7 @@
 
 }; // class DTBStat
 
-/**
- * Performs the MST calculation using the Dual-Tree Boruvka algorithm, using any
- * type of tree.
- *
- * For more information on the algorithm, see the following citation:
- *
- * @code
- * @inproceedings{
- *   author = {March, W.B., Ram, P., and Gray, A.G.},
- *   title = {{Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis,
- *      Applications.}},
- *   booktitle = {Proceedings of the 16th ACM SIGKDD International Conference
- *      on Knowledge Discovery and Data Mining}
- *   series = {KDD 2010},
- *   year = {2010}
- * }
- * @endcode
- *
- * General usage of this class might be like this:
- *
- * @code
- * extern arma::mat data; // We want to find the MST of this dataset.
- * DualTreeBoruvka<> dtb(data); // Create the tree with default options.
- *
- * // Find the MST.
- * arma::mat mstResults;
- * dtb.ComputeMST(mstResults);
- * @endcode
- *
- * More advanced usage of the class can use different types of trees, pass in an
- * already-built tree, or compute the MST using the O(n^2) naive algorithm.
- *
- * @tparam MetricType The metric to use.  IMPORTANT: this hasn't really been
- * tested with anything other than the L2 metric, so user beware. Note that the
- * tree type needs to compute bounds using the same metric as the type
- * specified here.
- * @tparam TreeType Type of tree to use.  Should use DTBStat as a statistic.
- */
-template<
-  typename MetricType = metric::EuclideanDistance,
-  typename TreeType = tree::BinarySpaceTree<bound::HRectBound<2>, DTBStat>
->
-class DualTreeBoruvka
-{
- private:
-  //! Copy of the data (if necessary).
-  typename TreeType::Mat dataCopy;
-  //! Reference to the data (this is what should be used for accessing data).
-  typename TreeType::Mat& data;
-
-  //! Pointer to the root of the tree.
-  TreeType* tree;
-  //! Indicates whether or not we "own" the tree.
-  bool ownTree;
-
-  //! Indicates whether or not O(n^2) naive mode will be used.
-  bool naive;
-
-  //! Edges.
-  std::vector<EdgePair> edges; // We must use vector with non-numerical types.
-
-  //! Connections.
-  UnionFind connections;
-
-  //! Permutations of points during tree building.
-  std::vector<size_t> oldFromNew;
-  //! List of edge nodes.
-  arma::Col<size_t> neighborsInComponent;
-  //! List of edge nodes.
-  arma::Col<size_t> neighborsOutComponent;
-  //! List of edge distances.
-  arma::vec neighborsDistances;
-
-  //! Total distance of the tree.
-  double totalDist;
-
-  //! The instantiated metric.
-  MetricType metric;
-
-  //! For sorting the edge list after the computation.
-  struct SortEdgesHelper
-  {
-    bool operator()(const EdgePair& pairA, const EdgePair& pairB)
-    {
-      return (pairA.Distance() < pairB.Distance());
-    }
-  } SortFun;
-
- public:
-  /**
-   * Create the tree from the given dataset.  This copies the dataset to an
-   * internal copy, because tree-building modifies the dataset.
-   *
-   * @param data Dataset to build a tree for.
-   * @param naive Whether the computation should be done in O(n^2) naive mode.
-   * @param leafSize The leaf size to be used during tree construction.
-   */
-  DualTreeBoruvka(const typename TreeType::Mat& dataset,
-                  const bool naive = false,
-                  const size_t leafSize = 1,
-                  const MetricType metric = MetricType());
-
-  /**
-   * Create the DualTreeBoruvka object with an already initialized tree.  This
-   * will not copy the dataset, and can save a little processing power.  Naive
-   * mode is not available as an option for this constructor; instead, to run
-   * naive computation, construct a tree with all the points in one leaf (i.e.
-   * leafSize = number of points).
-   *
-   * @note
-   * Because tree-building (at least with BinarySpaceTree) modifies the ordering
-   * of a matrix, be sure you pass the modified matrix to this object!  In
-   * addition, mapping the points of the matrix back to their original indices
-   * is not done when this constructor is used.
-   * @endnote
-   *
-   * @param tree Pre-built tree.
-   * @param dataset Dataset corresponding to the pre-built tree.
-   */
-  DualTreeBoruvka(TreeType* tree, const typename TreeType::Mat& dataset,
-                  const MetricType metric = MetricType());
-
-  /**
-   * Delete the tree, if it was created inside the object.
-   */
-  ~DualTreeBoruvka();
-
-  /**
-   * Iteratively find the nearest neighbor of each component until the MST is
-   * complete.  The results will be a 3xN matrix (with N equal to the number of
-   * edges in the minimum spanning tree).  The first row will contain the lesser
-   * index of the edge; the second row will contain the greater index of the
-   * edge; and the third row will contain the distance between the two edges.
-   *
-   * @param results Matrix which results will be stored in.
-   */
-  void ComputeMST(arma::mat& results);
-
- private:
-  /**
-   * Adds a single edge to the edge list
-   */
-  void AddEdge(const size_t e1, const size_t e2, const double distance);
-
-  /**
-   * Adds all the edges found in one iteration to the list of neighbors.
-   */
-  void AddAllEdges();
-
-  /**
-   * Unpermute the edge list and output it to results.
-   */
-  void EmitResults(arma::mat& results);
-
-  /**
-   * This function resets the values in the nodes of the tree nearest neighbor
-   * distance, and checks for fully connected nodes.
-   */
-  void CleanupHelper(TreeType* tree);
-
-  /**
-   * The values stored in the tree must be reset on each iteration.
-   */
-  void Cleanup();
-
-}; // class DualTreeBoruvka
-
 }; // namespace emst
 }; // namespace mlpack
 
-#include "dtb_impl.hpp"
-
-#endif // __MLPACK_METHODS_EMST_DTB_HPP
+#endif // __MLPACK_METHODS_EMST_DTB_STAT_HPP



More information about the mlpack-svn mailing list