[mlpack-svn] r15964 - mlpack/trunk/src/mlpack/methods/emst
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Oct 8 15:56:26 EDT 2013
Author: rcurtin
Date: Tue Oct 8 15:56:26 2013
New Revision: 15964
Log:
Move DTBStat into a separate file.
Added:
mlpack/trunk/src/mlpack/methods/emst/dtb_stat.hpp
- copied, changed from r15963, /mlpack/trunk/src/mlpack/methods/emst/dtb.hpp
Modified:
mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt
mlpack/trunk/src/mlpack/methods/emst/dtb.hpp
Modified: mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt (original)
+++ mlpack/trunk/src/mlpack/methods/emst/CMakeLists.txt Tue Oct 8 15:56:26 2013
@@ -1,14 +1,15 @@
# Define the files we need to compile
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
- # union_find
- union_find.hpp
- # dtb
- dtb.hpp
- dtb_impl.hpp
- dtb_rules.hpp
- dtb_rules_impl.hpp
- edge_pair.hpp
+ # union_find
+ union_find.hpp
+ # dtb
+ dtb.hpp
+ dtb_impl.hpp
+ dtb_rules.hpp
+ dtb_rules_impl.hpp
+ dtb_stat.hpp
+ edge_pair.hpp
)
# Add directory name to sources.
Modified: mlpack/trunk/src/mlpack/methods/emst/dtb.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/emst/dtb.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/emst/dtb.hpp Tue Oct 8 15:56:26 2013
@@ -20,6 +20,7 @@
#ifndef __MLPACK_METHODS_EMST_DTB_HPP
#define __MLPACK_METHODS_EMST_DTB_HPP
+#include "dtb_stat.hpp"
#include "edge_pair.hpp"
#include <mlpack/core.hpp>
@@ -31,69 +32,6 @@
namespace emst /** Euclidean Minimum Spanning Trees. */ {
/**
- * A statistic for use with MLPACK trees, which stores the upper bound on
- * distance to nearest neighbors and the component which this node belongs to.
- */
-class DTBStat
-{
- private:
- //! Upper bound on the distance to the nearest neighbor of any point in this
- //! node.
- double maxNeighborDistance;
-
- //! Lower bound on the distance to the nearest neighbor of any point in this
- //! node.
- double minNeighborDistance;
-
- //! Total bound for pruning.
- double bound;
-
- //! The index of the component that all points in this node belong to. This
- //! is the same index returned by UnionFind for all points in this node. If
- //! points in this node are in different components, this value will be
- //! negative.
- int componentMembership;
-
- public:
- /**
- * A generic initializer. Sets the maximum neighbor distance to its default,
- * and the component membership to -1 (no component).
- */
- DTBStat();
-
- /**
- * This is called when a node is finished initializing. We set the maximum
- * neighbor distance to its default, and if possible, we set the component
- * membership of the node (if it has only one point and no children).
- *
- * @param node Node that has been finished.
- */
- template<typename TreeType>
- DTBStat(const TreeType& node);
-
- //! Get the maximum neighbor distance.
- double MaxNeighborDistance() const { return maxNeighborDistance; }
- //! Modify the maximum neighbor distance.
- double& MaxNeighborDistance() { return maxNeighborDistance; }
-
- //! Get the minimum neighbor distance.
- double MinNeighborDistance() const { return minNeighborDistance; }
- //! Modify the minimum neighbor distance.
- double& MinNeighborDistance() { return minNeighborDistance; }
-
- //! Get the total bound for pruning.
- double Bound() const { return bound; }
- //! Modify the total bound for pruning.
- double& Bound() { return bound; }
-
- //! Get the component membership of this node.
- int ComponentMembership() const { return componentMembership; }
- //! Modify the component membership of this node.
- int& ComponentMembership() { return componentMembership; }
-
-}; // class DTBStat
-
-/**
* Performs the MST calculation using the Dual-Tree Boruvka algorithm, using any
* type of tree.
*
Copied: mlpack/trunk/src/mlpack/methods/emst/dtb_stat.hpp (from r15963, /mlpack/trunk/src/mlpack/methods/emst/dtb.hpp)
==============================================================================
--- /mlpack/trunk/src/mlpack/methods/emst/dtb.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/emst/dtb_stat.hpp Tue Oct 8 15:56:26 2013
@@ -2,33 +2,15 @@
* @file dtb.hpp
* @author Bill March (march at gatech.edu)
*
- * Contains an implementation of the DualTreeBoruvka algorithm for finding a
- * Euclidean Minimum Spanning Tree using the kd-tree data structure.
- *
- * @code
- * @inproceedings{
- * author = {March, W.B., Ram, P., and Gray, A.G.},
- * title = {{Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis,
- * Applications.}},
- * booktitle = {Proceedings of the 16th ACM SIGKDD International Conference
- * on Knowledge Discovery and Data Mining}
- * series = {KDD 2010},
- * year = {2010}
- * }
- * @endcode
+ * DTBStat is the StatisticType used by trees when performing EMST.
*/
-#ifndef __MLPACK_METHODS_EMST_DTB_HPP
-#define __MLPACK_METHODS_EMST_DTB_HPP
-
-#include "edge_pair.hpp"
+#ifndef __MLPACK_METHODS_EMST_DTB_STAT_HPP
+#define __MLPACK_METHODS_EMST_DTB_STAT_HPP
#include <mlpack/core.hpp>
-#include <mlpack/core/metrics/lmetric.hpp>
-
-#include <mlpack/core/tree/binary_space_tree.hpp>
namespace mlpack {
-namespace emst /** Euclidean Minimum Spanning Trees. */ {
+namespace emst {
/**
* A statistic for use with MLPACK trees, which stores the upper bound on
@@ -93,176 +75,7 @@
}; // class DTBStat
-/**
- * Performs the MST calculation using the Dual-Tree Boruvka algorithm, using any
- * type of tree.
- *
- * For more information on the algorithm, see the following citation:
- *
- * @code
- * @inproceedings{
- * author = {March, W.B., Ram, P., and Gray, A.G.},
- * title = {{Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis,
- * Applications.}},
- * booktitle = {Proceedings of the 16th ACM SIGKDD International Conference
- * on Knowledge Discovery and Data Mining}
- * series = {KDD 2010},
- * year = {2010}
- * }
- * @endcode
- *
- * General usage of this class might be like this:
- *
- * @code
- * extern arma::mat data; // We want to find the MST of this dataset.
- * DualTreeBoruvka<> dtb(data); // Create the tree with default options.
- *
- * // Find the MST.
- * arma::mat mstResults;
- * dtb.ComputeMST(mstResults);
- * @endcode
- *
- * More advanced usage of the class can use different types of trees, pass in an
- * already-built tree, or compute the MST using the O(n^2) naive algorithm.
- *
- * @tparam MetricType The metric to use. IMPORTANT: this hasn't really been
- * tested with anything other than the L2 metric, so user beware. Note that the
- * tree type needs to compute bounds using the same metric as the type
- * specified here.
- * @tparam TreeType Type of tree to use. Should use DTBStat as a statistic.
- */
-template<
- typename MetricType = metric::EuclideanDistance,
- typename TreeType = tree::BinarySpaceTree<bound::HRectBound<2>, DTBStat>
->
-class DualTreeBoruvka
-{
- private:
- //! Copy of the data (if necessary).
- typename TreeType::Mat dataCopy;
- //! Reference to the data (this is what should be used for accessing data).
- typename TreeType::Mat& data;
-
- //! Pointer to the root of the tree.
- TreeType* tree;
- //! Indicates whether or not we "own" the tree.
- bool ownTree;
-
- //! Indicates whether or not O(n^2) naive mode will be used.
- bool naive;
-
- //! Edges.
- std::vector<EdgePair> edges; // We must use vector with non-numerical types.
-
- //! Connections.
- UnionFind connections;
-
- //! Permutations of points during tree building.
- std::vector<size_t> oldFromNew;
- //! List of edge nodes.
- arma::Col<size_t> neighborsInComponent;
- //! List of edge nodes.
- arma::Col<size_t> neighborsOutComponent;
- //! List of edge distances.
- arma::vec neighborsDistances;
-
- //! Total distance of the tree.
- double totalDist;
-
- //! The instantiated metric.
- MetricType metric;
-
- //! For sorting the edge list after the computation.
- struct SortEdgesHelper
- {
- bool operator()(const EdgePair& pairA, const EdgePair& pairB)
- {
- return (pairA.Distance() < pairB.Distance());
- }
- } SortFun;
-
- public:
- /**
- * Create the tree from the given dataset. This copies the dataset to an
- * internal copy, because tree-building modifies the dataset.
- *
- * @param data Dataset to build a tree for.
- * @param naive Whether the computation should be done in O(n^2) naive mode.
- * @param leafSize The leaf size to be used during tree construction.
- */
- DualTreeBoruvka(const typename TreeType::Mat& dataset,
- const bool naive = false,
- const size_t leafSize = 1,
- const MetricType metric = MetricType());
-
- /**
- * Create the DualTreeBoruvka object with an already initialized tree. This
- * will not copy the dataset, and can save a little processing power. Naive
- * mode is not available as an option for this constructor; instead, to run
- * naive computation, construct a tree with all the points in one leaf (i.e.
- * leafSize = number of points).
- *
- * @note
- * Because tree-building (at least with BinarySpaceTree) modifies the ordering
- * of a matrix, be sure you pass the modified matrix to this object! In
- * addition, mapping the points of the matrix back to their original indices
- * is not done when this constructor is used.
- * @endnote
- *
- * @param tree Pre-built tree.
- * @param dataset Dataset corresponding to the pre-built tree.
- */
- DualTreeBoruvka(TreeType* tree, const typename TreeType::Mat& dataset,
- const MetricType metric = MetricType());
-
- /**
- * Delete the tree, if it was created inside the object.
- */
- ~DualTreeBoruvka();
-
- /**
- * Iteratively find the nearest neighbor of each component until the MST is
- * complete. The results will be a 3xN matrix (with N equal to the number of
- * edges in the minimum spanning tree). The first row will contain the lesser
- * index of the edge; the second row will contain the greater index of the
- * edge; and the third row will contain the distance between the two edges.
- *
- * @param results Matrix which results will be stored in.
- */
- void ComputeMST(arma::mat& results);
-
- private:
- /**
- * Adds a single edge to the edge list
- */
- void AddEdge(const size_t e1, const size_t e2, const double distance);
-
- /**
- * Adds all the edges found in one iteration to the list of neighbors.
- */
- void AddAllEdges();
-
- /**
- * Unpermute the edge list and output it to results.
- */
- void EmitResults(arma::mat& results);
-
- /**
- * This function resets the values in the nodes of the tree nearest neighbor
- * distance, and checks for fully connected nodes.
- */
- void CleanupHelper(TreeType* tree);
-
- /**
- * The values stored in the tree must be reset on each iteration.
- */
- void Cleanup();
-
-}; // class DualTreeBoruvka
-
}; // namespace emst
}; // namespace mlpack
-#include "dtb_impl.hpp"
-
-#endif // __MLPACK_METHODS_EMST_DTB_HPP
+#endif // __MLPACK_METHODS_EMST_DTB_STAT_HPP
More information about the mlpack-svn
mailing list