[mlpack-svn] r16768 - mlpack/trunk/src/mlpack/methods/rann

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jul 7 07:46:05 EDT 2014


Author: rcurtin
Date: Mon Jul  7 07:46:05 2014
New Revision: 16768

Log:
Split RAQueryStat into its own class.


Added:
   mlpack/trunk/src/mlpack/methods/rann/ra_query_stat.hpp
      - copied, changed from r16763, /mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp
Modified:
   mlpack/trunk/src/mlpack/methods/rann/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp

Modified: mlpack/trunk/src/mlpack/methods/rann/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/methods/rann/CMakeLists.txt	(original)
+++ mlpack/trunk/src/mlpack/methods/rann/CMakeLists.txt	Mon Jul  7 07:46:05 2014
@@ -2,16 +2,18 @@
 # Anything not in this list will not be compiled into the output library
 # Do not include test programs here
 set(SOURCES
-
   # rank-approximate search files
   ra_search.hpp
   ra_search_impl.hpp
 
-  # The rank-approximate search rules
+  # rank-approximate search rules
   ra_search_rules.hpp
   ra_search_rules_impl.hpp
 
-  # The typedefs
+  # query statistic
+  ra_query_stat.hpp
+
+  # typedefs
   ra_typedef.hpp
 )
 
@@ -33,4 +35,4 @@
   mlpack
 )
 
-install(TARGETS allkrann RUNTIME DESTINATION bin)
\ No newline at end of file
+install(TARGETS allkrann RUNTIME DESTINATION bin)

Copied: mlpack/trunk/src/mlpack/methods/rann/ra_query_stat.hpp (from r16763, /mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp)
==============================================================================
--- /mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_query_stat.hpp	Mon Jul  7 07:46:05 2014
@@ -1,22 +1,12 @@
 /**
- * @file ra_search.hpp
+ * @file ra_query_stat.hpp
  * @author Parikshit Ram
  *
- * Defines the RASearch class, which performs an abstract rank-approximate
- * nearest/farthest neighbor query on two datasets.
- *
- * The details of this method can be found in the following paper:
- *
- * @inproceedings{ram2009rank,
- *   title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and
- *       Speed in High Dimensions}},
- *   author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
- *   booktitle={{Advances of Neural Information Processing Systems}},
- *   year={2009}
- * }
+ * Defines the RAQueryStat class, which is the statistic used for
+ * rank-approximate nearest neighbor search (RASearch).
  */
-#ifndef __MLPACK_METHODS_RANN_RA_SEARCH_HPP
-#define __MLPACK_METHODS_RANN_RA_SEARCH_HPP
+#ifndef __MLPACK_METHODS_RANN_RA_QUERY_STAT_HPP
+#define __MLPACK_METHODS_RANN_RA_QUERY_STAT_HPP
 
 #include <mlpack/core.hpp>
 
@@ -26,9 +16,7 @@
 #include <mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp>
 
 namespace mlpack {
-namespace neighbor /** Neighbor-search routines.  These include
-                    * all-nearest-neighbors and all-furthest-neighbors
-                    * searches. */ {
+namespace neighbor {
 
 /**
  * Extra data for each node in the tree.  For neighbor searches, each node only
@@ -41,13 +29,6 @@
 template<typename SortPolicy>
 class RAQueryStat
 {
- private:
-  //! The bound on the node's neighbor distances.
-  double bound;
-
-  //! The minimum number of samples made by any query in this node.
-  size_t numSamplesMade;
-
  public:
   /**
    * Initialize the statistic with the worst possible distance according to our
@@ -74,274 +55,13 @@
   //! Modify the number of samples made.
   size_t& NumSamplesMade() { return numSamplesMade; }
 
-};
-
-/**
- * The RASearch class: This class provides a generic manner to perform
- * rank-approximate search via random-sampling. If the 'naive' option is chosen,
- * this rank-approximate search will be done by randomly sampled from the whole
- * set. If the 'naive' option is not chosen, the sampling is done in a
- * stratified manner in the tree as mentioned in the algorithms in Figure 2 of
- * the following paper:
- *
- * @inproceedings{ram2009rank,
- *   title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and
- *       Speed in High Dimensions}},
- *   author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
- *   booktitle={{Advances of Neural Information Processing Systems}},
- *   year={2009}
- * }
- *
- * @tparam SortPolicy The sort policy for distances; see NearestNeighborSort.
- * @tparam MetricType The metric to use for computation.
- * @tparam TreeType The tree type to use.
- */
-template<typename SortPolicy = NearestNeighborSort,
-         typename MetricType = mlpack::metric::SquaredEuclideanDistance,
-         typename TreeType = tree::BinarySpaceTree<bound::HRectBound<2, false>,
-                                                   RAQueryStat<SortPolicy> > >
-class RASearch
-{
- public:
-  /**
-   * Initialize the RASearch object, passing both a query and reference dataset.
-   * Optionally, perform the computation in naive mode or single-tree mode, and
-   * set the leaf size used for tree-building.  An initialized distance metric
-   * can be given, for cases where the metric has internal data (i.e. the
-   * distance::MahalanobisDistance class).
-   *
-   * This method will copy the matrices to internal copies, which are rearranged
-   * during tree-building.  You can avoid this extra copy by pre-constructing
-   * the trees and passing them using a diferent constructor.
-   *
-   * @param referenceSet Set of reference points.
-   * @param querySet Set of query points.
-   * @param naive If true, the rank-approximate search will be performed by
-   *      directly sampling the whole set instead of using the stratified
-   *      sampling on the tree.
-   * @param singleMode If true, single-tree search will be used (as opposed to
-   *      dual-tree search).
-   * @param leafSize Leaf size for tree construction (ignored if tree is given).
-   * @param metric An optional instance of the MetricType class.
-   */
-  RASearch(const typename TreeType::Mat& referenceSet,
-           const typename TreeType::Mat& querySet,
-           const bool naive = false,
-           const bool singleMode = false,
-           const size_t leafSize = 20,
-           const MetricType metric = MetricType());
-
-  /**
-   * Initialize the RASearch object, passing only one dataset, which is
-   * used as both the query and the reference dataset.  Optionally, perform the
-   * computation in naive mode or single-tree mode, and set the leaf size used
-   * for tree-building.  An initialized distance metric can be given, for cases
-   * where the metric has internal data (i.e. the distance::MahalanobisDistance
-   * class).
-   *
-   * If naive mode is being used and a pre-built tree is given, it may not work:
-   * naive mode operates by building a one-node tree (the root node holds all
-   * the points).  If that condition is not satisfied with the pre-built tree,
-   * then naive mode will not work.
-   *
-   * @param referenceSet Set of reference points.
-   * @param naive If true, the rank-approximate search will be performed
-   *      by directly sampling the whole set instead of using the stratified
-   *      sampling on the tree.
-   * @param singleMode If true, single-tree search will be used (as opposed to
-   *      dual-tree search).
-   * @param leafSize Leaf size for tree construction (ignored if tree is given).
-   * @param metric An optional instance of the MetricType class.
-   */
-  RASearch(const typename TreeType::Mat& referenceSet,
-           const bool naive = false,
-           const bool singleMode = false,
-           const size_t leafSize = 20,
-           const MetricType metric = MetricType());
-
-  /**
-   * Initialize the RASearch object with the given datasets and
-   * pre-constructed trees.  It is assumed that the points in referenceSet and
-   * querySet correspond to the points in referenceTree and queryTree,
-   * respectively.  Optionally, choose to use single-tree mode.  Naive mode is
-   * not available as an option for this constructor; instead, to run naive
-   * computation, construct a tree with all of the points in one leaf (i.e.
-   * leafSize = number of points).  Additionally, an instantiated distance
-   * metric can be given, for cases where the distance metric holds data.
-   *
-   * There is no copying of the data matrices in this constructor (because
-   * tree-building is not necessary), so this is the constructor to use when
-   * copies absolutely must be avoided.
-   *
-   * @note
-   * Because tree-building (at least with BinarySpaceTree) modifies the ordering
-   * of a matrix, be sure you pass the modified matrix to this object!  In
-   * addition, mapping the points of the matrix back to their original indices
-   * is not done when this constructor is used.
-   * @endnote
-   *
-   * @param referenceTree Pre-built tree for reference points.
-   * @param queryTree Pre-built tree for query points.
-   * @param referenceSet Set of reference points corresponding to referenceTree.
-   * @param querySet Set of query points corresponding to queryTree.
-   * @param singleMode Whether single-tree computation should be used (as
-   *      opposed to dual-tree computation).
-   * @param metric Instantiated distance metric.
-   */
-  RASearch(TreeType* referenceTree,
-           TreeType* queryTree,
-           const typename TreeType::Mat& referenceSet,
-           const typename TreeType::Mat& querySet,
-           const bool singleMode = false,
-           const MetricType metric = MetricType());
-
-  /**
-   * Initialize the RASearch object with the given reference dataset and
-   * pre-constructed tree.  It is assumed that the points in referenceSet
-   * correspond to the points in referenceTree.  Optionally, choose to use
-   * single-tree mode.  Naive mode is not available as an option for this
-   * constructor; instead, to run naive computation, construct a tree with all
-   * the points in one leaf (i.e. leafSize = number of points).  Additionally,
-   * an instantiated distance metric can be given, for the case where the
-   * distance metric holds data.
-   *
-   * There is no copying of the data matrices in this constructor (because
-   * tree-building is not necessary), so this is the constructor to use when
-   * copies absolutely must be avoided.
-   *
-   * @note
-   * Because tree-building (at least with BinarySpaceTree) modifies the ordering
-   * of a matrix, be sure you pass the modified matrix to this object!  In
-   * addition, mapping the points of the matrix back to their original indices
-   * is not done when this constructor is used.
-   * @endnote
-   *
-   * @param referenceTree Pre-built tree for reference points.
-   * @param referenceSet Set of reference points corresponding to referenceTree.
-   * @param singleMode Whether single-tree computation should be used (as
-   *      opposed to dual-tree computation).
-   * @param metric Instantiated distance metric.
-   */
-  RASearch(TreeType* referenceTree,
-           const typename TreeType::Mat& referenceSet,
-           const bool singleMode = false,
-           const MetricType metric = MetricType());
-
-  /**
-   * Delete the RASearch object. The tree is the only member we are
-   * responsible for deleting.  The others will take care of themselves.
-   */
-  ~RASearch();
-
-  /**
-   * Compute the rank approximate nearest neighbors and store the output in the
-   * given matrices. The matrices will be set to the size of n columns by k
-   * rows, where n is the number of points in the query dataset and k is the
-   * number of neighbors being searched for.
-   *
-   * Note that tau, the rank-approximation parameter, specifies that we are
-   * looking for k neighbors with probability alpha of being in the top tau
-   * percent of nearest neighbors.  So, as an example, if our dataset has 1000
-   * points, and we want 5 nearest neighbors with 95% probability of being in
-   * the top 5% of nearest neighbors (or, the top 50 nearest neighbors), we set
-   * k = 5, tau = 5, and alpha = 0.95.
-   *
-   * The method will fail (and issue a failure message) if the value of tau is
-   * too low: tau must be set such that the number of points in the
-   * corresponding percentile of the data is greater than k.  Thus, if we choose
-   * tau = 0.1 with a dataset of 1000 points and k = 5, then we are attempting
-   * to choose 5 nearest neighbors out of the closest 1 point -- this is
-   * invalid.
-   *
-   * @param k Number of neighbors to search for.
-   * @param resultingNeighbors Matrix storing lists of neighbors for each query
-   *     point.
-   * @param distances Matrix storing distances of neighbors for each query
-   *     point.
-   * @param tau The rank-approximation in percentile of the data. The default
-   *     value is 5%.
-   * @param alpha The desired success probability. The default value is 0.95.
-   * @param sampleAtLeaves Sample at leaves for faster but less accurate
-   *      computation. This defaults to 'false'.
-   * @param firstLeafExact Traverse to the first leaf without approximation.
-   *     This can ensure that the query definitely finds its (near) duplicate
-   *     if there exists one.  This defaults to 'false' for now.
-   * @param singleSampleLimit The limit on the largest node that can be
-   *     approximated by sampling. This defaults to 20.
-   */
-  void Search(const size_t k,
-              arma::Mat<size_t>& resultingNeighbors,
-              arma::mat& distances,
-              const double tau = 5,
-              const double alpha = 0.95,
-              const bool sampleAtLeaves = false,
-              const bool firstLeafExact = false,
-              const size_t singleSampleLimit = 20);
-
-  /**
-   * This function recursively resets the RAQueryStat of the queryTree to set
-   * 'bound' to WorstDistance and the 'numSamplesMade' to 0. This allows a user
-   * to perform multiple searches on the same pair of trees, possibly with
-   * different levels of approximation without requiring to build a new pair of
-   * trees for every new (approximate) search.
-   */
-  void ResetQueryTree();
-
-  // Returns a string representation of this object.
-  std::string ToString() const;
-
  private:
-  //! Copy of reference dataset (if we need it, because tree building modifies
-  //! it).
-  arma::mat referenceCopy;
-  //! Copy of query dataset (if we need it, because tree building modifies it).
-  arma::mat queryCopy;
-
-  //! Reference dataset.
-  const arma::mat& referenceSet;
-  //! Query dataset (may not be given).
-  const arma::mat& querySet;
-
-  //! Pointer to the root of the reference tree.
-  TreeType* referenceTree;
-  //! Pointer to the root of the query tree (might not exist).
-  TreeType* queryTree;
-
-  //! Indicates if we should free the reference tree at deletion time.
-  bool ownReferenceTree;
-  //! Indicates if we should free the query tree at deletion time.
-  bool ownQueryTree;
-
-  //! Indicates if naive random sampling on the set is being used.
-  bool naive;
-  //! Indicates if single-tree search is being used (opposed to dual-tree).
-  bool singleMode;
-
-  //! Instantiation of kernel.
-  MetricType metric;
-
-  //! Permutations of reference points during tree building.
-  std::vector<size_t> oldFromNewReferences;
-  //! Permutations of query points during tree building.
-  std::vector<size_t> oldFromNewQueries;
-
-  //! Total number of pruned nodes during the neighbor search.
-  size_t numberOfPrunes;
-
-  /**
-   * @param treeNode The node of the tree whose RAQueryStat is reset
-   *     and whose children are to be explored recursively.
-   */
-  void ResetRAQueryStat(TreeType* treeNode);
-}; // class RASearch
-
-}; // namespace neighbor
-}; // namespace mlpack
+  //! The bound on the node's neighbor distances.
+  double bound;
 
-// Include implementation.
-#include "ra_search_impl.hpp"
+  //! The minimum number of samples made by any query in this node.
+  size_t numSamplesMade;
 
-// Include convenient typedefs.
-#include "ra_typedef.hpp"
+};
 
 #endif

Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp	Mon Jul  7 07:46:05 2014
@@ -25,56 +25,7 @@
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp>
 
-namespace mlpack {
-namespace neighbor /** Neighbor-search routines.  These include
-                    * all-nearest-neighbors and all-furthest-neighbors
-                    * searches. */ {
-
-/**
- * Extra data for each node in the tree.  For neighbor searches, each node only
- * needs to store a bound on neighbor distances.
- *
- * Every query is required to make a minimum number of samples to guarantee the
- * desired approximation error. The 'numSamplesMade' keeps track of the minimum
- * number of samples made by all queries in the node in question.
- */
-template<typename SortPolicy>
-class RAQueryStat
-{
- private:
-  //! The bound on the node's neighbor distances.
-  double bound;
-
-  //! The minimum number of samples made by any query in this node.
-  size_t numSamplesMade;
-
- public:
-  /**
-   * Initialize the statistic with the worst possible distance according to our
-   * sorting policy.
-   */
-  RAQueryStat() : bound(SortPolicy::WorstDistance()), numSamplesMade(0) { }
-
-  /**
-   * Initialization for a node.
-   */
-  template<typename TreeType>
-  RAQueryStat(const TreeType& /* node */) :
-    bound(SortPolicy::WorstDistance()),
-    numSamplesMade(0)
-  { }
-
-  //! Get the bound.
-  double Bound() const { return bound; }
-  //! Modify the bound.
-  double& Bound() { return bound; }
-
-  //! Get the number of samples made.
-  size_t NumSamplesMade() const { return numSamplesMade; }
-  //! Modify the number of samples made.
-  size_t& NumSamplesMade() { return numSamplesMade; }
-
-};
+#include "ra_query_stat.hpp"
 
 /**
  * The RASearch class: This class provides a generic manner to perform



More information about the mlpack-svn mailing list