[mlpack-svn] r14140 - mlpack/trunk/src/mlpack/methods/rann

Fri Jan 18 17:32:44 EST 2013

Author: rcurtin
Date: 2013-01-18 17:32:44 -0500 (Fri, 18 Jan 2013)
New Revision: 14140

Modified:
   mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp
   mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp
Log:
Simple cleanups of RANN code.


Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp	2013-01-18 22:26:33 UTC (rev 14139)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp	2013-01-18 22:32:44 UTC (rev 14140)
@@ -4,23 +4,21 @@
  *
  * Defines the RASearch class, which performs an abstract
  * rank-approximate nearest/farthest neighbor query on two datasets.
- * 
+ *
  * The details of this method can be found in the following paper:
  *
  * @inproceedings{ram2009rank,
- *    title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and Speed in High Dimensions}},
- *    author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
- *    booktitle={{Advances of Neural Information Processing Systems}},
- *    year={2009}
+ *   title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and
+ *       Speed in High Dimensions}},
+ *   author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
+ *   booktitle={{Advances of Neural Information Processing Systems}},
+ *   year={2009}
  * }
- * 
  */
 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_HPP
 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_HPP
 
 #include <mlpack/core.hpp>
-#include <vector>
-#include <string>
 
 #include <mlpack/core/tree/binary_space_tree.hpp>
 
@@ -35,10 +33,10 @@
 /**
  * Extra data for each node in the tree.  For neighbor searches, each node only
  * needs to store a bound on neighbor distances.
- * 
- * Every query is required to make a minimum number of samples to guarantee
- * the desired approximation error. The 'numSamplesMade' keeps track of 
- * the minimum number of samples made by all queries in the node in question. 
+ *
+ * Every query is required to make a minimum number of samples to guarantee the
+ * desired approximation error. The 'numSamplesMade' keeps track of the minimum
+ * number of samples made by all queries in the node in question.
  */
 template<typename SortPolicy>
 class RAQueryStat
@@ -47,13 +45,13 @@
   //! The bound on the node's neighbor distances.
   double bound;
 
-  //! The minimum number of samples made by any query in this node
+  //! The minimum number of samples made by any query in this node.
   size_t numSamplesMade;
 
  public:
   /**
-   * Initialize the statistic with the worst possible distance according to
-   * our sorting policy.
+   * Initialize the statistic with the worst possible distance according to our
+   * sorting policy.
    */
   RAQueryStat() : bound(SortPolicy::WorstDistance()), numSamplesMade(0) { }
 
@@ -61,11 +59,11 @@
    * Initialization for a leaf, required by the StatisticType policy.
    */
   template<typename MatType>
-  RAQueryStat(const MatType& /* dataset */, 
-              const size_t /* begin */, 
-              const size_t /* count */) : 
-    bound(SortPolicy::WorstDistance()), 
-    numSamplesMade(0) 
+  RAQueryStat(const MatType& /* dataset */,
+              const size_t /* begin */,
+              const size_t /* count */) :
+    bound(SortPolicy::WorstDistance()),
+    numSamplesMade(0)
   { }
 
   /**
@@ -77,29 +75,37 @@
               const size_t /* count */,
               const RAQueryStat& /* leftStat */,
               const RAQueryStat& /* rightStat */) :
-    bound(SortPolicy::WorstDistance()), 
-    numSamplesMade(0) 
+    bound(SortPolicy::WorstDistance()),
+    numSamplesMade(0)
   { }
-  
+
   //! Get the bound.
   double Bound() const { return bound; }
   //! Modify the bound.
   double& Bound() { return bound; }
 
-  //! Get the number of samples made
+  //! Get the number of samples made.
   size_t NumSamplesMade() const { return numSamplesMade; }
-  //! Modify the number of samples made
+  //! Modify the number of samples made.
   size_t& NumSamplesMade() { return numSamplesMade; }
 };
 
 /**
- * The RASearch class: This class provides a generic manner to perform 
- *   rank-approximate search via random-sampling. If the 'naive' option 
- *   is chosen, this rank-approximate search will be done by randomly 
- *   sampled from the whole set. If the 'naive' option is not chosen, 
- *   the sampling is done in a stratified manner in the tree as 
- *   mentioned in the algorithms in Figure 2 of the aforementioned paper. 
+ * The RASearch class: This class provides a generic manner to perform
+ * rank-approximate search via random-sampling. If the 'naive' option is chosen,
+ * this rank-approximate search will be done by randomly sampled from the whole
+ * set. If the 'naive' option is not chosen, the sampling is done in a
+ * stratified manner in the tree as mentioned in the algorithms in Figure 2 of
+ * the following paper:
  *
+ * @inproceedings{ram2009rank,
+ *   title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and
+ *       Speed in High Dimensions}},
+ *   author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
+ *   booktitle={{Advances of Neural Information Processing Systems}},
+ *   year={2009}
+ * }
+ *
  * @tparam SortPolicy The sort policy for distances; see NearestNeighborSort.
  * @tparam MetricType The metric to use for computation.
  * @tparam TreeType The tree type to use.
@@ -112,22 +118,21 @@
 {
  public:
   /**
-   * Initialize the RASearch object, passing both a query and reference
-   * dataset.  Optionally, perform the computation in naive mode or single-tree
-   * mode, and set the leaf size used for tree-building.  An initialized
-   * distance metric can be given, for cases where the metric has internal data
-   * (i.e. the distance::MahalanobisDistance class).
+   * Initialize the RASearch object, passing both a query and reference dataset.
+   * Optionally, perform the computation in naive mode or single-tree mode, and
+   * set the leaf size used for tree-building.  An initialized distance metric
+   * can be given, for cases where the metric has internal data (i.e. the
+   * distance::MahalanobisDistance class).
    *
-   * This method will copy the matrices to internal copies, 
-   * which are rearranged during tree-building.  You can avoid this 
-   * extra copy by pre-constructing the trees and passing them 
-   * using a diferent constructor.
+   * This method will copy the matrices to internal copies, which are rearranged
+   * during tree-building.  You can avoid this extra copy by pre-constructing
+   * the trees and passing them using a diferent constructor.
    *
    * @param referenceSet Set of reference points.
    * @param querySet Set of query points.
-   * @param naive If true, the rank-approximate search will be performed 
-   *      by directly sampling the whole set instead of using the stratified 
-   *      sampling on the tree. 
+   * @param naive If true, the rank-approximate search will be performed by
+   *      directly sampling the whole set instead of using the stratified
+   *      sampling on the tree.
    * @param singleMode If true, single-tree search will be used (as opposed to
    *      dual-tree search).
    * @param leafSize Leaf size for tree construction (ignored if tree is given).
@@ -154,9 +159,9 @@
    * then naive mode will not work.
    *
    * @param referenceSet Set of reference points.
-   * @param naive If true, the rank-approximate search will be performed 
-   *      by directly sampling the whole set instead of using the stratified 
-   *      sampling on the tree. 
+   * @param naive If true, the rank-approximate search will be performed
+   *      by directly sampling the whole set instead of using the stratified
+   *      sampling on the tree.
    * @param singleMode If true, single-tree search will be used (as opposed to
    *      dual-tree search).
    * @param leafSize Leaf size for tree construction (ignored if tree is given).
@@ -236,7 +241,6 @@
            const bool singleMode = false,
            const MetricType metric = MetricType());
 
-
   /**
    * Delete the RASearch object. The tree is the only member we are
    * responsible for deleting.  The others will take care of themselves.
@@ -244,26 +248,25 @@
   ~RASearch();
 
   /**
-   * Compute the rank approximate nearest neighbors and store the 
-   * output in the given matrices. The matrices will be set to the size 
-   * of n columns by k rows, where n is the number of points in the 
-   * query dataset and k is the number of neighbors being searched for.
+   * Compute the rank approximate nearest neighbors and store the output in the
+   * given matrices. The matrices will be set to the size of n columns by k
+   * rows, where n is the number of points in the query dataset and k is the
+   * number of neighbors being searched for.
    *
    * @param k Number of neighbors to search for.
    * @param resultingNeighbors Matrix storing lists of neighbors for each query
    *     point.
    * @param distances Matrix storing distances of neighbors for each query
    *     point.
-   * @param tau The rank-approximation in percentile of the data. The 
-   *     default value is 0.1%. 
+   * @param tau The rank-approximation in percentile of the data. The default
+   *     value is 0.1%.
    * @param alpha The desired success probability. The default value is 0.95.
-   * @param sampleAtLeaves The flag to trigger sampling at leaves for 
-   *     faster yet less accurate computation. This defaults to 'false'.
-   * @param firstLeafExact The flag to trigger the traversal of the algorithm 
-   *     to the first leaf without approximation. This can ensure that the 
-   *     query definitely finds its (near) duplicate if there exists one.
-   *     This defaults to 'false' for now.
-   * @param singleSampleLimit The limit on the largest node that can be 
+   * @param sampleAtLeaves Sample at leaves for faster but less accurate
+   *      computation. This defaults to 'false'.
+   * @param firstLeafExact Traverse to the first leaf without approximation.
+   *     This can ensure that the query definitely finds its (near) duplicate
+   *     if there exists one.  This defaults to 'false' for now.
+   * @param singleSampleLimit The limit on the largest node that can be
    *     approximated by sampling. This defaults to 20.
    */
   void Search(const size_t k,
@@ -277,15 +280,13 @@
 
   /**
    * This function recursively resets the RAQueryStat of the queryTree to set
-   * 'bound' to WorstDistance and the 'numSamplesMade' to 0. This allows 
-   * a user to perform multiple searches on the same pair of trees, 
-   * possibly with different levels of approximation without requiring 
-   * to build a new pair of trees for every new (approximate) search.
+   * 'bound' to WorstDistance and the 'numSamplesMade' to 0. This allows a user
+   * to perform multiple searches on the same pair of trees, possibly with
+   * different levels of approximation without requiring to build a new pair of
+   * trees for every new (approximate) search.
    */
   void ResetQueryTree();
 
-
-
  private:
   //! Copy of reference dataset (if we need it, because tree building modifies
   //! it).
@@ -325,12 +326,10 @@
   size_t numberOfPrunes;
 
   /**
-   *
-   * @param treeNode The node of the tree whose RAQueryStat is reset 
+   * @param treeNode The node of the tree whose RAQueryStat is reset
    *     and whose children are to be explored recursively.
    */
   void ResetRAQueryStat(TreeType* treeNode);
-
 }; // class RASearch
 
 }; // namespace neighbor

Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp	2013-01-18 22:26:33 UTC (rev 14139)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp	2013-01-18 22:32:44 UTC (rev 14140)
@@ -2,7 +2,7 @@
  * @file ra_search_impl.hpp
  * @author Parikshit Ram
  *
- * Implementation of RASearch class to perform rank-approximate 
+ * Implementation of RASearch class to perform rank-approximate
  * all-nearest-neighbors on two specified data sets.
  */
 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_IMPL_HPP
@@ -12,7 +12,8 @@
 
 #include "ra_search_rules.hpp"
 
-using namespace mlpack::neighbor;
+namespace mlpack {
+namespace neighbor {
 
 // Construct the object.
 template<typename SortPolicy, typename MetricType, typename TreeType>
@@ -23,20 +24,20 @@
          const bool singleMode,
          const size_t leafSize,
          const MetricType metric) :
-  referenceCopy(referenceSet),
-  queryCopy(querySet),
-  referenceSet(referenceCopy),
-  querySet(queryCopy),
-  referenceTree(NULL),
-  queryTree(NULL),
-  ownReferenceTree(true), // False if a tree was passed.
-  ownQueryTree(true), // False if a tree was passed.
-  naive(naive),
-  singleMode(!naive && singleMode), // No single mode if naive.
-  metric(metric),
-  numberOfPrunes(0)
+    referenceCopy(referenceSet),
+    queryCopy(querySet),
+    referenceSet(referenceCopy),
+    querySet(queryCopy),
+    referenceTree(NULL),
+    queryTree(NULL),
+    ownReferenceTree(true), // False if a tree was passed.
+    ownQueryTree(true), // False if a tree was passed.
+    naive(naive),
+    singleMode(!naive && singleMode), // No single mode if naive.
+    metric(metric),
+    numberOfPrunes(0)
 {
-  // We'll time tree building, but only if we are building trees.
+  // We'll time tree building.
   Timer::Start("tree_building");
 
   // Construct as a naive object if we need to.
@@ -46,7 +47,7 @@
   queryTree = new TreeType(queryCopy, oldFromNewQueries,
       (naive ? querySet.n_cols : leafSize));
 
-  // Stop the timer we started above (if we need to).
+  // Stop the timer we started above.
   Timer::Stop("tree_building");
 }
 
@@ -58,19 +59,19 @@
          const bool singleMode,
          const size_t leafSize,
          const MetricType metric) :
-  referenceCopy(referenceSet),
-  referenceSet(referenceCopy),
-  querySet(referenceCopy),
-  referenceTree(NULL),
-  queryTree(NULL),
-  ownReferenceTree(true),
-  ownQueryTree(false), // Since it will be the same as referenceTree.
-  naive(naive),
-  singleMode(!naive && singleMode), // No single mode if naive.
-  metric(metric),
-  numberOfPrunes(0)
+    referenceCopy(referenceSet),
+    referenceSet(referenceCopy),
+    querySet(referenceCopy),
+    referenceTree(NULL),
+    queryTree(NULL),
+    ownReferenceTree(true),
+    ownQueryTree(false), // Since it will be the same as referenceTree.
+    naive(naive),
+    singleMode(!naive && singleMode), // No single mode if naive.
+    metric(metric),
+    numberOfPrunes(0)
 {
-  // We'll time tree building, but only if we are building trees.
+  // We'll time tree building.
   Timer::Start("tree_building");
 
   // Construct as a naive object if we need to.
@@ -90,16 +91,16 @@
          const typename TreeType::Mat& querySet,
          const bool singleMode,
          const MetricType metric) :
-  referenceSet(referenceSet),
-  querySet(querySet),
-  referenceTree(referenceTree),
-  queryTree(queryTree),
-  ownReferenceTree(false),
-  ownQueryTree(false),
-  naive(false),
-  singleMode(singleMode),
-  metric(metric),
-  numberOfPrunes(0)
+    referenceSet(referenceSet),
+    querySet(querySet),
+    referenceTree(referenceTree),
+    queryTree(queryTree),
+    ownReferenceTree(false),
+    ownQueryTree(false),
+    naive(false),
+    singleMode(singleMode),
+    metric(metric),
+    numberOfPrunes(0)
 // Nothing else to initialize.
 {  }
 
@@ -153,6 +154,7 @@
        const size_t singleSampleLimit)
 {
   Timer::Start("computing_neighbors");
+
   // If we have built the trees ourselves, then we will have to map all the
   // indices back to their original indices when this computation is finished.
   // To avoid an extra copy, we will store the neighbors and distances in a
@@ -160,7 +162,7 @@
   arma::Mat<size_t>* neighborPtr = &resultingNeighbors;
   arma::mat* distancePtr = &distances;
 
-  if (!naive) // if naive, no re-mapping required since points are not mapped
+  if (!naive) // If naive, no re-mapping required since points are not mapped.
   {
     if (ownQueryTree || (ownReferenceTree && !queryTree))
       distancePtr = new arma::mat; // Query indices need to be mapped.
@@ -179,16 +181,16 @@
   {
     // Create the helper object for the tree traversal.
     typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
-    RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, 
+    RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                    metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact,
                    singleSampleLimit);
 
     if (!referenceTree->IsLeaf())
     {
-      Log::Info << "Running single-tree..." << std::endl;
+      Log::Info << "Performing single-tree traversal..." << std::endl;
 
       // Create the traverser.
-      typename TreeType::template SingleTreeTraverser<RuleType> 
+      typename TreeType::template SingleTreeTraverser<RuleType>
         traverser(rules);
 
       // Now have it traverse for each point.
@@ -197,28 +199,28 @@
 
       numPrunes = traverser.NumPrunes();
     }
-    else 
+    else
     {
       assert(naive);
       Log::Info << "Naive sampling already done!" << std::endl;
     }
 
-    Log::Info << "ST: NDC: " << rules.NumDistComputations()
-      / querySet.n_cols << std::endl;
+    Log::Info << "Single-tree traversal done; number of distance calculations: "
+        << (rules.NumDistComputations() / querySet.n_cols) << std::endl;
   }
   else // Dual-tree recursion.
   {
-    Log::Info << "Running dual-tree ..." << std::endl;
+    Log::Info << "Performing dual-tree traversal..." << std::endl;
 
     typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
-    RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, 
+    RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                    metric, tau, alpha, sampleAtLeaves, firstLeafExact,
                    singleSampleLimit);
 
     typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);
 
-    Log::Info << "DT: QS Pre-search: " << queryTree->Stat().NumSamplesMade() <<
-      std::endl;
+    Log::Info << "Dual-tree traversal; query statistic pre-search: "
+        << queryTree->Stat().NumSamplesMade() << std::endl;
 
     if (queryTree)
       traverser.Traverse(*queryTree, *referenceTree);
@@ -227,19 +229,18 @@
 
     numPrunes = traverser.NumPrunes();
 
-    Log::Info << "DT: NDC: " << rules.NumDistComputations() 
-      / querySet.n_cols << std::endl;
+    Log::Info << "Dual-tree traversal done; number of distance calculations: "
+        << (rules.NumDistComputations() / querySet.n_cols) << std::endl;
   }
 
   Timer::Stop("computing_neighbors");
   Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;
 
-
   // Now, do we need to do mapping of indices?
   if ((!ownReferenceTree && !ownQueryTree) || naive)
   {
-    // No mapping needed if we do not own the trees or if 
-    // we are doing naive sampling.  We are done.
+    // No mapping needed if we do not own the trees or if we are doing naive
+    // sampling.  We are done.
     return;
   }
   else if (ownReferenceTree && ownQueryTree) // Map references and queries.
@@ -324,7 +325,6 @@
   }
 } // Search
 
-
 template<typename SortPolicy, typename MetricType, typename TreeType>
 void RASearch<SortPolicy, MetricType, TreeType>::
 ResetQueryTree()
@@ -333,14 +333,11 @@
   {
     if (queryTree)
       ResetRAQueryStat(queryTree);
-    else 
+    else
       ResetRAQueryStat(referenceTree);
   }
-
-  return;
 }
 
-
 template<typename SortPolicy, typename MetricType, typename TreeType>
 void RASearch<SortPolicy, MetricType, TreeType>::
 ResetRAQueryStat(TreeType* treeNode)
@@ -348,11 +345,11 @@
   treeNode->Stat().Bound() = SortPolicy::WorstDistance();
   treeNode->Stat().NumSamplesMade() = 0;
 
-  if (!treeNode->IsLeaf())
-    for (size_t i = 0; i < treeNode->NumChildren(); i++)
-      ResetRAQueryStat(&treeNode->Child(i));
-
-  return;
+  for (size_t i = 0; i < treeNode->NumChildren(); i++)
+    ResetRAQueryStat(&treeNode->Child(i));
 } // ResetRAQueryStat
 
+}; // namespace neighbor
+}; // namespace mlpack
+
 #endif