[mlpack-svn] r14140 - mlpack/trunk/src/mlpack/methods/rann
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Jan 18 17:32:44 EST 2013
Author: rcurtin
Date: 2013-01-18 17:32:44 -0500 (Fri, 18 Jan 2013)
New Revision: 14140
Modified:
mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp
mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp
Log:
Simple cleanups of RANN code.
Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp 2013-01-18 22:26:33 UTC (rev 14139)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search.hpp 2013-01-18 22:32:44 UTC (rev 14140)
@@ -4,23 +4,21 @@
*
* Defines the RASearch class, which performs an abstract
* rank-approximate nearest/farthest neighbor query on two datasets.
- *
+ *
* The details of this method can be found in the following paper:
*
* @inproceedings{ram2009rank,
- * title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and Speed in High Dimensions}},
- * author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
- * booktitle={{Advances of Neural Information Processing Systems}},
- * year={2009}
+ * title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and
+ * Speed in High Dimensions}},
+ * author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
+ * booktitle={{Advances of Neural Information Processing Systems}},
+ * year={2009}
* }
- *
*/
#ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_HPP
#define __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_HPP
#include <mlpack/core.hpp>
-#include <vector>
-#include <string>
#include <mlpack/core/tree/binary_space_tree.hpp>
@@ -35,10 +33,10 @@
/**
* Extra data for each node in the tree. For neighbor searches, each node only
* needs to store a bound on neighbor distances.
- *
- * Every query is required to make a minimum number of samples to guarantee
- * the desired approximation error. The 'numSamplesMade' keeps track of
- * the minimum number of samples made by all queries in the node in question.
+ *
+ * Every query is required to make a minimum number of samples to guarantee the
+ * desired approximation error. The 'numSamplesMade' keeps track of the minimum
+ * number of samples made by all queries in the node in question.
*/
template<typename SortPolicy>
class RAQueryStat
@@ -47,13 +45,13 @@
//! The bound on the node's neighbor distances.
double bound;
- //! The minimum number of samples made by any query in this node
+ //! The minimum number of samples made by any query in this node.
size_t numSamplesMade;
public:
/**
- * Initialize the statistic with the worst possible distance according to
- * our sorting policy.
+ * Initialize the statistic with the worst possible distance according to our
+ * sorting policy.
*/
RAQueryStat() : bound(SortPolicy::WorstDistance()), numSamplesMade(0) { }
@@ -61,11 +59,11 @@
* Initialization for a leaf, required by the StatisticType policy.
*/
template<typename MatType>
- RAQueryStat(const MatType& /* dataset */,
- const size_t /* begin */,
- const size_t /* count */) :
- bound(SortPolicy::WorstDistance()),
- numSamplesMade(0)
+ RAQueryStat(const MatType& /* dataset */,
+ const size_t /* begin */,
+ const size_t /* count */) :
+ bound(SortPolicy::WorstDistance()),
+ numSamplesMade(0)
{ }
/**
@@ -77,29 +75,37 @@
const size_t /* count */,
const RAQueryStat& /* leftStat */,
const RAQueryStat& /* rightStat */) :
- bound(SortPolicy::WorstDistance()),
- numSamplesMade(0)
+ bound(SortPolicy::WorstDistance()),
+ numSamplesMade(0)
{ }
-
+
//! Get the bound.
double Bound() const { return bound; }
//! Modify the bound.
double& Bound() { return bound; }
- //! Get the number of samples made
+ //! Get the number of samples made.
size_t NumSamplesMade() const { return numSamplesMade; }
- //! Modify the number of samples made
+ //! Modify the number of samples made.
size_t& NumSamplesMade() { return numSamplesMade; }
};
/**
- * The RASearch class: This class provides a generic manner to perform
- * rank-approximate search via random-sampling. If the 'naive' option
- * is chosen, this rank-approximate search will be done by randomly
- * sampled from the whole set. If the 'naive' option is not chosen,
- * the sampling is done in a stratified manner in the tree as
- * mentioned in the algorithms in Figure 2 of the aforementioned paper.
+ * The RASearch class: This class provides a generic manner to perform
+ * rank-approximate search via random-sampling. If the 'naive' option is chosen,
+ * this rank-approximate search will be done by randomly sampled from the whole
+ * set. If the 'naive' option is not chosen, the sampling is done in a
+ * stratified manner in the tree as mentioned in the algorithms in Figure 2 of
+ * the following paper:
*
+ * @inproceedings{ram2009rank,
+ * title={{Rank-Approximate Nearest Neighbor Search: Retaining Meaning and
+ * Speed in High Dimensions}},
+ * author={{Ram, P. and Lee, D. and Ouyang, H. and Gray, A. G.}},
+ * booktitle={{Advances of Neural Information Processing Systems}},
+ * year={2009}
+ * }
+ *
* @tparam SortPolicy The sort policy for distances; see NearestNeighborSort.
* @tparam MetricType The metric to use for computation.
* @tparam TreeType The tree type to use.
@@ -112,22 +118,21 @@
{
public:
/**
- * Initialize the RASearch object, passing both a query and reference
- * dataset. Optionally, perform the computation in naive mode or single-tree
- * mode, and set the leaf size used for tree-building. An initialized
- * distance metric can be given, for cases where the metric has internal data
- * (i.e. the distance::MahalanobisDistance class).
+ * Initialize the RASearch object, passing both a query and reference dataset.
+ * Optionally, perform the computation in naive mode or single-tree mode, and
+ * set the leaf size used for tree-building. An initialized distance metric
+ * can be given, for cases where the metric has internal data (i.e. the
+ * distance::MahalanobisDistance class).
*
- * This method will copy the matrices to internal copies,
- * which are rearranged during tree-building. You can avoid this
- * extra copy by pre-constructing the trees and passing them
- * using a diferent constructor.
+ * This method will copy the matrices to internal copies, which are rearranged
+ * during tree-building. You can avoid this extra copy by pre-constructing
+ * the trees and passing them using a diferent constructor.
*
* @param referenceSet Set of reference points.
* @param querySet Set of query points.
- * @param naive If true, the rank-approximate search will be performed
- * by directly sampling the whole set instead of using the stratified
- * sampling on the tree.
+ * @param naive If true, the rank-approximate search will be performed by
+ * directly sampling the whole set instead of using the stratified
+ * sampling on the tree.
* @param singleMode If true, single-tree search will be used (as opposed to
* dual-tree search).
* @param leafSize Leaf size for tree construction (ignored if tree is given).
@@ -154,9 +159,9 @@
* then naive mode will not work.
*
* @param referenceSet Set of reference points.
- * @param naive If true, the rank-approximate search will be performed
- * by directly sampling the whole set instead of using the stratified
- * sampling on the tree.
+ * @param naive If true, the rank-approximate search will be performed
+ * by directly sampling the whole set instead of using the stratified
+ * sampling on the tree.
* @param singleMode If true, single-tree search will be used (as opposed to
* dual-tree search).
* @param leafSize Leaf size for tree construction (ignored if tree is given).
@@ -236,7 +241,6 @@
const bool singleMode = false,
const MetricType metric = MetricType());
-
/**
* Delete the RASearch object. The tree is the only member we are
* responsible for deleting. The others will take care of themselves.
@@ -244,26 +248,25 @@
~RASearch();
/**
- * Compute the rank approximate nearest neighbors and store the
- * output in the given matrices. The matrices will be set to the size
- * of n columns by k rows, where n is the number of points in the
- * query dataset and k is the number of neighbors being searched for.
+ * Compute the rank approximate nearest neighbors and store the output in the
+ * given matrices. The matrices will be set to the size of n columns by k
+ * rows, where n is the number of points in the query dataset and k is the
+ * number of neighbors being searched for.
*
* @param k Number of neighbors to search for.
* @param resultingNeighbors Matrix storing lists of neighbors for each query
* point.
* @param distances Matrix storing distances of neighbors for each query
* point.
- * @param tau The rank-approximation in percentile of the data. The
- * default value is 0.1%.
+ * @param tau The rank-approximation in percentile of the data. The default
+ * value is 0.1%.
* @param alpha The desired success probability. The default value is 0.95.
- * @param sampleAtLeaves The flag to trigger sampling at leaves for
- * faster yet less accurate computation. This defaults to 'false'.
- * @param firstLeafExact The flag to trigger the traversal of the algorithm
- * to the first leaf without approximation. This can ensure that the
- * query definitely finds its (near) duplicate if there exists one.
- * This defaults to 'false' for now.
- * @param singleSampleLimit The limit on the largest node that can be
+ * @param sampleAtLeaves Sample at leaves for faster but less accurate
+ * computation. This defaults to 'false'.
+ * @param firstLeafExact Traverse to the first leaf without approximation.
+ * This can ensure that the query definitely finds its (near) duplicate
+ * if there exists one. This defaults to 'false' for now.
+ * @param singleSampleLimit The limit on the largest node that can be
* approximated by sampling. This defaults to 20.
*/
void Search(const size_t k,
@@ -277,15 +280,13 @@
/**
* This function recursively resets the RAQueryStat of the queryTree to set
- * 'bound' to WorstDistance and the 'numSamplesMade' to 0. This allows
- * a user to perform multiple searches on the same pair of trees,
- * possibly with different levels of approximation without requiring
- * to build a new pair of trees for every new (approximate) search.
+ * 'bound' to WorstDistance and the 'numSamplesMade' to 0. This allows a user
+ * to perform multiple searches on the same pair of trees, possibly with
+ * different levels of approximation without requiring to build a new pair of
+ * trees for every new (approximate) search.
*/
void ResetQueryTree();
-
-
private:
//! Copy of reference dataset (if we need it, because tree building modifies
//! it).
@@ -325,12 +326,10 @@
size_t numberOfPrunes;
/**
- *
- * @param treeNode The node of the tree whose RAQueryStat is reset
+ * @param treeNode The node of the tree whose RAQueryStat is reset
* and whose children are to be explored recursively.
*/
void ResetRAQueryStat(TreeType* treeNode);
-
}; // class RASearch
}; // namespace neighbor
Modified: mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp 2013-01-18 22:26:33 UTC (rev 14139)
+++ mlpack/trunk/src/mlpack/methods/rann/ra_search_impl.hpp 2013-01-18 22:32:44 UTC (rev 14140)
@@ -2,7 +2,7 @@
* @file ra_search_impl.hpp
* @author Parikshit Ram
*
- * Implementation of RASearch class to perform rank-approximate
+ * Implementation of RASearch class to perform rank-approximate
* all-nearest-neighbors on two specified data sets.
*/
#ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_RA_SEARCH_IMPL_HPP
@@ -12,7 +12,8 @@
#include "ra_search_rules.hpp"
-using namespace mlpack::neighbor;
+namespace mlpack {
+namespace neighbor {
// Construct the object.
template<typename SortPolicy, typename MetricType, typename TreeType>
@@ -23,20 +24,20 @@
const bool singleMode,
const size_t leafSize,
const MetricType metric) :
- referenceCopy(referenceSet),
- queryCopy(querySet),
- referenceSet(referenceCopy),
- querySet(queryCopy),
- referenceTree(NULL),
- queryTree(NULL),
- ownReferenceTree(true), // False if a tree was passed.
- ownQueryTree(true), // False if a tree was passed.
- naive(naive),
- singleMode(!naive && singleMode), // No single mode if naive.
- metric(metric),
- numberOfPrunes(0)
+ referenceCopy(referenceSet),
+ queryCopy(querySet),
+ referenceSet(referenceCopy),
+ querySet(queryCopy),
+ referenceTree(NULL),
+ queryTree(NULL),
+ ownReferenceTree(true), // False if a tree was passed.
+ ownQueryTree(true), // False if a tree was passed.
+ naive(naive),
+ singleMode(!naive && singleMode), // No single mode if naive.
+ metric(metric),
+ numberOfPrunes(0)
{
- // We'll time tree building, but only if we are building trees.
+ // We'll time tree building.
Timer::Start("tree_building");
// Construct as a naive object if we need to.
@@ -46,7 +47,7 @@
queryTree = new TreeType(queryCopy, oldFromNewQueries,
(naive ? querySet.n_cols : leafSize));
- // Stop the timer we started above (if we need to).
+ // Stop the timer we started above.
Timer::Stop("tree_building");
}
@@ -58,19 +59,19 @@
const bool singleMode,
const size_t leafSize,
const MetricType metric) :
- referenceCopy(referenceSet),
- referenceSet(referenceCopy),
- querySet(referenceCopy),
- referenceTree(NULL),
- queryTree(NULL),
- ownReferenceTree(true),
- ownQueryTree(false), // Since it will be the same as referenceTree.
- naive(naive),
- singleMode(!naive && singleMode), // No single mode if naive.
- metric(metric),
- numberOfPrunes(0)
+ referenceCopy(referenceSet),
+ referenceSet(referenceCopy),
+ querySet(referenceCopy),
+ referenceTree(NULL),
+ queryTree(NULL),
+ ownReferenceTree(true),
+ ownQueryTree(false), // Since it will be the same as referenceTree.
+ naive(naive),
+ singleMode(!naive && singleMode), // No single mode if naive.
+ metric(metric),
+ numberOfPrunes(0)
{
- // We'll time tree building, but only if we are building trees.
+ // We'll time tree building.
Timer::Start("tree_building");
// Construct as a naive object if we need to.
@@ -90,16 +91,16 @@
const typename TreeType::Mat& querySet,
const bool singleMode,
const MetricType metric) :
- referenceSet(referenceSet),
- querySet(querySet),
- referenceTree(referenceTree),
- queryTree(queryTree),
- ownReferenceTree(false),
- ownQueryTree(false),
- naive(false),
- singleMode(singleMode),
- metric(metric),
- numberOfPrunes(0)
+ referenceSet(referenceSet),
+ querySet(querySet),
+ referenceTree(referenceTree),
+ queryTree(queryTree),
+ ownReferenceTree(false),
+ ownQueryTree(false),
+ naive(false),
+ singleMode(singleMode),
+ metric(metric),
+ numberOfPrunes(0)
// Nothing else to initialize.
{ }
@@ -153,6 +154,7 @@
const size_t singleSampleLimit)
{
Timer::Start("computing_neighbors");
+
// If we have built the trees ourselves, then we will have to map all the
// indices back to their original indices when this computation is finished.
// To avoid an extra copy, we will store the neighbors and distances in a
@@ -160,7 +162,7 @@
arma::Mat<size_t>* neighborPtr = &resultingNeighbors;
arma::mat* distancePtr = &distances;
- if (!naive) // if naive, no re-mapping required since points are not mapped
+ if (!naive) // If naive, no re-mapping required since points are not mapped.
{
if (ownQueryTree || (ownReferenceTree && !queryTree))
distancePtr = new arma::mat; // Query indices need to be mapped.
@@ -179,16 +181,16 @@
{
// Create the helper object for the tree traversal.
typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
- RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
+ RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact,
singleSampleLimit);
if (!referenceTree->IsLeaf())
{
- Log::Info << "Running single-tree..." << std::endl;
+ Log::Info << "Performing single-tree traversal..." << std::endl;
// Create the traverser.
- typename TreeType::template SingleTreeTraverser<RuleType>
+ typename TreeType::template SingleTreeTraverser<RuleType>
traverser(rules);
// Now have it traverse for each point.
@@ -197,28 +199,28 @@
numPrunes = traverser.NumPrunes();
}
- else
+ else
{
assert(naive);
Log::Info << "Naive sampling already done!" << std::endl;
}
- Log::Info << "ST: NDC: " << rules.NumDistComputations()
- / querySet.n_cols << std::endl;
+ Log::Info << "Single-tree traversal done; number of distance calculations: "
+ << (rules.NumDistComputations() / querySet.n_cols) << std::endl;
}
else // Dual-tree recursion.
{
- Log::Info << "Running dual-tree ..." << std::endl;
+ Log::Info << "Performing dual-tree traversal..." << std::endl;
typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
- RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
+ RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
metric, tau, alpha, sampleAtLeaves, firstLeafExact,
singleSampleLimit);
typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);
- Log::Info << "DT: QS Pre-search: " << queryTree->Stat().NumSamplesMade() <<
- std::endl;
+ Log::Info << "Dual-tree traversal; query statistic pre-search: "
+ << queryTree->Stat().NumSamplesMade() << std::endl;
if (queryTree)
traverser.Traverse(*queryTree, *referenceTree);
@@ -227,19 +229,18 @@
numPrunes = traverser.NumPrunes();
- Log::Info << "DT: NDC: " << rules.NumDistComputations()
- / querySet.n_cols << std::endl;
+ Log::Info << "Dual-tree traversal done; number of distance calculations: "
+ << (rules.NumDistComputations() / querySet.n_cols) << std::endl;
}
Timer::Stop("computing_neighbors");
Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;
-
// Now, do we need to do mapping of indices?
if ((!ownReferenceTree && !ownQueryTree) || naive)
{
- // No mapping needed if we do not own the trees or if
- // we are doing naive sampling. We are done.
+ // No mapping needed if we do not own the trees or if we are doing naive
+ // sampling. We are done.
return;
}
else if (ownReferenceTree && ownQueryTree) // Map references and queries.
@@ -324,7 +325,6 @@
}
} // Search
-
template<typename SortPolicy, typename MetricType, typename TreeType>
void RASearch<SortPolicy, MetricType, TreeType>::
ResetQueryTree()
@@ -333,14 +333,11 @@
{
if (queryTree)
ResetRAQueryStat(queryTree);
- else
+ else
ResetRAQueryStat(referenceTree);
}
-
- return;
}
-
template<typename SortPolicy, typename MetricType, typename TreeType>
void RASearch<SortPolicy, MetricType, TreeType>::
ResetRAQueryStat(TreeType* treeNode)
@@ -348,11 +345,11 @@
treeNode->Stat().Bound() = SortPolicy::WorstDistance();
treeNode->Stat().NumSamplesMade() = 0;
- if (!treeNode->IsLeaf())
- for (size_t i = 0; i < treeNode->NumChildren(); i++)
- ResetRAQueryStat(&treeNode->Child(i));
-
- return;
+ for (size_t i = 0; i < treeNode->NumChildren(); i++)
+ ResetRAQueryStat(&treeNode->Child(i));
} // ResetRAQueryStat
+}; // namespace neighbor
+}; // namespace mlpack
+
#endif
More information about the mlpack-svn
mailing list