[mlpack-svn] r15042 - mlpack/trunk/src/mlpack/methods/range_search
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed May 8 22:22:41 EDT 2013
Author: rcurtin
Date: 2013-05-08 22:22:41 -0400 (Wed, 08 May 2013)
New Revision: 15042
Modified:
mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt
mlpack/trunk/src/mlpack/methods/range_search/range_search.hpp
mlpack/trunk/src/mlpack/methods/range_search/range_search_impl.hpp
mlpack/trunk/src/mlpack/methods/range_search/range_search_rules.hpp
mlpack/trunk/src/mlpack/methods/range_search/range_search_rules_impl.hpp
Log:
Revamp RangeSearch as per #244. Now this works with cover trees too!
Modified: mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt 2013-05-09 01:50:34 UTC (rev 15041)
+++ mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt 2013-05-09 02:22:41 UTC (rev 15042)
@@ -3,6 +3,8 @@
set(SOURCES
range_search.hpp
range_search_impl.hpp
+ range_search_rules.hpp
+ range_search_rules_impl.hpp
)
# Add directory name to sources.
Modified: mlpack/trunk/src/mlpack/methods/range_search/range_search.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/range_search.hpp 2013-05-09 01:50:34 UTC (rev 15041)
+++ mlpack/trunk/src/mlpack/methods/range_search/range_search.hpp 2013-05-09 02:22:41 UTC (rev 15042)
@@ -18,9 +18,12 @@
namespace range /** Range-search routines. */ {
/**
- * The RangeSearch class is a template class for performing range searches.
+ * The RangeSearch class is a template class for performing range searches. It
+ * is implemented in the style of a generalized tree-independent dual-tree
+ * algorithm; for more details on the actual algorithm, see the RangeSearchRules
+ * class.
*/
-template<typename MetricType = mlpack::metric::SquaredEuclideanDistance,
+template<typename MetricType = mlpack::metric::EuclideanDistance,
typename TreeType = tree::BinarySpaceTree<bound::HRectBound<2>,
tree::EmptyStatistic> >
class RangeSearch
@@ -182,57 +185,6 @@
std::vector<std::vector<double> >& distances);
private:
- /**
- * Compute the base case, when both referenceNode and queryNode are leaves
- * containing points.
- *
- * @param referenceNode Reference node (must be a leaf).
- * @param queryNode Query node (must be a leaf).
- * @param range Range of distances to search for.
- * @param neighbors Object holding list of neighbors.
- * @param distances Object holding list of distances.
- */
- void ComputeBaseCase(const TreeType* referenceNode,
- const TreeType* queryNode,
- const math::Range& range,
- std::vector<std::vector<size_t> >& neighbors,
- std::vector<std::vector<double> >& distances) const;
-
- /**
- * Perform the dual-tree recursion, which will recurse until the base case is
- * necessary.
- *
- * @param referenceNode Reference node.
- * @param queryNode Query node.
- * @param range Range of distances to search for.
- * @param neighbors Object holding list of neighbors.
- * @param distances Object holding list of distances.
- */
- void DualTreeRecursion(const TreeType* referenceNode,
- const TreeType* queryNode,
- const math::Range& range,
- std::vector<std::vector<size_t> >& neighbors,
- std::vector<std::vector<double> >& distances);
-
- /**
- * Perform the single-tree recursion, which will recurse down the reference
- * tree to get the results for a single point.
- *
- * @param referenceNode Reference node.
- * @param queryPoint Point to query for.
- * @param queryIndex Index of query node.
- * @param range Range of distances to search for.
- * @param neighbors Object holding list of neighbors.
- * @param distances Object holding list of distances.
- */
- template<typename VecType>
- void SingleTreeRecursion(const TreeType* referenceNode,
- const VecType& queryPoint,
- const size_t queryIndex,
- const math::Range& range,
- std::vector<size_t>& neighbors,
- std::vector<double>& distances);
-
//! Copy of reference matrix; used when a tree is built internally.
typename TreeType::Mat referenceCopy;
//! Copy of query matrix; used when a tree is built internally.
@@ -267,7 +219,7 @@
MetricType metric;
//! The number of pruned nodes during computation.
- size_t numberOfPrunes;
+ size_t numPrunes;
};
}; // namespace range
Modified: mlpack/trunk/src/mlpack/methods/range_search/range_search_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/range_search_impl.hpp 2013-05-09 01:50:34 UTC (rev 15041)
+++ mlpack/trunk/src/mlpack/methods/range_search/range_search_impl.hpp 2013-05-09 02:22:41 UTC (rev 15042)
@@ -10,6 +10,9 @@
// Just in case it hasn't been included.
#include "range_search.hpp"
+// The rules for traversal.
+#include "range_search_rules.hpp"
+
namespace mlpack {
namespace range {
@@ -30,7 +33,7 @@
naive(naive),
singleMode(!naive && singleMode), // Naive overrides single mode.
metric(metric),
- numberOfPrunes(0)
+ numPrunes(0)
{
// Build the trees.
Timer::Start("range_search/tree_building");
@@ -61,7 +64,7 @@
naive(naive),
singleMode(!naive && singleMode), // Naive overrides single mode.
metric(metric),
- numberOfPrunes(0)
+ numPrunes(0)
{
// Build the trees.
Timer::Start("range_search/tree_building");
@@ -90,7 +93,7 @@
naive(false),
singleMode(singleMode),
metric(metric),
- numberOfPrunes(0)
+ numPrunes(0)
{
// Nothing else to initialize.
}
@@ -110,7 +113,7 @@
naive(false),
singleMode(singleMode),
metric(metric),
- numberOfPrunes(0)
+ numPrunes(0)
{
// Nothing else to initialize.
}
@@ -133,7 +136,7 @@
Timer::Start("range_search/computing_neighbors");
// Set size of prunes to 0.
- numberOfPrunes = 0;
+ numPrunes = 0;
// If we have built the trees ourselves, then we will have to map all the
// indices back to their original indices when this computation is finished.
@@ -153,39 +156,39 @@
distancePtr->clear();
distancePtr->resize(querySet.n_cols);
- if (naive)
+ // Create the helper object for the traversal.
+ typedef RangeSearchRules<MetricType, TreeType> RuleType;
+ RuleType rules(referenceSet, querySet, range, *neighborPtr, *distancePtr,
+ metric);
+
+ if (singleMode)
{
- // Run the base case.
- if (!queryTree)
- ComputeBaseCase(referenceTree, referenceTree, range, *neighborPtr,
- *distancePtr);
- else
- ComputeBaseCase(referenceTree, queryTree, range, *neighborPtr,
- *distancePtr);
+ // Create the traverser.
+ typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);
+
+ // Now have it traverse for each point.
+ for (size_t i = 0; i < querySet.n_cols; ++i)
+ traverser.Traverse(i, *referenceTree);
+
+ numPrunes = traverser.NumPrunes();
}
- else if (singleMode)
+ else // Dual-tree recursion.
{
- // Loop over each of the query points.
- for (size_t i = 0; i < querySet.n_cols; i++)
- {
- SingleTreeRecursion(referenceTree, querySet.col(i), i, range,
- (*neighborPtr)[i], (*distancePtr)[i]);
- }
- }
- else
- {
- if (!queryTree) // References are the same as queries.
- DualTreeRecursion(referenceTree, referenceTree, range, *neighborPtr,
- *distancePtr);
+ // Create the traverser.
+ typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);
+
+ if (queryTree)
+ traverser.Traverse(*queryTree, *referenceTree);
else
- DualTreeRecursion(referenceTree, queryTree, range, *neighborPtr,
- *distancePtr);
+ traverser.Traverse(*referenceTree, *referenceTree);
+
+ numPrunes = traverser.NumPrunes();
}
Timer::Stop("range_search/computing_neighbors");
// Output number of prunes.
- Log::Info << "Number of pruned nodes during computation: " << numberOfPrunes
+ Log::Info << "Number of pruned nodes during computation: " << numPrunes
<< "." << std::endl;
// Map points back to original indices, if necessary.
@@ -287,168 +290,6 @@
}
}
-template<typename MetricType, typename TreeType>
-void RangeSearch<MetricType, TreeType>::ComputeBaseCase(
- const TreeType* referenceNode,
- const TreeType* queryNode,
- const math::Range& range,
- std::vector<std::vector<size_t> >& neighbors,
- std::vector<std::vector<double> >& distances) const
-{
- // node->Begin() is the index of the first point in the node,
- // node->End() is one past the last index.
- for (size_t queryIndex = queryNode->Begin(); queryIndex < queryNode->End();
- queryIndex++)
- {
- double minDistance =
- referenceNode->Bound().MinDistance(querySet.col(queryIndex));
- double maxDistance =
- referenceNode->Bound().MaxDistance(querySet.col(queryIndex));
-
- // Now see if any points could fall into the range.
- if (range.Contains(math::Range(minDistance, maxDistance)))
- {
- // Loop through the reference points and see which fall into the range.
- for (size_t referenceIndex = referenceNode->Begin();
- referenceIndex < referenceNode->End(); referenceIndex++)
- {
- // We can't add points that are ourselves.
- if (referenceNode != queryNode || referenceIndex != queryIndex)
- {
- double distance = metric.Evaluate(querySet.col(queryIndex),
- referenceSet.col(referenceIndex));
-
- // If this lies in the range, add it.
- if (range.Contains(distance))
- {
- neighbors[queryIndex].push_back(referenceIndex);
- distances[queryIndex].push_back(distance);
- }
- }
- }
- }
- }
-}
-
-template<typename MetricType, typename TreeType>
-void RangeSearch<MetricType, TreeType>::DualTreeRecursion(
- const TreeType* referenceNode,
- const TreeType* queryNode,
- const math::Range& range,
- std::vector<std::vector<size_t> >& neighbors,
- std::vector<std::vector<double> >& distances)
-{
- // See if we can prune this node.
- math::Range distance =
- referenceNode->Bound().RangeDistance(queryNode->Bound());
-
- if (!range.Contains(distance))
- {
- numberOfPrunes++; // Don't recurse. These nodes can't contain anything.
- return;
- }
-
- // If both nodes are leaves, then we compute the base case.
- if (referenceNode->IsLeaf() && queryNode->IsLeaf())
- {
- ComputeBaseCase(referenceNode, queryNode, range, neighbors, distances);
- }
- else if (referenceNode->IsLeaf())
- {
- // We must descend down the query node to get a leaf.
- DualTreeRecursion(referenceNode, queryNode->Left(), range, neighbors,
- distances);
- DualTreeRecursion(referenceNode, queryNode->Right(), range, neighbors,
- distances);
- }
- else if (queryNode->IsLeaf())
- {
- // We must descend down the reference node to get a leaf.
- DualTreeRecursion(referenceNode->Left(), queryNode, range, neighbors,
- distances);
- DualTreeRecursion(referenceNode->Right(), queryNode, range, neighbors,
- distances);
- }
- else
- {
- // First descend the left reference node.
- DualTreeRecursion(referenceNode->Left(), queryNode->Left(), range,
- neighbors, distances);
- DualTreeRecursion(referenceNode->Left(), queryNode->Right(), range,
- neighbors, distances);
-
- // Now descend the right reference node.
- DualTreeRecursion(referenceNode->Right(), queryNode->Left(), range,
- neighbors, distances);
- DualTreeRecursion(referenceNode->Right(), queryNode->Right(), range,
- neighbors, distances);
- }
-}
-
-template<typename MetricType, typename TreeType>
-template<typename VecType>
-void RangeSearch<MetricType, TreeType>::SingleTreeRecursion(
- const TreeType* referenceNode,
- const VecType& queryPoint,
- const size_t queryIndex,
- const math::Range& range,
- std::vector<size_t>& neighbors,
- std::vector<double>& distances)
-{
- // See if we need to recurse or if we can perform base-case computations.
- if (referenceNode->IsLeaf())
- {
- // Base case: reference node is a leaf.
- for (size_t referenceIndex = referenceNode->Begin(); referenceIndex !=
- referenceNode->End(); referenceIndex++)
- {
- // Don't add this point if it is the same as the query point.
- if (!queryTree && !(referenceIndex == queryIndex))
- {
- double distance = metric.Evaluate(queryPoint,
- referenceSet.col(referenceIndex));
-
- // See if the point is in the range we are looking for.
- if (range.Contains(distance))
- {
- neighbors.push_back(referenceIndex);
- distances.push_back(distance);
- }
- }
- }
- }
- else
- {
- // Recurse down the tree.
- math::Range distanceLeft =
- referenceNode->Left()->Bound().RangeDistance(queryPoint);
- math::Range distanceRight =
- referenceNode->Right()->Bound().RangeDistance(queryPoint);
-
- if (range.Contains(distanceLeft))
- {
- // The left may have points we want to recurse to.
- SingleTreeRecursion(referenceNode->Left(), queryPoint, queryIndex,
- range, neighbors, distances);
- }
- else
- {
- numberOfPrunes++;
- }
-
- if (range.Contains(distanceRight))
- {
- // The right may have points we want to recurse to.
- SingleTreeRecursion(referenceNode->Right(), queryPoint, queryIndex,
- range, neighbors, distances);
- }
- else
- {
- numberOfPrunes++;
- }
- }
-}
-
}; // namespace range
}; // namespace mlpack
Modified: mlpack/trunk/src/mlpack/methods/range_search/range_search_rules.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/range_search_rules.hpp 2013-05-09 01:50:34 UTC (rev 15041)
+++ mlpack/trunk/src/mlpack/methods/range_search/range_search_rules.hpp 2013-05-09 02:22:41 UTC (rev 15042)
@@ -8,17 +8,29 @@
#define __MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_RULES_HPP
namespace mlpack {
-namespace neighbor {
+namespace range {
+
template<typename MetricType, typename TreeType>
class RangeSearchRules
{
public:
+ /**
+ * Construct the RangeSearchRules object. This is usually done from within
+ * the RangeSearch class at search time.
+ *
+ * @param referenceSet Set of reference data.
+ * @param querySet Set of query data.
+ * @param range Range to search for.
+ * @param neighbors Vector to store resulting neighbors in.
+ * @param distances Vector to store resulting distances in.
+ * @param metric Instantiated metric.
+ */
RangeSearchRules(const arma::mat& referenceSet,
const arma::mat& querySet,
+ const math::Range& range,
std::vector<std::vector<size_t> >& neighbors,
std::vector<std::vector<double> >& distances,
- math::Range& range,
MetricType& metric);
/**
@@ -66,7 +78,7 @@
*/
double Rescore(const size_t queryIndex,
TreeType& referenceNode,
- const double oldScore);
+ const double oldScore) const;
/**
* Get the score for recursion order. A low score indicates priority for
@@ -105,7 +117,7 @@
*/
double Rescore(TreeType& queryNode,
TreeType& referenceNode,
- const double oldScore);
+ const double oldScore) const;
private:
//! The reference set.
@@ -114,24 +126,27 @@
//! The query set.
const arma::mat& querySet;
+ //! The range of distances for which we are searching.
+ const math::Range& range;
+
//! The vector the resultant neighbor indices should be stored in.
std::vector<std::vector<size_t> >& neighbors;
//! The vector the resultant neighbor distances should be stored in.
std::vector<std::vector<double> >& distances;
- //! The range of distances for which we are searching.
- math::Range& range;
-
//! The instantiated metric.
MetricType& metric;
//! Add all the points in the given node to the results for the given query
- //! point.
- void AddResult(const size_t queryIndex, TreeType& referenceNode);
+ //! point. If the base case has already been calculated, we make sure to not
+ //! add that to the results twice.
+ void AddResult(const size_t queryIndex,
+ TreeType& referenceNode,
+ const bool hasBaseCase);
};
-}; // namespace neighbor
+}; // namespace range
}; // namespace mlpack
// Include implementation.
Modified: mlpack/trunk/src/mlpack/methods/range_search/range_search_rules_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/range_search_rules_impl.hpp 2013-05-09 01:50:34 UTC (rev 15041)
+++ mlpack/trunk/src/mlpack/methods/range_search/range_search_rules_impl.hpp 2013-05-09 02:22:41 UTC (rev 15042)
@@ -11,20 +11,21 @@
#include "range_search_rules.hpp"
namespace mlpack {
-namespace neighbor {
+namespace range {
template<typename MetricType, typename TreeType>
-RangeSearchRules::RangeSearchRules(const arma::mat& referenceSet,
- const arma::mat& querySet,
- std::vector<std::vector<size_t> >& neighbors,
- std::vector<std::vector<double> >& distances,
- math::Range& range,
- MetricType& metric) :
+RangeSearchRules<MetricType, TreeType>::RangeSearchRules(
+ const arma::mat& referenceSet,
+ const arma::mat& querySet,
+ const math::Range& range,
+ std::vector<std::vector<size_t> >& neighbors,
+ std::vector<std::vector<double> >& distances,
+ MetricType& metric) :
referenceSet(referenceSet),
querySet(querySet),
+ range(range),
neighbors(neighbors),
distances(distances),
- range(range),
metric(metric)
{
// Nothing to do.
@@ -33,8 +34,9 @@
//! The base case. Evaluate the distance between the two points and add to the
//! results if necessary.
template<typename MetricType, typename TreeType>
-double RangeSearchRules::BaseCase(const size_t queryIndex,
- const size_t referenceIndex)
+double RangeSearchRules<MetricType, TreeType>::BaseCase(
+ const size_t queryIndex,
+ const size_t referenceIndex)
{
// If the datasets are the same, don't return the point as in its own range.
if ((&referenceSet == &querySet) && (queryIndex == referenceIndex))
@@ -54,8 +56,8 @@
//! Single-tree scoring function.
template<typename MetricType, typename TreeType>
-double RangeSearchRules::Score(const size_t queryIndex,
- TreeType& referenceNode)
+double RangeSearchRules<MetricType, TreeType>::Score(const size_t queryIndex,
+ TreeType& referenceNode)
{
const math::Range distances =
referenceNode.RangeDistance(querySet.unsafe_col(queryIndex));
@@ -68,7 +70,7 @@
// results.
if ((distances.Lo() >= range.Lo()) && (distances.Hi() <= range.Hi()))
{
- AddResult(queryIndex, referenceNode);
+ AddResult(queryIndex, referenceNode, false);
return DBL_MAX; // We don't need to go any deeper.
}
@@ -79,9 +81,10 @@
//! Single-tree scoring function.
template<typename MetricType, typename TreeType>
-double RangeSearchRules::Score(const size_t queryIndex,
- TreeType& referenceNode,
- const double baseCaseResult)
+double RangeSearchRules<MetricType, TreeType>::Score(
+ const size_t queryIndex,
+ TreeType& referenceNode,
+ const double baseCaseResult)
{
const math::Range distances = referenceNode.RangeDistance(
querySet.unsafe_col(queryIndex), baseCaseResult);
@@ -94,7 +97,7 @@
// results.
if ((distances.Lo() >= range.Lo()) && (distances.Hi() <= range.Hi()))
{
- AddResult(queryIndex, referenceNode);
+ AddResult(queryIndex, referenceNode, true);
return DBL_MAX; // We don't need to go any deeper.
}
@@ -105,15 +108,122 @@
//! Single-tree rescoring function.
template<typename MetricType, typename TreeType>
-double RangeSearchRules<MetricType, TreeType>::Rescore(const size_t queryIndex,
- TreeType& referenceNode,
- const double oldScore)
+double RangeSearchRules<MetricType, TreeType>::Rescore(
+ const size_t /* queryIndex */,
+ TreeType& /* referenceNode */,
+ const double oldScore) const
{
// If it wasn't pruned before, it isn't pruned now.
return oldScore;
}
-}; // namespace neighbor
+//! Dual-tree scoring function.
+template<typename MetricType, typename TreeType>
+double RangeSearchRules<MetricType, TreeType>::Score(TreeType& queryNode,
+ TreeType& referenceNode)
+{
+ const math::Range distances = referenceNode.RangeDistance(&queryNode);
+
+ // If the ranges do not overlap, prune this node.
+ if (!distances.Contains(range))
+ return DBL_MAX;
+
+ // In this case, all of the points in the reference node will be part of all
+ // the results for each point in the query node.
+ if ((distances.Lo() >= range.Lo()) && (distances.Hi() <= range.Hi()))
+ {
+ for (size_t i = 0; i < queryNode.NumDescendants(); ++i)
+ AddResult(queryNode.Descendant(i), referenceNode, false);
+ return DBL_MAX; // We don't need to go any deeper.
+ }
+
+ // Otherwise the score doesn't matter. Recursion order is irrelevant in range
+ // search.
+ return 0.0;
+}
+
+//! Dual-tree scoring function.
+template<typename MetricType, typename TreeType>
+double RangeSearchRules<MetricType, TreeType>::Score(
+ TreeType& queryNode,
+ TreeType& referenceNode,
+ const double baseCaseResult)
+{
+ const math::Range distances = referenceNode.RangeDistance(&queryNode,
+ baseCaseResult);
+
+ // If the ranges do not overlap, prune this node.
+ if (!distances.Contains(range))
+ return DBL_MAX;
+
+ // In this case, all of the points in the reference node will be part of all
+ // the results for each point in the query node.
+ if ((distances.Lo() >= range.Lo()) && (distances.Hi() <= range.Hi()))
+ {
+ AddResult(queryNode.Descendant(0), referenceNode, true);
+ // We have not calculated the base case for any descendants other than the
+ // first point.
+ for (size_t i = 1; i < queryNode.NumDescendants(); ++i)
+ AddResult(queryNode.Descendant(i), referenceNode, false);
+ return DBL_MAX; // We don't need to go any deeper.
+ }
+
+ // Otherwise the score doesn't matter. Recursion order is irrelevant in range
+ // search.
+ return 0.0;
+}
+
+//! Dual-tree rescoring function.
+template<typename MetricType, typename TreeType>
+double RangeSearchRules<MetricType, TreeType>::Rescore(
+ TreeType& /* queryNode */,
+ TreeType& /* referenceNode */,
+ const double oldScore) const
+{
+ // If it wasn't pruned before, it isn't pruned now.
+ return oldScore;
+}
+
+//! Add all the points in the given node to the results for the given query
+//! point.
+template<typename MetricType, typename TreeType>
+void RangeSearchRules<MetricType, TreeType>::AddResult(const size_t queryIndex,
+ TreeType& referenceNode,
+ const bool hasBaseCase)
+{
+ // Some types of trees calculate the base case evaluation before Score() is
+ // called, so if the base case has already been calculated, then we must avoid
+ // adding that point to the results again.
+ size_t baseCaseMod = 0;
+ if (tree::TreeTraits<TreeType>::FirstPointIsCentroid && hasBaseCase)
+ {
+ baseCaseMod = 1;
+ }
+
+ // Resize distances and neighbors vectors appropriately. We have to use
+ // reserve() and not resize(), because we don't know if we will encounter the
+ // case where the datasets and points are the same (and we skip in that case).
+ const size_t oldSize = neighbors[queryIndex].size();
+ neighbors[queryIndex].reserve(oldSize + referenceNode.NumDescendants() -
+ baseCaseMod);
+ distances[queryIndex].reserve(oldSize + referenceNode.NumDescendants() -
+ baseCaseMod);
+
+ for (size_t i = baseCaseMod; i < referenceNode.NumDescendants(); ++i)
+ {
+ if ((&referenceSet == &querySet) &&
+ (queryIndex == referenceNode.Descendant(i)))
+ continue;
+
+ const double distance = metric.Evaluate(querySet.unsafe_col(queryIndex),
+ referenceNode.Dataset().unsafe_col(referenceNode.Descendant(i)));
+
+ neighbors[queryIndex].push_back(referenceNode.Descendant(i));
+ distances[queryIndex].push_back(distance);
+ }
+}
+
+}; // namespace range
}; // namespace mlpack
#endif
More information about the mlpack-svn
mailing list