[mlpack-svn] r10813 - mlpack/trunk/src/mlpack/methods/neighbor_search
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Dec 14 17:59:31 EST 2011
Author: rcurtin
Date: 2011-12-14 17:59:30 -0500 (Wed, 14 Dec 2011)
New Revision: 10813
Modified:
mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp
mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp
mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp
mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
Log:
Redo API for NeighborSearch for the last time and go over documentation.
Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp 2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp 2011-12-14 22:59:30 UTC (rev 10813)
@@ -40,16 +40,17 @@
// Define our input parameters that this program will take.
PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
- "R");
-PARAM_STRING("query_file", "File containing query points (optional).", "Q", "");
-PARAM_STRING_REQ("distances_file", "File to output distances into.", "D");
-PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "N");
+ "r");
+PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "k");
+PARAM_STRING_REQ("distances_file", "File to output distances into.", "d");
+PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "n");
-PARAM_INT("leaf_size", "Leaf size for tree building.", "L", 20);
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
PARAM_FLAG("single_mode", "If true, single-tree search is used (as opposed to "
- "dual-tree search.", "S");
-PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "");
+ "dual-tree search.", "s");
int main(int argc, char *argv[])
{
@@ -98,6 +99,9 @@
Log::Warn << "--single_mode ignored because --naive is present." << endl;
}
+ if (naive)
+ leafSize = referenceData.n_cols;
+
arma::Mat<size_t> neighbors;
arma::mat distances;
@@ -111,7 +115,7 @@
Timer::Start("reference_tree_building");
BinarySpaceTree<bound::HRectBound<2>, QueryStat<FurthestNeighborSort> >
- refTree(referenceData, oldFromNewRefs);
+ refTree(referenceData, oldFromNewRefs, leafSize);
Timer::Stop("reference_tree_building");
@@ -129,23 +133,26 @@
Log::Info << "Building query tree..." << endl;
+ if (naive && leafSize < queryData.n_cols)
+ leafSize = queryData.n_cols;
+
// Build trees by hand, so we can save memory: if we pass a tree to
// NeighborSearch, it does not copy the matrix.
Timer::Start("query_tree_building");
BinarySpaceTree<bound::HRectBound<2>, QueryStat<FurthestNeighborSort> >
- queryTree(queryData, oldFromNewQueries);
+ queryTree(queryData, oldFromNewQueries, leafSize);
Timer::Stop("query_tree_building");
- allkfn = new AllkFN(referenceData, queryData, naive, singleMode, 20,
- &refTree, &queryTree);
+ allkfn = new AllkFN(&refTree, &queryTree, referenceData, queryData,
+ singleMode);
Log::Info << "Tree built." << endl;
}
else
{
- allkfn = new AllkFN(referenceData, naive, singleMode, 20, &refTree);
+ allkfn = new AllkFN(&refTree, referenceData, singleMode);
Log::Info << "Trees built." << endl;
}
Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp 2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp 2011-12-14 22:59:30 UTC (rev 10813)
@@ -40,16 +40,18 @@
// Define our input parameters that this program will take.
PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
- "R");
-PARAM_STRING("query_file", "File containing query points (optional).", "Q", "");
-PARAM_STRING_REQ("distances_file", "File to output distances into.", "D");
-PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "N");
+ "r");
+PARAM_STRING_REQ("distances_file", "File to output distances into.", "d");
+PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "n");
-PARAM_INT("leaf_size", "Leaf size for tree building.", "L", 20);
-PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "");
+PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "k");
+
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
+PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
PARAM_FLAG("single_mode", "If true, single-tree search is used (as opposed to "
- "dual-tree search.", "S");
-PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "");
+ "dual-tree search.", "s");
int main(int argc, char *argv[])
{
@@ -98,6 +100,9 @@
Log::Warn << "--single_mode ignored because --naive is present." << endl;
}
+ if (naive)
+ leafSize = referenceData.n_cols;
+
arma::Mat<size_t> neighbors;
arma::mat distances;
@@ -127,6 +132,9 @@
if (!data::Load(queryFile.c_str(), queryData))
Log::Fatal << "Query file " << queryFile << " not found" << endl;
+ if (naive && leafSize < queryData.n_cols)
+ leafSize = queryData.n_cols;
+
Log::Info << "Query data loaded from " << queryFile << endl;
Log::Info << "Building query tree..." << endl;
@@ -140,14 +148,14 @@
Timer::Stop("tree_building");
- allknn = new AllkNN(referenceData, queryData, naive, singleMode, 20,
- &refTree, &queryTree);
+ allknn = new AllkNN(&refTree, &queryTree, referenceData, queryData,
+ singleMode);
Log::Info << "Tree built." << endl;
}
else
{
- allknn = new AllkNN(referenceData, naive, singleMode, 20, &refTree);
+ allknn = new AllkNN(&refTree, referenceData, singleMode);
Log::Info << "Trees built." << endl;
}
Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp 2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp 2011-12-14 22:59:30 UTC (rev 10813)
@@ -91,17 +91,14 @@
public:
/**
* Initialize the NeighborSearch object, passing both a query and reference
- * dataset. Optionally, already-built trees can be passed, for the case where
- * a special tree-building procedure is needed. If referenceTree is given, it
- * is assumed that the points in referenceTree correspond to the points in
- * referenceSet. The same is true for queryTree and querySet. An initialized
+ * dataset. Optionally, perform the computation in naive mode or single-tree
+ * mode, and set the leaf size used for tree-building. An initialized
* distance metric can be given, for cases where the metric has internal data
* (i.e. the distance::MahalanobisDistance class).
*
- * If naive mode is being used and a pre-built tree is given, it may not work:
- * naive mode operates by building a one-node tree (the root node holds all
- * the points). If that condition is not satisfied with the pre-built tree,
- * then naive mode will not work.
+ * This method will copy the matrices to internal copies, which are rearranged
+ * during tree-building. You can avoid this extra copy by pre-constructing
+ * the trees and passing them using a diferent constructor.
*
* @param referenceSet Set of reference points.
* @param querySet Set of query points.
@@ -110,28 +107,21 @@
* @param singleMode If true, single-tree search will be used (as opposed to
* dual-tree search).
* @param leafSize Leaf size for tree construction (ignored if tree is given).
- * @param referenceTree Optionally pass a pre-built tree for the reference
- * set.
- * @param queryTree Optionally pass a pre-built tree for the query set.
* @param metric An optional instance of the MetricType class.
*/
- NeighborSearch(const arma::mat& referenceSet,
- const arma::mat& querySet,
+ NeighborSearch(const typename TreeType::Mat& referenceSet,
+ const typename TreeType::Mat& querySet,
const bool naive = false,
const bool singleMode = false,
const size_t leafSize = 20,
- TreeType* referenceTree = NULL,
- TreeType* queryTree = NULL,
const MetricType metric = MetricType());
/**
* Initialize the NeighborSearch object, passing only one dataset, which is
- * used as both the query and the reference dataset. Optionally, an
- * already-built tree can be passed, for the case where a special
- * tree-building procedure is needed. If referenceTree is given, it is
- * assumed that the points in referenceTree correspond to the points in
- * referenceSet. An initialized distance metric can be given, for cases where
- * the metric has internal data (i.e. the distance::MahalanobisDistance
+ * used as both the query and the reference dataset. Optionally, perform the
+ * computation in naive mode or single-tree mode, and set the leaf size used
+ * for tree-building. An initialized distance metric can be given, for cases
+ * where the metric has internal data (i.e. the distance::MahalanobisDistance
* class).
*
* If naive mode is being used and a pre-built tree is given, it may not work:
@@ -145,18 +135,84 @@
* @param singleMode If true, single-tree search will be used (as opposed to
* dual-tree search).
* @param leafSize Leaf size for tree construction (ignored if tree is given).
- * @param referenceTree Optionally pass a pre-built tree for the reference
- * set.
* @param metric An optional instance of the MetricType class.
*/
- NeighborSearch(const arma::mat& referenceSet,
+ NeighborSearch(const typename TreeType::Mat& referenceSet,
const bool naive = false,
const bool singleMode = false,
const size_t leafSize = 20,
- TreeType* referenceTree = NULL,
const MetricType metric = MetricType());
/**
+ * Initialize the NeighborSearch object with the given datasets and
+ * pre-constructed trees. It is assumed that the points in referenceSet and
+ * querySet correspond to the points in referenceTree and queryTree,
+ * respectively. Optionally, choose to use single-tree mode. Naive mode is
+ * not available as an option for this constructor; instead, to run naive
+ * computation, construct a tree with all of the points in one leaf (i.e.
+ * leafSize = number of points). Additionally, an instantiated distance
+ * metric can be given, for cases where the distance metric holds data.
+ *
+ * There is no copying of the data matrices in this constructor (because
+ * tree-building is not necessary), so this is the constructor to use when
+ * copies absolutely must be avoided.
+ *
+ * @note
+ * Because tree-building (at least with BinarySpaceTree) modifies the ordering
+ * of a matrix, be sure you pass the modified matrix to this object! In
+ * addition, mapping the points of the matrix back to their original indices
+ * is not done when this constructor is used.
+ * @endnote
+ *
+ * @param referenceTree Pre-built tree for reference points.
+ * @param queryTree Pre-built tree for query points.
+ * @param referenceSet Set of reference points corresponding to referenceTree.
+ * @param querySet Set of query points corresponding to queryTree.
+ * @param singleMode Whether single-tree computation should be used (as
+ * opposed to dual-tree computation).
+ * @param metric Instantiated distance metric.
+ */
+ NeighborSearch(TreeType* referenceTree,
+ TreeType* queryTree,
+ const typename TreeType::Mat& referenceSet,
+ const typename TreeType::Mat& querySet,
+ const bool singleMode = false,
+ const MetricType metric = MetricType());
+
+ /**
+ * Initialize the NeighborSearch object with the given reference dataset and
+ * pre-constructed tree. It is assumed that the points in referenceSet
+ * correspond to the points in referenceTree. Optionally, choose to use
+ * single-tree mode. Naive mode is not available as an option for this
+ * constructor; instead, to run naive computation, construct a tree with all
+ * the points in one leaf (i.e. leafSize = number of points). Additionally,
+ * an instantiated distance metric can be given, for the case where the
+ * distance metric holds data.
+ *
+ * There is no copying of the data matrices in this constructor (because
+ * tree-building is not necessary), so this is the constructor to use when
+ * copies absolutely must be avoided.
+ *
+ * @note
+ * Because tree-building (at least with BinarySpaceTree) modifies the ordering
+ * of a matrix, be sure you pass the modified matrix to this object! In
+ * addition, mapping the points of the matrix back to their original indices
+ * is not done when this constructor is used.
+ * @endnote
+ *
+ * @param referenceTree Pre-built tree for reference points.
+ * @param referenceSet Set of reference points corresponding to referenceTree.
+ * @param singleMode Whether single-tree computation should be used (as
+ * opposed to dual-tree computation).
+ * @param metric Instantiated distance metric.
+ */
+ NeighborSearch(TreeType* referenceTree,
+ const typename TreeType::Mat& referenceSet,
+ const bool singleMode = false,
+ const MetricType metric = MetricType());
+
+
+ /**
* Delete the NeighborSearch object. The tree is the only member we are
* responsible for deleting. The others will take care of themselves.
*/
Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp 2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp 2011-12-14 22:59:30 UTC (rev 10813)
@@ -15,24 +15,22 @@
// Construct the object.
template<typename SortPolicy, typename MetricType, typename TreeType>
NeighborSearch<SortPolicy, MetricType, TreeType>::
-NeighborSearch(const arma::mat& referenceSet,
- const arma::mat& querySet,
+NeighborSearch(const typename TreeType::Mat& referenceSet,
+ const typename TreeType::Mat& querySet,
const bool naive,
const bool singleMode,
const size_t leafSize,
- TreeType* referenceTree,
- TreeType* queryTree,
const MetricType metric) :
- referenceCopy(referenceTree ? arma::mat() : referenceSet),
- queryCopy(queryTree ? arma::mat() : querySet),
- referenceSet(referenceTree ? referenceSet : referenceCopy),
- querySet(queryTree ? querySet : queryCopy),
+ referenceCopy(referenceSet),
+ queryCopy(querySet),
+ referenceSet(referenceCopy),
+ querySet(queryCopy),
naive(naive),
singleMode(!naive && singleMode), // No single mode if naive.
- referenceTree(referenceTree),
- queryTree(queryTree),
- ownReferenceTree(!referenceTree), // False if a tree was passed.
- ownQueryTree(!queryTree), // False if a tree was passed.
+ referenceTree(NULL),
+ queryTree(NULL),
+ ownReferenceTree(true), // False if a tree was passed.
+ ownQueryTree(true), // False if a tree was passed.
metric(metric),
numberOfPrunes(0)
{
@@ -43,26 +41,12 @@
if (!referenceTree || !queryTree)
Timer::Start("tree_building");
- if (!referenceTree)
- {
- // Construct as a naive object if we need to.
- if (naive)
- this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
- referenceSet.n_cols /* everything in one leaf */);
- else
- this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
- leafSize);
- }
+ // Construct as a naive object if we need to.
+ referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
+ (naive ? referenceCopy.n_cols : leafSize));
- if (!queryTree)
- {
- // Construct as a naive object if we need to.
- if (naive)
- this->queryTree = new TreeType(queryCopy, oldFromNewQueries,
- querySet.n_cols);
- else
- this->queryTree = new TreeType(queryCopy, oldFromNewQueries, leafSize);
- }
+ queryTree = new TreeType(queryCopy, oldFromNewQueries,
+ (naive ? querySet.n_cols : leafSize));
// Stop the timer we started above (if we need to).
if (!referenceTree || !queryTree)
@@ -72,42 +56,78 @@
// Construct the object.
template<typename SortPolicy, typename MetricType, typename TreeType>
NeighborSearch<SortPolicy, MetricType, TreeType>::
-NeighborSearch(const arma::mat& referenceSet,
+NeighborSearch(const typename TreeType::Mat& referenceSet,
const bool naive,
const bool singleMode,
const size_t leafSize,
- TreeType* referenceTree,
const MetricType metric) :
- referenceCopy(referenceTree ? arma::mat() : referenceSet),
- referenceSet(referenceTree ? referenceSet : referenceCopy),
- querySet(referenceTree ? referenceSet : referenceCopy),
+ referenceCopy(referenceSet),
+ referenceSet(referenceCopy),
+ querySet(referenceCopy),
naive(naive),
singleMode(!naive && singleMode), // No single mode if naive.
- referenceTree(referenceTree),
+ referenceTree(NULL),
queryTree(NULL),
- ownReferenceTree(!referenceTree),
+ ownReferenceTree(true),
ownQueryTree(false), // Since it will be the same as referenceTree.
metric(metric),
numberOfPrunes(0)
{
// We'll time tree building, but only if we are building trees.
- if (!referenceTree)
- {
- Timer::Start("tree_building");
+ Timer::Start("tree_building");
- // Construct as a naive object if we need to.
- if (naive)
- this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
- referenceSet.n_cols /* everything in one leaf */);
- else
- this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
- leafSize);
+ // Construct as a naive object if we need to.
+ referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
+ (naive ? referenceSet.n_cols : leafSize));
- // Stop the timer we started above.
- Timer::Stop("tree_building");
- }
+ // Stop the timer we started above.
+ Timer::Stop("tree_building");
}
+// Construct the object.
+template<typename SortPolicy, typename MetricType, typename TreeType>
+NeighborSearch<SortPolicy, MetricType, TreeType>::NeighborSearch(
+ TreeType* referenceTree,
+ TreeType* queryTree,
+ const typename TreeType::Mat& referenceSet,
+ const typename TreeType::Mat& querySet,
+ const bool singleMode,
+ const MetricType metric) :
+ referenceSet(referenceSet),
+ querySet(querySet),
+ referenceTree(referenceTree),
+ queryTree(queryTree),
+ ownReferenceTree(false),
+ ownQueryTree(false),
+ naive(false),
+ singleMode(singleMode),
+ metric(metric),
+ numberOfPrunes(0)
+{
+ // Nothing else to initialize.
+}
+
+// Construct the object.
+template<typename SortPolicy, typename MetricType, typename TreeType>
+NeighborSearch<SortPolicy, MetricType, TreeType>::NeighborSearch(
+ TreeType* referenceTree,
+ const typename TreeType::Mat& referenceSet,
+ const bool singleMode,
+ const MetricType metric) :
+ referenceSet(referenceSet),
+ querySet(referenceSet),
+ referenceTree(referenceTree),
+ queryTree(NULL),
+ ownReferenceTree(false),
+ ownQueryTree(false),
+ naive(false),
+ singleMode(singleMode),
+ metric(metric),
+ numberOfPrunes(0)
+{
+ // Nothing else to initialize.
+}
+
/**
* The tree is the only member we may be responsible for deleting. The others
* will take care of themselves.
More information about the mlpack-svn
mailing list