[mlpack-svn] r10813 - mlpack/trunk/src/mlpack/methods/neighbor_search

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Dec 14 17:59:31 EST 2011


Author: rcurtin
Date: 2011-12-14 17:59:30 -0500 (Wed, 14 Dec 2011)
New Revision: 10813

Modified:
   mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp
   mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp
   mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp
   mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
Log:
Redo API for NeighborSearch for the last time and go over documentation.


Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp	2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/allkfn_main.cpp	2011-12-14 22:59:30 UTC (rev 10813)
@@ -40,16 +40,17 @@
 
 // Define our input parameters that this program will take.
 PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
-    "R");
-PARAM_STRING("query_file", "File containing query points (optional).", "Q", "");
-PARAM_STRING_REQ("distances_file", "File to output distances into.", "D");
-PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "N");
+    "r");
+PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "k");
+PARAM_STRING_REQ("distances_file", "File to output distances into.", "d");
+PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "n");
 
-PARAM_INT("leaf_size", "Leaf size for tree building.", "L", 20);
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
 PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
 PARAM_FLAG("single_mode", "If true, single-tree search is used (as opposed to "
-    "dual-tree search.", "S");
-PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "");
+    "dual-tree search.", "s");
 
 int main(int argc, char *argv[])
 {
@@ -98,6 +99,9 @@
     Log::Warn << "--single_mode ignored because --naive is present." << endl;
   }
 
+  if (naive)
+    leafSize = referenceData.n_cols;
+
   arma::Mat<size_t> neighbors;
   arma::mat distances;
 
@@ -111,7 +115,7 @@
   Timer::Start("reference_tree_building");
 
   BinarySpaceTree<bound::HRectBound<2>, QueryStat<FurthestNeighborSort> >
-      refTree(referenceData, oldFromNewRefs);
+      refTree(referenceData, oldFromNewRefs, leafSize);
 
   Timer::Stop("reference_tree_building");
 
@@ -129,23 +133,26 @@
 
     Log::Info << "Building query tree..." << endl;
 
+    if (naive && leafSize < queryData.n_cols)
+      leafSize = queryData.n_cols;
+
     // Build trees by hand, so we can save memory: if we pass a tree to
     // NeighborSearch, it does not copy the matrix.
     Timer::Start("query_tree_building");
 
     BinarySpaceTree<bound::HRectBound<2>, QueryStat<FurthestNeighborSort> >
-        queryTree(queryData, oldFromNewQueries);
+        queryTree(queryData, oldFromNewQueries, leafSize);
 
     Timer::Stop("query_tree_building");
 
-    allkfn = new AllkFN(referenceData, queryData, naive, singleMode, 20,
-        &refTree, &queryTree);
+    allkfn = new AllkFN(&refTree, &queryTree, referenceData, queryData,
+        singleMode);
 
     Log::Info << "Tree built." << endl;
   }
   else
   {
-    allkfn = new AllkFN(referenceData, naive, singleMode, 20, &refTree);
+    allkfn = new AllkFN(&refTree, referenceData, singleMode);
 
     Log::Info << "Trees built." << endl;
   }

Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp	2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/allknn_main.cpp	2011-12-14 22:59:30 UTC (rev 10813)
@@ -40,16 +40,18 @@
 
 // Define our input parameters that this program will take.
 PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
-    "R");
-PARAM_STRING("query_file", "File containing query points (optional).", "Q", "");
-PARAM_STRING_REQ("distances_file", "File to output distances into.", "D");
-PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "N");
+    "r");
+PARAM_STRING_REQ("distances_file", "File to output distances into.", "d");
+PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "n");
 
-PARAM_INT("leaf_size", "Leaf size for tree building.", "L", 20);
-PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "");
+PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "k");
+
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
+PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
 PARAM_FLAG("single_mode", "If true, single-tree search is used (as opposed to "
-    "dual-tree search.", "S");
-PARAM_INT_REQ("k", "Number of furthest neighbors to find.", "");
+    "dual-tree search.", "s");
 
 int main(int argc, char *argv[])
 {
@@ -98,6 +100,9 @@
     Log::Warn << "--single_mode ignored because --naive is present." << endl;
   }
 
+  if (naive)
+    leafSize = referenceData.n_cols;
+
   arma::Mat<size_t> neighbors;
   arma::mat distances;
 
@@ -127,6 +132,9 @@
     if (!data::Load(queryFile.c_str(), queryData))
       Log::Fatal << "Query file " << queryFile << " not found" << endl;
 
+    if (naive && leafSize < queryData.n_cols)
+      leafSize = queryData.n_cols;
+
     Log::Info << "Query data loaded from " << queryFile << endl;
 
     Log::Info << "Building query tree..." << endl;
@@ -140,14 +148,14 @@
 
     Timer::Stop("tree_building");
 
-    allknn = new AllkNN(referenceData, queryData, naive, singleMode, 20,
-        &refTree, &queryTree);
+    allknn = new AllkNN(&refTree, &queryTree, referenceData, queryData,
+        singleMode);
 
     Log::Info << "Tree built." << endl;
   }
   else
   {
-    allknn = new AllkNN(referenceData, naive, singleMode, 20, &refTree);
+    allknn = new AllkNN(&refTree, referenceData, singleMode);
 
     Log::Info << "Trees built." << endl;
   }

Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp	2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search.hpp	2011-12-14 22:59:30 UTC (rev 10813)
@@ -91,17 +91,14 @@
  public:
   /**
    * Initialize the NeighborSearch object, passing both a query and reference
-   * dataset.  Optionally, already-built trees can be passed, for the case where
-   * a special tree-building procedure is needed.  If referenceTree is given, it
-   * is assumed that the points in referenceTree correspond to the points in
-   * referenceSet.  The same is true for queryTree and querySet.  An initialized
+   * dataset.  Optionally, perform the computation in naive mode or single-tree
+   * mode, and set the leaf size used for tree-building.  An initialized
    * distance metric can be given, for cases where the metric has internal data
    * (i.e. the distance::MahalanobisDistance class).
    *
-   * If naive mode is being used and a pre-built tree is given, it may not work:
-   * naive mode operates by building a one-node tree (the root node holds all
-   * the points).  If that condition is not satisfied with the pre-built tree,
-   * then naive mode will not work.
+   * This method will copy the matrices to internal copies, which are rearranged
+   * during tree-building.  You can avoid this extra copy by pre-constructing
+   * the trees and passing them using a diferent constructor.
    *
    * @param referenceSet Set of reference points.
    * @param querySet Set of query points.
@@ -110,28 +107,21 @@
    * @param singleMode If true, single-tree search will be used (as opposed to
    *      dual-tree search).
    * @param leafSize Leaf size for tree construction (ignored if tree is given).
-   * @param referenceTree Optionally pass a pre-built tree for the reference
-   *      set.
-   * @param queryTree Optionally pass a pre-built tree for the query set.
    * @param metric An optional instance of the MetricType class.
    */
-  NeighborSearch(const arma::mat& referenceSet,
-                 const arma::mat& querySet,
+  NeighborSearch(const typename TreeType::Mat& referenceSet,
+                 const typename TreeType::Mat& querySet,
                  const bool naive = false,
                  const bool singleMode = false,
                  const size_t leafSize = 20,
-                 TreeType* referenceTree = NULL,
-                 TreeType* queryTree = NULL,
                  const MetricType metric = MetricType());
 
   /**
    * Initialize the NeighborSearch object, passing only one dataset, which is
-   * used as both the query and the reference dataset.  Optionally, an
-   * already-built tree can be passed, for the case where a special
-   * tree-building procedure is needed.  If referenceTree is given, it is
-   * assumed that the points in referenceTree correspond to the points in
-   * referenceSet.  An initialized distance metric can be given, for cases where
-   * the metric has internal data (i.e. the distance::MahalanobisDistance
+   * used as both the query and the reference dataset.  Optionally, perform the
+   * computation in naive mode or single-tree mode, and set the leaf size used
+   * for tree-building.  An initialized distance metric can be given, for cases
+   * where the metric has internal data (i.e. the distance::MahalanobisDistance
    * class).
    *
    * If naive mode is being used and a pre-built tree is given, it may not work:
@@ -145,18 +135,84 @@
    * @param singleMode If true, single-tree search will be used (as opposed to
    *      dual-tree search).
    * @param leafSize Leaf size for tree construction (ignored if tree is given).
-   * @param referenceTree Optionally pass a pre-built tree for the reference
-   *      set.
    * @param metric An optional instance of the MetricType class.
    */
-  NeighborSearch(const arma::mat& referenceSet,
+  NeighborSearch(const typename TreeType::Mat& referenceSet,
                  const bool naive = false,
                  const bool singleMode = false,
                  const size_t leafSize = 20,
-                 TreeType* referenceTree = NULL,
                  const MetricType metric = MetricType());
 
   /**
+   * Initialize the NeighborSearch object with the given datasets and
+   * pre-constructed trees.  It is assumed that the points in referenceSet and
+   * querySet correspond to the points in referenceTree and queryTree,
+   * respectively.  Optionally, choose to use single-tree mode.  Naive mode is
+   * not available as an option for this constructor; instead, to run naive
+   * computation, construct a tree with all of the points in one leaf (i.e.
+   * leafSize = number of points).  Additionally, an instantiated distance
+   * metric can be given, for cases where the distance metric holds data.
+   *
+   * There is no copying of the data matrices in this constructor (because
+   * tree-building is not necessary), so this is the constructor to use when
+   * copies absolutely must be avoided.
+   *
+   * @note
+   * Because tree-building (at least with BinarySpaceTree) modifies the ordering
+   * of a matrix, be sure you pass the modified matrix to this object!  In
+   * addition, mapping the points of the matrix back to their original indices
+   * is not done when this constructor is used.
+   * @endnote
+   *
+   * @param referenceTree Pre-built tree for reference points.
+   * @param queryTree Pre-built tree for query points.
+   * @param referenceSet Set of reference points corresponding to referenceTree.
+   * @param querySet Set of query points corresponding to queryTree.
+   * @param singleMode Whether single-tree computation should be used (as
+   *      opposed to dual-tree computation).
+   * @param metric Instantiated distance metric.
+   */
+  NeighborSearch(TreeType* referenceTree,
+                 TreeType* queryTree,
+                 const typename TreeType::Mat& referenceSet,
+                 const typename TreeType::Mat& querySet,
+                 const bool singleMode = false,
+                 const MetricType metric = MetricType());
+
+  /**
+   * Initialize the NeighborSearch object with the given reference dataset and
+   * pre-constructed tree.  It is assumed that the points in referenceSet
+   * correspond to the points in referenceTree.  Optionally, choose to use
+   * single-tree mode.  Naive mode is not available as an option for this
+   * constructor; instead, to run naive computation, construct a tree with all
+   * the points in one leaf (i.e. leafSize = number of points).  Additionally,
+   * an instantiated distance metric can be given, for the case where the
+   * distance metric holds data.
+   *
+   * There is no copying of the data matrices in this constructor (because
+   * tree-building is not necessary), so this is the constructor to use when
+   * copies absolutely must be avoided.
+   *
+   * @note
+   * Because tree-building (at least with BinarySpaceTree) modifies the ordering
+   * of a matrix, be sure you pass the modified matrix to this object!  In
+   * addition, mapping the points of the matrix back to their original indices
+   * is not done when this constructor is used.
+   * @endnote
+   *
+   * @param referenceTree Pre-built tree for reference points.
+   * @param referenceSet Set of reference points corresponding to referenceTree.
+   * @param singleMode Whether single-tree computation should be used (as
+   *      opposed to dual-tree computation).
+   * @param metric Instantiated distance metric.
+   */
+  NeighborSearch(TreeType* referenceTree,
+                 const typename TreeType::Mat& referenceSet,
+                 const bool singleMode = false,
+                 const MetricType metric = MetricType());
+
+
+  /**
    * Delete the NeighborSearch object. The tree is the only member we are
    * responsible for deleting.  The others will take care of themselves.
    */

Modified: mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp	2011-12-14 22:58:45 UTC (rev 10812)
+++ mlpack/trunk/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp	2011-12-14 22:59:30 UTC (rev 10813)
@@ -15,24 +15,22 @@
 // Construct the object.
 template<typename SortPolicy, typename MetricType, typename TreeType>
 NeighborSearch<SortPolicy, MetricType, TreeType>::
-NeighborSearch(const arma::mat& referenceSet,
-               const arma::mat& querySet,
+NeighborSearch(const typename TreeType::Mat& referenceSet,
+               const typename TreeType::Mat& querySet,
                const bool naive,
                const bool singleMode,
                const size_t leafSize,
-               TreeType* referenceTree,
-               TreeType* queryTree,
                const MetricType metric) :
-    referenceCopy(referenceTree ? arma::mat() : referenceSet),
-    queryCopy(queryTree ? arma::mat() : querySet),
-    referenceSet(referenceTree ? referenceSet : referenceCopy),
-    querySet(queryTree ? querySet : queryCopy),
+    referenceCopy(referenceSet),
+    queryCopy(querySet),
+    referenceSet(referenceCopy),
+    querySet(queryCopy),
     naive(naive),
     singleMode(!naive && singleMode), // No single mode if naive.
-    referenceTree(referenceTree),
-    queryTree(queryTree),
-    ownReferenceTree(!referenceTree), // False if a tree was passed.
-    ownQueryTree(!queryTree), // False if a tree was passed.
+    referenceTree(NULL),
+    queryTree(NULL),
+    ownReferenceTree(true), // False if a tree was passed.
+    ownQueryTree(true), // False if a tree was passed.
     metric(metric),
     numberOfPrunes(0)
 {
@@ -43,26 +41,12 @@
   if (!referenceTree || !queryTree)
     Timer::Start("tree_building");
 
-  if (!referenceTree)
-  {
-    // Construct as a naive object if we need to.
-    if (naive)
-      this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
-          referenceSet.n_cols /* everything in one leaf */);
-    else
-      this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
-          leafSize);
-  }
+  // Construct as a naive object if we need to.
+  referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
+      (naive ? referenceCopy.n_cols : leafSize));
 
-  if (!queryTree)
-  {
-    // Construct as a naive object if we need to.
-    if (naive)
-      this->queryTree = new TreeType(queryCopy, oldFromNewQueries,
-          querySet.n_cols);
-    else
-      this->queryTree = new TreeType(queryCopy, oldFromNewQueries, leafSize);
-  }
+  queryTree = new TreeType(queryCopy, oldFromNewQueries,
+      (naive ? querySet.n_cols : leafSize));
 
   // Stop the timer we started above (if we need to).
   if (!referenceTree || !queryTree)
@@ -72,42 +56,78 @@
 // Construct the object.
 template<typename SortPolicy, typename MetricType, typename TreeType>
 NeighborSearch<SortPolicy, MetricType, TreeType>::
-NeighborSearch(const arma::mat& referenceSet,
+NeighborSearch(const typename TreeType::Mat& referenceSet,
                const bool naive,
                const bool singleMode,
                const size_t leafSize,
-               TreeType* referenceTree,
                const MetricType metric) :
-    referenceCopy(referenceTree ? arma::mat() : referenceSet),
-    referenceSet(referenceTree ? referenceSet : referenceCopy),
-    querySet(referenceTree ? referenceSet : referenceCopy),
+    referenceCopy(referenceSet),
+    referenceSet(referenceCopy),
+    querySet(referenceCopy),
     naive(naive),
     singleMode(!naive && singleMode), // No single mode if naive.
-    referenceTree(referenceTree),
+    referenceTree(NULL),
     queryTree(NULL),
-    ownReferenceTree(!referenceTree),
+    ownReferenceTree(true),
     ownQueryTree(false), // Since it will be the same as referenceTree.
     metric(metric),
     numberOfPrunes(0)
 {
   // We'll time tree building, but only if we are building trees.
-  if (!referenceTree)
-  {
-    Timer::Start("tree_building");
+  Timer::Start("tree_building");
 
-    // Construct as a naive object if we need to.
-    if (naive)
-      this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
-          referenceSet.n_cols /* everything in one leaf */);
-    else
-      this->referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
-          leafSize);
+  // Construct as a naive object if we need to.
+  referenceTree = new TreeType(referenceCopy, oldFromNewReferences,
+      (naive ? referenceSet.n_cols : leafSize));
 
-    // Stop the timer we started above.
-    Timer::Stop("tree_building");
-  }
+  // Stop the timer we started above.
+  Timer::Stop("tree_building");
 }
 
+// Construct the object.
+template<typename SortPolicy, typename MetricType, typename TreeType>
+NeighborSearch<SortPolicy, MetricType, TreeType>::NeighborSearch(
+    TreeType* referenceTree,
+    TreeType* queryTree,
+    const typename TreeType::Mat& referenceSet,
+    const typename TreeType::Mat& querySet,
+    const bool singleMode,
+    const MetricType metric) :
+    referenceSet(referenceSet),
+    querySet(querySet),
+    referenceTree(referenceTree),
+    queryTree(queryTree),
+    ownReferenceTree(false),
+    ownQueryTree(false),
+    naive(false),
+    singleMode(singleMode),
+    metric(metric),
+    numberOfPrunes(0)
+{
+  // Nothing else to initialize.
+}
+
+// Construct the object.
+template<typename SortPolicy, typename MetricType, typename TreeType>
+NeighborSearch<SortPolicy, MetricType, TreeType>::NeighborSearch(
+    TreeType* referenceTree,
+    const typename TreeType::Mat& referenceSet,
+    const bool singleMode,
+    const MetricType metric) :
+    referenceSet(referenceSet),
+    querySet(referenceSet),
+    referenceTree(referenceTree),
+    queryTree(NULL),
+    ownReferenceTree(false),
+    ownQueryTree(false),
+    naive(false),
+    singleMode(singleMode),
+    metric(metric),
+    numberOfPrunes(0)
+{
+  // Nothing else to initialize.
+}
+
 /**
  * The tree is the only member we may be responsible for deleting.  The others
  * will take care of themselves.




More information about the mlpack-svn mailing list