[mlpack-svn] r11605 - mlpack/trunk/src/mlpack/methods/range_search
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Feb 27 15:14:23 EST 2012
Author: rcurtin
Date: 2012-02-27 15:14:22 -0500 (Mon, 27 Feb 2012)
New Revision: 11605
Added:
mlpack/trunk/src/mlpack/methods/range_search/range_search_main.cpp
Removed:
mlpack/trunk/src/mlpack/methods/range_search/range_main.cpp
Modified:
mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt
Log:
Move to range_search_main.cpp and clean up executable.
Modified: mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt 2012-02-27 19:52:19 UTC (rev 11604)
+++ mlpack/trunk/src/mlpack/methods/range_search/CMakeLists.txt 2012-02-27 20:14:22 UTC (rev 11605)
@@ -17,7 +17,7 @@
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
add_executable(range_search
- range_main.cpp
+ range_search_main.cpp
)
target_link_libraries(range_search
mlpack
Deleted: mlpack/trunk/src/mlpack/methods/range_search/range_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/range_main.cpp 2012-02-27 19:52:19 UTC (rev 11604)
+++ mlpack/trunk/src/mlpack/methods/range_search/range_main.cpp 2012-02-27 20:14:22 UTC (rev 11605)
@@ -1,223 +0,0 @@
-/**
- * @file allknn_main.cpp
- * @author Ryan Curtin
- *
- * Implementation of the AllkNN executable. Allows some number of standard
- * options.
- */
-#include <mlpack/core.hpp>
-#include <mlpack/core/metrics/lmetric.hpp>
-
-#include <string>
-#include <fstream>
-#include <iostream>
-
-#include "range_search.hpp"
-
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::range;
-using namespace mlpack::tree;
-
-// Information about the program itself.
-PROGRAM_INFO("Range Search",
- "This program will calculate the all nearest-neighbors of a set of "
- "points constrained by a range. You may specify a separate set of reference points and query "
- "points, or just a reference set which will be used as both the reference "
- "and query set."
- "\n\n"
- "For example, the following will calculate nearest neighbors within 5 units of each"
- "point in 'input.csv' and store the distances in 'distances.csv' and the "
- "neighbors in the file 'neighbors.csv':"
- "\n\n"
- "$ allknn --min=0 --max=5 --reference_file=input.csv --distances_file=distances.csv\n"
- " --neighbors_file=neighbors.csv"
- "\n\n"
- "The output files are organized such that row i and column j in the "
- "neighbors output file corresponds to the index of the point in the "
- "reference set which is the i'th nearest neighbor from the point in the "
- "query set with index j. Row i and column j in the distances output file "
- "corresponds to the distance between those two points.");
-
-// Define our input parameters that this program will take.
-PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
- "r");
-PARAM_STRING_REQ("distances_file", "File to output distances into.", "d");
-PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "n");
-
-PARAM_DOUBLE_REQ("max", "Furthest neighbors to find.", "A");
-PARAM_DOUBLE("min", "Closest neighbors to find.", "I", 0.0);
-
-PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
-
-PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
-PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
-PARAM_FLAG("single_mode", "If true, single-tree search is used (as opposed to "
- "dual-tree search.", "s");
-
-typedef RangeSearch<metric::SquaredEuclideanDistance,
- BinarySpaceTree<bound::HRectBound<2>, EmptyStatistic> > AllInRange;
-
-int main(int argc, char *argv[])
-{
- // Give CLI the command line parameters the user passed in.
- CLI::ParseCommandLine(argc, argv);
-
- // Get all the parameters.
- string referenceFile = CLI::GetParam<string>("reference_file");
-
- string distancesFile = CLI::GetParam<string>("distances_file");
- string neighborsFile = CLI::GetParam<string>("neighbors_file");
-
- int lsInt = CLI::GetParam<int>("leaf_size");
-
- double max = CLI::GetParam<int>("max");
- double min = CLI::GetParam<int>("min");
-
- bool naive = CLI::HasParam("naive");
- bool singleMode = CLI::HasParam("single_mode");
-
- arma::mat referenceData;
- arma::mat queryData; // So it doesn't go out of scope.
- if (!data::Load(referenceFile.c_str(), referenceData))
- Log::Fatal << "Reference file " << referenceFile << "not found." << endl;
-
- Log::Info << "Loaded reference data from '" << referenceFile << "'." << endl;
-
- // Sanity check on range value: max must be greater than min.
- if (max <= min)
- {
- Log::Fatal << "Invalid [min,max]: " << max << "; must be greater than " << min;
- }
-
- // Sanity check on leaf size.
- if (lsInt < 0)
- {
- Log::Fatal << "Invalid leaf size: " << lsInt << ". Must be greater "
- "than or equal to 0." << endl;
- }
- size_t leafSize = lsInt;
-
- // Naive mode overrides single mode.
- if (singleMode && naive)
- {
- Log::Warn << "--single_mode ignored because --naive is present." << endl;
- }
-
- if (naive)
- leafSize = referenceData.n_cols;
-
- std::vector<std::vector<size_t> > neighbors;
- std::vector<std::vector<double> > distances;
-
- // Because we may construct it differently, we need a pointer.
- AllInRange* rangeSearch = NULL;
-
- // Mappings for when we build the tree.
- std::vector<size_t> oldFromNewRefs;
-
- // Build trees by hand, so we can save memory: if we pass a tree to
- // NeighborSearch, it does not copy the matrix.
- Log::Info << "Building reference tree..." << endl;
- Timer::Start("tree_building");
-
- BinarySpaceTree<bound::HRectBound<2>, tree::EmptyStatistic>
- refTree(referenceData, oldFromNewRefs, leafSize);
- BinarySpaceTree<bound::HRectBound<2>, tree::EmptyStatistic>*
- queryTree = NULL; // Empty for now.
-
- Timer::Stop("tree_building");
-
- std::vector<size_t> oldFromNewQueries;
-
- if (CLI::GetParam<string>("query_file") != "")
- {
- string queryFile = CLI::GetParam<string>("query_file");
-
- if (!data::Load(queryFile.c_str(), queryData))
- Log::Fatal << "Query file " << queryFile << " not found" << endl;
-
- if (naive && leafSize < queryData.n_cols)
- leafSize = queryData.n_cols;
-
- Log::Info << "Loaded query data from '" << queryFile << "'." << endl;
-
- Log::Info << "Building query tree..." << endl;
-
- // Build trees by hand, so we can save memory: if we pass a tree to
- // NeighborSearch, it does not copy the matrix.
- Timer::Start("tree_building");
-
- queryTree = new BinarySpaceTree<bound::HRectBound<2>,
- tree::EmptyStatistic >(queryData, oldFromNewQueries,
- leafSize);
-
- Timer::Stop("tree_building");
-
- rangeSearch = new AllInRange(&refTree, queryTree, referenceData,
- queryData, singleMode);
-
- Log::Info << "Tree built." << endl;
- }
- else
- {
- rangeSearch = new AllInRange(&refTree, referenceData, singleMode);
-
- Log::Info << "Trees built." << endl;
- }
-
- Log::Info << "Computing neighbors within [" << min << ", " << max << "]." << endl;
-
- math::Range r = math::Range(min,max);
- rangeSearch->Search(r, neighbors, distances);
-
- Log::Info << "Neighbors computed." << endl;
-
- // We have to map back to the original indices from before the tree
- // construction.
- Log::Info << "Re-mapping indices..." << endl;
-
-// arma::mat distancesOut(distances.n_rows, distances.n_cols);
-// arma::Mat<size_t> neighborsOut(neighbors.n_rows, neighbors.n_cols);
-
- /*
- // Do the actual remapping.
- if (CLI::GetParam<string>("query_file") != "")
- {
- for (size_t i = 0; i < distances.n_cols; ++i)
- {
- // Map distances (copy a column).
- distancesOut.col(oldFromNewQueries[i]) = distances.col(i);
-
- // Map indices of neighbors.
- for (size_t j = 0; j < distances.n_rows; ++j)
- {
- neighborsOut(j, oldFromNewQueries[i]) = oldFromNewRefs[neighbors(j, i)];
- }
- }
- }
- else
- {
- for (size_t i = 0; i < distances.n_cols; ++i)
- {
- // Map distances (copy a column).
- distancesOut.col(oldFromNewRefs[i]) = distances.col(i);
-
- // Map indices of neighbors.
- for (size_t j = 0; j < distances.n_rows; ++j)
- {
- neighborsOut(j, oldFromNewRefs[i]) = oldFromNewRefs[neighbors(j, i)];
- }
- }
- }
-
- // Clean up.
- if (queryTree)
- delete queryTree;
-
- // Save output.
- data::Save(distancesFile, distances);
- data::Save(neighborsFile, neighbors);
-*/
- delete rangeSearch;
-}
Copied: mlpack/trunk/src/mlpack/methods/range_search/range_search_main.cpp (from rev 11463, mlpack/trunk/src/mlpack/methods/range_search/range_main.cpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/range_search/range_search_main.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/range_search/range_search_main.cpp 2012-02-27 20:14:22 UTC (rev 11605)
@@ -0,0 +1,286 @@
+/**
+ * @file range_search_main.cpp
+ * @author Ryan Curtin
+ * @author Matthew Amidon
+ *
+ * Implementation of the RangeSearch executable. Allows some number of standard
+ * options.
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/core/metrics/lmetric.hpp>
+
+#include "range_search.hpp"
+
+using namespace std;
+using namespace mlpack;
+using namespace mlpack::range;
+using namespace mlpack::tree;
+
+// Information about the program itself.
+PROGRAM_INFO("Range Search",
+ "This program implements range search with a Euclidean distance metric. "
+ "For a given query point, a given range, and a given set of reference "
+ "points, the program will return all of the reference points with distance "
+ "to the query point in the given range. This is performed for an entire "
+ "set of query points. You may specify a separate set of reference and query"
+ " points, or only a reference set -- which is then used as both the "
+ "reference and query set. The given range is taken to be inclusive (that "
+ "is, points with a distance exactly equal to the minimum and maximum of the"
+ " range are included in the results)."
+ "\n\n"
+ "For example, the following will calculate the points within the range [2, "
+ "5] of each point in 'input.csv' and store the distances in 'distances.csv'"
+ " and the neighbors in 'neighbors.csv':"
+ "\n\n"
+ "$ range_search --min=2 --max=5 --reference_file=input.csv\n"
+ " --distances_file=distances.csv --neighbors_file=neighbors.csv"
+ "\n\n"
+ "The output files are organized such that line i corresponds to the points "
+ "found for query point i. Because sometimes 0 points may be found in the "
+ "given range, lines of the output files may be empty. The points are not "
+ "ordered in any specific manner."
+ "\n\n"
+ "Because the number of points returned for each query point may differ, the"
+ " resultant CSV-like files may not be loadable by many programs. However, "
+ "at this time a better way to store this non-square result is not known. "
+ "As a result, any output files will be written as CSVs in this manner, "
+ "regardless of the given extension.");
+
+// Define our input parameters that this program will take.
+PARAM_STRING_REQ("reference_file", "File containing the reference dataset.",
+ "r");
+PARAM_STRING_REQ("distances_file", "File to output distances into.", "d");
+PARAM_STRING_REQ("neighbors_file", "File to output neighbors into.", "n");
+
+PARAM_DOUBLE_REQ("max", "Upper bound in range.", "M");
+PARAM_DOUBLE("min", "Lower bound in range.", "m", 0.0);
+
+PARAM_STRING("query_file", "File containing query points (optional).", "q", "");
+
+PARAM_INT("leaf_size", "Leaf size for tree building.", "l", 20);
+PARAM_FLAG("naive", "If true, O(n^2) naive mode is used for computation.", "N");
+PARAM_FLAG("single_mode", "If true, single-tree search is used (as opposed to "
+ "dual-tree search.", "s");
+
+typedef RangeSearch<metric::SquaredEuclideanDistance,
+ BinarySpaceTree<bound::HRectBound<2>, EmptyStatistic> > RSType;
+
+int main(int argc, char *argv[])
+{
+ // Give CLI the command line parameters the user passed in.
+ CLI::ParseCommandLine(argc, argv);
+
+ // Get all the parameters.
+ string referenceFile = CLI::GetParam<string>("reference_file");
+
+ string distancesFile = CLI::GetParam<string>("distances_file");
+ string neighborsFile = CLI::GetParam<string>("neighbors_file");
+
+ int lsInt = CLI::GetParam<int>("leaf_size");
+
+ double max = CLI::GetParam<double>("max");
+ double min = CLI::GetParam<double>("min");
+
+ bool naive = CLI::HasParam("naive");
+ bool singleMode = CLI::HasParam("single_mode");
+
+ arma::mat referenceData;
+ arma::mat queryData; // So it doesn't go out of scope.
+ if (!data::Load(referenceFile.c_str(), referenceData))
+ Log::Fatal << "Reference file " << referenceFile << "not found." << endl;
+
+ Log::Info << "Loaded reference data from '" << referenceFile << "'." << endl;
+
+ // Sanity check on range value: max must be greater than min.
+ if (max <= min)
+ {
+ Log::Fatal << "Invalid range: maximum (" << max << ") must be greater than "
+ << "minimum (" << min << ")." << endl;
+ }
+
+ // Sanity check on leaf size.
+ if (lsInt < 0)
+ {
+ Log::Fatal << "Invalid leaf size: " << lsInt << ". Must be greater "
+ "than or equal to 0." << endl;
+ }
+ size_t leafSize = lsInt;
+
+ // Naive mode overrides single mode.
+ if (singleMode && naive)
+ {
+ Log::Warn << "--single_mode ignored because --naive is present." << endl;
+ }
+
+ if (naive)
+ leafSize = referenceData.n_cols;
+
+ vector<vector<size_t> > neighbors;
+ vector<vector<double> > distances;
+
+ // Because we may construct it differently, we need a pointer.
+ RSType* rangeSearch = NULL;
+
+ // Mappings for when we build the tree.
+ vector<size_t> oldFromNewRefs;
+
+ // Build trees by hand, so we can save memory: if we pass a tree to
+ // NeighborSearch, it does not copy the matrix.
+ Log::Info << "Building reference tree..." << endl;
+ Timer::Start("tree_building");
+
+ BinarySpaceTree<bound::HRectBound<2>, tree::EmptyStatistic>
+ refTree(referenceData, oldFromNewRefs, leafSize);
+ BinarySpaceTree<bound::HRectBound<2>, tree::EmptyStatistic>*
+ queryTree = NULL; // Empty for now.
+
+ Timer::Stop("tree_building");
+
+ std::vector<size_t> oldFromNewQueries;
+
+ if (CLI::GetParam<string>("query_file") != "")
+ {
+ string queryFile = CLI::GetParam<string>("query_file");
+
+ if (!data::Load(queryFile.c_str(), queryData))
+ Log::Fatal << "Query file " << queryFile << " not found" << endl;
+
+ if (naive && leafSize < queryData.n_cols)
+ leafSize = queryData.n_cols;
+
+ Log::Info << "Loaded query data from '" << queryFile << "'." << endl;
+
+ Log::Info << "Building query tree..." << endl;
+
+ // Build trees by hand, so we can save memory: if we pass a tree to
+ // NeighborSearch, it does not copy the matrix.
+ Timer::Start("tree_building");
+
+ queryTree = new BinarySpaceTree<bound::HRectBound<2>,
+ tree::EmptyStatistic >(queryData, oldFromNewQueries,
+ leafSize);
+
+ Timer::Stop("tree_building");
+
+ rangeSearch = new RSType(&refTree, queryTree, referenceData, queryData,
+ singleMode);
+
+ Log::Info << "Tree built." << endl;
+ }
+ else
+ {
+ rangeSearch = new RSType(&refTree, referenceData, singleMode);
+
+ Log::Info << "Trees built." << endl;
+ }
+
+ Log::Info << "Computing neighbors within range [" << min << ", " << max
+ << "]." << endl;
+
+ math::Range r = math::Range(min, max);
+ rangeSearch->Search(r, neighbors, distances);
+
+ Log::Info << "Neighbors computed." << endl;
+
+ // We have to map back to the original indices from before the tree
+ // construction.
+ Log::Info << "Re-mapping indices..." << endl;
+
+ vector<vector<double> > distancesOut;
+ distancesOut.resize(distances.size());
+ vector<vector<size_t> > neighborsOut;
+ neighborsOut.resize(neighbors.size());
+
+ // Do the actual remapping.
+ if (CLI::GetParam<string>("query_file") != "")
+ {
+ for (size_t i = 0; i < distances.size(); ++i)
+ {
+ // Map distances (copy a column).
+ distancesOut[oldFromNewQueries[i]] = distances[i];
+
+ // Map indices of neighbors.
+ neighborsOut[oldFromNewQueries[i]].resize(neighbors[i].size());
+ for (size_t j = 0; j < distances[i].size(); ++j)
+ {
+ neighborsOut[oldFromNewQueries[i]][j] = oldFromNewRefs[neighbors[i][j]];
+ }
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < distances.size(); ++i)
+ {
+ // Map distances (copy a column).
+ distancesOut[oldFromNewRefs[i]] = distances[i];
+
+ // Map indices of neighbors.
+ neighborsOut[oldFromNewRefs[i]].resize(neighbors[i].size());
+ for (size_t j = 0; j < distances[i].size(); ++j)
+ {
+ neighborsOut[oldFromNewRefs[i]][j] = oldFromNewRefs[neighbors[i][j]];
+ }
+ }
+ }
+
+ // Clean up.
+ if (queryTree)
+ delete queryTree;
+
+ // Save output. We have to do this by hand.
+ fstream distancesStr(distancesFile.c_str(), fstream::out);
+ if (!distancesStr.is_open())
+ {
+ Log::Warn << "Cannot open file '" << distancesFile << "' to save output "
+ << "distances to!" << endl;
+ }
+ else
+ {
+ // Loop over each point.
+ for (size_t i = 0; i < distancesOut.size(); ++i)
+ {
+ // Store the distances of each point. We may have 0 points to store, so
+ // we must account for that possibility.
+ for (size_t j = 0; j + 1 < distancesOut[i].size(); ++j)
+ {
+ distancesStr << distancesOut[i][j] << ", ";
+ }
+
+ if (distancesOut[i].size() > 0)
+ distancesStr << distancesOut[i][distancesOut[i].size() - 1];
+
+ distancesStr << endl;
+ }
+
+ distancesStr.close();
+ }
+
+ fstream neighborsStr(neighborsFile.c_str(), fstream::out);
+ if (!neighborsStr.is_open())
+ {
+ Log::Warn << "Cannot open file '" << neighborsFile << "' to save output "
+ << "neighbor indices to!" << endl;
+ }
+ else
+ {
+ // Loop over each point.
+ for (size_t i = 0; i < neighborsOut.size(); ++i)
+ {
+ // Store the neighbors of each point. We may have 0 points to store, so
+ // we must account for that possibility.
+ for (size_t j = 0; j + 1 < neighborsOut[i].size(); ++j)
+ {
+ neighborsStr << neighborsOut[i][j] << ", ";
+ }
+
+ if (neighborsOut[i].size() > 0)
+ neighborsStr << neighborsOut[i][neighborsOut[i].size() - 1];
+
+ neighborsStr << endl;
+ }
+
+ neighborsStr.close();
+ }
+
+ delete rangeSearch;
+}
More information about the mlpack-svn
mailing list