[mlpack-git] master: Changes BaseCase to include the loop over candidate set. Places parallelization tests into #ifdef block (b02e2f3)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jul 6 11:56:44 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/34cf8d94f79c9a72ff4199676033b060cd039fcd...425324bf7fb7c86c85d10a909d8a59d4f69b7164
>---------------------------------------------------------------
commit b02e2f341aa800b3397bf5651f99af9b88645d88
Author: Yannis Mentekidis <mentekid at gmail.com>
Date: Wed Jul 6 16:56:44 2016 +0100
Changes BaseCase to include the loop over candidate set. Places parallelization tests into #ifdef block
>---------------------------------------------------------------
b02e2f341aa800b3397bf5651f99af9b88645d88
CMakeLists.txt | 4 +-
src/mlpack/methods/lsh/lsh_search_impl.hpp | 104 ++--------------
src/mlpack/tests/lsh_test.cpp | 189 ++++++++++++-----------------
3 files changed, 87 insertions(+), 210 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2a3b451..df949ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,10 +24,10 @@ option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF)
option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF)
option(TEST_VERBOSE "Run test cases with verbose output." OFF)
option(BUILD_TESTS "Build tests." ON)
-option(BUILD_CLI_EXECUTABLES "Build command-line executables" ON)
+option(BUILD_CLI_EXECUTABLES "Build command-line executables." ON)
option(BUILD_SHARED_LIBS
"Compile shared libraries (if OFF, static libraries are compiled)." ON)
-option(HAS_OPENMP "Use OpenMP for parallel execution, if available" ON)
+option(HAS_OPENMP "Use OpenMP for parallel execution, if available." ON)
enable_testing()
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 8d3cdb1..75a1201 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -217,13 +217,14 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
double shs = (double) secondHashSize; // Convenience cast.
if (unmodVector[j] >= 0.0)
{
- secondHashVectors(i, j) = size_t(fmod(unmodVector[j], shs));
+ const size_t key = size_t(fmod(unmodVector[j], shs));
+ secondHashVectors(i, j) = key;
}
else
{
const double mod = fmod(-unmodVector[j], shs);
- secondHashVectors(i, j) = (mod < 1.0) ? 0 : secondHashSize -
- size_t(mod);
+ const size_t key = (mod < 1.0) ? 0 : secondHashSize - size_t(mod);
+ secondHashVectors(i, j) = key;
}
}
}
@@ -305,74 +306,6 @@ void LSHSearch<SortPolicy>::InsertNeighbor(arma::mat& distances,
neighbors(pos, queryIndex) = neighbor;
}
-/*
-// Base case where the query set is the reference set. (So, we can't return
-// ourselves as the nearest neighbor.)
-template<typename SortPolicy>
-inline force_inline
-void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
- const size_t referenceIndex,
- arma::Mat<size_t>& neighbors,
- arma::mat& distances) const
-{
- // If the points are the same, we can't continue.
- if (queryIndex == referenceIndex)
- return;
-
- const double distance = metric::EuclideanDistance::Evaluate(
- referenceSet->unsafe_col(queryIndex),
- referenceSet->unsafe_col(referenceIndex));
-
- // If this distance is better than any of the current candidates, the
- // SortDistance() function will give us the position to insert it into.
- arma::vec queryDist = distances.unsafe_col(queryIndex);
- arma::Col<size_t> queryIndices = neighbors.unsafe_col(queryIndex);
- size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices,
- distance);
-
- // SortDistance() returns (size_t() - 1) if we shouldn't add it.
- if (insertPosition != (size_t() - 1))
- {
- #pragma omp critical
- {
- InsertNeighbor(distances, neighbors, queryIndex, insertPosition,
- referenceIndex, distance);
- }
- }
-}
-
-// Base case for bichromatic search.
-template<typename SortPolicy>
-inline force_inline
-void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
- const size_t referenceIndex,
- const arma::mat& querySet,
- arma::Mat<size_t>& neighbors,
- arma::mat& distances) const
-{
- const double distance = metric::EuclideanDistance::Evaluate(
- querySet.unsafe_col(queryIndex),
- referenceSet->unsafe_col(referenceIndex));
-
- // If this distance is better than any of the current candidates, the
- // SortDistance() function will give us the position to insert it into.
- arma::vec queryDist = distances.unsafe_col(queryIndex);
- arma::Col<size_t> queryIndices = neighbors.unsafe_col(queryIndex);
- size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices,
- distance);
-
- // SortDistance() returns (size_t() - 1) if we shouldn't add it.
- if (insertPosition != (size_t() - 1))
- {
- #pragma omp critical
- {
- InsertNeighbor(distances, neighbors, queryIndex, insertPosition,
- referenceIndex, distance);
- }
- }
-}
-*/
-
// Base case where the query set is the reference set. (So, we can't return
// ourselves as the nearest neighbor.)
template<typename SortPolicy>
@@ -382,6 +315,7 @@ void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
arma::Mat<size_t>& neighbors,
arma::mat& distances) const
{
+
for (size_t j = 0; j < referenceIndices.n_elem; ++j)
{
const size_t referenceIndex = referenceIndices[j];
@@ -432,8 +366,11 @@ void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
// SortDistance() returns (size_t() - 1) if we shouldn't add it.
if (insertPosition != (size_t() - 1))
+ #pragma omp critical
InsertNeighbor(distances, neighbors, queryIndex, insertPosition,
referenceIndex, distance);
+
+ #pragma omp flush(distances, neighbors)
}
}
template<typename SortPolicy>
@@ -907,7 +844,7 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
// Parallelization to process more than one query at a time.
// use as many threads possible but not more than allowed number
- size_t numThreadsUsed = std::min( (arma::uword) maxThreads, querySet.n_cols );
+ size_t numThreadsUsed = maxThreads;
#pragma omp parallel for \
num_threads ( numThreadsUsed )\
shared(avgIndicesReturned, resultingNeighbors, distances) \
@@ -931,16 +868,6 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
// Sequentially go through all the candidates and save the best 'k'
// candidates.
- /*
- numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem);
- #pragma omp parallel for\
- num_threads( numThreadsUsed )\
- shared(refIndices, resultingNeighbors, distances, querySet)\
- schedule(dynamic)
- for (size_t j = 0; j < refIndices.n_elem; j++)
- BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors,
- distances);
- */
BaseCase(i, refIndices, querySet, resultingNeighbors, distances);
}
@@ -989,7 +916,7 @@ Search(const size_t k,
// Parallelization to process more than one query at a time.
// use as many threads possible but not more than allowed number
- size_t numThreadsUsed = std::min( (arma::uword) maxThreads, referenceSet->n_cols );
+ size_t numThreadsUsed = maxThreads;
#pragma omp parallel for \
num_threads ( numThreadsUsed )\
shared(avgIndicesReturned, resultingNeighbors, distances) \
@@ -1012,18 +939,7 @@ Search(const size_t k,
// Sequentially go through all the candidates and save the best 'k'
// candidates.
-
- /*
- numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem);
- #pragma omp parallel for\
- num_threads( numThreadsUsed )\
- shared(refIndices, resultingNeighbors, distances)\
- schedule(dynamic)
- for (size_t j = 0; j < refIndices.n_elem; j++)
- BaseCase(i, (size_t) refIndices[j], resultingNeighbors, distances);
- */
BaseCase(i, refIndices, resultingNeighbors, distances);
-
}
Timer::Stop("computing_neighbors");
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 8aac238..179b3b1 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -471,7 +471,6 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge)
}
}
-
/**
* Test: Create an LSHSearch object and use an increasing number of probes to
* search for points. Require that recall for the same object doesn't decrease
@@ -612,119 +611,6 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
neighbors.col(0) >= N / 4 && neighbors.col(0) < N / 2));
}
-
-/**
- * Test: This test verifies that parallel query processing returns correct
- * results for the bichromatic search.
- */
-BOOST_AUTO_TEST_CASE(ParallelBichromatic)
-{
- // kNN and LSH parameters (use LSH default parameters).
- const int k = 4;
- const int numTables = 16;
- const int numProj = 3;
-
- // Read iris training and testing data as reference and query sets.
- const string trainSet = "iris_train.csv";
- const string testSet = "iris_test.csv";
- arma::mat rdata;
- arma::mat qdata;
- data::Load(trainSet, rdata, true);
- data::Load(testSet, qdata, true);
-
- // Where to store neighbors and distances
- arma::Mat<size_t> sequentialNeighbors;
- arma::Mat<size_t> parallelNeighbors;
- arma::mat distances;
-
- // Construct an LSH object. By default, it uses the maximum number of threads
- LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters
- lshTest.Search(qdata, k, parallelNeighbors, distances);
-
- // Now perform same search but with 1 thread
- lshTest.MaxThreads(1);
- lshTest.Search(qdata, k, sequentialNeighbors, distances);
-
- // Require both have same results
- double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
- BOOST_REQUIRE_EQUAL(recall, 1);
-}
-
-/**
- * Test: This test verifies that parallel query processing returns correct
- * results for the monochromatic search.
- */
-BOOST_AUTO_TEST_CASE(ParallelMonochromatic)
-{
- // kNN and LSH parameters.
- const int k = 4;
- const int numTables = 16;
- const int numProj = 3;
-
- // Read iris training data as reference and query set.
- const string trainSet = "iris_train.csv";
- arma::mat rdata;
- data::Load(trainSet, rdata, true);
-
- // Where to store neighbors and distances
- arma::Mat<size_t> sequentialNeighbors;
- arma::Mat<size_t> parallelNeighbors;
- arma::mat distances;
-
- // Construct an LSH object, using maximum number of available threads.
- LSHSearch<> lshTest(rdata, numProj, numTables);
- lshTest.Search(k, parallelNeighbors, distances);
-
- // Now perform same search but with 1 thread.
- lshTest.MaxThreads(1);
- lshTest.Search(k, sequentialNeighbors, distances);
-
- // Require both have same results.
- double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
- BOOST_REQUIRE_EQUAL(recall, 1);
-}
-
-/**
- * Test: This test verifies that processing a query in parallel returns the same
- * results with processing it sequentially.
- * Requires OMP_NESTED environment variable to be set to TRUE. To set it,
- * execute:
- * ```
- * export OMP_NESTED=TRUE
- * ```
- */
-BOOST_AUTO_TEST_CASE(ParallelSingleQuery)
-{
- // kNN and LSH parameters.
- const int k = 4;
- const int numTables = 16;
- const int numProj = 3;
-
- // Read iris training data as reference and query set.
- const string trainSet = "iris_train.csv";
- arma::mat rdata;
- data::Load(trainSet, rdata, true);
-
- arma::mat qdata = rdata.col(0); // Only 1 query.
-
- // Where to store neighbors and distances.
- arma::Mat<size_t> sequentialNeighbors;
- arma::Mat<size_t> parallelNeighbors;
- arma::mat distances;
-
- // Construct an LSH object. By default, maximum number of threads are used.
- LSHSearch<> lshTest(rdata, numProj, numTables);
- lshTest.Search(qdata, k, parallelNeighbors, distances);
-
- // Now perform same search but with 1 thread.
- lshTest.MaxThreads(1);
- lshTest.Search(qdata, k, sequentialNeighbors, distances);
-
- // Require both have same results.
- double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
- BOOST_REQUIRE_EQUAL(recall, 1);
-}
-
BOOST_AUTO_TEST_CASE(LSHTrainTest)
{
// This is a not very good test that simply checks that the re-trained LSH
@@ -866,4 +752,79 @@ BOOST_AUTO_TEST_CASE(EmptyConstructorTest)
BOOST_REQUIRE_EQUAL(distances.n_rows, 3);
}
+// These two tests are only compiled if the user has specified OpenMP to be
+// used.
+#ifdef HAS_OPENMP
+/**
+ * Test: This test verifies that parallel query processing returns correct
+ * results for the bichromatic search.
+ */
+BOOST_AUTO_TEST_CASE(ParallelBichromatic)
+{
+ // kNN and LSH parameters (use LSH default parameters).
+ const int k = 4;
+ const int numTables = 16;
+ const int numProj = 3;
+
+ // Read iris training and testing data as reference and query sets.
+ const string trainSet = "iris_train.csv";
+ const string testSet = "iris_test.csv";
+ arma::mat rdata;
+ arma::mat qdata;
+ data::Load(trainSet, rdata, true);
+ data::Load(testSet, qdata, true);
+
+ // Where to store neighbors and distances
+ arma::Mat<size_t> sequentialNeighbors;
+ arma::Mat<size_t> parallelNeighbors;
+ arma::mat distances;
+
+ // Construct an LSH object. By default, it uses the maximum number of threads
+ LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters
+ lshTest.Search(qdata, k, parallelNeighbors, distances);
+
+ // Now perform same search but with 1 thread
+ lshTest.MaxThreads(1);
+ lshTest.Search(qdata, k, sequentialNeighbors, distances);
+
+ // Require both have same results
+ double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
+ BOOST_REQUIRE_EQUAL(recall, 1);
+}
+
+/**
+ * Test: This test verifies that parallel query processing returns correct
+ * results for the monochromatic search.
+ */
+BOOST_AUTO_TEST_CASE(ParallelMonochromatic)
+{
+ // kNN and LSH parameters.
+ const int k = 4;
+ const int numTables = 16;
+ const int numProj = 3;
+
+ // Read iris training data as reference and query set.
+ const string trainSet = "iris_train.csv";
+ arma::mat rdata;
+ data::Load(trainSet, rdata, true);
+
+ // Where to store neighbors and distances
+ arma::Mat<size_t> sequentialNeighbors;
+ arma::Mat<size_t> parallelNeighbors;
+ arma::mat distances;
+
+ // Construct an LSH object, using maximum number of available threads.
+ LSHSearch<> lshTest(rdata, numProj, numTables);
+ lshTest.Search(k, parallelNeighbors, distances);
+
+ // Now perform same search but with 1 thread.
+ lshTest.MaxThreads(1);
+ lshTest.Search(k, sequentialNeighbors, distances);
+
+ // Require both have same results.
+ double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
+ BOOST_REQUIRE_EQUAL(recall, 1);
+}
+#endif
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list