[mlpack-git] master: Changes BaseCase to include the loop over candidate set. Places parallelization tests into #ifdef block (b02e2f3)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jul 6 11:56:44 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/34cf8d94f79c9a72ff4199676033b060cd039fcd...425324bf7fb7c86c85d10a909d8a59d4f69b7164

>---------------------------------------------------------------

commit b02e2f341aa800b3397bf5651f99af9b88645d88
Author: Yannis Mentekidis <mentekid at gmail.com>
Date:   Wed Jul 6 16:56:44 2016 +0100

    Changes BaseCase to include the loop over candidate set. Places parallelization tests into #ifdef block


>---------------------------------------------------------------

b02e2f341aa800b3397bf5651f99af9b88645d88
 CMakeLists.txt                             |   4 +-
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 104 ++--------------
 src/mlpack/tests/lsh_test.cpp              | 189 ++++++++++++-----------------
 3 files changed, 87 insertions(+), 210 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2a3b451..df949ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,10 +24,10 @@ option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF)
 option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF)
 option(TEST_VERBOSE "Run test cases with verbose output." OFF)
 option(BUILD_TESTS "Build tests." ON)
-option(BUILD_CLI_EXECUTABLES "Build command-line executables" ON)
+option(BUILD_CLI_EXECUTABLES "Build command-line executables." ON)
 option(BUILD_SHARED_LIBS
     "Compile shared libraries (if OFF, static libraries are compiled)." ON)
-option(HAS_OPENMP "Use OpenMP for parallel execution, if available" ON)
+option(HAS_OPENMP "Use OpenMP for parallel execution, if available." ON)
 
 enable_testing()
 
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 8d3cdb1..75a1201 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -217,13 +217,14 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
       double shs = (double) secondHashSize; // Convenience cast.
       if (unmodVector[j] >= 0.0)
       {
-        secondHashVectors(i, j) = size_t(fmod(unmodVector[j], shs));
+        const size_t key = size_t(fmod(unmodVector[j], shs));
+        secondHashVectors(i, j) = key;
       }
       else
       {
         const double mod = fmod(-unmodVector[j], shs);
-        secondHashVectors(i, j) = (mod < 1.0) ? 0 : secondHashSize -
-            size_t(mod);
+        const size_t key = (mod < 1.0) ? 0 : secondHashSize - size_t(mod);
+        secondHashVectors(i, j) = key;
       }
     }
   }
@@ -305,74 +306,6 @@ void LSHSearch<SortPolicy>::InsertNeighbor(arma::mat& distances,
   neighbors(pos, queryIndex) = neighbor;
 }
 
-/*
-// Base case where the query set is the reference set.  (So, we can't return
-// ourselves as the nearest neighbor.)
-template<typename SortPolicy>
-inline force_inline
-void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
-                                     const size_t referenceIndex,
-                                     arma::Mat<size_t>& neighbors,
-                                     arma::mat& distances) const
-{
-  // If the points are the same, we can't continue.
-  if (queryIndex == referenceIndex)
-    return;
-
-  const double distance = metric::EuclideanDistance::Evaluate(
-      referenceSet->unsafe_col(queryIndex),
-      referenceSet->unsafe_col(referenceIndex));
-
-  // If this distance is better than any of the current candidates, the
-  // SortDistance() function will give us the position to insert it into.
-  arma::vec queryDist = distances.unsafe_col(queryIndex);
-  arma::Col<size_t> queryIndices = neighbors.unsafe_col(queryIndex);
-  size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices,
-      distance);
-
-  // SortDistance() returns (size_t() - 1) if we shouldn't add it.
-  if (insertPosition != (size_t() - 1))
-  {
-    #pragma omp critical
-    {
-      InsertNeighbor(distances, neighbors, queryIndex, insertPosition,
-          referenceIndex, distance);
-    }
-  }
-}
-
-// Base case for bichromatic search.
-template<typename SortPolicy>
-inline force_inline
-void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
-                                     const size_t referenceIndex,
-                                     const arma::mat& querySet,
-                                     arma::Mat<size_t>& neighbors,
-                                     arma::mat& distances) const
-{
-  const double distance = metric::EuclideanDistance::Evaluate(
-      querySet.unsafe_col(queryIndex),
-      referenceSet->unsafe_col(referenceIndex));
-
-  // If this distance is better than any of the current candidates, the
-  // SortDistance() function will give us the position to insert it into.
-  arma::vec queryDist = distances.unsafe_col(queryIndex);
-  arma::Col<size_t> queryIndices = neighbors.unsafe_col(queryIndex);
-  size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices,
-      distance);
-
-  // SortDistance() returns (size_t() - 1) if we shouldn't add it.
-  if (insertPosition != (size_t() - 1))
-  {  
-    #pragma omp critical
-    {
-      InsertNeighbor(distances, neighbors, queryIndex, insertPosition,
-          referenceIndex, distance);
-    }
-  }
-}
-*/
-
 // Base case where the query set is the reference set.  (So, we can't return
 // ourselves as the nearest neighbor.)
 template<typename SortPolicy>
@@ -382,6 +315,7 @@ void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
                                      arma::Mat<size_t>& neighbors,
                                      arma::mat& distances) const
 {
+
   for (size_t j = 0; j < referenceIndices.n_elem; ++j)
   {
     const size_t referenceIndex = referenceIndices[j];
@@ -432,8 +366,11 @@ void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
 
     // SortDistance() returns (size_t() - 1) if we shouldn't add it.
     if (insertPosition != (size_t() - 1))
+      #pragma omp critical
       InsertNeighbor(distances, neighbors, queryIndex, insertPosition,
           referenceIndex, distance);
+
+      #pragma omp flush(distances, neighbors)
   }
 }
 template<typename SortPolicy>
@@ -907,7 +844,7 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
 
   // Parallelization to process more than one query at a time.
   // use as many threads possible but not more than allowed number
-  size_t numThreadsUsed = std::min( (arma::uword) maxThreads, querySet.n_cols );
+  size_t numThreadsUsed = maxThreads;
   #pragma omp parallel for \
     num_threads ( numThreadsUsed )\
     shared(avgIndicesReturned, resultingNeighbors, distances) \
@@ -931,16 +868,6 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
 
     // Sequentially go through all the candidates and save the best 'k'
     // candidates.
-    /*
-    numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem);
-    #pragma omp parallel for\
-    num_threads( numThreadsUsed )\
-    shared(refIndices, resultingNeighbors, distances, querySet)\
-    schedule(dynamic)
-    for (size_t j = 0; j < refIndices.n_elem; j++)
-      BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors,
-          distances);
-    */
     BaseCase(i, refIndices, querySet, resultingNeighbors, distances);
   }
 
@@ -989,7 +916,7 @@ Search(const size_t k,
 
   // Parallelization to process more than one query at a time.
   // use as many threads possible but not more than allowed number
-  size_t numThreadsUsed = std::min( (arma::uword) maxThreads, referenceSet->n_cols );
+  size_t numThreadsUsed = maxThreads;
   #pragma omp parallel for \
     num_threads ( numThreadsUsed )\
     shared(avgIndicesReturned, resultingNeighbors, distances) \
@@ -1012,18 +939,7 @@ Search(const size_t k,
 
     // Sequentially go through all the candidates and save the best 'k'
     // candidates.
-
-    /*
-    numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem);
-    #pragma omp parallel for\
-    num_threads( numThreadsUsed )\
-    shared(refIndices, resultingNeighbors, distances)\
-    schedule(dynamic)
-    for (size_t j = 0; j < refIndices.n_elem; j++)
-      BaseCase(i, (size_t) refIndices[j], resultingNeighbors, distances);
-    */
     BaseCase(i, refIndices, resultingNeighbors, distances);
-
   }
 
   Timer::Stop("computing_neighbors");
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 8aac238..179b3b1 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -471,7 +471,6 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge)
   }
 }
 
-
 /**
  * Test: Create an LSHSearch object and use an increasing number of probes to
  * search for points. Require that recall for the same object doesn't decrease
@@ -612,119 +611,6 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
       neighbors.col(0) >= N / 4 && neighbors.col(0) < N / 2));
 }
 
-
-/**
- * Test: This test verifies that parallel query processing returns correct
- * results for the bichromatic search.
- */
-BOOST_AUTO_TEST_CASE(ParallelBichromatic)
-{
-  // kNN and LSH parameters (use LSH default parameters).
-  const int k = 4;
-  const int numTables = 16;
-  const int numProj = 3;
-
-  // Read iris training and testing data as reference and query sets.
-  const string trainSet = "iris_train.csv";
-  const string testSet = "iris_test.csv";
-  arma::mat rdata;
-  arma::mat qdata;
-  data::Load(trainSet, rdata, true);
-  data::Load(testSet, qdata, true);
-
-  // Where to store neighbors and distances
-  arma::Mat<size_t> sequentialNeighbors;
-  arma::Mat<size_t> parallelNeighbors;
-  arma::mat distances;
-
-  // Construct an LSH object. By default, it uses the maximum number of threads
-  LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters
-  lshTest.Search(qdata, k, parallelNeighbors, distances);
-
-  // Now perform same search but with 1 thread
-  lshTest.MaxThreads(1);
-  lshTest.Search(qdata, k, sequentialNeighbors, distances);
-
-  // Require both have same results
-  double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
-  BOOST_REQUIRE_EQUAL(recall, 1);
-}
-
-/**
- * Test: This test verifies that parallel query processing returns correct
- * results for the monochromatic search.
- */
-BOOST_AUTO_TEST_CASE(ParallelMonochromatic)
-{
-  // kNN and LSH parameters.
-  const int k = 4;
-  const int numTables = 16;
-  const int numProj = 3;
-
-  // Read iris training data as reference and query set.
-  const string trainSet = "iris_train.csv";
-  arma::mat rdata;
-  data::Load(trainSet, rdata, true);
-
-  // Where to store neighbors and distances
-  arma::Mat<size_t> sequentialNeighbors;
-  arma::Mat<size_t> parallelNeighbors;
-  arma::mat distances;
-
-  // Construct an LSH object, using maximum number of available threads.
-  LSHSearch<> lshTest(rdata, numProj, numTables);
-  lshTest.Search(k, parallelNeighbors, distances);
-
-  // Now perform same search but with 1 thread.
-  lshTest.MaxThreads(1);
-  lshTest.Search(k, sequentialNeighbors, distances);
-
-  // Require both have same results.
-  double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
-  BOOST_REQUIRE_EQUAL(recall, 1);
-}
-
-/**
- * Test: This test verifies that processing a query in parallel returns the same
- * results with processing it sequentially. 
- * Requires OMP_NESTED environment variable to be set to TRUE. To set it,
- * execute:
- * ```
- * export OMP_NESTED=TRUE
- * ```
- */
-BOOST_AUTO_TEST_CASE(ParallelSingleQuery)
-{
-  // kNN and LSH parameters.
-  const int k = 4;
-  const int numTables = 16;
-  const int numProj = 3;
-
-  // Read iris training data as reference and query set.
-  const string trainSet = "iris_train.csv";
-  arma::mat rdata;
-  data::Load(trainSet, rdata, true);
-
-  arma::mat qdata = rdata.col(0); // Only 1 query.
-
-  // Where to store neighbors and distances.
-  arma::Mat<size_t> sequentialNeighbors;
-  arma::Mat<size_t> parallelNeighbors;
-  arma::mat distances;
-
-  // Construct an LSH object. By default, maximum number of threads are used.
-  LSHSearch<> lshTest(rdata, numProj, numTables);
-  lshTest.Search(qdata, k, parallelNeighbors, distances);
-
-  // Now perform same search but with 1 thread.
-  lshTest.MaxThreads(1);
-  lshTest.Search(qdata, k, sequentialNeighbors, distances);
-
-  // Require both have same results.
-  double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
-  BOOST_REQUIRE_EQUAL(recall, 1);
-}
-
 BOOST_AUTO_TEST_CASE(LSHTrainTest)
 {
   // This is a not very good test that simply checks that the re-trained LSH
@@ -866,4 +752,79 @@ BOOST_AUTO_TEST_CASE(EmptyConstructorTest)
   BOOST_REQUIRE_EQUAL(distances.n_rows, 3);
 }
 
+// These two tests are only compiled if the user has specified OpenMP to be
+// used.
+#ifdef HAS_OPENMP
+/**
+ * Test: This test verifies that parallel query processing returns correct
+ * results for the bichromatic search.
+ */
+BOOST_AUTO_TEST_CASE(ParallelBichromatic)
+{
+  // kNN and LSH parameters (use LSH default parameters).
+  const int k = 4;
+  const int numTables = 16;
+  const int numProj = 3;
+
+  // Read iris training and testing data as reference and query sets.
+  const string trainSet = "iris_train.csv";
+  const string testSet = "iris_test.csv";
+  arma::mat rdata;
+  arma::mat qdata;
+  data::Load(trainSet, rdata, true);
+  data::Load(testSet, qdata, true);
+
+  // Where to store neighbors and distances
+  arma::Mat<size_t> sequentialNeighbors;
+  arma::Mat<size_t> parallelNeighbors;
+  arma::mat distances;
+
+  // Construct an LSH object. By default, it uses the maximum number of threads
+  LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters
+  lshTest.Search(qdata, k, parallelNeighbors, distances);
+
+  // Now perform same search but with 1 thread
+  lshTest.MaxThreads(1);
+  lshTest.Search(qdata, k, sequentialNeighbors, distances);
+
+  // Require both have same results
+  double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
+  BOOST_REQUIRE_EQUAL(recall, 1);
+}
+
+/**
+ * Test: This test verifies that parallel query processing returns correct
+ * results for the monochromatic search.
+ */
+BOOST_AUTO_TEST_CASE(ParallelMonochromatic)
+{
+  // kNN and LSH parameters.
+  const int k = 4;
+  const int numTables = 16;
+  const int numProj = 3;
+
+  // Read iris training data as reference and query set.
+  const string trainSet = "iris_train.csv";
+  arma::mat rdata;
+  data::Load(trainSet, rdata, true);
+
+  // Where to store neighbors and distances
+  arma::Mat<size_t> sequentialNeighbors;
+  arma::Mat<size_t> parallelNeighbors;
+  arma::mat distances;
+
+  // Construct an LSH object, using maximum number of available threads.
+  LSHSearch<> lshTest(rdata, numProj, numTables);
+  lshTest.Search(k, parallelNeighbors, distances);
+
+  // Now perform same search but with 1 thread.
+  lshTest.MaxThreads(1);
+  lshTest.Search(k, sequentialNeighbors, distances);
+
+  // Require both have same results.
+  double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
+  BOOST_REQUIRE_EQUAL(recall, 1);
+}
+#endif
+
 BOOST_AUTO_TEST_SUITE_END();




More information about the mlpack-git mailing list