[mlpack-git] master: Adds parallelization to bichromatic search (5db5423)

gitdub at mlpack.org gitdub at mlpack.org
Fri Jul 8 14:36:43 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/34cf8d94f79c9a72ff4199676033b060cd039fcd...425324bf7fb7c86c85d10a909d8a59d4f69b7164

>---------------------------------------------------------------

commit 5db5423ae584814d66b4d3da84d4031a205d1389
Author: Yannis Mentekidis <mentekid at gmail.com>
Date:   Sun Jun 19 14:56:09 2016 +0300

    Adds parallelization to bichromatic search


>---------------------------------------------------------------

5db5423ae584814d66b4d3da84d4031a205d1389
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 8ac794c..571e548 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -489,9 +489,26 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
 
   Timer::Start("computing_neighbors");
 
-  // Go through every query point sequentially.
+  // Parallelization allows us to process more than one query at a time. To
+  // control workload and thread access, we use numThreadsUsed and maxThreads to
+  // make sure we only use as many threads as the user specified.
+  #pragma omp parallel for \
+    if (numThreadsUsed <= maxThreads) \
+    num_threads (maxThreads-numThreadsUsed)\
+    shared(avgIndicesReturned, resultingNeighbors, distances) \
+    schedule(dynamic)
+
+  // Go through every query point.
   for (size_t i = 0; i < querySet.n_cols; i++)
   {
+    // Master thread updates the number of threads used
+    if (i == 0 && omp_get_thread_num() == 0)
+    {
+      numThreadsUsed+=omp_get_num_threads();
+      Log::Info 
+        << "Using "<< numThreadsUsed << " threads to process queries." << endl;
+    }
+
     // Hash every query into every hash table and eventually into the
     // 'secondHashTable' to obtain the neighbor candidates.
     arma::uvec refIndices;
@@ -499,6 +516,8 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
 
     // An informative book-keeping for the number of neighbor candidates
     // returned on average.
+    // Make atomic to avoid race conditions when multiple threads are running
+    #pragma omp atomic
     avgIndicesReturned += refIndices.n_elem;
 
     // Sequentially go through all the candidates and save the best 'k'
@@ -507,6 +526,8 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
       BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors,
           distances);
   }
+  // parallel region over, reset number of threads to 1
+  numThreadsUsed = omp_get_num_threads();
 
   Timer::Stop("computing_neighbors");
 
@@ -549,7 +570,7 @@ Search(const size_t k,
     // Master thread updates the number of threads used
     if (i == 0 && omp_get_thread_num() == 0)
     {
-      numThreadsUsed+=omp_get_num_threads(); //
+      numThreadsUsed+=omp_get_num_threads();
       Log::Info 
         << "Using "<< numThreadsUsed << " threads to process queries." << endl;
     }




More information about the mlpack-git mailing list