[mlpack-git] master: Adds parallelism option to CMakeFiles, removes most omp.h dependence (a60ff91)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jun 27 05:39:29 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/34cf8d94f79c9a72ff4199676033b060cd039fcd...425324bf7fb7c86c85d10a909d8a59d4f69b7164
>---------------------------------------------------------------
commit a60ff919012d6c76f5ed5155558f87b8c07b7e1b
Author: Yannis Mentekidis <mentekid at gmail.com>
Date: Mon Jun 27 10:39:29 2016 +0100
Adds parallelism option to CMakeFiles, removes most omp.h dependence
>---------------------------------------------------------------
a60ff919012d6c76f5ed5155558f87b8c07b7e1b
CMakeLists.txt | 12 +++-
src/mlpack/core.hpp | 5 ++
src/mlpack/methods/lsh/lsh_search.hpp | 2 +-
src/mlpack/methods/lsh/lsh_search_impl.hpp | 97 +++++++++++++++++++-----------
src/mlpack/tests/lsh_test.cpp | 4 +-
5 files changed, 79 insertions(+), 41 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b5ce3bc..855212e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,15 +18,16 @@ endif()
# First, define all the compilation options.
# We default to debugging mode for developers.
-option(DEBUG "Compile with debugging information" ON)
-option(PROFILE "Compile with profiling information" ON)
+option(DEBUG "Compile with debugging information." ON)
+option(PROFILE "Compile with profiling information." ON)
option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF)
option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF)
option(TEST_VERBOSE "Run test cases with verbose output." OFF)
option(BUILD_TESTS "Build tests." ON)
option(BUILD_CLI_EXECUTABLES "Build command-line executables" ON)
option(BUILD_SHARED_LIBS
- "Compile shared libraries (if OFF, static libraries are compiled)" ON)
+ "Compile shared libraries (if OFF, static libraries are compiled)." ON)
+option(HAS_OPENMP "Use OpenMP for parallel execution." OFF)
enable_testing()
@@ -117,6 +118,11 @@ if(ARMA_EXTRA_DEBUG)
add_definitions(-DARMA_EXTRA_DEBUG)
endif()
+# If the user has an OpenMP-enabled compiler, turn OpenMP on
+if (HAS_OPENMP)
+ add_definitions(-DHAS_OPENMP)
+endif()
+
# Now, find the libraries we need to compile against. Several variables can be
# set to manually specify the directory in which each of these libraries
# resides.
diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp
index 6f5c181..a3a6b04 100644
--- a/src/mlpack/core.hpp
+++ b/src/mlpack/core.hpp
@@ -232,6 +232,11 @@
#include <mlpack/core/kernels/spherical_kernel.hpp>
#include <mlpack/core/kernels/triangular_kernel.hpp>
+// Use OpenMP if compiled with -DHAS_OPENMP.
+#ifdef HAS_OPENMP
+ #include <omp.h>
+#endif
+
// Use Armadillo's C++ version detection.
#ifdef ARMA_USE_CXX11
#define MLPACK_USE_CX11
diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index e1767cb..799ee21 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -270,7 +270,7 @@ class LSHSearch
template<typename VecType>
void ReturnIndicesFromTable(const VecType& queryPoint,
arma::uvec& referenceIndices,
- size_t numTablesToSearch) const;
+ size_t numTablesToSearch);
/**
* This is a helper function that computes the distance of the query to the
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index d780de3..a81fa55 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -14,6 +14,16 @@ using std::cout; using std::endl; //TODO: remove
namespace mlpack {
namespace neighbor {
+// Simple small function to set threads to 1 if OpenMP is not used
+inline size_t DefineMaxThreads()
+{
+ #ifdef _OPENMP
+ return omp_get_max_threads();
+ #else
+ return 1;
+ #endif
+}
+
// Construct the object with random tables
template<typename SortPolicy>
LSHSearch<SortPolicy>::
@@ -31,7 +41,7 @@ LSHSearch(const arma::mat& referenceSet,
secondHashSize(secondHashSize),
bucketSize(bucketSize),
distanceEvaluations(0),
- maxThreads(omp_get_max_threads()),
+ maxThreads(DefineMaxThreads()),
numThreadsUsed(1)
{
// Pass work to training function.
@@ -344,7 +354,7 @@ template<typename VecType>
void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
const VecType& queryPoint,
arma::uvec& referenceIndices,
- size_t numTablesToSearch) const
+ size_t numTablesToSearch)
{
// Decide on the number of tables to look into.
if (numTablesToSearch == 0) // If no user input is given, search all.
@@ -406,18 +416,37 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
arma::Col<size_t> refPointsConsidered;
refPointsConsidered.zeros(referenceSet->n_cols);
- for (size_t i = 0; i < hashVec.n_elem; ++i)
+ // Define the number of threads used to process this.
+ size_t numThreadsUsed = std::min(maxThreads, numTablesToSearch);
+
+ // Parallelization: By default nested parallelism is off, so this won't be
+ // parallel. The user might turn nested parallelism on if (for example) they
+ // have a query-by-query processing scheme and so processing more than one
+ // query at the same time doesn't make sense for them.
+
+ #pragma omp parallel for \
+ num_threads (numThreadsUsed) \
+ shared (hashVec, refPointsConsidered) \
+ schedule(dynamic)
+ for (size_t i = 0; i < numTablesToSearch; ++i)
{
+
const size_t hashInd = (size_t) hashVec[i];
const size_t tableRow = bucketRowInHashTable[hashInd];
// Pick the indices in the bucket corresponding to 'hashInd'.
if (tableRow != secondHashSize)
+ {
for (size_t j = 0; j < bucketContentSize[tableRow]; j++)
+ {
+ #pragma omp atomic
refPointsConsidered[secondHashTable[tableRow](j)]++;
+ }
+ }
}
// Only keep reference points found in at least one bucket.
+ // TODO: maybe write parallel implementation of this?
referenceIndices = arma::find(refPointsConsidered > 0);
return;
}
@@ -431,6 +460,19 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
// Retrieve candidates.
size_t start = 0;
+
+ // Define the number of threads used to process this.
+ size_t numThreadsUsed = std::min(maxThreads, numTablesToSearch);
+
+ // Parallelization: By default nested parallelism is off, so this won't be
+ // parallel. The user might turn nested parallelism on if (for example) they
+ // have a query-by-query processing scheme and so processing more than one
+ // query at the same time doesn't make sense for them.
+
+ #pragma omp parallel for \
+ num_threads (numThreadsUsed) \
+ shared (hashVec, refPointsConsideredSmall, start) \
+ schedule(dynamic)
for (size_t i = 0; i < numTablesToSearch; ++i) // For all tables
{
const size_t hashInd = (size_t) hashVec[i]; // Find the query's bucket.
@@ -438,11 +480,19 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
// Store all secondHashTable points in the candidates set.
if (tableRow != secondHashSize)
+ {
for (size_t j = 0; j < bucketContentSize[tableRow]; ++j)
- refPointsConsideredSmall(start++) = secondHashTable[tableRow][j];
+ {
+ #pragma omp critical
+ {
+ refPointsConsideredSmall(start++) = secondHashTable[tableRow][j];
+ }
+ }
+ }
}
// Only keep unique candidates.
+ // TODO: again main bottleneck is here. Parallelize?
referenceIndices = arma::unique(refPointsConsideredSmall);
return;
}
@@ -489,25 +539,16 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
Timer::Start("computing_neighbors");
- // Parallelization allows us to process more than one query at a time. To
- // control workload and thread access, we use numThreadsUsed and maxThreads to
- // make sure we only use as many threads as the user specified.
+ // Parallelization to process more than one query at a time.
+ // use as many threads possible but not more than allowed number
+ size_t numThreadsUsed = std::min( (arma::uword) maxThreads, querySet.n_cols );
#pragma omp parallel for \
- if (numThreadsUsed <= maxThreads) \
- num_threads (maxThreads-numThreadsUsed)\
+ num_threads ( numThreadsUsed )\
shared(avgIndicesReturned, resultingNeighbors, distances) \
schedule(dynamic)
-
// Go through every query point.
for (size_t i = 0; i < querySet.n_cols; i++)
{
- // Master thread updates the number of threads used
- if (i == 0 && omp_get_thread_num() == 0)
- {
- numThreadsUsed+=omp_get_num_threads();
- Log::Info
- << "Using "<< numThreadsUsed << " threads to process queries." << endl;
- }
// Hash every query into every hash table and eventually into the
// 'secondHashTable' to obtain the neighbor candidates.
@@ -526,8 +567,6 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors,
distances);
}
- // parallel region over, reset number of threads to 1
- numThreadsUsed = omp_get_num_threads();
Timer::Stop("computing_neighbors");
@@ -556,24 +595,16 @@ Search(const size_t k,
Timer::Start("computing_neighbors");
- // Parallelization allows us to process more than one query at a time. To
- // control workload and thread access, we use numThreadsUsed and maxThreads to
- // make sure we only use as many threads as the user specified.
+ // Parallelization to process more than one query at a time.
+ // use as many threads possible but not more than allowed number
+ size_t numThreadsUsed = std::min( (arma::uword) maxThreads, referenceSet->n_cols );
#pragma omp parallel for \
- if (numThreadsUsed <= maxThreads) \
- num_threads (maxThreads-numThreadsUsed)\
+ num_threads ( numThreadsUsed )\
shared(avgIndicesReturned, resultingNeighbors, distances) \
schedule(dynamic)
// Go through every query point.
for (size_t i = 0; i < referenceSet->n_cols; i++)
{
- // Master thread updates the number of threads used
- if (i == 0 && omp_get_thread_num() == 0)
- {
- numThreadsUsed+=omp_get_num_threads();
- Log::Info
- << "Using "<< numThreadsUsed << " threads to process queries." << endl;
- }
// Hash every query into every hash table and eventually into the
// 'secondHashTable' to obtain the neighbor candidates.
arma::uvec refIndices;
@@ -592,10 +623,6 @@ Search(const size_t k,
}
- // parallel region over, reset number of threads to 1
- numThreadsUsed = omp_get_num_threads();
-
-
Timer::Stop("computing_neighbors");
distanceEvaluations += avgIndicesReturned;
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 52ef1c9..ec2e394 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -499,7 +499,7 @@ BOOST_AUTO_TEST_CASE(ParallelBichromatic)
lshTest.Search(qdata, k, sequentialNeighbors, distances);
// Require both have same results
- double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors);
+ double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
BOOST_REQUIRE_EQUAL(recall, 1);
}
@@ -533,7 +533,7 @@ BOOST_AUTO_TEST_CASE(ParallelMonochromatic)
lshTest.Search(k, sequentialNeighbors, distances);
// Require both have same results
- double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors);
+ double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors);
BOOST_REQUIRE_EQUAL(recall, 1);
}
More information about the mlpack-git
mailing list