[mlpack-git] master: Makes Perturbation functions members of LSHSearch (2aa839c)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Jun 28 05:52:27 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eaa7182ebed8cce3fd6191dc1f8170546ea297da...812048c7c6bee0b6c8d936677f23bbb5930c6cfc
>---------------------------------------------------------------
commit 2aa839cac0501b3812c40c798f54f49e5902e9ed
Author: Yannis Mentekidis <mentekid at gmail.com>
Date: Tue Jun 28 10:52:27 2016 +0100
Makes Perturbation functions members of LSHSearch
>---------------------------------------------------------------
2aa839cac0501b3812c40c798f54f49e5902e9ed
src/mlpack/methods/lsh/lsh_search.hpp | 38 +++++-
src/mlpack/methods/lsh/lsh_search_impl.hpp | 183 ++++-------------------------
2 files changed, 56 insertions(+), 165 deletions(-)
diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index dce275d..c621710 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -360,9 +360,41 @@ class LSHSearch
* bin.
*/
void GetAdditionalProbingBins(const arma::vec& queryCode,
- const arma::vec& queryCodeNotFloored,
- const size_t T,
- arma::mat& additionalProbingBins) const;
+ const arma::vec& queryCodeNotFloored,
+ const size_t T,
+ arma::mat& additionalProbingBins) const;
+
+ /**
+ * Returns the score of a perturbation vector generated by perturbation set A.
+ * The score of a pertubation set (vector) is the sum of scores of the
+ * participating actions.
+ */
+ double PerturbationScore(const std::vector<bool>& A,
+ const arma::vec& scores) const;
+ /**
+ * Inline function used by GetAdditionalProbingBins. The vector shift operation
+ * replaces the largest element of a vector A with (largest element) + 1.
+ * Returns true if resulting vector is valid, otherwise false.
+ */
+ bool PerturbationShift(std::vector<bool>& A) const;
+
+ /**
+ * Inline function used by GetAdditionalProbingBins. The vector expansion
+ * operation adds the element [1 + (largest_element)] to a vector A, where
+ * largest_element is the largest element of A. Returns true if resulting vector
+ * is valid, otherwise false.
+ */
+
+ bool PerturbationExpand(std::vector<bool>& A) const;
+ /**
+ * Return true if perturbation set A is valid. A perturbation set is invalid if
+ * it contains two (or more) actions for the same dimension or dimensions that
+ * are larger than the queryCode's dimensions.
+ * @param A - a perturbation set
+ */
+ bool PerturbationValid(const std::vector<bool>& A) const;
+
+
//! Reference dataset.
const arma::mat* referenceSet;
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 546f802..d91aebe 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -333,80 +333,11 @@ void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
referenceIndex, distance);
}
-/*
-//Returns the score of a perturbation vector generated by perturbation set A.
-//The score of a pertubation set (vector) is the sum of scores of the
-//participating actions.
-inline double perturbationScore(const std::vector<size_t>& A,
- const arma::vec& scores)
-{
- double score = 0.0;
- for (size_t i = 0; i < A.size(); ++i)
- score+=scores[A[i]];
- return score;
-}
-
-// Inline function used by GetAdditionalProbingBins. The vector shift operation
-// replaces the largest element of a vector A with (largest element) + 1.
-inline void perturbationShift(std::vector<size_t>& A)
-{
- size_t max_pos = 0;
- size_t max = A[0];
- for (size_t i = 1; i < A.size(); ++i)
- {
- if (A[i] > max)
- {
- max = A[i];
- max_pos = i;
- }
- }
- A[max_pos]++;
-}
-
-// Inline function used by GetAdditionalProbingBins. The vector expansion
-// operation adds the element [1 + (largest_element)] to a vector A, where
-// largest_element is the largest element of A.
-inline void perturbationExpand(std::vector<size_t>& A)
-{
- size_t max = A[0];
- for (size_t i = 1; i < A.size(); ++i)
- if (A[i] > max)
- max = A[i];
- A.push_back(max + 1);
-}
-
-// Return true if perturbation set A is valid. A perturbation set is invalid if
-// it contains two (or more) actions for the same dimension or dimensions that
-// are larger than the queryCode's dimensions.
-inline bool perturbationValid(const std::vector<size_t>& A,
- const size_t numProj)
-{
- // Stack allocation and initialization to 0 (bool check[numProj] = {0}) made
- // some compilers complain, and std::vector might even be compressed (depends
- // on implementation) so this saves some space.
- std::vector<bool> check(numProj);
-
- for (size_t i = 0; i < A.size(); ++i)
- {
- // Check that we only use valid dimensions. If not, vector is not valid.
- if (A[i] >= 2 * numProj)
- return false;
-
- // Check that we only see each dimension once. If not, vector is not valid.
- if (check[A[i] % numProj ] == 0)
- check[A[i] % numProj ] = 1;
- else
- return false;
- }
- return true;
-}
-*/
-
-// Returns the score of a perturbation vector generated by perturbation set A.
-// The score of a pertubation set (vector) is the sum of scores of the
-// participating actions.
-inline double perturbationScore(const std::vector<bool>& A,
- const arma::vec& scores)
+template<typename SortPolicy>
+inline force_inline
+double LSHSearch<SortPolicy>::PerturbationScore(
+ const std::vector<bool>& A,
+ const arma::vec& scores) const
{
double score = 0.0;
for (size_t i = 0; i < A.size(); ++i)
@@ -415,10 +346,9 @@ inline double perturbationScore(const std::vector<bool>& A,
return score;
}
-// Inline function used by GetAdditionalProbingBins. The vector shift operation
-// replaces the largest element of a vector A with (largest element) + 1.
-// Returns true if resulting vector is valid, otherwise false.
-inline bool perturbationShift(std::vector<bool>& A)
+template<typename SortPolicy>
+inline force_inline
+bool LSHSearch<SortPolicy>::PerturbationShift(std::vector<bool>& A) const
{
size_t maxPos = 0;
for (size_t i = 0; i < A.size(); ++i)
@@ -434,11 +364,9 @@ inline bool perturbationShift(std::vector<bool>& A)
return false; // invalid
}
-// Inline function used by GetAdditionalProbingBins. The vector expansion
-// operation adds the element [1 + (largest_element)] to a vector A, where
-// largest_element is the largest element of A. Returns true if resulting vector
-// is valid, otherwise false.
-inline bool perturbationExpand(std::vector<bool>& A)
+template<typename SortPolicy>
+inline force_inline
+bool LSHSearch<SortPolicy>::PerturbationExpand(std::vector<bool>& A) const
{
// Find the last '1' in A
size_t maxPos = 0;
@@ -452,14 +380,12 @@ inline bool perturbationExpand(std::vector<bool>& A)
return true;
}
return false;
-
}
-// Return true if perturbation set A is valid. A perturbation set is invalid if
-// it contains two (or more) actions for the same dimension or dimensions that
-// are larger than the queryCode's dimensions.
-inline bool perturbationValid(const std::vector<bool>& A,
- const size_t numProj)
+template<typename SortPolicy>
+inline force_inline
+bool LSHSearch<SortPolicy>::PerturbationValid(
+ const std::vector<bool>& A) const
{
// Use check to mark dimensions we have seen before in A. If a dimension is
// seen twice (or more), A is not a valid perturbation.
@@ -609,73 +535,6 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
// Transform perturbation set to perturbation vector by setting the
// dimensions specified by the set to queryCode+action (action is {-1, 1}).
- /*
- std::vector<size_t> Ao;
- Ao.push_back(0); // initial perturbation holds smallest score (0 if sorted)
-
- std::vector< std::vector<size_t> > perturbationSets;
- perturbationSets.push_back(Ao); // storage of perturbation sets
-
- std::priority_queue<
- std::pair<double, size_t>, // contents: pairs of (score, index)
- std::vector< // container: vector of pairs
- std::pair<double, size_t>
- >,
- std::greater< std::pair<double, size_t> > // comparator of pairs(compare scores)
- > minHeap; // our minheap
-
- // Start by adding the lowest scoring set to the minheap
- // std::pair<double, size_t> pair0( perturbationScore(Ao, scores), 0 );
- // minHeap.push(pair0);
- minHeap.push( std::make_pair(perturbationScore(Ao, scores), 0) );
-
- // loop invariable: after pvec iterations, additionalProbingBins contains pvec
- // valid codes of the lowest-scoring bins (bins most likely to contain
- // neighbors of the query).
- for (size_t pvec = 0; pvec < T; ++pvec)
- {
- std::vector<size_t> Ai;
- do
- {
- // get the perturbation set corresponding to the minimum score
- Ai = perturbationSets[ minHeap.top().second ];
- minHeap.pop(); // .top() returns, .pop() removes
-
- // modify Ai (shift)
- std::vector<size_t> As = Ai;
- perturbationShift(As);
- if ( perturbationValid(As, numProj) )
- {
- perturbationSets.push_back(As); // add shifted set to sets
- minHeap.push(
- std::make_pair(
- perturbationScore(As, scores),
- perturbationSets.size() - 1)
- );
- }
-
- // modify Ai (expand)
- std::vector<size_t> Ae = Ai;
- perturbationExpand(Ae);
- if ( perturbationValid(Ae, numProj) )
- {
- perturbationSets.push_back(Ae); // add expanded set to sets
- minHeap.push(
- std::make_pair(
- perturbationScore(Ae, scores),
- perturbationSets.size() - 1)
- );
- }
-
- }while (! perturbationValid(Ai, numProj) );//Discard invalid perturbations
-
- // add perturbation vector to probing sequence if valid
- for (size_t i = 0; i < Ai.size(); ++i)
- additionalProbingBins(positions(Ai[i]), pvec) += actions(Ai[i]);
-
- }
- */
-
// Perturbation sets (A) mark with 1 the (score, action, dimension) positions
// included in a given perturbation vector. Other spaces are 0.
std::vector<bool> Ao(2 * numProj);
@@ -693,7 +552,7 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
> minHeap; // our minheap
// Start by adding the lowest scoring set to the minheap.
- minHeap.push( std::make_pair(perturbationScore(Ao, scores), 0) );
+ minHeap.push( std::make_pair(PerturbationScore(Ao, scores), 0) );
// Loop invariable: after pvec iterations, additionalProbingBins contains pvec
// valid codes of the lowest-scoring bins (bins most likely to contain
@@ -709,31 +568,31 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
// Shift operation on Ai (replace max with max+1).
std::vector<bool> As = Ai;
- if ( perturbationShift(As) && perturbationValid(As, numProj) )
+ if ( PerturbationShift(As) && PerturbationValid(As) )
// Don't add invalid sets.
{
perturbationSets.push_back(As); // add shifted set to sets
minHeap.push(
std::make_pair(
- perturbationScore(As, scores),
+ PerturbationScore(As, scores),
perturbationSets.size() - 1)
);
}
// Expand operation on Ai (add max+1 to set).
std::vector<bool> Ae = Ai;
- if ( perturbationExpand(Ae) && perturbationValid(Ae, numProj) )
+ if ( PerturbationExpand(Ae) && PerturbationValid(Ae) )
// Don't add invalid sets.
{
perturbationSets.push_back(Ae); // add expanded set to sets
minHeap.push(
std::make_pair(
- perturbationScore(Ae, scores),
+ PerturbationScore(Ae, scores),
perturbationSets.size() - 1)
);
}
- }while (! perturbationValid(Ai, numProj) );//Discard invalid perturbations
+ }while (! PerturbationValid(Ai) );//Discard invalid perturbations
// Found valid perturbation set Ai. Construct perturbation vector from set.
for (size_t pos = 0; pos < Ai.size(); ++pos)
More information about the mlpack-git
mailing list