[mlpack-git] master: Makes Perturbation functions members of LSHSearch (2aa839c)

gitdub at mlpack.org gitdub at mlpack.org
Tue Jun 28 05:52:27 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eaa7182ebed8cce3fd6191dc1f8170546ea297da...812048c7c6bee0b6c8d936677f23bbb5930c6cfc

>---------------------------------------------------------------

commit 2aa839cac0501b3812c40c798f54f49e5902e9ed
Author: Yannis Mentekidis <mentekid at gmail.com>
Date:   Tue Jun 28 10:52:27 2016 +0100

    Makes Perturbation functions members of LSHSearch


>---------------------------------------------------------------

2aa839cac0501b3812c40c798f54f49e5902e9ed
 src/mlpack/methods/lsh/lsh_search.hpp      |  38 +++++-
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 183 ++++-------------------------
 2 files changed, 56 insertions(+), 165 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index dce275d..c621710 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -360,9 +360,41 @@ class LSHSearch
    * bin.
   */
   void GetAdditionalProbingBins(const arma::vec& queryCode,
-                            const arma::vec& queryCodeNotFloored,
-                            const size_t T,
-                            arma::mat& additionalProbingBins) const;
+                                const arma::vec& queryCodeNotFloored,
+                                const size_t T,
+                                arma::mat& additionalProbingBins) const;
+
+  /**
+   * Returns the score of a perturbation vector generated by perturbation set A.
+   * The score of a pertubation set (vector) is the sum of scores of the
+   * participating actions.
+  */
+  double PerturbationScore(const std::vector<bool>& A,
+                                  const arma::vec& scores) const;
+  /**
+   * Inline function used by GetAdditionalProbingBins. The vector shift operation
+   * replaces the largest element of a vector A with (largest element) + 1.
+   * Returns true if resulting vector is valid, otherwise false.
+  */
+  bool PerturbationShift(std::vector<bool>& A) const;
+
+  /**
+   * Inline function used by GetAdditionalProbingBins. The vector expansion
+   * operation adds the element [1 + (largest_element)] to a vector A, where
+   * largest_element is the largest element of A. Returns true if resulting vector
+   * is valid, otherwise false.
+  */
+
+  bool PerturbationExpand(std::vector<bool>& A) const;
+  /**
+   * Return true if perturbation set A is valid. A perturbation set is invalid if
+   * it contains two (or more) actions for the same dimension or dimensions that
+   * are larger than the queryCode's dimensions.
+   * @param A - a perturbation set
+  */
+  bool PerturbationValid(const std::vector<bool>& A) const;
+
+
 
   //! Reference dataset.
   const arma::mat* referenceSet;
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 546f802..d91aebe 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -333,80 +333,11 @@ void LSHSearch<SortPolicy>::BaseCase(const size_t queryIndex,
         referenceIndex, distance);
 }
 
-/*
-//Returns the score of a perturbation vector generated by perturbation set A.
-//The score of a pertubation set (vector) is the sum of scores of the
-//participating actions.
-inline double perturbationScore(const std::vector<size_t>& A,
-                                const arma::vec& scores)
-{
-  double score = 0.0;
-  for (size_t i = 0; i < A.size(); ++i)
-    score+=scores[A[i]];
-  return score;
-}
-
-// Inline function used by GetAdditionalProbingBins. The vector shift operation
-// replaces the largest element of a vector A with (largest element) + 1.
-inline void perturbationShift(std::vector<size_t>& A)
-{
-  size_t max_pos = 0;
-  size_t max = A[0];
-  for (size_t i = 1; i < A.size(); ++i)
-  {
-    if (A[i] > max)
-    {
-      max = A[i];
-      max_pos = i;
-    }
-  }
-  A[max_pos]++;
-}
-
-// Inline function used by GetAdditionalProbingBins. The vector expansion
-// operation adds the element [1 + (largest_element)] to a vector A, where
-// largest_element is the largest element of A.
-inline void perturbationExpand(std::vector<size_t>& A)
-{
-  size_t max = A[0];
-  for (size_t i = 1; i < A.size(); ++i)
-    if (A[i] > max)
-      max = A[i];
-  A.push_back(max + 1);
-}
-
-// Return true if perturbation set A is valid. A perturbation set is invalid if
-// it contains two (or more) actions for the same dimension or dimensions that
-// are larger than the queryCode's dimensions.
-inline bool perturbationValid(const std::vector<size_t>& A,
-                              const size_t numProj)
-{
-  // Stack allocation and initialization to 0 (bool check[numProj] = {0}) made
-  // some compilers complain, and std::vector might even be compressed (depends
-  // on implementation) so this saves some space.
-  std::vector<bool> check(numProj);
-
-  for (size_t i = 0; i < A.size(); ++i)
-  {
-    // Check that we only use valid dimensions. If not, vector is not valid.
-    if (A[i] >= 2 * numProj)
-      return false;
-
-    // Check that we only see each dimension once. If not, vector is not valid.
-    if (check[A[i] % numProj ] == 0)
-      check[A[i] % numProj ] = 1;
-    else
-      return false;
-  }
-  return true;
-}
-*/
-
-// Returns the score of a perturbation vector generated by perturbation set A.
-// The score of a pertubation set (vector) is the sum of scores of the
-// participating actions.
-inline double perturbationScore(const std::vector<bool>& A,
-                                const arma::vec& scores)
+template<typename SortPolicy>
+inline force_inline
+double LSHSearch<SortPolicy>::PerturbationScore(
+    const std::vector<bool>& A,
+    const arma::vec& scores) const
 {
   double score = 0.0;
   for (size_t i = 0; i < A.size(); ++i)
@@ -415,10 +346,9 @@ inline double perturbationScore(const std::vector<bool>& A,
   return score;
 }
 
-// Inline function used by GetAdditionalProbingBins. The vector shift operation
-// replaces the largest element of a vector A with (largest element) + 1.
-// Returns true if resulting vector is valid, otherwise false.
-inline bool perturbationShift(std::vector<bool>& A)
+template<typename SortPolicy>
+inline force_inline
+bool LSHSearch<SortPolicy>::PerturbationShift(std::vector<bool>& A) const
 {
   size_t maxPos = 0;
   for (size_t i = 0; i < A.size(); ++i)
@@ -434,11 +364,9 @@ inline bool perturbationShift(std::vector<bool>& A)
   return false; // invalid
 }
 
-// Inline function used by GetAdditionalProbingBins. The vector expansion
-// operation adds the element [1 + (largest_element)] to a vector A, where
-// largest_element is the largest element of A. Returns true if resulting vector
-// is valid, otherwise false.
-inline bool perturbationExpand(std::vector<bool>& A)
+template<typename SortPolicy>
+inline force_inline
+bool LSHSearch<SortPolicy>::PerturbationExpand(std::vector<bool>& A) const
 {
   // Find the last '1' in A
   size_t maxPos = 0;
@@ -452,14 +380,12 @@ inline bool perturbationExpand(std::vector<bool>& A)
     return true;
   }
   return false;
-
 }
 
-// Return true if perturbation set A is valid. A perturbation set is invalid if
-// it contains two (or more) actions for the same dimension or dimensions that
-// are larger than the queryCode's dimensions.
-inline bool perturbationValid(const std::vector<bool>& A,
-                              const size_t numProj)
+template<typename SortPolicy>
+inline force_inline
+bool LSHSearch<SortPolicy>::PerturbationValid(
+    const std::vector<bool>& A) const
 {
   // Use check to mark dimensions we have seen before in A. If a dimension is
   // seen twice (or more), A is not a valid perturbation.
@@ -609,73 +535,6 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
   // Transform perturbation set to perturbation vector by setting the
   // dimensions specified by the set to queryCode+action (action is {-1, 1}).
 
-  /*
-  std::vector<size_t> Ao;
-  Ao.push_back(0); // initial perturbation holds smallest score (0 if sorted)
-
-  std::vector< std::vector<size_t> > perturbationSets;
-  perturbationSets.push_back(Ao); // storage of perturbation sets
-
-  std::priority_queue<
-    std::pair<double, size_t>,        // contents: pairs of (score, index)
-    std::vector<                      // container: vector of pairs
-      std::pair<double, size_t>
-      >,
-    std::greater< std::pair<double, size_t> > // comparator of pairs(compare scores)
-  > minHeap; // our minheap
-
-  // Start by adding the lowest scoring set to the minheap
-  // std::pair<double, size_t> pair0( perturbationScore(Ao, scores), 0 );
-  // minHeap.push(pair0);
-  minHeap.push( std::make_pair(perturbationScore(Ao, scores), 0) );
-
-  // loop invariable: after pvec iterations, additionalProbingBins contains pvec
-  // valid codes of the lowest-scoring bins (bins most likely to contain
-  // neighbors of the query).
-  for (size_t pvec = 0; pvec < T; ++pvec)
-  {
-    std::vector<size_t> Ai;
-    do
-    {
-      // get the perturbation set corresponding to the minimum score
-      Ai = perturbationSets[ minHeap.top().second ];
-      minHeap.pop(); // .top() returns, .pop() removes
-
-      // modify Ai (shift)
-      std::vector<size_t> As = Ai;
-      perturbationShift(As);
-      if ( perturbationValid(As, numProj) )
-      {
-        perturbationSets.push_back(As); // add shifted set to sets
-        minHeap.push( 
-            std::make_pair(
-              perturbationScore(As, scores), 
-              perturbationSets.size() - 1)
-            );
-      }
-
-      // modify Ai (expand)
-      std::vector<size_t> Ae = Ai;
-      perturbationExpand(Ae);
-      if ( perturbationValid(Ae, numProj) )
-      {
-        perturbationSets.push_back(Ae); // add expanded set to sets
-        minHeap.push(
-            std::make_pair(
-              perturbationScore(Ae, scores),
-              perturbationSets.size() - 1)
-            );
-      }
-
-    }while (! perturbationValid(Ai, numProj)  );//Discard invalid perturbations
-
-    // add perturbation vector to probing sequence if valid
-    for (size_t i = 0; i < Ai.size(); ++i)
-      additionalProbingBins(positions(Ai[i]), pvec) += actions(Ai[i]);
-
-  }
-  */
-
   // Perturbation sets (A) mark with 1 the (score, action, dimension) positions
   // included in a given perturbation vector. Other spaces are 0.
   std::vector<bool> Ao(2 * numProj);
@@ -693,7 +552,7 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
   > minHeap; // our minheap
 
   // Start by adding the lowest scoring set to the minheap.
-  minHeap.push( std::make_pair(perturbationScore(Ao, scores), 0) );
+  minHeap.push( std::make_pair(PerturbationScore(Ao, scores), 0) );
 
   // Loop invariable: after pvec iterations, additionalProbingBins contains pvec
   // valid codes of the lowest-scoring bins (bins most likely to contain
@@ -709,31 +568,31 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
 
       // Shift operation on Ai (replace max with max+1).
       std::vector<bool> As = Ai;
-      if ( perturbationShift(As) && perturbationValid(As, numProj) )
+      if ( PerturbationShift(As) && PerturbationValid(As) )
         // Don't add invalid sets.
       {
         perturbationSets.push_back(As); // add shifted set to sets
         minHeap.push( 
             std::make_pair(
-              perturbationScore(As, scores), 
+              PerturbationScore(As, scores), 
               perturbationSets.size() - 1)
             );
       }
 
       // Expand operation on Ai (add max+1 to set).
       std::vector<bool> Ae = Ai;
-      if ( perturbationExpand(Ae) && perturbationValid(Ae, numProj) ) 
+      if ( PerturbationExpand(Ae) && PerturbationValid(Ae)  ) 
         // Don't add invalid sets.
       {
         perturbationSets.push_back(Ae); // add expanded set to sets
         minHeap.push(
             std::make_pair(
-              perturbationScore(Ae, scores),
+              PerturbationScore(Ae, scores),
               perturbationSets.size() - 1)
             );
       }
 
-    }while (! perturbationValid(Ai, numProj)  );//Discard invalid perturbations
+    }while (! PerturbationValid(Ai)  );//Discard invalid perturbations
 
     // Found valid perturbation set Ai. Construct perturbation vector from set.
     for (size_t pos = 0; pos < Ai.size(); ++pos)




More information about the mlpack-git mailing list