[mlpack-git] master: Fixes minor typo that caused multiprobe test to not happen (00377a8)

gitdub at mlpack.org gitdub at mlpack.org
Thu Jun 30 15:11:42 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eaa7182ebed8cce3fd6191dc1f8170546ea297da...812048c7c6bee0b6c8d936677f23bbb5930c6cfc

>---------------------------------------------------------------

commit 00377a8605a0338275a948dcaabc2257762f3d1e
Author: Yannis Mentekidis <mentekid at gmail.com>
Date:   Mon Jun 13 14:32:17 2016 +0300

    Fixes minor typo that caused multiprobe test to not happen


>---------------------------------------------------------------

00377a8605a0338275a948dcaabc2257762f3d1e
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 75 +++++++++++++++++++++++++++++-
 src/mlpack/tests/lsh_test.cpp              |  2 +-
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index dabf4b0..6b061e2 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -474,6 +474,80 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
   positions.rows(numProj, 2 * numProj - 1) =
     arma::linspace< arma::Col<size_t> >(0, numProj - 1, numProj);
 
+  // optimization: no need to create heap for 1 or 2 codes
+  if (T == 1)
+  {
+    // special case: 1 code only
+    // simply find location of minimum score, generate 1 perturbation vector,
+    // and add its code to additionalProbingBins column 0
+
+    // find location and value of smallest element of scores vector
+    double minscore = scores[0];
+    size_t minloc = 0;
+    for (size_t s = 0; s < 2 * numProj; ++s)
+    {
+      if (minscore > scores[s])
+      {
+        minscore = scores[s];
+        minloc = s;
+      }
+    }
+    
+    // add or subtract 1 to dimension corresponding to minimum score
+    additionalProbingBins(positions[minloc], 0) += actions[minloc];
+    
+    // done - only 1 code was needed
+    return;
+  }
+
+  if (T == 2)
+  {
+    // special case: 2 codes only
+    // The second perturbation vector still can't comprise of more than one
+    // change in the bin codes. This is because of the way perturbation vectors
+    // are generated: First we create the one with the smallest score (Ao) and
+    // then we either add 1 extra dimension to it (Ae) or shift it by one (As).
+    // Since As contains the second smallest score, and Ae contains both the
+    // smallest and the second smallest, it's obvious that score(Ae) >
+    // score(As). Therefore the second perturbation vector is ALWAYS the vector
+    // containing only the second-lowest scoring perturbation.
+    
+
+    // First, find location of minimum score, generate 1 perturbation vector,
+    // and add its code to additionalProbingBins column 0, like in T==1.
+
+    // find location and value of smallest element of scores vector
+    double minscore = scores[0];
+    size_t minloc = 0;
+    for (size_t s = 1; s < 2 * numProj; ++s)
+    {
+      if (minscore > scores[s])
+      {
+        minscore = scores[s];
+        minloc = s;
+      }
+    }
+    
+    // add or subtract 1 to dimension corresponding to minimum score
+    additionalProbingBins(positions[minloc], 0) += actions[minloc];
+
+    // Now, find location of second smallest score and generate one more vector.
+    
+    double minscore2 = scores[0];
+    size_t minloc2 = 0;
+    for (size_t s = 0; s < 2 * numProj; ++s) // here we can't start from 0
+    {
+      if ( minscore2 > scores[s] && scores[s] > minscore) //second smallest
+      {
+        minscore2 = scores[s];
+        minloc2 = s;
+      }
+    }
+
+    // add or subtract 1 to second smallest score
+    additionalProbingBins(positions[minloc2], 1) += actions[minloc2];
+  }
+
   // sort everything in increasing order
   arma::Col<long long unsigned int> sortidx = arma::sort_index(scores);
   scores = scores(sortidx);
@@ -641,7 +715,6 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
     hashMat.set_size(1, numTablesToSearch);
     hashMat.row(0) = hashVec;
   }
-  std::cout<<hashMat<<std::endl;
 
   // Count number of points hashed in the same bucket as the query
   size_t maxNumPoints = 0;
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index aefe3b5..c144f3d 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -532,7 +532,7 @@ BOOST_AUTO_TEST_CASE(MultiprobeTest)
   
   bool foundIncrease = 0;
 
-  for (size_t rep; rep < repetitions; ++rep)
+  for (size_t rep = 0; rep < repetitions; ++rep)
   {
     // train a model
     LSHSearch<> multiprobeTest(




More information about the mlpack-git mailing list