[mlpack-git] master: Fix error with casting negative numbers to size_t. (e6bc4b4)

gitdub at mlpack.org gitdub at mlpack.org
Tue Jun 28 18:58:22 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/8e740b02eb97c874bbc9b141d9928644bdba7c6b...e6bc4b41704e546a7495fcca90db7cd0919ca189

>---------------------------------------------------------------

commit e6bc4b41704e546a7495fcca90db7cd0919ca189
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Jun 28 18:57:48 2016 -0400

    Fix error with casting negative numbers to size_t.


>---------------------------------------------------------------

e6bc4b41704e546a7495fcca90db7cd0919ca189
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index 149beab..64ad80a 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -166,7 +166,8 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
   }
 
   // We will store the second hash vectors in this matrix; the second hash
-  // vector for table i will be held in row i.
+  // vector for table i will be held in row i.  We have to use int and not
+  // size_t, otherwise negative numbers are cast to 0.
   arma::Mat<size_t> secondHashVectors(numTables, referenceSet.n_cols);
 
   for (size_t i = 0; i < numTables; i++)
@@ -189,15 +190,20 @@ void LSHSearch<SortPolicy>::Train(const arma::mat& referenceSet,
     hashMat /= hashWidth;
 
     // Step V: Putting the points in the 'secondHashTable' by hashing the key.
-    // Now we hash every key, point ID to its corresponding bucket.
-    secondHashVectors.row(i) = arma::conv_to<arma::Row<size_t>>::from(
-        secondHashWeights.t() * arma::floor(hashMat));
+    // Now we hash every key, point ID to its corresponding bucket.  We must
+    // also normalize the hashes to the range [0, secondHashSize).
+    arma::rowvec unmodVector = secondHashWeights.t() * arma::floor(hashMat);
+    for (size_t j = 0; j < secondHashVectors.n_cols; ++j)
+    {
+      double shs = (double) secondHashSize; // Convenience cast.
+      if (unmodVector[j] >= 0.0)
+        secondHashVectors[j] = size_t(fmod(unmodVector[j], shs));
+      else
+        secondHashVectors[j] = secondHashSize -
+            size_t(fmod(-unmodVector[j], shs));
+    }
   }
 
-  // Normalize hashes (take modulus with secondHashSize).
-  secondHashVectors.transform([secondHashSize](size_t val)
-      { return val % secondHashSize; });
-
   // Now, using the hash vectors for each table, count the number of rows we
   // have in the second hash table.
   arma::Row<size_t> secondHashBinCounts(secondHashSize, arma::fill::zeros);




More information about the mlpack-git mailing list