[mlpack-git] master: Fixes a lot of style issues (cc6a5c2)

gitdub at mlpack.org gitdub at mlpack.org
Tue Jun 28 06:47:32 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eaa7182ebed8cce3fd6191dc1f8170546ea297da...812048c7c6bee0b6c8d936677f23bbb5930c6cfc

>---------------------------------------------------------------

commit cc6a5c29263e19143e1147f5fa7851cb8a0cab21
Author: Yannis Mentekidis <mentekid at gmail.com>
Date:   Tue Jun 28 11:47:32 2016 +0100

    Fixes a lot of style issues


>---------------------------------------------------------------

cc6a5c29263e19143e1147f5fa7851cb8a0cab21
 src/mlpack/methods/lsh/lsh_main.cpp        |  6 +-
 src/mlpack/methods/lsh/lsh_search.hpp      | 17 +++---
 src/mlpack/methods/lsh/lsh_search_impl.hpp | 93 +++++++++++++-----------------
 src/mlpack/tests/lsh_test.cpp              | 60 +++++++++----------
 4 files changed, 81 insertions(+), 95 deletions(-)

diff --git a/src/mlpack/methods/lsh/lsh_main.cpp b/src/mlpack/methods/lsh/lsh_main.cpp
index 0c22f93..dee9f55 100644
--- a/src/mlpack/methods/lsh/lsh_main.cpp
+++ b/src/mlpack/methods/lsh/lsh_main.cpp
@@ -64,8 +64,8 @@ PARAM_INT("tables", "The number of hash tables to be used.", "L", 30);
 PARAM_DOUBLE("hash_width", "The hash width for the first-level hashing in the "
     "LSH preprocessing. By default, the LSH class automatically estimates a "
     "hash width for its use.", "H", 0.0);
-PARAM_INT("num_probes", "Number of additional probes for Multiprobe LSH"
-    " If 0, traditional LSH is used.", "T", 0);
+PARAM_INT("num_probes", "Number of additional probes for Multiprobe LSH;"
+    " if 0, traditional LSH is used.", "T", 0);
 PARAM_INT("second_hash_size", "The size of the second level hash table.", "S",
     99901);
 PARAM_INT("bucket_size", "The maximum size of a bucket in the second level "
@@ -139,7 +139,7 @@ int main(int argc, char *argv[])
   const size_t numProj = CLI::GetParam<int>("projections");
   const size_t numTables = CLI::GetParam<int>("tables");
   const double hashWidth = CLI::GetParam<double>("hash_width");
-  const size_t numProbes = CLI::GetParam<int>("num_probes");
+  const size_t numProbes = (size_t) CLI::GetParam<int>("num_probes");
 
   arma::Mat<size_t> neighbors;
   arma::mat distances;
diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index c621710..44a7b64 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -35,7 +35,6 @@
  * }
  *
  */
-
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
 
@@ -280,9 +279,9 @@ class LSHSearch
    *    hashing the query into all the hash tables and eventually into
    *    multiple buckets of the second hash table.
    * @param numTablesToSearch The number of tables to perform the search in. If
-   * 0, all tables are searched.
+   *    0, all tables are searched.
    * @param T The number of additional probing bins for multiprobe LSH. If 0,
-   * single-probe is used.
+   *    single-probe is used.
    */
   template<typename VecType>
   void ReturnIndicesFromTable(const VecType& queryPoint,
@@ -354,10 +353,10 @@ class LSHSearch
    *
    * @param queryCode vector containing the numProj-dimensional query code.
    * @param queryCodeNotFloored vector containing the projection location of the
-   * query.
+   *    query.
    * @param T number of additional probing bins.
    * @param additionalProbingBins matrix. Each column will hold one additional
-   * bin.
+   *    bin.
   */
   void GetAdditionalProbingBins(const arma::vec& queryCode,
                                 const arma::vec& queryCodeNotFloored,
@@ -368,6 +367,8 @@ class LSHSearch
    * Returns the score of a perturbation vector generated by perturbation set A.
    * The score of a pertubation set (vector) is the sum of scores of the
    * participating actions.
+   * @param A perturbation set to compute the score of.
+   * @param scores vector containing score of each perturbation.
   */
   double PerturbationScore(const std::vector<bool>& A,
                                   const arma::vec& scores) const;
@@ -375,6 +376,7 @@ class LSHSearch
    * Inline function used by GetAdditionalProbingBins. The vector shift operation
    * replaces the largest element of a vector A with (largest element) + 1.
    * Returns true if resulting vector is valid, otherwise false.
+   * @param A perturbation set to shift.
   */
   bool PerturbationShift(std::vector<bool>& A) const;
 
@@ -383,14 +385,15 @@ class LSHSearch
    * operation adds the element [1 + (largest_element)] to a vector A, where
    * largest_element is the largest element of A. Returns true if resulting vector
    * is valid, otherwise false.
+   * @param A perturbation set to expand.
   */
-
   bool PerturbationExpand(std::vector<bool>& A) const;
+  
   /**
    * Return true if perturbation set A is valid. A perturbation set is invalid if
    * it contains two (or more) actions for the same dimension or dimensions that
    * are larger than the queryCode's dimensions.
-   * @param A - a perturbation set
+   * @param A perturbation set to validate.
   */
   bool PerturbationValid(const std::vector<bool>& A) const;
 
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index d91aebe..5916d36 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -9,7 +9,6 @@
 
 #include <mlpack/core.hpp>
 
-
 namespace mlpack {
 namespace neighbor {
 
@@ -358,7 +357,7 @@ bool LSHSearch<SortPolicy>::PerturbationShift(std::vector<bool>& A) const
   if ( maxPos + 1 < A.size()) // otherwise, this is an invalid vector 
   {
     A[maxPos] = 0;
-    A[maxPos+1] = 1;
+    A[maxPos + 1] = 1;
     return true; // valid
   }
   return false; // invalid
@@ -374,7 +373,7 @@ bool LSHSearch<SortPolicy>::PerturbationExpand(std::vector<bool>& A) const
     if (A[i]) // marked true
       maxPos = i;
 
-  if ( maxPos + 1 < A.size()) // otherwise, this is an invalid vector
+  if (maxPos + 1 < A.size()) // otherwise, this is an invalid vector
   {
     A[maxPos + 1] = 1;
     return true;
@@ -402,12 +401,11 @@ bool LSHSearch<SortPolicy>::PerturbationValid(
       continue;
 
     // If dimesnion is unseen thus far, mark it as seen.
-    if ( check[i % numProj] == false )
+    if (check[i % numProj] == false)
       check[i % numProj] = true;
     else
       return false; // If dimension was seen before, set is not valid.
   }
-
   // If we didn't fail, set is valid.
   return true;
 }
@@ -415,10 +413,10 @@ bool LSHSearch<SortPolicy>::PerturbationValid(
 // Compute additional probing bins for a query
 template<typename SortPolicy>
 void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
-                            const arma::vec& queryCode,
-                            const arma::vec& queryCodeNotFloored,
-                            const size_t T,
-                            arma::mat& additionalProbingBins) const
+    const arma::vec& queryCode,
+    const arma::vec& queryCodeNotFloored,
+    const size_t T,
+    arma::mat& additionalProbingBins) const
 {
 
   // No additional bins requested. Our work is done.
@@ -448,15 +446,15 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
 
   // Actions vector describes what perturbation (-1/+1) corresponds to a score.
   arma::Col<short int> actions(2 * numProj); // will be [-1 ... 1 ...]
-  actions.rows(0, numProj - 1) = // first numProj rows
+  actions.rows(0, numProj - 1) = // First numProj rows.
     -1 * arma::ones< arma::Col<short int> > (numProj); // -1s
-  actions.rows(numProj, (2 * numProj) - 1) = // last numProj rows
+  actions.rows(numProj, (2 * numProj) - 1) = // Last numProj rows.
     arma::ones< arma::Col<short int> > (numProj); // 1s
 
 
   // Acting dimension vector shows which coordinate to transform according to
-  // actions described by actions vector
-  arma::Col<size_t> positions(2 * numProj); // will be [0 1 2 ... 0 1 2 ...]
+  // actions (actions are described by actions vector above).
+  arma::Col<size_t> positions(2 * numProj); // Will be [0 1 2 ... 0 1 2 ...].
   positions.rows(0, numProj - 1) =
     arma::linspace< arma::Col<size_t> >(0, numProj - 1, numProj);
   positions.rows(numProj, 2 * numProj - 1) =
@@ -465,7 +463,6 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
   // Special case: No need to create heap for 1 or 2 codes.
   if (T <= 2)
   {
-
     // First, find location of minimum score, generate 1 perturbation vector,
     // and add its code to additionalProbingBins column 0.
 
@@ -500,7 +497,7 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
     size_t minloc2 = 0;
     for (size_t s = 0; s < (2 * numProj); ++s) // here we can't start from 1
     {
-      if ( minscore2 > scores[s] && s != minloc) //second smallest
+      if (minscore2 > scores[s] && s != minloc) //second smallest
       {
         minscore2 = scores[s];
         minloc2 = s;
@@ -515,7 +512,7 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
   // General case: more than 2 perturbation vectors require use of minheap.
 
   // Sort everything in increasing order.
-  arma::Col<long long unsigned int> sortidx = arma::sort_index(scores);
+  arma::uvec sortidx = arma::sort_index(scores);
   scores = scores(sortidx);
   actions = actions(sortidx);
   positions = positions(sortidx);
@@ -568,40 +565,38 @@ void LSHSearch<SortPolicy>::GetAdditionalProbingBins(
 
       // Shift operation on Ai (replace max with max+1).
       std::vector<bool> As = Ai;
-      if ( PerturbationShift(As) && PerturbationValid(As) )
+      if (PerturbationShift(As) && PerturbationValid(As))
         // Don't add invalid sets.
       {
         perturbationSets.push_back(As); // add shifted set to sets
         minHeap.push(
-            std::make_pair(
-              PerturbationScore(As, scores), 
-              perturbationSets.size() - 1)
+            std::make_pair(PerturbationScore(As, scores), 
+            perturbationSets.size() - 1)
             );
       }
 
       // Expand operation on Ai (add max+1 to set).
       std::vector<bool> Ae = Ai;
-      if ( PerturbationExpand(Ae) && PerturbationValid(Ae)  ) 
+      if (PerturbationExpand(Ae) && PerturbationValid(Ae)) 
         // Don't add invalid sets.
       {
         perturbationSets.push_back(Ae); // add expanded set to sets
         minHeap.push(
-            std::make_pair(
-              PerturbationScore(Ae, scores),
-              perturbationSets.size() - 1)
+            std::make_pair(PerturbationScore(Ae, scores),
+            perturbationSets.size() - 1)
             );
       }
 
-    }while (! PerturbationValid(Ai)  );//Discard invalid perturbations
+    } while (!PerturbationValid(Ai));//Discard invalid perturbations
 
     // Found valid perturbation set Ai. Construct perturbation vector from set.
     for (size_t pos = 0; pos < Ai.size(); ++pos)
       // If Ai[pos] is marked, add action to probing vector.
       additionalProbingBins(positions(pos), pvec) 
-          += Ai[pos] ? actions(pos) : 0;   }
+          += Ai[pos] ? actions(pos) : 0;
+  }
 }
 
-
 template<typename SortPolicy>
 template<typename VecType>
 void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
@@ -630,8 +625,7 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
   for (size_t i = 0; i < numTablesToSearch; i++)
     queryCodesNotFloored.unsafe_col(i) = projections.slice(i).t() * queryPoint;
   queryCodesNotFloored += offsets.cols(0, numTablesToSearch - 1);
-  allProjInTables = arma::floor(queryCodesNotFloored/hashWidth);
-
+  allProjInTables = arma::floor(queryCodesNotFloored / hashWidth);
 
   // Use hashMat to store the primary probing codes and any additional codes
   // from multiprobe LSH.
@@ -640,14 +634,13 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
 
   // Compute the primary hash value of each key of the query into a bucket of
   // the secondHashTable using the secondHashWeights.
-  hashMat.row(0) = 
-    arma::conv_to< arma::Row<size_t> >:: // floor by typecasting to size_t
-    from( secondHashWeights.t() * allProjInTables );
-  // mod to compute 2nd-level codes
+  hashMat.row(0) = arma::conv_to<arma::Row<size_t>> // Floor by typecasting
+      ::from(secondHashWeights.t() * allProjInTables);
+  // Mod to compute 2nd-level codes.
   for (size_t i = 0; i < numTablesToSearch; i++)
     hashMat(0, i) = (hashMat(0, i) % secondHashSize);
 
-  // Compute hash codes of additional probing bins
+  // Compute hash codes of additional probing bins.
   if (T > 0)
   {
     for (size_t i = 0; i < numTablesToSearch; ++i)
@@ -704,19 +697,16 @@ void LSHSearch<SortPolicy>::ReturnIndicesFromTable(
 
     for (size_t i = 0; i < numTablesToSearch; ++i) // for all tables
     {
-      for (size_t p = 0; p < T + 1; ++p) // for entire probing sequence
+      for (size_t p = 0; p < T + 1; ++p) // For entire probing sequence.
       {
-
         // get the sequence code
         size_t hashInd = hashMat(p, i);
         size_t tableRow = bucketRowInHashTable[hashInd];
 
         if (tableRow < secondHashSize && bucketContentSize[tableRow] > 0)
-        {
           // Pick the indices in the bucket corresponding to hashInd.
           for (size_t j = 0; j < bucketContentSize[tableRow]; ++j)
             refPointsConsidered[ secondHashTable[tableRow](j) ]++;
-        }
       }
     }
 
@@ -795,19 +785,18 @@ void LSHSearch<SortPolicy>::Search(const arma::mat& querySet,
   // If the user requested more than the available number of additional probing
   // bins, set Teffective to maximum T. Maximum T is 2^numProj - 1
   size_t Teffective = T;
-  if (T > ( (size_t) ( (1 << numProj) - 1) ) )
+  if (T > ((size_t) ((1 << numProj) - 1)))
   {
-    Teffective = ( 1 << numProj ) - 1;
-    Log::Warn << "Requested " << T << 
-      " additional bins are more than theoretical maximum. Using " <<
-      Teffective << " instead." << std::endl;
+    Teffective = (1 << numProj) - 1;
+    Log::Warn << "Requested " << T << " additional bins are more than "
+        << "theoretical maximum. Using " << Teffective << " instead." 
+        << std::endl;
   }
 
   // If the user set multiprobe, log it
-  if (T > 0)
-    Log::Info << "Running multiprobe LSH with " << Teffective <<
-      " additional probing bins per table per query."<< std::endl;
-
+  if (Teffective > 0)
+    Log::Info << "Running multiprobe LSH with " << Teffective 
+        <<" additional probing bins per table per query." << std::endl;
 
   size_t avgIndicesReturned = 0;
 
@@ -859,12 +848,12 @@ Search(const size_t k,
   // If the user requested more than the available number of additional probing
   // bins, set Teffective to maximum T. Maximum T is 2^numProj - 1
   size_t Teffective = T;
-  if (T > ( (size_t) ( (1 << numProj) - 1) ) )
+  if (T > ((size_t) ((1 << numProj) - 1)))
   {
-    Teffective = ( 1 << numProj ) - 1;
-    Log::Warn << "Requested " << T << 
-      " additional bins are more than theoretical maximum. Using " <<
-      Teffective << " instead." << std::endl;
+    Teffective = (1 << numProj) - 1;
+    Log::Warn << "Requested " << T << " additional bins are more than "
+        << "theoretical maximum. Using " << Teffective << " instead." 
+        << std::endl;
   }
 
   // If the user set multiprobe, log it
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 73cc3a1..d0c65cd 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -16,8 +16,13 @@ using namespace mlpack;
 using namespace mlpack::neighbor;
 
 /**
- * Generates a point set of four clusters around (0.5, 0.5),
- * (3.5, 0.5), (0.5, 3.5), (3.5, 3.5).
+ * Generates a point set of four clusters:
+ * -C1 around (0.5, 3.5),
+ * -C2 around (3.5, 3.5),
+ * -C3 around (0.5, 0.5),
+ * -C4 around (3.5, 3.5).
+ *
+ * It then merges these clusters into one set, rdata.
  */
 void GetPointset(const size_t N, arma::mat& rdata)
 {
@@ -506,40 +511,30 @@ BOOST_AUTO_TEST_CASE(MultiprobeTest)
 
   for (size_t rep = 0; rep < repetitions; ++rep)
   {
-    // train a model
-    LSHSearch<> multiprobeTest(
-        rdata,
-        numProj,
-        numTables,
-        hashWidth,
-        secondHashSize,
-        bucketSize);
+    // Train a model.
+    LSHSearch<> multiprobeTest(rdata, numProj, numTables, hashWidth, 
+        secondHashSize, bucketSize);
 
     double prevRecall = 0;
-    // search with varying number of probes
+    // Search with varying number of probes.
     for (size_t p = 0; p < probeTrials; ++p)
     {
       arma::Mat<size_t> lshNeighbors;
-      arma::mat lshDistances; //move outside of loop for speed?
-      
-      multiprobeTest.Search(
-          qdata, 
-          k, 
-          lshNeighbors, 
-          lshDistances, 
-          0, 
+      arma::mat lshDistances;
+      
+      multiprobeTest.Search(qdata, k, lshNeighbors, lshDistances, 0, 
           numProbes[p]);
 
-      // compute recall
-      double recall = LSHSearch<>::ComputeRecall(lshNeighbors, groundTruth); //TODO: change to LSHSearch::ComputeRecall??
+      // Compute recall of this run.
+      double recall = LSHSearch<>::ComputeRecall(lshNeighbors, groundTruth); 
       if (p > 0)
       {
-        // more probes should at the very least not lower recall...
+        // More probes should at the very least not lower recall...
         BOOST_REQUIRE_GE(recall, prevRecall);
 
-        // ... and should ideally increase it a bit
+        // ... and should ideally increase it a bit.
         if (recall > prevRecall + epsilonIncrease)
-         foundIncrease = true;
+          foundIncrease = true;
         prevRecall = recall;
       }
     }
@@ -563,7 +558,7 @@ BOOST_AUTO_TEST_CASE(MultiprobeTest)
 BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
 {
   math::RandomSeed(std::time(NULL));
-  // generate known deterministic clusters of points
+  // Generate known deterministic clusters of points.
   const size_t N = 40;
   arma::mat rdata;
   GetPointset(N, rdata);
@@ -573,19 +568,18 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
   const int secondHashSize = 99901;
   const int bucketSize = 500;
 
-  // 1 table, projections on orthonormal plane
+  // 1 table, projections on orthonormal plane.
   arma::cube projections(2, 2, 1);
   projections(0, 0, 0) = 1;
   projections(1, 0, 0) = 0;
   projections(0, 1, 0) = 0;
   projections(1, 1, 0) = 1;
 
-  // construct LSH object with given tables
+  // Construct LSH object with given tables.
   LSHSearch<> lshTest(rdata, projections,
                       hashWidth, secondHashSize, bucketSize);
 
-
-  // two points near the clusters but outside their bins. q1's lowest scoring
+  // Two points near the clusters but outside their bins. q1's lowest scoring
   // perturbation vectors will map to C1 cluster's bins.
   arma::mat q1;
   q1 << 1.1 << arma::endr << 3.3; // vector [1.1, 3.3]
@@ -593,15 +587,15 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
   arma::Mat<size_t> neighbors;
   arma::mat distances;
 
-  // Test that q1 simple search comes up empty
+  // Test that q1 simple search comes up empty.
   lshTest.Search(q1, k, neighbors, distances);
   cout << neighbors << endl;
-  BOOST_REQUIRE( arma::all(neighbors.col(0) == N) );
+  BOOST_REQUIRE(arma::all(neighbors.col(0) == N));
 
-  // Searching with 3 additional probing bins should find neighbors
+  // Searching with 3 additional probing bins should find neighbors.
   lshTest.Search(q1, k, neighbors, distances, 0, 3);
   cout << neighbors << endl;
-  BOOST_REQUIRE( arma::all(neighbors.col(0) == N || neighbors.col(0) < 10) );
+  BOOST_REQUIRE(arma::all(neighbors.col(0) == N || neighbors.col(0) < 10));
 }
 */
 




More information about the mlpack-git mailing list