[mlpack-git] master: Style fixes for LSH test. (bc6b9e7)

gitdub at mlpack.org gitdub at mlpack.org
Fri Apr 8 20:37:52 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/f0675d7789b69746f7c337c3ec4a778cef932924...ba826b1959a3f83532e91765b2bba0705e588d39

>---------------------------------------------------------------

commit bc6b9e74766ff87fe71b60a1a3107e8139500c1d
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Apr 8 20:37:52 2016 -0400

    Style fixes for LSH test.


>---------------------------------------------------------------

bc6b9e74766ff87fe71b60a1a3107e8139500c1d
 src/mlpack/tests/lsh_test.cpp | 206 ++++++++++++++++++++----------------------
 1 file changed, 99 insertions(+), 107 deletions(-)

diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 2076ad5..52c0803 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -15,23 +15,23 @@ using namespace std;
 using namespace mlpack;
 using namespace mlpack::neighbor;
 
-double compute_recall(
-    const arma::Mat<size_t>& LSHneighbors,
+double ComputeRecall(
+    const arma::Mat<size_t>& lshNeighbors,
     const arma::Mat<size_t>& groundTruth)
 {
-  const size_t queries = LSHneighbors.n_cols;
-  const size_t neigh = LSHneighbors.n_rows;
+  const size_t queries = lshNeighbors.n_cols;
+  const size_t neigh = lshNeighbors.n_rows;
 
-  int same = 0;
+  size_t same = 0;
   for (size_t q = 0; q < queries; ++q)
   {
     for (size_t n = 0; n < neigh; ++n)
     {
-      same += (LSHneighbors(n,q) == groundTruth(n,q));
+      same += (lshNeighbors(n, q) == groundTruth(n, q));
     }
   }
-  return static_cast<double>(same)/
-    (static_cast<double>(queries*neigh));
+  return static_cast<double>(same) /
+    (static_cast<double>(queries * neigh));
 }
 
 BOOST_AUTO_TEST_SUITE(LSHTest);
@@ -46,115 +46,111 @@ BOOST_AUTO_TEST_SUITE(LSHTest);
  * This produces false negatives, so we attempt the test numTries times and
  * only declare failure if all of them fail.
  */
-BOOST_AUTO_TEST_CASE(numTablesTest)
+BOOST_AUTO_TEST_CASE(NumTablesTest)
 {
-
-  //math::RandomSeed(time(0));
-  //kNN and LSH parameters (use LSH default parameters)
+  // kNN and LSH parameters (use LSH default parameters).
   const int k = 4;
   const int numProj = 10;
   const double hashWidth = 0;
   const int secondHashSize = 99901;
   const int bucketSize = 500;
 
-  //test parameters
-  const double epsilon = 0.1; //allowed deviation from expected monotonicity
-  const int numTries = 5; //tries for each test before declaring failure
+  // Test parameters.
+  const double epsilon = 0.1; // Allowed deviation from expected monotonicity.
+  const int numTries = 5; // Tries for each test before declaring failure.
 
-  //read iris training and testing data as reference and query
-  const string trainSet="iris_train.csv";
-  const string testSet="iris_test.csv";
+  // Read iris training and testing data as reference and query sets.
+  const string trainSet = "iris_train.csv";
+  const string testSet = "iris_test.csv";
   arma::mat rdata;
   arma::mat qdata;
   data::Load(trainSet, rdata, true);
   data::Load(testSet, qdata, true);
 
-  //Run classic knn on reference data
+  // Run classic knn on reference data.
   AllkNN knn(rdata);
   arma::Mat<size_t> groundTruth;
   arma::mat groundDistances;
   knn.Search(qdata, k, groundTruth, groundDistances);
 
-
   bool fail;
   for (int t = 0; t < numTries; ++t)
   {
     fail = false;
 
-    const int lSize = 6; //number of runs
-    const int lValue[] = {1, 8, 16, 32, 64, 128}; //number of tables
-    double lValueRecall[lSize] = {0.0}; //recall of each LSH run
+    const int lSize = 6; // Number of runs.
+    const int lValue[] = {1, 8, 16, 32, 64, 128}; // Number of tables.
+    double lValueRecall[lSize] = {0.0}; // Recall of each LSH run.
 
-    for (size_t l=0; l < lSize; ++l)
+    for (size_t l = 0; l < lSize; ++l)
     {
-      //run LSH with only numTables varying (other values default)
-      LSHSearch<> lshTest(rdata, numProj, lValue[l],
-          hashWidth, secondHashSize, bucketSize);
-      arma::Mat<size_t> LSHneighbors;
-      arma::mat LSHdistances;
-      lshTest.Search(qdata, k, LSHneighbors, LSHdistances);
+      // Run LSH with only numTables varying (other values are defaults).
+      LSHSearch<> lshTest(rdata, numProj, lValue[l], hashWidth, secondHashSize,
+          bucketSize);
+      arma::Mat<size_t> lshNeighbors;
+      arma::mat lshDistances;
+      lshTest.Search(qdata, k, lshNeighbors, lshDistances);
 
-      //compute recall for each query
-      lValueRecall[l] = compute_recall(LSHneighbors, groundTruth);
+      // Compute recall for each query.
+      lValueRecall[l] = ComputeRecall(lshNeighbors, groundTruth);
 
       if (l > 0)
       {
-        if(lValueRecall[l] < lValueRecall[l-1] - epsilon)
+        if (lValueRecall[l] < lValueRecall[l - 1] - epsilon)
         {
-          fail = true; //if test fails at one point, stop and retry
+          fail = true; // If test fails at one point, stop and retry.
           break;
         }
       }
     }
 
-    if ( !fail )
-      break; //if test passes one time, it is sufficient
+    if (!fail)
+      break; // If test passes one time, it is sufficient.
   }
 
   BOOST_REQUIRE(fail == false);
 }
 
-/*Test: Run LSH with varying hash width, keeping all other parameters
+/**
+ * Test: Run LSH with varying hash width, keeping all other parameters
  * constant. Compute the recall, i.e. the number of reported neighbors that
  * are real neighbors of the query.
  * LSH's property is that (with high probability), increasing the hash width
  * will increase recall. Epsilon ensures that if noise lightly affects the
  * projections, the test will not fail.
  */
-BOOST_AUTO_TEST_CASE(hashWidthTest)
+BOOST_AUTO_TEST_CASE(HashWidthTest)
 {
-
-  //math::RandomSeed(time(0));
-  //kNN and LSH parameters (use LSH default parameters)
+  // kNN and LSH parameters (use LSH default parameters).
   const int k = 4;
   const int numTables = 30;
   const int numProj = 10;
   const int secondHashSize = 99901;
   const int bucketSize = 500;
 
-  //test parameters
-  const double epsilon = 0.1; //allowed deviation from expected monotonicity
+  // Test parameters.
+  const double epsilon = 0.1; // Allowed deviation from expected monotonicity.
 
-  //read iris training and testing data as reference and query
-  const string trainSet="iris_train.csv";
-  const string testSet="iris_test.csv";
+  // Read iris training and testing data as reference and query.
+  const string trainSet = "iris_train.csv";
+  const string testSet = "iris_test.csv";
   arma::mat rdata;
   arma::mat qdata;
   data::Load(trainSet, rdata, true);
   data::Load(testSet, qdata, true);
 
-  //Run classic knn on reference data
+  // Run classic knn on reference data.
   AllkNN knn(rdata);
   arma::Mat<size_t> groundTruth;
   arma::mat groundDistances;
   knn.Search(qdata, k, groundTruth, groundDistances);
-  const int hSize = 7; //number of runs
-  const double hValue[] = {0.1, 0.5, 1, 5, 10, 50, 500}; //hash width
-  double hValueRecall[hSize] = {0.0}; //recall of each run
+  const int hSize = 7; // Number of runs.
+  const double hValue[] = {0.1, 0.5, 1, 5, 10, 50, 500}; // Hash width.
+  double hValueRecall[hSize] = {0.0}; // Recall of each run.
 
   for (size_t h = 0; h < hSize; ++h)
   {
-    //run LSH with only hashWidth varying (other values default)
+    // Run LSH with only hashWidth varying (other values are defaults).
     LSHSearch<> lshTest(
         rdata,
         numProj,
@@ -163,16 +159,15 @@ BOOST_AUTO_TEST_CASE(hashWidthTest)
         secondHashSize,
         bucketSize);
 
-    arma::Mat<size_t> LSHneighbors;
-    arma::mat LSHdistances;
-    lshTest.Search(qdata, k, LSHneighbors, LSHdistances);
+    arma::Mat<size_t> lshNeighbors;
+    arma::mat lshDistances;
+    lshTest.Search(qdata, k, lshNeighbors, lshDistances);
 
-    //compute recall for each query
-    hValueRecall[h] = compute_recall(LSHneighbors, groundTruth);
+    // Compute recall for each query.
+    hValueRecall[h] = ComputeRecall(lshNeighbors, groundTruth);
 
     if (h > 0)
-        BOOST_REQUIRE_GE(hValueRecall[h], hValueRecall[h-1] - epsilon);
-
+      BOOST_REQUIRE_GE(hValueRecall[h], hValueRecall[h - 1] - epsilon);
   }
 }
 
@@ -184,42 +179,40 @@ BOOST_AUTO_TEST_CASE(hashWidthTest)
  * projections per table will decrease recall. Epsilon ensures that if noise
  * lightly affects the projections, the test will not fail.
  */
-BOOST_AUTO_TEST_CASE(numProjTest)
+BOOST_AUTO_TEST_CASE(NumProjTest)
 {
-
-  //math::RandomSeed(time(0));
-  //kNN and LSH parameters (use LSH default parameters)
+  // kNN and LSH parameters (use LSH default parameters).
   const int k = 4;
   const int numTables = 30;
   const double hashWidth = 0;
   const int secondHashSize = 99901;
   const int bucketSize = 500;
 
-  //test parameters
-  const double epsilon = 0.1; //allowed deviation from expected monotonicity
+  // Test parameters.
+  const double epsilon = 0.1; // Allowed deviation from expected monotonicity.
 
-  //read iris training and testing data as reference and query
-  const string trainSet="iris_train.csv";
-  const string testSet="iris_test.csv";
+  // Read iris training and testing data as reference and query sets.
+  const string trainSet = "iris_train.csv";
+  const string testSet = "iris_test.csv";
   arma::mat rdata;
   arma::mat qdata;
   data::Load(trainSet, rdata, true);
   data::Load(testSet, qdata, true);
 
-  //Run classic knn on reference data
+  // Run classic knn on reference data.
   AllkNN knn(rdata);
   arma::Mat<size_t> groundTruth;
   arma::mat groundDistances;
   knn.Search(qdata, k, groundTruth, groundDistances);
 
-  //LSH test parameters for numProj
-  const int pSize = 5; //number of runs
-  const int pValue[] = {1, 10, 20, 50, 100}; //number of projections
-  double pValueRecall[pSize] = {0.0}; //recall of each run
+  // LSH test parameters for numProj.
+  const int pSize = 5; // Number of runs.
+  const int pValue[] = {1, 10, 20, 50, 100}; // Number of projections.
+  double pValueRecall[pSize] = {0.0}; // Recall of each run.
 
   for (size_t p = 0; p < pSize; ++p)
   {
-    //run LSH with only numProj varying (other values default)
+    // Run LSH with only numProj varying (other values are defaults).
     LSHSearch<> lshTest(
         rdata,
         pValue[p],
@@ -228,15 +221,16 @@ BOOST_AUTO_TEST_CASE(numProjTest)
         secondHashSize,
         bucketSize);
 
-    arma::Mat<size_t> LSHneighbors;
-    arma::mat LSHdistances;
-    lshTest.Search(qdata, k, LSHneighbors, LSHdistances);
+    arma::Mat<size_t> lshNeighbors;
+    arma::mat lshDistances;
+    lshTest.Search(qdata, k, lshNeighbors, lshDistances);
 
-    //compute recall for each query
-    pValueRecall[p] = compute_recall(LSHneighbors, groundTruth);
+    // Compute recall for each query.
+    pValueRecall[p] = ComputeRecall(lshNeighbors, groundTruth);
 
-    if (p > 0) //don't check first run, only that increasing P decreases recall
-        BOOST_REQUIRE_LE(pValueRecall[p] - epsilon, pValueRecall[p-1]);
+    // Don't check the first run; only check that increasing P decreases recall.
+    if (p > 0)
+      BOOST_REQUIRE_LE(pValueRecall[p] - epsilon, pValueRecall[p - 1]);
   }
 }
 
@@ -250,33 +244,31 @@ BOOST_AUTO_TEST_CASE(numProjTest)
  * to be very low. Set the threshhold very high (recall <= 25%) to make sure
  * that a test fail means bad implementation.
  */
-BOOST_AUTO_TEST_CASE(recallTest)
+BOOST_AUTO_TEST_CASE(RecallTest)
 {
-  //math::RandomSeed(time(0));
-  //kNN and LSH parameters (use LSH default parameters)
+  // kNN and LSH parameters (use LSH default parameters).
   const int k = 4;
   const int secondHashSize = 99901;
   const int bucketSize = 500;
 
-
-  //read iris training and testing data as reference and query
-  const string trainSet="iris_train.csv";
-  const string testSet="iris_test.csv";
+  // Read iris training and testing data as reference and query sets.
+  const string trainSet = "iris_train.csv";
+  const string testSet = "iris_test.csv";
   arma::mat rdata;
   arma::mat qdata;
   data::Load(trainSet, rdata, true);
   data::Load(testSet, qdata, true);
 
-  //Run classic knn on reference data
+  // Run classic knn on reference data.
   AllkNN knn(rdata);
   arma::Mat<size_t> groundTruth;
   arma::mat groundDistances;
   knn.Search(qdata, k, groundTruth, groundDistances);
 
-  //Expensive LSH run
-  const int hExp = 10000; //first-level hash width
-  const int kExp = 1; //projections per table
-  const int tExp = 128; //number of tables
+  // Expensive LSH run.
+  const int hExp = 10000; // First-level hash width.
+  const int kExp = 1; // Projections per table.
+  const int tExp = 128; // Number of tables.
   const double recallThreshExp = 0.5;
 
   LSHSearch<> lshTestExp(
@@ -286,20 +278,20 @@ BOOST_AUTO_TEST_CASE(recallTest)
       hExp,
       secondHashSize,
       bucketSize);
-  arma::Mat<size_t> LSHneighborsExp;
-  arma::mat LSHdistancesExp;
-  lshTestExp.Search(qdata, k, LSHneighborsExp, LSHdistancesExp);
+  arma::Mat<size_t> lshNeighborsExp;
+  arma::mat lshDistancesExp;
+  lshTestExp.Search(qdata, k, lshNeighborsExp, lshDistancesExp);
 
-  const double recallExp = compute_recall(LSHneighborsExp, groundTruth);
+  const double recallExp = ComputeRecall(lshNeighborsExp, groundTruth);
 
-  //This run should have recall higher than the threshold
+  // This run should have recall higher than the threshold.
   BOOST_REQUIRE_GE(recallExp, recallThreshExp);
 
-  //Cheap LSH Run
-  const int hChp = 1; //small first-level hash width
-  const int kChp = 1000; //large number of projections per table
-  const int tChp = 1; //only one table
-  const double recallThreshChp = 0.25; //recall threshold
+  // Cheap LSH run.
+  const int hChp = 1; // Small first-level hash width.
+  const int kChp = 1000; // Large number of projections per table.
+  const int tChp = 1; // Only one table.
+  const double recallThreshChp = 0.25; // Recall threshold.
 
   LSHSearch<> lshTestChp(
       rdata,
@@ -308,13 +300,13 @@ BOOST_AUTO_TEST_CASE(recallTest)
       hChp,
       secondHashSize,
       bucketSize);
-  arma::Mat<size_t> LSHneighborsChp;
-  arma::mat LSHdistancesChp;
-  lshTestChp.Search(qdata, k, LSHneighborsChp, LSHdistancesChp);
+  arma::Mat<size_t> lshNeighborsChp;
+  arma::mat lshDistancesChp;
+  lshTestChp.Search(qdata, k, lshNeighborsChp, lshDistancesChp);
 
-  const double recallChp = compute_recall(LSHneighborsChp, groundTruth);
+  const double recallChp = ComputeRecall(lshNeighborsChp, groundTruth);
 
-  //This run should have recall lower than the threshold
+  // This run should have recall lower than the threshold.
   BOOST_REQUIRE_LE(recallChp, recallThreshChp);
 }
 




More information about the mlpack-git mailing list