[mlpack-git] master: Style fixes for LSH test. (bc6b9e7)
gitdub at mlpack.org
gitdub at mlpack.org
Fri Apr 8 20:37:52 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/f0675d7789b69746f7c337c3ec4a778cef932924...ba826b1959a3f83532e91765b2bba0705e588d39
>---------------------------------------------------------------
commit bc6b9e74766ff87fe71b60a1a3107e8139500c1d
Author: Ryan Curtin <ryan at ratml.org>
Date: Fri Apr 8 20:37:52 2016 -0400
Style fixes for LSH test.
>---------------------------------------------------------------
bc6b9e74766ff87fe71b60a1a3107e8139500c1d
src/mlpack/tests/lsh_test.cpp | 206 ++++++++++++++++++++----------------------
1 file changed, 99 insertions(+), 107 deletions(-)
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index 2076ad5..52c0803 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -15,23 +15,23 @@ using namespace std;
using namespace mlpack;
using namespace mlpack::neighbor;
-double compute_recall(
- const arma::Mat<size_t>& LSHneighbors,
+double ComputeRecall(
+ const arma::Mat<size_t>& lshNeighbors,
const arma::Mat<size_t>& groundTruth)
{
- const size_t queries = LSHneighbors.n_cols;
- const size_t neigh = LSHneighbors.n_rows;
+ const size_t queries = lshNeighbors.n_cols;
+ const size_t neigh = lshNeighbors.n_rows;
- int same = 0;
+ size_t same = 0;
for (size_t q = 0; q < queries; ++q)
{
for (size_t n = 0; n < neigh; ++n)
{
- same += (LSHneighbors(n,q) == groundTruth(n,q));
+ same += (lshNeighbors(n, q) == groundTruth(n, q));
}
}
- return static_cast<double>(same)/
- (static_cast<double>(queries*neigh));
+ return static_cast<double>(same) /
+ (static_cast<double>(queries * neigh));
}
BOOST_AUTO_TEST_SUITE(LSHTest);
@@ -46,115 +46,111 @@ BOOST_AUTO_TEST_SUITE(LSHTest);
* This produces false negatives, so we attempt the test numTries times and
* only declare failure if all of them fail.
*/
-BOOST_AUTO_TEST_CASE(numTablesTest)
+BOOST_AUTO_TEST_CASE(NumTablesTest)
{
-
- //math::RandomSeed(time(0));
- //kNN and LSH parameters (use LSH default parameters)
+ // kNN and LSH parameters (use LSH default parameters).
const int k = 4;
const int numProj = 10;
const double hashWidth = 0;
const int secondHashSize = 99901;
const int bucketSize = 500;
- //test parameters
- const double epsilon = 0.1; //allowed deviation from expected monotonicity
- const int numTries = 5; //tries for each test before declaring failure
+ // Test parameters.
+ const double epsilon = 0.1; // Allowed deviation from expected monotonicity.
+ const int numTries = 5; // Tries for each test before declaring failure.
- //read iris training and testing data as reference and query
- const string trainSet="iris_train.csv";
- const string testSet="iris_test.csv";
+ // Read iris training and testing data as reference and query sets.
+ const string trainSet = "iris_train.csv";
+ const string testSet = "iris_test.csv";
arma::mat rdata;
arma::mat qdata;
data::Load(trainSet, rdata, true);
data::Load(testSet, qdata, true);
- //Run classic knn on reference data
+ // Run classic knn on reference data.
AllkNN knn(rdata);
arma::Mat<size_t> groundTruth;
arma::mat groundDistances;
knn.Search(qdata, k, groundTruth, groundDistances);
-
bool fail;
for (int t = 0; t < numTries; ++t)
{
fail = false;
- const int lSize = 6; //number of runs
- const int lValue[] = {1, 8, 16, 32, 64, 128}; //number of tables
- double lValueRecall[lSize] = {0.0}; //recall of each LSH run
+ const int lSize = 6; // Number of runs.
+ const int lValue[] = {1, 8, 16, 32, 64, 128}; // Number of tables.
+ double lValueRecall[lSize] = {0.0}; // Recall of each LSH run.
- for (size_t l=0; l < lSize; ++l)
+ for (size_t l = 0; l < lSize; ++l)
{
- //run LSH with only numTables varying (other values default)
- LSHSearch<> lshTest(rdata, numProj, lValue[l],
- hashWidth, secondHashSize, bucketSize);
- arma::Mat<size_t> LSHneighbors;
- arma::mat LSHdistances;
- lshTest.Search(qdata, k, LSHneighbors, LSHdistances);
+ // Run LSH with only numTables varying (other values are defaults).
+ LSHSearch<> lshTest(rdata, numProj, lValue[l], hashWidth, secondHashSize,
+ bucketSize);
+ arma::Mat<size_t> lshNeighbors;
+ arma::mat lshDistances;
+ lshTest.Search(qdata, k, lshNeighbors, lshDistances);
- //compute recall for each query
- lValueRecall[l] = compute_recall(LSHneighbors, groundTruth);
+ // Compute recall for each query.
+ lValueRecall[l] = ComputeRecall(lshNeighbors, groundTruth);
if (l > 0)
{
- if(lValueRecall[l] < lValueRecall[l-1] - epsilon)
+ if (lValueRecall[l] < lValueRecall[l - 1] - epsilon)
{
- fail = true; //if test fails at one point, stop and retry
+ fail = true; // If test fails at one point, stop and retry.
break;
}
}
}
- if ( !fail )
- break; //if test passes one time, it is sufficient
+ if (!fail)
+ break; // If test passes one time, it is sufficient.
}
BOOST_REQUIRE(fail == false);
}
-/*Test: Run LSH with varying hash width, keeping all other parameters
+/**
+ * Test: Run LSH with varying hash width, keeping all other parameters
* constant. Compute the recall, i.e. the number of reported neighbors that
* are real neighbors of the query.
* LSH's property is that (with high probability), increasing the hash width
* will increase recall. Epsilon ensures that if noise lightly affects the
* projections, the test will not fail.
*/
-BOOST_AUTO_TEST_CASE(hashWidthTest)
+BOOST_AUTO_TEST_CASE(HashWidthTest)
{
-
- //math::RandomSeed(time(0));
- //kNN and LSH parameters (use LSH default parameters)
+ // kNN and LSH parameters (use LSH default parameters).
const int k = 4;
const int numTables = 30;
const int numProj = 10;
const int secondHashSize = 99901;
const int bucketSize = 500;
- //test parameters
- const double epsilon = 0.1; //allowed deviation from expected monotonicity
+ // Test parameters.
+ const double epsilon = 0.1; // Allowed deviation from expected monotonicity.
- //read iris training and testing data as reference and query
- const string trainSet="iris_train.csv";
- const string testSet="iris_test.csv";
+ // Read iris training and testing data as reference and query.
+ const string trainSet = "iris_train.csv";
+ const string testSet = "iris_test.csv";
arma::mat rdata;
arma::mat qdata;
data::Load(trainSet, rdata, true);
data::Load(testSet, qdata, true);
- //Run classic knn on reference data
+ // Run classic knn on reference data.
AllkNN knn(rdata);
arma::Mat<size_t> groundTruth;
arma::mat groundDistances;
knn.Search(qdata, k, groundTruth, groundDistances);
- const int hSize = 7; //number of runs
- const double hValue[] = {0.1, 0.5, 1, 5, 10, 50, 500}; //hash width
- double hValueRecall[hSize] = {0.0}; //recall of each run
+ const int hSize = 7; // Number of runs.
+ const double hValue[] = {0.1, 0.5, 1, 5, 10, 50, 500}; // Hash width.
+ double hValueRecall[hSize] = {0.0}; // Recall of each run.
for (size_t h = 0; h < hSize; ++h)
{
- //run LSH with only hashWidth varying (other values default)
+ // Run LSH with only hashWidth varying (other values are defaults).
LSHSearch<> lshTest(
rdata,
numProj,
@@ -163,16 +159,15 @@ BOOST_AUTO_TEST_CASE(hashWidthTest)
secondHashSize,
bucketSize);
- arma::Mat<size_t> LSHneighbors;
- arma::mat LSHdistances;
- lshTest.Search(qdata, k, LSHneighbors, LSHdistances);
+ arma::Mat<size_t> lshNeighbors;
+ arma::mat lshDistances;
+ lshTest.Search(qdata, k, lshNeighbors, lshDistances);
- //compute recall for each query
- hValueRecall[h] = compute_recall(LSHneighbors, groundTruth);
+ // Compute recall for each query.
+ hValueRecall[h] = ComputeRecall(lshNeighbors, groundTruth);
if (h > 0)
- BOOST_REQUIRE_GE(hValueRecall[h], hValueRecall[h-1] - epsilon);
-
+ BOOST_REQUIRE_GE(hValueRecall[h], hValueRecall[h - 1] - epsilon);
}
}
@@ -184,42 +179,40 @@ BOOST_AUTO_TEST_CASE(hashWidthTest)
* projections per table will decrease recall. Epsilon ensures that if noise
* lightly affects the projections, the test will not fail.
*/
-BOOST_AUTO_TEST_CASE(numProjTest)
+BOOST_AUTO_TEST_CASE(NumProjTest)
{
-
- //math::RandomSeed(time(0));
- //kNN and LSH parameters (use LSH default parameters)
+ // kNN and LSH parameters (use LSH default parameters).
const int k = 4;
const int numTables = 30;
const double hashWidth = 0;
const int secondHashSize = 99901;
const int bucketSize = 500;
- //test parameters
- const double epsilon = 0.1; //allowed deviation from expected monotonicity
+ // Test parameters.
+ const double epsilon = 0.1; // Allowed deviation from expected monotonicity.
- //read iris training and testing data as reference and query
- const string trainSet="iris_train.csv";
- const string testSet="iris_test.csv";
+ // Read iris training and testing data as reference and query sets.
+ const string trainSet = "iris_train.csv";
+ const string testSet = "iris_test.csv";
arma::mat rdata;
arma::mat qdata;
data::Load(trainSet, rdata, true);
data::Load(testSet, qdata, true);
- //Run classic knn on reference data
+ // Run classic knn on reference data.
AllkNN knn(rdata);
arma::Mat<size_t> groundTruth;
arma::mat groundDistances;
knn.Search(qdata, k, groundTruth, groundDistances);
- //LSH test parameters for numProj
- const int pSize = 5; //number of runs
- const int pValue[] = {1, 10, 20, 50, 100}; //number of projections
- double pValueRecall[pSize] = {0.0}; //recall of each run
+ // LSH test parameters for numProj.
+ const int pSize = 5; // Number of runs.
+ const int pValue[] = {1, 10, 20, 50, 100}; // Number of projections.
+ double pValueRecall[pSize] = {0.0}; // Recall of each run.
for (size_t p = 0; p < pSize; ++p)
{
- //run LSH with only numProj varying (other values default)
+ // Run LSH with only numProj varying (other values are defaults).
LSHSearch<> lshTest(
rdata,
pValue[p],
@@ -228,15 +221,16 @@ BOOST_AUTO_TEST_CASE(numProjTest)
secondHashSize,
bucketSize);
- arma::Mat<size_t> LSHneighbors;
- arma::mat LSHdistances;
- lshTest.Search(qdata, k, LSHneighbors, LSHdistances);
+ arma::Mat<size_t> lshNeighbors;
+ arma::mat lshDistances;
+ lshTest.Search(qdata, k, lshNeighbors, lshDistances);
- //compute recall for each query
- pValueRecall[p] = compute_recall(LSHneighbors, groundTruth);
+ // Compute recall for each query.
+ pValueRecall[p] = ComputeRecall(lshNeighbors, groundTruth);
- if (p > 0) //don't check first run, only that increasing P decreases recall
- BOOST_REQUIRE_LE(pValueRecall[p] - epsilon, pValueRecall[p-1]);
+ // Don't check the first run; only check that increasing P decreases recall.
+ if (p > 0)
+ BOOST_REQUIRE_LE(pValueRecall[p] - epsilon, pValueRecall[p - 1]);
}
}
@@ -250,33 +244,31 @@ BOOST_AUTO_TEST_CASE(numProjTest)
* to be very low. Set the threshhold very high (recall <= 25%) to make sure
* that a test fail means bad implementation.
*/
-BOOST_AUTO_TEST_CASE(recallTest)
+BOOST_AUTO_TEST_CASE(RecallTest)
{
- //math::RandomSeed(time(0));
- //kNN and LSH parameters (use LSH default parameters)
+ // kNN and LSH parameters (use LSH default parameters).
const int k = 4;
const int secondHashSize = 99901;
const int bucketSize = 500;
-
- //read iris training and testing data as reference and query
- const string trainSet="iris_train.csv";
- const string testSet="iris_test.csv";
+ // Read iris training and testing data as reference and query sets.
+ const string trainSet = "iris_train.csv";
+ const string testSet = "iris_test.csv";
arma::mat rdata;
arma::mat qdata;
data::Load(trainSet, rdata, true);
data::Load(testSet, qdata, true);
- //Run classic knn on reference data
+ // Run classic knn on reference data.
AllkNN knn(rdata);
arma::Mat<size_t> groundTruth;
arma::mat groundDistances;
knn.Search(qdata, k, groundTruth, groundDistances);
- //Expensive LSH run
- const int hExp = 10000; //first-level hash width
- const int kExp = 1; //projections per table
- const int tExp = 128; //number of tables
+ // Expensive LSH run.
+ const int hExp = 10000; // First-level hash width.
+ const int kExp = 1; // Projections per table.
+ const int tExp = 128; // Number of tables.
const double recallThreshExp = 0.5;
LSHSearch<> lshTestExp(
@@ -286,20 +278,20 @@ BOOST_AUTO_TEST_CASE(recallTest)
hExp,
secondHashSize,
bucketSize);
- arma::Mat<size_t> LSHneighborsExp;
- arma::mat LSHdistancesExp;
- lshTestExp.Search(qdata, k, LSHneighborsExp, LSHdistancesExp);
+ arma::Mat<size_t> lshNeighborsExp;
+ arma::mat lshDistancesExp;
+ lshTestExp.Search(qdata, k, lshNeighborsExp, lshDistancesExp);
- const double recallExp = compute_recall(LSHneighborsExp, groundTruth);
+ const double recallExp = ComputeRecall(lshNeighborsExp, groundTruth);
- //This run should have recall higher than the threshold
+ // This run should have recall higher than the threshold.
BOOST_REQUIRE_GE(recallExp, recallThreshExp);
- //Cheap LSH Run
- const int hChp = 1; //small first-level hash width
- const int kChp = 1000; //large number of projections per table
- const int tChp = 1; //only one table
- const double recallThreshChp = 0.25; //recall threshold
+ // Cheap LSH run.
+ const int hChp = 1; // Small first-level hash width.
+ const int kChp = 1000; // Large number of projections per table.
+ const int tChp = 1; // Only one table.
+ const double recallThreshChp = 0.25; // Recall threshold.
LSHSearch<> lshTestChp(
rdata,
@@ -308,13 +300,13 @@ BOOST_AUTO_TEST_CASE(recallTest)
hChp,
secondHashSize,
bucketSize);
- arma::Mat<size_t> LSHneighborsChp;
- arma::mat LSHdistancesChp;
- lshTestChp.Search(qdata, k, LSHneighborsChp, LSHdistancesChp);
+ arma::Mat<size_t> lshNeighborsChp;
+ arma::mat lshDistancesChp;
+ lshTestChp.Search(qdata, k, lshNeighborsChp, lshDistancesChp);
- const double recallChp = compute_recall(LSHneighborsChp, groundTruth);
+ const double recallChp = ComputeRecall(lshNeighborsChp, groundTruth);
- //This run should have recall lower than the threshold
+ // This run should have recall lower than the threshold.
BOOST_REQUIRE_LE(recallChp, recallThreshChp);
}
More information about the mlpack-git
mailing list