[mlpack-git] master: Re-adds Deterministic Multiprobe Test (4ba6f6d)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Jun 28 07:36:33 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eaa7182ebed8cce3fd6191dc1f8170546ea297da...812048c7c6bee0b6c8d936677f23bbb5930c6cfc
>---------------------------------------------------------------
commit 4ba6f6d29b61cfbc90b2a0d002ab85914b7ccb07
Author: Yannis Mentekidis <mentekid at gmail.com>
Date: Tue Jun 28 12:36:33 2016 +0100
Re-adds Deterministic Multiprobe Test
>---------------------------------------------------------------
4ba6f6d29b61cfbc90b2a0d002ab85914b7ccb07
src/mlpack/tests/lsh_test.cpp | 55 +++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 20 deletions(-)
diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp
index d0c65cd..f3fb242 100644
--- a/src/mlpack/tests/lsh_test.cpp
+++ b/src/mlpack/tests/lsh_test.cpp
@@ -539,31 +539,28 @@ BOOST_AUTO_TEST_CASE(MultiprobeTest)
}
}
}
-
BOOST_REQUIRE(foundIncrease);
}
/**
* Test: This is a deterministic test that verifies multiprobe LSH works
- * correctly. To do this, we generate a query , q1, that will end up
- * outside of the bucket of its nearest neighbors in C1. The neighbors will be
- * found only by searching additional probing bins.
- *
- * We verify this actually is the case.
- *
- * Note: This test doesn't work, because of the effect the random offsets of LSH
- * have on the 1st level codes. We need to think of something better.
+ * correctly. To do this, we generate two queries, q1 and q2. q1 is hashed
+ * directly under cluster C2, q2 is hashed in C2's center.
+ * We verify that:
+ * 1) q1 should have no neighbors without multiprobe.
+ * 2) q1 should have neighbors only from C2 with 1 additional probe.
+ * 3) q2 should have all neighbors found with 3 additional probes.
*/
-/*
BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
{
math::RandomSeed(std::time(NULL));
+
// Generate known deterministic clusters of points.
const size_t N = 40;
arma::mat rdata;
GetPointset(N, rdata);
- const int k = N/2;
+ const int k = N/4;
const double hashWidth = 1;
const int secondHashSize = 99901;
const int bucketSize = 500;
@@ -579,25 +576,43 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest)
LSHSearch<> lshTest(rdata, projections,
hashWidth, secondHashSize, bucketSize);
- // Two points near the clusters but outside their bins. q1's lowest scoring
- // perturbation vectors will map to C1 cluster's bins.
+ const arma::mat offsets = lshTest.Offsets();
+
+ // Construct q1 so it is hashed directly under C2.
arma::mat q1;
- q1 << 1.1 << arma::endr << 3.3; // vector [1.1, 3.3]
+ q1 << 3.9 << arma::endr << 2.99;
+ q1-= offsets;
+
+ // Construct q2 so it is hashed near the center of C2.
+ arma::mat q2;
+ q2 << 3.6 << arma::endr << 3.6;
+ q2-= offsets;
arma::Mat<size_t> neighbors;
arma::mat distances;
// Test that q1 simple search comes up empty.
lshTest.Search(q1, k, neighbors, distances);
- cout << neighbors << endl;
BOOST_REQUIRE(arma::all(neighbors.col(0) == N));
- // Searching with 3 additional probing bins should find neighbors.
- lshTest.Search(q1, k, neighbors, distances, 0, 3);
- cout << neighbors << endl;
- BOOST_REQUIRE(arma::all(neighbors.col(0) == N || neighbors.col(0) < 10));
+ // Test that q1 search with 1 additional probe returns some C2 points.
+ lshTest.Search(q1, k, neighbors, distances, 0, 1);
+ BOOST_REQUIRE(arma::all(
+ neighbors.col(0) == N ||
+ (neighbors.col(0) >= 10 && neighbors.col(0) < 20)));
+
+ // Test that q2 simple search returns some C2 points.
+ lshTest.Search(q2, k, neighbors, distances);
+ BOOST_REQUIRE(arma::all(
+ neighbors.col(0) == N ||
+ (neighbors.col(0) >= 10 && neighbors.col(0) < 20)));
+
+ // Test that q2 with 3 additional probes returns all C2 points.
+ lshTest.Search(q2, k, neighbors, distances, 0, 3);
+ BOOST_REQUIRE(arma::all(
+ neighbors.col(0) >= 10 && neighbors.col(0) < 20));
}
-*/
+
BOOST_AUTO_TEST_CASE(LSHTrainTest)
{
More information about the mlpack-git
mailing list