[mlpack-git] master: Add more tests for SpillSearch (bff4eca)
gitdub at mlpack.org
gitdub at mlpack.org
Thu Aug 18 13:39:03 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/0f4b25acd6aaa14294c044874ba6cc0751712baa...0a19d07bd39e6223991976474bc79671ba8aa0f0
>---------------------------------------------------------------
commit bff4ecad61e2e56d43374bb3bad1e980de8803c8
Author: MarcosPividori <marcos.pividori at gmail.com>
Date: Fri Jul 29 13:54:48 2016 -0300
Add more tests for SpillSearch
>---------------------------------------------------------------
bff4ecad61e2e56d43374bb3bad1e980de8803c8
src/mlpack/tests/knn_test.cpp | 102 ++++++++++++++++++++++++++++++++++++------
1 file changed, 88 insertions(+), 14 deletions(-)
diff --git a/src/mlpack/tests/knn_test.cpp b/src/mlpack/tests/knn_test.cpp
index c82fffd..affcbb0 100644
--- a/src/mlpack/tests/knn_test.cpp
+++ b/src/mlpack/tests/knn_test.cpp
@@ -893,7 +893,7 @@ BOOST_AUTO_TEST_CASE(DualBallTreeTest)
* nodes, and backtracking in non-overlapping nodes) against the naive method.
* This uses only a random reference dataset.
*/
-BOOST_AUTO_TEST_CASE(SingleSpillTreeTest)
+BOOST_AUTO_TEST_CASE(HybridSpillSearchTest)
{
arma::mat dataset;
dataset.randu(50, 300); // 50 dimensional, 300 points.
@@ -910,26 +910,100 @@ BOOST_AUTO_TEST_CASE(SingleSpillTreeTest)
if (distancesNaive(k - 1, i) > max_dist)
max_dist = distancesNaive(k - 1, i);
- // If we are sure that tau is a valid upper bound of the kth nearest neighbor
- // of the query points, then we can be sure that we will get an exact
+ // If we are sure that tau is a valid strict upper bound of the kth nearest
+ // neighbor of the query points, then we can be sure that we will get an exact
// solution.
- SPTree<EuclideanDistance, NeighborSearchStat<NearestNeighborSort>, arma::mat>
- referenceTree(dataset, max_dist * 1.01 /* tau parameter */);
+ SpillSearch<> spTreeSearch(dataset, false, true,
+ max_dist * 1.01 /* tau parameter */);
- NeighborSearch<NearestNeighborSort, EuclideanDistance, arma::mat, SPTree>
- spTreeSearch(&referenceTree, true);
+ for (size_t mode = 0; mode < 2; mode++)
+ {
+ switch (mode)
+ {
+ case 0: // Single Tree Search.
+ spTreeSearch.Naive() = false;
+ spTreeSearch.SingleMode() = true;
+ break;
+ case 1: // Dual Tree Search.
+ spTreeSearch.Naive() = false;
+ spTreeSearch.SingleMode() = false;
+ break;
+ }
- arma::Mat<size_t> neighborsSPTree;
- arma::mat distancesSPTree;
- spTreeSearch.Search(dataset, k, neighborsSPTree, distancesSPTree);
+ arma::Mat<size_t> neighborsSPTree;
+ arma::mat distancesSPTree;
+ spTreeSearch.Search(dataset, k, neighborsSPTree, distancesSPTree);
- for (size_t i = 0; i < neighborsSPTree.n_elem; ++i)
- {
- BOOST_REQUIRE_EQUAL(neighborsSPTree(i), neighborsNaive(i));
- BOOST_REQUIRE_CLOSE(distancesSPTree(i), distancesNaive(i), 1e-5);
+ for (size_t i = 0; i < neighborsSPTree.n_elem; ++i)
+ {
+ BOOST_REQUIRE_EQUAL(neighborsSPTree(i), neighborsNaive(i));
+ BOOST_REQUIRE_CLOSE(distancesSPTree(i), distancesNaive(i), 1e-5);
+ }
}
}
+/**
+ * Test hybrid sp-tree search doesn't repeat points.
+ * This uses only a random reference dataset.
+ */
+BOOST_AUTO_TEST_CASE(DuplicatedSpillSearchTest)
+{
+ arma::mat dataset;
+ dataset.randu(50, 300); // 50 dimensional, 300 points.
+
+ const size_t k = 15;
+ double tau = 0;
+
+ for (size_t test = 0; test < 2; test++)
+ {
+ switch (test)
+ {
+ case 0:
+ tau = 0;
+ break;
+ case 1:
+ tau = 0.1;
+ break;
+ }
+
+ SpillSearch<> spTreeSearch(dataset, false, false, tau);
+ arma::Mat<size_t> neighborsSPTree;
+ arma::mat distancesSPTree;
+
+ for (size_t mode = 0; mode < 2; mode++)
+ {
+ switch (mode)
+ {
+ case 0: // Single Tree Search.
+ spTreeSearch.Naive() = false;
+ spTreeSearch.SingleMode() = true;
+ break;
+ case 1: // Dual Tree Search.
+ spTreeSearch.Naive() = false;
+ spTreeSearch.SingleMode() = false;
+ break;
+ }
+
+ spTreeSearch.Search(dataset, k, neighborsSPTree, distancesSPTree);
+
+ for (size_t i = 0; i < neighborsSPTree.n_cols; ++i)
+ {
+ // Test that at least one point was found.
+ BOOST_REQUIRE(distancesSPTree(0, i) != DBL_MAX);
+
+ for (size_t j = 0; j < neighborsSPTree.n_rows; ++j)
+ {
+ if (distancesSPTree(j, i) == DBL_MAX)
+ break;
+ // All candidates with same distances must be different points.
+ for (size_t k = j + 1; k < neighborsSPTree.n_rows &&
+ distancesSPTree(k, i) == distancesSPTree(j, i); ++k)
+ BOOST_REQUIRE(neighborsSPTree(k, i) != neighborsSPTree(j, i));
+ }
+ }
+ }
+ }
+}
/**
* Make sure sparse nearest neighbors works with kd trees.
More information about the mlpack-git
mailing list