[mlpack-git] master: Add tests for approximate Nearest Neighbor Search. (0f65abf)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jun 22 14:09:07 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/a9f5622c8a14409111f2d71bf5c0f8aaa8ad4ae1...37fda23945b4f998cd5fa6ec011ae345236c8552

>---------------------------------------------------------------

commit 0f65abf878cf37fa0fb6e8c04bd2b85524355320
Author: MarcosPividori <marcos.pividori at gmail.com>
Date:   Mon Jun 6 13:35:52 2016 -0300

    Add tests for approximate Nearest Neighbor Search.


>---------------------------------------------------------------

0f65abf878cf37fa0fb6e8c04bd2b85524355320
 src/mlpack/tests/CMakeLists.txt |   1 +
 src/mlpack/tests/aknn_test.cpp  | 397 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 398 insertions(+)

diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index bd204f7..1d5f61b 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -31,6 +31,7 @@ add_executable(mlpack_test
   kmeans_test.cpp
   knn_test.cpp
   krann_search_test.cpp
+  aknn_test.cpp
   lars_test.cpp
   lbfgs_test.cpp
   lin_alg_test.cpp
diff --git a/src/mlpack/tests/aknn_test.cpp b/src/mlpack/tests/aknn_test.cpp
new file mode 100644
index 0000000..0fab49b
--- /dev/null
+++ b/src/mlpack/tests/aknn_test.cpp
@@ -0,0 +1,397 @@
+/**
+ * @file aknn_test.cpp
+ *
+ * Test file for KNN class with different values of epsilon.
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
+#include <mlpack/methods/neighbor_search/unmap.hpp>
+#include <mlpack/methods/neighbor_search/ns_model.hpp>
+#include <mlpack/core/tree/cover_tree.hpp>
+#include <mlpack/core/tree/example_tree.hpp>
+#include <boost/test/unit_test.hpp>
+#include "old_boost_test_definitions.hpp"
+
+using namespace mlpack;
+using namespace mlpack::neighbor;
+using namespace mlpack::tree;
+using namespace mlpack::metric;
+using namespace mlpack::bound;
+
+BOOST_AUTO_TEST_SUITE(AKNNTest);
+
+/**
+ * Test the dual-tree nearest-neighbors method with different values for
+ * epsilon. This uses both a query and reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualTreeVsNaive1)
+{
+  arma::mat dataset;
+
+  if (!data::Load("test_data_3_1000.csv", dataset))
+    BOOST_FAIL("Cannot load test dataset test_data_3_1000.csv!");
+
+  KNN naive(dataset, true);
+  arma::Mat<size_t> neighborsNaive;
+  arma::mat distancesNaive;
+  naive.Search(dataset, 15, neighborsNaive, distancesNaive);
+
+  for (size_t c = 0; c < 4; c++)
+  {
+    KNN* knn;
+    double epsilon;
+
+    switch (c)
+    {
+      case 0: // Use the dual-tree method with e=0.02.
+        epsilon = 0.02;
+        break;
+      case 1: // Use the dual-tree method with e=0.05.
+        epsilon = 0.05;
+        break;
+      case 2: // Use the dual-tree method with e=0.10.
+        epsilon = 0.10;
+        break;
+      case 3: // Use the dual-tree method with e=0.20.
+        epsilon = 0.20;
+        break;
+    }
+
+    knn = new KNN(dataset, false, false, epsilon);
+
+    // Now perform the actual calculation.
+    arma::Mat<size_t> neighborsTree;
+    arma::mat distancesTree;
+    knn->Search(dataset, 15, neighborsTree, distancesTree);
+
+    for (size_t i = 0; i < neighborsTree.n_elem; i++)
+      BOOST_REQUIRE_CLOSE(distancesTree(i), distancesNaive(i), epsilon * 100);
+
+    // Clean the memory.
+    delete knn;
+  }
+}
+
+/**
+ * Test the dual-tree nearest-neighbors method with the naive method.  This uses
+ * only a reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualTreeVsNaive2)
+{
+  arma::mat dataset;
+
+  if (!data::Load("test_data_3_1000.csv", dataset))
+    BOOST_FAIL("Cannot load test dataset test_data_3_1000.csv!");
+
+  KNN naive(dataset, true);
+  arma::Mat<size_t> neighborsNaive;
+  arma::mat distancesNaive;
+  naive.Search(15, neighborsNaive, distancesNaive);
+
+  KNN knn(dataset, false, false, 0.05);
+  arma::Mat<size_t> neighborsTree;
+  arma::mat distancesTree;
+  knn.Search(15, neighborsTree, distancesTree);
+
+  for (size_t i = 0; i < neighborsTree.n_elem; i++)
+    BOOST_REQUIRE_CLOSE(distancesTree(i), distancesNaive(i), 5);
+}
+
+/**
+ * Test the single-tree nearest-neighbors method with the naive method.  This
+ * uses only a reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(SingleTreeVsNaive)
+{
+  arma::mat dataset;
+
+  if (!data::Load("test_data_3_1000.csv", dataset))
+    BOOST_FAIL("Cannot load test dataset test_data_3_1000.csv!");
+
+  KNN naive(dataset, true);
+  arma::Mat<size_t> neighborsNaive;
+  arma::mat distancesNaive;
+  naive.Search(15, neighborsNaive, distancesNaive);
+
+  KNN knn(dataset, false, true, 0.05);
+  arma::Mat<size_t> neighborsTree;
+  arma::mat distancesTree;
+  knn.Search(15, neighborsTree, distancesTree);
+
+  for (size_t i = 0; i < neighborsTree.n_elem; i++)
+    BOOST_REQUIRE_CLOSE(distancesTree[i], distancesNaive[i], 5);
+}
+
+/**
+ * Test the cover tree single-tree nearest-neighbors method against the naive
+ * method.  This uses only a random reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(SingleCoverTreeTest)
+{
+  arma::mat data;
+  data.randu(75, 1000); // 75 dimensional, 1000 points.
+
+  KNN naive(data, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(data, 15, naiveNeighbors, naiveDistances);
+
+  StandardCoverTree<EuclideanDistance, NeighborSearchStat<NearestNeighborSort>,
+      arma::mat> tree(data);
+
+  NeighborSearch<NearestNeighborSort, LMetric<2>, arma::mat, StandardCoverTree>
+      coverTreeSearch(&tree, true, 0.05);
+
+  arma::Mat<size_t> coverTreeNeighbors;
+  arma::mat coverTreeDistances;
+  coverTreeSearch.Search(data, 15, coverTreeNeighbors, coverTreeDistances);
+
+  for (size_t i = 0; i < coverTreeNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(coverTreeDistances[i], naiveDistances[i], 5);
+}
+
+/**
+ * Test the cover tree dual-tree nearest neighbors method against the naive
+ * method.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualCoverTreeTest)
+{
+  arma::mat dataset;
+  data::Load("test_data_3_1000.csv", dataset);
+
+  KNN naive(dataset, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(dataset, 15, naiveNeighbors, naiveDistances);
+
+  StandardCoverTree<EuclideanDistance, NeighborSearchStat<NearestNeighborSort>,
+      arma::mat> referenceTree(dataset);
+
+  NeighborSearch<NearestNeighborSort, EuclideanDistance, arma::mat,
+      StandardCoverTree> coverTreeSearch(&referenceTree, false, 0.05);
+
+  arma::Mat<size_t> coverNeighbors;
+  arma::mat coverDistances;
+  coverTreeSearch.Search(&referenceTree, 15, coverNeighbors, coverDistances);
+
+  for (size_t i = 0; i < coverNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(coverDistances[i], naiveDistances[i], 5);
+}
+
+/**
+ * Test the ball tree single-tree nearest-neighbors method against the naive
+ * method.  This uses only a random reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(SingleBallTreeTest)
+{
+  arma::mat data;
+  data.randu(50, 300); // 50 dimensional, 300 points.
+
+  KNN naive(data, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(data, 15, naiveNeighbors, naiveDistances);
+
+  NeighborSearch<NearestNeighborSort, EuclideanDistance, arma::mat, BallTree>
+      ballTreeSearch(data, false, true, 0.05);
+
+  arma::Mat<size_t> ballNeighbors;
+  arma::mat ballDistances;
+  ballTreeSearch.Search(data, 15, ballNeighbors, ballDistances);
+
+  for (size_t i = 0; i < ballNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(ballDistances(i), naiveDistances(i), 5);
+}
+
+/**
+ * Test the ball tree dual-tree nearest neighbors method against the naive
+ * method.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualBallTreeTest)
+{
+  arma::mat dataset;
+  data::Load("test_data_3_1000.csv", dataset);
+
+  KNN naive(dataset, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(15, naiveNeighbors, naiveDistances);
+
+  NeighborSearch<NearestNeighborSort, EuclideanDistance, arma::mat, BallTree>
+      ballTreeSearch(dataset, false, false, 0.05);
+  arma::Mat<size_t> ballNeighbors;
+  arma::mat ballDistances;
+  ballTreeSearch.Search(15, ballNeighbors, ballDistances);
+
+  for (size_t i = 0; i < ballNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(ballDistances(i), naiveDistances(i), 5);
+}
+
+// Make sure sparse nearest neighbors works with kd trees.
+BOOST_AUTO_TEST_CASE(SparseKNNKDTreeTest)
+{
+  // The dimensionality of these datasets must be high so that the probability
+  // of a completely empty point is very low.  In this case, with dimensionality
+  // 70, the probability of all 70 dimensions being zero is 0.8^70 = 1.65e-7 in
+  // the reference set and 0.9^70 = 6.27e-4 in the query set.
+  arma::sp_mat queryDataset;
+  queryDataset.sprandu(70, 200, 0.2);
+  arma::sp_mat referenceDataset;
+  referenceDataset.sprandu(70, 500, 0.1);
+  arma::mat denseQuery(queryDataset);
+  arma::mat denseReference(referenceDataset);
+
+  typedef NeighborSearch<NearestNeighborSort, EuclideanDistance, arma::sp_mat,
+      KDTree> SparseKNN;
+
+  SparseKNN a(referenceDataset, false, false, 0.05);
+  KNN naive(denseReference, true);
+
+  arma::mat sparseDistances;
+  arma::Mat<size_t> sparseNeighbors;
+  a.Search(queryDataset, 10, sparseNeighbors, sparseDistances);
+
+  arma::mat naiveDistances;
+  arma::Mat<size_t> naiveNeighbors;
+  naive.Search(denseQuery, 10, naiveNeighbors, naiveDistances);
+
+  for (size_t i = 0; i < naiveNeighbors.n_cols; ++i)
+    for (size_t j = 0; j < naiveNeighbors.n_rows; ++j)
+      BOOST_REQUIRE_CLOSE(naiveDistances(j, i), sparseDistances(j, i), 5);
+}
+
+// Ensure that we can build an NSModel<NearestNeighborSearch> and get correct
+// results.
+BOOST_AUTO_TEST_CASE(KNNModelTest)
+{
+  typedef NSModel<NearestNeighborSort> KNNModel;
+
+  arma::mat queryData = arma::randu<arma::mat>(10, 50);
+  arma::mat referenceData = arma::randu<arma::mat>(10, 200);
+
+  // Build all the possible models.
+  KNNModel models[12];
+  models[0] = KNNModel(KNNModel::TreeTypes::KD_TREE, true);
+  models[1] = KNNModel(KNNModel::TreeTypes::KD_TREE, false);
+  models[2] = KNNModel(KNNModel::TreeTypes::COVER_TREE, true);
+  models[3] = KNNModel(KNNModel::TreeTypes::COVER_TREE, false);
+  models[4] = KNNModel(KNNModel::TreeTypes::R_TREE, true);
+  models[5] = KNNModel(KNNModel::TreeTypes::R_TREE, false);
+  models[6] = KNNModel(KNNModel::TreeTypes::R_STAR_TREE, true);
+  models[7] = KNNModel(KNNModel::TreeTypes::R_STAR_TREE, false);
+  models[8] = KNNModel(KNNModel::TreeTypes::X_TREE, true);
+  models[9] = KNNModel(KNNModel::TreeTypes::X_TREE, false);
+  models[10] = KNNModel(KNNModel::TreeTypes::BALL_TREE, true);
+  models[11] = KNNModel(KNNModel::TreeTypes::BALL_TREE, false);
+
+  for (size_t j = 0; j < 3; ++j)
+  {
+    // Get a baseline.
+    KNN knn(referenceData);
+    arma::Mat<size_t> baselineNeighbors;
+    arma::mat baselineDistances;
+    knn.Search(queryData, 3, baselineNeighbors, baselineDistances);
+
+    for (size_t i = 0; i < 12; ++i)
+    {
+      // We only have std::move() constructors so make a copy of our data.
+      arma::mat referenceCopy(referenceData);
+      arma::mat queryCopy(queryData);
+      if (j == 0)
+        models[i].BuildModel(std::move(referenceCopy), 20, false, false, 0.05);
+      if (j == 1)
+        models[i].BuildModel(std::move(referenceCopy), 20, false, true, 0.05);
+      if (j == 2)
+        models[i].BuildModel(std::move(referenceCopy), 20, true, false);
+
+      arma::Mat<size_t> neighbors;
+      arma::mat distances;
+
+      models[i].Search(std::move(queryCopy), 3, neighbors, distances);
+
+      BOOST_REQUIRE_EQUAL(neighbors.n_rows, baselineNeighbors.n_rows);
+      BOOST_REQUIRE_EQUAL(neighbors.n_cols, baselineNeighbors.n_cols);
+      BOOST_REQUIRE_EQUAL(neighbors.n_elem, baselineNeighbors.n_elem);
+      BOOST_REQUIRE_EQUAL(distances.n_rows, baselineDistances.n_rows);
+      BOOST_REQUIRE_EQUAL(distances.n_cols, baselineDistances.n_cols);
+      BOOST_REQUIRE_EQUAL(distances.n_elem, baselineDistances.n_elem);
+      for (size_t k = 0; k < distances.n_elem; ++k)
+        BOOST_REQUIRE_CLOSE(distances[k], baselineDistances[k], 5);
+    }
+  }
+}
+
+// Ensure that we can build an NSModel<NearestNeighborSearch> and get correct
+// results, in the case where the reference set is the same as the query set.
+BOOST_AUTO_TEST_CASE(KNNModelMonochromaticTest)
+{
+  typedef NSModel<NearestNeighborSort> KNNModel;
+
+  arma::mat referenceData = arma::randu<arma::mat>(10, 200);
+
+  // Build all the possible models.
+  KNNModel models[12];
+  models[0] = KNNModel(KNNModel::TreeTypes::KD_TREE, true);
+  models[1] = KNNModel(KNNModel::TreeTypes::KD_TREE, false);
+  models[2] = KNNModel(KNNModel::TreeTypes::COVER_TREE, true);
+  models[3] = KNNModel(KNNModel::TreeTypes::COVER_TREE, false);
+  models[4] = KNNModel(KNNModel::TreeTypes::R_TREE, true);
+  models[5] = KNNModel(KNNModel::TreeTypes::R_TREE, false);
+  models[6] = KNNModel(KNNModel::TreeTypes::R_STAR_TREE, true);
+  models[7] = KNNModel(KNNModel::TreeTypes::R_STAR_TREE, false);
+  models[8] = KNNModel(KNNModel::TreeTypes::X_TREE, true);
+  models[9] = KNNModel(KNNModel::TreeTypes::X_TREE, false);
+  models[10] = KNNModel(KNNModel::TreeTypes::BALL_TREE, true);
+  models[11] = KNNModel(KNNModel::TreeTypes::BALL_TREE, false);
+
+  for (size_t j = 0; j < 3; ++j)
+  {
+    // Get a baseline.
+    KNN knn(referenceData);
+    arma::Mat<size_t> baselineNeighbors;
+    arma::mat baselineDistances;
+    knn.Search(3, baselineNeighbors, baselineDistances);
+
+    for (size_t i = 0; i < 12; ++i)
+    {
+      // We only have a std::move() constructor... so copy the data.
+      arma::mat referenceCopy(referenceData);
+      if (j == 0)
+        models[i].BuildModel(std::move(referenceCopy), 20, false, false, 0.05);
+      if (j == 1)
+        models[i].BuildModel(std::move(referenceCopy), 20, false, true, 0.05);
+      if (j == 2)
+        models[i].BuildModel(std::move(referenceCopy), 20, true, false);
+
+      arma::Mat<size_t> neighbors;
+      arma::mat distances;
+
+      models[i].Search(3, neighbors, distances);
+
+      BOOST_REQUIRE_EQUAL(neighbors.n_rows, baselineNeighbors.n_rows);
+      BOOST_REQUIRE_EQUAL(neighbors.n_cols, baselineNeighbors.n_cols);
+      BOOST_REQUIRE_EQUAL(neighbors.n_elem, baselineNeighbors.n_elem);
+      BOOST_REQUIRE_EQUAL(distances.n_rows, baselineDistances.n_rows);
+      BOOST_REQUIRE_EQUAL(distances.n_cols, baselineDistances.n_cols);
+      BOOST_REQUIRE_EQUAL(distances.n_elem, baselineDistances.n_elem);
+      for (size_t k = 0; k < distances.n_elem; ++k)
+        BOOST_REQUIRE_CLOSE(distances[k], baselineDistances[k], 5);
+    }
+  }
+}
+
+BOOST_AUTO_TEST_SUITE_END();




More information about the mlpack-git mailing list