[mlpack-git] master: Add tests for approximate Furthest Neighbor Search. (c64bdba)

gitdub at mlpack.org gitdub at mlpack.org
Wed Jun 22 14:09:07 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/a9f5622c8a14409111f2d71bf5c0f8aaa8ad4ae1...37fda23945b4f998cd5fa6ec011ae345236c8552

>---------------------------------------------------------------

commit c64bdba5ca5fe578904e6b8ebe54cc9dda1562e3
Author: MarcosPividori <marcos.pividori at gmail.com>
Date:   Mon Jun 6 13:37:32 2016 -0300

    Add tests for approximate Furthest Neighbor Search.


>---------------------------------------------------------------

c64bdba5ca5fe578904e6b8ebe54cc9dda1562e3
 src/mlpack/tests/CMakeLists.txt |   1 +
 src/mlpack/tests/akfn_test.cpp  | 241 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 242 insertions(+)

diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 1d5f61b..967edee 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -28,6 +28,7 @@ add_executable(mlpack_test
   kernel_pca_test.cpp
   kernel_traits_test.cpp
   kfn_test.cpp
+  akfn_test.cpp
   kmeans_test.cpp
   knn_test.cpp
   krann_search_test.cpp
diff --git a/src/mlpack/tests/akfn_test.cpp b/src/mlpack/tests/akfn_test.cpp
new file mode 100644
index 0000000..59178c5
--- /dev/null
+++ b/src/mlpack/tests/akfn_test.cpp
@@ -0,0 +1,241 @@
+/**
+ * @file akfn_test.cpp
+ *
+ * Tests for KFN (k-furthest-neighbors) with different values of epsilon.
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
+#include <mlpack/core/tree/cover_tree.hpp>
+#include <boost/test/unit_test.hpp>
+#include "old_boost_test_definitions.hpp"
+
+using namespace mlpack;
+using namespace mlpack::neighbor;
+using namespace mlpack::tree;
+using namespace mlpack::metric;
+using namespace mlpack::bound;
+
+BOOST_AUTO_TEST_SUITE(AKFNTest);
+
+/**
+ * Test the dual-tree furthest-neighbors method with different values for
+ * epsilon. This uses both a query and reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualTreeVsNaive1)
+{
+  arma::mat dataset;
+
+  if (!data::Load("test_data_3_1000.csv", dataset))
+    BOOST_FAIL("Cannot load test dataset test_data_3_1000.csv!");
+
+  KFN naive(dataset, true);
+  arma::Mat<size_t> neighborsNaive;
+  arma::mat distancesNaive;
+  naive.Search(dataset, 15, neighborsNaive, distancesNaive);
+
+  for (size_t c = 0; c < 4; c++)
+  {
+    KFN* kfn;
+    double epsilon;
+
+    switch (c)
+    {
+      case 0: // Use the dual-tree method with e=0.02.
+        epsilon = 0.02;
+        break;
+      case 1: // Use the dual-tree method with e=0.05.
+        epsilon = 0.05;
+        break;
+      case 2: // Use the dual-tree method with e=0.10.
+        epsilon = 0.10;
+        break;
+      case 3: // Use the dual-tree method with e=0.20.
+        epsilon = 0.20;
+        break;
+    }
+
+    kfn = new KFN(dataset, false, false, epsilon);
+
+    // Now perform the actual calculation.
+    arma::Mat<size_t> neighborsTree;
+    arma::mat distancesTree;
+    kfn->Search(dataset, 15, neighborsTree, distancesTree);
+
+    for (size_t i = 0; i < neighborsTree.n_elem; i++)
+      BOOST_REQUIRE_CLOSE(distancesTree(i), distancesNaive(i), epsilon * 100);
+
+    // Clean the memory.
+    delete kfn;
+  }
+}
+
+/**
+ * Test the dual-tree furthest-neighbors method with the naive method.  This
+ * uses only a reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualTreeVsNaive2)
+{
+  arma::mat dataset;
+
+  if (!data::Load("test_data_3_1000.csv", dataset))
+    BOOST_FAIL("Cannot load test dataset test_data_3_1000.csv!");
+
+  KFN naive(dataset, true);
+  arma::Mat<size_t> neighborsNaive;
+  arma::mat distancesNaive;
+  naive.Search(15, neighborsNaive, distancesNaive);
+
+  KFN kfn(dataset, false, false, 0.05);
+  arma::Mat<size_t> neighborsTree;
+  arma::mat distancesTree;
+  kfn.Search(15, neighborsTree, distancesTree);
+
+  for (size_t i = 0; i < neighborsTree.n_elem; i++)
+    BOOST_REQUIRE_CLOSE(distancesTree[i], distancesNaive[i], 5);
+}
+
+/**
+ * Test the single-tree furthest-neighbors method with the naive method.  This
+ * uses only a reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(SingleTreeVsNaive)
+{
+  arma::mat dataset;
+
+  if (!data::Load("test_data_3_1000.csv", dataset))
+    BOOST_FAIL("Cannot load test dataset test_data_3_1000.csv!");
+
+  KFN naive(dataset, true);
+  arma::Mat<size_t> neighborsNaive;
+  arma::mat distancesNaive;
+  naive.Search(15, neighborsNaive, distancesNaive);
+
+  KFN kfn(dataset, false, true, 0.05);
+  arma::Mat<size_t> neighborsTree;
+  arma::mat distancesTree;
+  kfn.Search(15, neighborsTree, distancesTree);
+
+  for (size_t i = 0; i < neighborsTree.n_elem; i++)
+    BOOST_REQUIRE_CLOSE(distancesTree[i], distancesNaive[i], 5);
+}
+
+/**
+ * Test the cover tree single-tree furthest-neighbors method against the naive
+ * method.  This uses only a random reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(SingleCoverTreeTest)
+{
+  arma::mat data;
+  data.randu(75, 1000); // 75 dimensional, 1000 points.
+
+  KFN naive(data, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(data, 15, naiveNeighbors, naiveDistances);
+
+  StandardCoverTree<EuclideanDistance, NeighborSearchStat<FurthestNeighborSort>,
+      arma::mat> tree(data);
+
+  NeighborSearch<FurthestNeighborSort, LMetric<2>, arma::mat, StandardCoverTree>
+      coverTreeSearch(&tree, true, 0.05);
+
+  arma::Mat<size_t> coverTreeNeighbors;
+  arma::mat coverTreeDistances;
+  coverTreeSearch.Search(data, 15, coverTreeNeighbors, coverTreeDistances);
+
+  for (size_t i = 0; i < coverTreeNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(coverTreeDistances[i], naiveDistances[i], 5);
+}
+
+/**
+ * Test the cover tree dual-tree furthest neighbors method against the naive
+ * method.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualCoverTreeTest)
+{
+  arma::mat dataset;
+  data::Load("test_data_3_1000.csv", dataset);
+
+  KFN naive(dataset, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(dataset, 15, naiveNeighbors, naiveDistances);
+
+  StandardCoverTree<EuclideanDistance, NeighborSearchStat<FurthestNeighborSort>,
+      arma::mat> referenceTree(dataset);
+
+  NeighborSearch<FurthestNeighborSort, LMetric<2>, arma::mat, StandardCoverTree>
+      coverTreeSearch(&referenceTree, false, 0.05);
+
+  arma::Mat<size_t> coverTreeNeighbors;
+  arma::mat coverTreeDistances;
+  coverTreeSearch.Search(dataset, 15, coverTreeNeighbors, coverTreeDistances);
+
+  for (size_t i = 0; i < coverTreeNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(coverTreeDistances[i], naiveDistances[i], 5);
+}
+
+/**
+ * Test the ball tree single-tree furthest-neighbors method against the naive
+ * method.  This uses only a random reference dataset.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(SingleBallTreeTest)
+{
+  arma::mat data;
+  data.randu(75, 1000); // 75 dimensional, 1000 points.
+
+  KFN naive(data, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(data, 15, naiveNeighbors, naiveDistances);
+
+  NeighborSearch<FurthestNeighborSort, EuclideanDistance, arma::mat, BallTree>
+      ballTreeSearch(data, false, true, 0.05);
+
+  arma::Mat<size_t> ballNeighbors;
+  arma::mat ballDistances;
+  ballTreeSearch.Search(data, 15, ballNeighbors, ballDistances);
+
+  for (size_t i = 0; i < ballNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(ballDistances(i), naiveDistances(i), 5);
+}
+
+/**
+ * Test the ball tree dual-tree furthest neighbors method against the naive
+ * method.
+ *
+ * Errors are produced if the results are not according to relative error.
+ */
+BOOST_AUTO_TEST_CASE(DualBallTreeTest)
+{
+  arma::mat dataset;
+  data::Load("test_data_3_1000.csv", dataset);
+
+  KFN naive(dataset, true);
+  arma::Mat<size_t> naiveNeighbors;
+  arma::mat naiveDistances;
+  naive.Search(15, naiveNeighbors, naiveDistances);
+
+  NeighborSearch<FurthestNeighborSort, EuclideanDistance, arma::mat, BallTree>
+      ballTreeSearch(dataset, false, false, 0.05);
+  arma::Mat<size_t> ballNeighbors;
+  arma::mat ballDistances;
+  ballTreeSearch.Search(15, ballNeighbors, ballDistances);
+
+  for (size_t i = 0; i < ballNeighbors.n_elem; ++i)
+    BOOST_REQUIRE_CLOSE(ballDistances(i), naiveDistances(i), 5);
+}
+
+BOOST_AUTO_TEST_SUITE_END();




More information about the mlpack-git mailing list