[mlpack-git] master: Test SampleInitialization. (01cf94c)

gitdub at mlpack.org gitdub at mlpack.org
Tue Apr 12 10:43:52 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eeba6bdc50ad4d785cb6880edbaba78173036ca6...8d77f4231046703d5c0c05ed4795458f98267968

>---------------------------------------------------------------

commit 01cf94c0468321bceca03f5e8bfd024458debd4f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Apr 12 14:42:45 2016 +0000

    Test SampleInitialization.


>---------------------------------------------------------------

01cf94c0468321bceca03f5e8bfd024458debd4f
 src/mlpack/tests/kmeans_test.cpp | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/mlpack/tests/kmeans_test.cpp b/src/mlpack/tests/kmeans_test.cpp
index 1a90fa7..3089a3e 100644
--- a/src/mlpack/tests/kmeans_test.cpp
+++ b/src/mlpack/tests/kmeans_test.cpp
@@ -11,6 +11,8 @@
 #include <mlpack/methods/kmeans/hamerly_kmeans.hpp>
 #include <mlpack/methods/kmeans/pelleg_moore_kmeans.hpp>
 #include <mlpack/methods/kmeans/dual_tree_kmeans.hpp>
+#include <mlpack/methods/kmeans/sample_initialization.hpp>
+#include <mlpack/methods/kmeans/random_partition.hpp>
 
 #include <mlpack/core/tree/cover_tree/cover_tree.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
@@ -63,7 +65,9 @@ arma::mat kMeansData("  0.0   0.0;" // Class 1.
  */
 BOOST_AUTO_TEST_CASE(KMeansSimpleTest)
 {
-  KMeans<> kmeans;
+  // This test was originally written to use RandomPartition, and is left that
+  // way because RandomPartition gives better initializations here.
+  KMeans<EuclideanDistance, RandomPartition> kmeans;
 
   arma::Row<size_t> assignments;
   kmeans.Cluster((arma::mat) trans(kMeansData), 3, assignments);
@@ -662,4 +666,38 @@ BOOST_AUTO_TEST_CASE(DTNNCoverTreeTest)
   }
 }
 
+/**
+ * Make sure that the sample initialization strategy successfully samples points
+ * from the dataset.
+ */
+BOOST_AUTO_TEST_CASE(SampleInitializationTest)
+{
+  arma::mat dataset = arma::randu<arma::mat>(5, 100);
+  const size_t clusters = 10;
+  arma::mat centroids;
+
+  SampleInitialization::Cluster(dataset, clusters, centroids);
+
+  // Check that the size of the matrix is correct.
+  BOOST_REQUIRE_EQUAL(centroids.n_cols, 10);
+  BOOST_REQUIRE_EQUAL(centroids.n_rows, 5);
+
+  // Check that each entry in the matrix is some sample from the dataset.
+  for (size_t i = 0; i < clusters; ++i)
+  {
+    // If the loop successfully terminates, j will be equal to dataset.n_cols.
+    // If not then we have found a match.
+    size_t j;
+    for (j = 0; j < dataset.n_cols; ++j)
+    {
+      const double distance = metric::EuclideanDistance::Evaluate(
+          centroids.col(i), dataset.col(j));
+      if (distance < 1e-10)
+        break;
+    }
+
+    BOOST_REQUIRE_LT(j, dataset.n_cols);
+  }
+}
+
 BOOST_AUTO_TEST_SUITE_END();




More information about the mlpack-git mailing list