[mlpack-git] master: Test SampleInitialization. (01cf94c)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Apr 12 10:43:52 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eeba6bdc50ad4d785cb6880edbaba78173036ca6...8d77f4231046703d5c0c05ed4795458f98267968
>---------------------------------------------------------------
commit 01cf94c0468321bceca03f5e8bfd024458debd4f
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Apr 12 14:42:45 2016 +0000
Test SampleInitialization.
>---------------------------------------------------------------
01cf94c0468321bceca03f5e8bfd024458debd4f
src/mlpack/tests/kmeans_test.cpp | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/src/mlpack/tests/kmeans_test.cpp b/src/mlpack/tests/kmeans_test.cpp
index 1a90fa7..3089a3e 100644
--- a/src/mlpack/tests/kmeans_test.cpp
+++ b/src/mlpack/tests/kmeans_test.cpp
@@ -11,6 +11,8 @@
#include <mlpack/methods/kmeans/hamerly_kmeans.hpp>
#include <mlpack/methods/kmeans/pelleg_moore_kmeans.hpp>
#include <mlpack/methods/kmeans/dual_tree_kmeans.hpp>
+#include <mlpack/methods/kmeans/sample_initialization.hpp>
+#include <mlpack/methods/kmeans/random_partition.hpp>
#include <mlpack/core/tree/cover_tree/cover_tree.hpp>
#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
@@ -63,7 +65,9 @@ arma::mat kMeansData(" 0.0 0.0;" // Class 1.
*/
BOOST_AUTO_TEST_CASE(KMeansSimpleTest)
{
- KMeans<> kmeans;
+ // This test was originally written to use RandomPartition, and is left that
+ // way because RandomPartition gives better initializations here.
+ KMeans<EuclideanDistance, RandomPartition> kmeans;
arma::Row<size_t> assignments;
kmeans.Cluster((arma::mat) trans(kMeansData), 3, assignments);
@@ -662,4 +666,38 @@ BOOST_AUTO_TEST_CASE(DTNNCoverTreeTest)
}
}
+/**
+ * Make sure that the sample initialization strategy successfully samples points
+ * from the dataset.
+ */
+BOOST_AUTO_TEST_CASE(SampleInitializationTest)
+{
+ arma::mat dataset = arma::randu<arma::mat>(5, 100);
+ const size_t clusters = 10;
+ arma::mat centroids;
+
+ SampleInitialization::Cluster(dataset, clusters, centroids);
+
+ // Check that the size of the matrix is correct.
+ BOOST_REQUIRE_EQUAL(centroids.n_cols, 10);
+ BOOST_REQUIRE_EQUAL(centroids.n_rows, 5);
+
+ // Check that each entry in the matrix is some sample from the dataset.
+ for (size_t i = 0; i < clusters; ++i)
+ {
+ // If the loop successfully terminates, j will be equal to dataset.n_cols.
+ // If not then we have found a match.
+ size_t j;
+ for (j = 0; j < dataset.n_cols; ++j)
+ {
+ const double distance = metric::EuclideanDistance::Evaluate(
+ centroids.col(i), dataset.col(j));
+ if (distance < 1e-10)
+ break;
+ }
+
+ BOOST_REQUIRE_LT(j, dataset.n_cols);
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list