[mlpack-svn] r10306 - in mlpack/trunk/src/mlpack: methods/gmm tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Nov 16 17:54:12 EST 2011
Author: rcurtin
Date: 2011-11-16 17:54:11 -0500 (Wed, 16 Nov 2011)
New Revision: 10306
Added:
mlpack/trunk/src/mlpack/tests/gmm_test.cpp
Removed:
mlpack/trunk/src/mlpack/methods/gmm/gmm_test.cpp
Modified:
mlpack/trunk/src/mlpack/tests/CMakeLists.txt
Log:
Move GMM test to where all the other tests are.
Deleted: mlpack/trunk/src/mlpack/methods/gmm/gmm_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/gmm_test.cpp 2011-11-16 22:53:40 UTC (rev 10305)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm_test.cpp 2011-11-16 22:54:11 UTC (rev 10306)
@@ -1,249 +0,0 @@
-/**
- * @file gmm_test.cpp
- * @author Ryan Curtin
- *
- * Test for the Gaussian Mixture Model class.
- */
-#include <mlpack/core.h>
-
-#include "mog_em.hpp"
-#include "mog_l2e.hpp"
-#include "phi.hpp"
-
-#define BOOST_TEST_MODULE GMMTest
-#include <boost/test/unit_test.hpp>
-
-using namespace mlpack;
-using namespace mlpack::gmm;
-
-/**
- * Test the phi() function, in the univariate Gaussian case.
- */
-BOOST_AUTO_TEST_CASE(UnivariatePhiTest)
-{
- // Simple case.
- BOOST_REQUIRE_CLOSE(phi(0.0, 0.0, 1.0), 0.398942280401433, 1e-5);
-
- // A few more cases...
- BOOST_REQUIRE_CLOSE(phi(0.0, 0.0, 2.0), 0.282094791773878, 1e-5);
-
- BOOST_REQUIRE_CLOSE(phi(1.0, 0.0, 1.0), 0.241970724519143, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-1.0, 0.0, 1.0), 0.241970724519143, 1e-5);
-
- BOOST_REQUIRE_CLOSE(phi(1.0, 0.0, 2.0), 0.219695644733861, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-1.0, 0.0, 2.0), 0.219695644733861, 1e-5);
-
- BOOST_REQUIRE_CLOSE(phi(1.0, 1.0, 1.0), 0.398942280401433, 1e-5);
-
- BOOST_REQUIRE_CLOSE(phi(-1.0, 1.0, 2.0), 0.103776874355149, 1e-5);
-}
-
-/**
- * Test the phi() function, in the multivariate Gaussian case.
- */
-BOOST_AUTO_TEST_CASE(MultivariatePhiTest)
-{
- // Simple case.
- arma::vec mean = "0 0";
- arma::mat cov = "1 0; 0 1";
- arma::vec x = "0 0";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.159154943091895, 1e-5);
-
- cov = "2 0; 0 2";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0795774715459477, 1e-5);
-
- x = "1 1";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0482661763150270, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-x, mean, cov), 0.0482661763150270, 1e-5);
-
- mean = "1 1";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0795774715459477, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-x, -mean, cov), 0.0795774715459477, 1e-5);
-
- cov = "2 1.5; 1 4";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0624257046546403, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-x, -mean, cov), 0.0624257046546403, 1e-5);
-
- x = "-1 4";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.00144014867515135, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-x, mean, cov), 0.00133352162064845, 1e-5);
-
- // Higher-dimensional case.
- x = "0 1 2 3 4";
- mean = "5 6 3 3 2";
- cov = "6 1 1 0 2;"
- "0 7 1 0 1;"
- "1 1 4 1 1;"
- "1 0 1 7 0;"
- "2 0 1 1 6";
-
- BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 1.02531207499358e-6, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-x, -mean, cov), 1.02531207499358e-6, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(x, -mean, cov), 1.06784794079363e-8, 1e-5);
- BOOST_REQUIRE_CLOSE(phi(-x, mean, cov), 1.06784794079363e-8, 1e-5);
-}
-
-/**
- * Test training a model on only one Gaussian (randomly generated) in two
- * dimensions. We will vary the dataset size from small to large. The EM
- * algorithm is used for training the GMM.
- */
-BOOST_AUTO_TEST_CASE(GMMTrainEMOneGaussian)
-{
- // Initialize random seed.
- srand(time(NULL));
-
- for (size_t iterations = 0; iterations < 10; iterations++)
- {
- // Determine random covariance and mean.
- arma::vec mean;
- mean.randu(2);
- arma::vec covar;
- covar.randu(2);
-
- arma::mat data;
- data.randn(2 /* dimension */, 100 * pow(10, (iterations / 3.0)));
-
- // Now apply mean and covariance.
- data.row(0) *= covar(0);
- data.row(1) *= covar(1);
-
- data.row(0) += mean(0);
- data.row(1) += mean(1);
-
- // Now, train the model.
- MoGEM gmm(1, 2);
- gmm.ExpectationMaximization(data);
-
- arma::vec actual_mean = arma::mean(data, 1);
- arma::mat actual_covar = ccov(data, 1 /* biased estimator */);
-
- // Check the model to see that it is correct.
- BOOST_REQUIRE_CLOSE((gmm.Means()[0])[0], actual_mean(0), 1e-5);
- BOOST_REQUIRE_CLOSE((gmm.Means()[0])[1], actual_mean(1), 1e-5);
-
- BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(0, 0), actual_covar(0, 0), 1e-5);
- BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(0, 1), actual_covar(0, 1), 1e-5);
- BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(1, 0), actual_covar(1, 0), 1e-5);
- BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(1, 1), actual_covar(1, 1), 1e-5);
-
- BOOST_REQUIRE_CLOSE(gmm.Weights()[0], 1.0, 1e-5);
- }
-}
-
-/**
- * Test a training model on multiple Gaussians in higher dimensionality than
- * two. We will hold the dataset size constant at 10k points. The EM algorithm
- * is used for training the GMM.
- */
-BOOST_AUTO_TEST_CASE(GMMTrainEMMultipleGaussians)
-{
- // Initialize random seed... just in case.
- srand(time(NULL));
-
- for (size_t iterations = 1; iterations < 2; iterations++)
- {
- Log::Warn << "Iteration " << iterations << std::endl;
-
- // Choose dimension based on iteration number.
- int dims = iterations + 2; // Between 2 and 11 dimensions.
- int gaussians = 2 * (iterations + 1); // Between 2 and 20 Gaussians.
-
- // Generate dataset.
- arma::mat data;
- data.zeros(dims, 1000); // Constant 1k points.
-
- std::vector<arma::vec> means(gaussians);
- std::vector<arma::mat> covars(gaussians);
- arma::vec weights(gaussians);
- arma::Col<size_t> counts(gaussians);
-
- // Choose weights randomly.
- weights.randu(gaussians);
- weights /= accu(weights);
- for (size_t i = 0; i < gaussians; i++)
- counts[i] = round(weights[i] * (data.n_cols - gaussians));
- // Ensure one point minimum in each.
- counts += 1;
-
- // Account for rounding errors (possibly necessary).
- counts[gaussians - 1] += (data.n_cols - arma::accu(counts));
-
- // Build each Gaussian individually.
- size_t point = 0;
- for (int i = 0; i < gaussians; i++)
- {
- arma::mat gaussian;
- gaussian.randn(dims, counts[i]);
-
- // Randomly generate mean and covariance.
- means[i].randu(dims);
- means[i] += 50 * i;
- covars[i].randu(dims, dims);
-
- data.cols(point, point + counts[i] - 1) = (covars[i] * gaussian + means[i]
- * arma::ones<arma::rowvec>(counts[i]));
-
- // Calculate the actual means and covariances because they will probably
- // be different (this is easier to do before we shuffle the points).
- means[i] = arma::mean(data.cols(point, point + counts[i] - 1), 1);
- covars[i] = ccov(data.cols(point, point + counts[i] - 1), 1 /* biased */);
-
- point += counts[i];
- }
-
- // Calculate actual weights.
- for (size_t i = 0; i < gaussians; i++)
- weights[i] = (double) counts[i] / data.n_cols;
-
- // Now train the model.
- MoGEM gmm(gaussians, dims);
- gmm.ExpectationMaximization(data);
-
- Log::Warn << "Actual weights: " << std::endl << weights << std::endl;
- Log::Warn << "Estimated weights: " << std::endl << gmm.Weights()
- << std::endl;
-
- arma::uvec sort_ref = sort_index(weights);
- arma::uvec sort_try = sort_index(gmm.Weights());
-
- for (int i = 0; i < gaussians; i++)
- {
- Log::Warn << "Actual mean " << i << ":" << std::endl;
- Log::Warn << means[sort_ref[i]] << std::endl;
- Log::Warn << "Actual covariance " << i << ":" << std::endl;
- Log::Warn << covars[sort_ref[i]] << std::endl;
-
- Log::Warn << "Estimated mean " << i << ":" << std::endl;
- Log::Warn << gmm.Means()[sort_try[i]] << std::endl;
- Log::Warn << "Estimated covariance" << i << ":" << std::endl;
- Log::Warn << gmm.Covariances()[sort_try[i]] << std::endl;
- }
-
- // Check the model to see that it is correct.
-
- for (int i = 0; i < gaussians; i++)
- {
- // Check the mean.
- for (int j = 0; j < dims; j++)
- BOOST_REQUIRE_CLOSE((gmm.Means()[sort_try[i]])[j],
- (means[sort_ref[i]])[j], 1e-5);
-
- // Check the covariance.
- for (int row = 0; row < dims; row++)
- for (int col = 0; col < dims; col++)
- BOOST_REQUIRE_CLOSE((gmm.Covariances()[sort_try[i]])(row, col),
- (covars[sort_ref[i]])(row, col), 1e-5);
-
- // Check the weight.
- BOOST_REQUIRE_CLOSE(gmm.Weights()[sort_try[i]], weights[sort_ref[i]],
- 1e-5);
- }
- }
-}
Modified: mlpack/trunk/src/mlpack/tests/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/tests/CMakeLists.txt 2011-11-16 22:53:40 UTC (rev 10305)
+++ mlpack/trunk/src/mlpack/tests/CMakeLists.txt 2011-11-16 22:54:11 UTC (rev 10306)
@@ -17,6 +17,7 @@
cli_test.cpp
distribution_test.cpp
emst_test.cpp
+ #gmm_test.cpp
hmm_test.cpp
infomax_ica_test.cpp
kernel_test.cpp
Copied: mlpack/trunk/src/mlpack/tests/gmm_test.cpp (from rev 10304, mlpack/trunk/src/mlpack/methods/gmm/gmm_test.cpp)
===================================================================
--- mlpack/trunk/src/mlpack/tests/gmm_test.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/tests/gmm_test.cpp 2011-11-16 22:54:11 UTC (rev 10306)
@@ -0,0 +1,249 @@
+/**
+ * @file gmm_test.cpp
+ * @author Ryan Curtin
+ *
+ * Test for the Gaussian Mixture Model class.
+ */
+#include <mlpack/core.h>
+
+#include "mog_em.hpp"
+#include "mog_l2e.hpp"
+#include "phi.hpp"
+
+#define BOOST_TEST_MODULE GMMTest
+#include <boost/test/unit_test.hpp>
+
+using namespace mlpack;
+using namespace mlpack::gmm;
+
+/**
+ * Test the phi() function, in the univariate Gaussian case.
+ */
+BOOST_AUTO_TEST_CASE(UnivariatePhiTest)
+{
+ // Simple case.
+ BOOST_REQUIRE_CLOSE(phi(0.0, 0.0, 1.0), 0.398942280401433, 1e-5);
+
+ // A few more cases...
+ BOOST_REQUIRE_CLOSE(phi(0.0, 0.0, 2.0), 0.282094791773878, 1e-5);
+
+ BOOST_REQUIRE_CLOSE(phi(1.0, 0.0, 1.0), 0.241970724519143, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-1.0, 0.0, 1.0), 0.241970724519143, 1e-5);
+
+ BOOST_REQUIRE_CLOSE(phi(1.0, 0.0, 2.0), 0.219695644733861, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-1.0, 0.0, 2.0), 0.219695644733861, 1e-5);
+
+ BOOST_REQUIRE_CLOSE(phi(1.0, 1.0, 1.0), 0.398942280401433, 1e-5);
+
+ BOOST_REQUIRE_CLOSE(phi(-1.0, 1.0, 2.0), 0.103776874355149, 1e-5);
+}
+
+/**
+ * Test the phi() function, in the multivariate Gaussian case.
+ */
+BOOST_AUTO_TEST_CASE(MultivariatePhiTest)
+{
+ // Simple case.
+ arma::vec mean = "0 0";
+ arma::mat cov = "1 0; 0 1";
+ arma::vec x = "0 0";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.159154943091895, 1e-5);
+
+ cov = "2 0; 0 2";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0795774715459477, 1e-5);
+
+ x = "1 1";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0482661763150270, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-x, mean, cov), 0.0482661763150270, 1e-5);
+
+ mean = "1 1";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0795774715459477, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-x, -mean, cov), 0.0795774715459477, 1e-5);
+
+ cov = "2 1.5; 1 4";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.0624257046546403, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-x, -mean, cov), 0.0624257046546403, 1e-5);
+
+ x = "-1 4";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 0.00144014867515135, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-x, mean, cov), 0.00133352162064845, 1e-5);
+
+ // Higher-dimensional case.
+ x = "0 1 2 3 4";
+ mean = "5 6 3 3 2";
+ cov = "6 1 1 0 2;"
+ "0 7 1 0 1;"
+ "1 1 4 1 1;"
+ "1 0 1 7 0;"
+ "2 0 1 1 6";
+
+ BOOST_REQUIRE_CLOSE(phi(x, mean, cov), 1.02531207499358e-6, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-x, -mean, cov), 1.02531207499358e-6, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(x, -mean, cov), 1.06784794079363e-8, 1e-5);
+ BOOST_REQUIRE_CLOSE(phi(-x, mean, cov), 1.06784794079363e-8, 1e-5);
+}
+
+/**
+ * Test training a model on only one Gaussian (randomly generated) in two
+ * dimensions. We will vary the dataset size from small to large. The EM
+ * algorithm is used for training the GMM.
+ */
+BOOST_AUTO_TEST_CASE(GMMTrainEMOneGaussian)
+{
+ // Initialize random seed.
+ srand(time(NULL));
+
+ for (size_t iterations = 0; iterations < 10; iterations++)
+ {
+ // Determine random covariance and mean.
+ arma::vec mean;
+ mean.randu(2);
+ arma::vec covar;
+ covar.randu(2);
+
+ arma::mat data;
+ data.randn(2 /* dimension */, 100 * pow(10, (iterations / 3.0)));
+
+ // Now apply mean and covariance.
+ data.row(0) *= covar(0);
+ data.row(1) *= covar(1);
+
+ data.row(0) += mean(0);
+ data.row(1) += mean(1);
+
+ // Now, train the model.
+ MoGEM gmm(1, 2);
+ gmm.ExpectationMaximization(data);
+
+ arma::vec actual_mean = arma::mean(data, 1);
+ arma::mat actual_covar = ccov(data, 1 /* biased estimator */);
+
+ // Check the model to see that it is correct.
+ BOOST_REQUIRE_CLOSE((gmm.Means()[0])[0], actual_mean(0), 1e-5);
+ BOOST_REQUIRE_CLOSE((gmm.Means()[0])[1], actual_mean(1), 1e-5);
+
+ BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(0, 0), actual_covar(0, 0), 1e-5);
+ BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(0, 1), actual_covar(0, 1), 1e-5);
+ BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(1, 0), actual_covar(1, 0), 1e-5);
+ BOOST_REQUIRE_CLOSE((gmm.Covariances()[0])(1, 1), actual_covar(1, 1), 1e-5);
+
+ BOOST_REQUIRE_CLOSE(gmm.Weights()[0], 1.0, 1e-5);
+ }
+}
+
+/**
+ * Test a training model on multiple Gaussians in higher dimensionality than
+ * two. We will hold the dataset size constant at 10k points. The EM algorithm
+ * is used for training the GMM.
+ */
+BOOST_AUTO_TEST_CASE(GMMTrainEMMultipleGaussians)
+{
+ // Initialize random seed... just in case.
+ srand(time(NULL));
+
+ for (size_t iterations = 1; iterations < 2; iterations++)
+ {
+ Log::Warn << "Iteration " << iterations << std::endl;
+
+ // Choose dimension based on iteration number.
+ int dims = iterations + 2; // Between 2 and 11 dimensions.
+ int gaussians = 2 * (iterations + 1); // Between 2 and 20 Gaussians.
+
+ // Generate dataset.
+ arma::mat data;
+ data.zeros(dims, 1000); // Constant 1k points.
+
+ std::vector<arma::vec> means(gaussians);
+ std::vector<arma::mat> covars(gaussians);
+ arma::vec weights(gaussians);
+ arma::Col<size_t> counts(gaussians);
+
+ // Choose weights randomly.
+ weights.randu(gaussians);
+ weights /= accu(weights);
+ for (size_t i = 0; i < gaussians; i++)
+ counts[i] = round(weights[i] * (data.n_cols - gaussians));
+ // Ensure one point minimum in each.
+ counts += 1;
+
+ // Account for rounding errors (possibly necessary).
+ counts[gaussians - 1] += (data.n_cols - arma::accu(counts));
+
+ // Build each Gaussian individually.
+ size_t point = 0;
+ for (int i = 0; i < gaussians; i++)
+ {
+ arma::mat gaussian;
+ gaussian.randn(dims, counts[i]);
+
+ // Randomly generate mean and covariance.
+ means[i].randu(dims);
+ means[i] += 50 * i;
+ covars[i].randu(dims, dims);
+
+ data.cols(point, point + counts[i] - 1) = (covars[i] * gaussian + means[i]
+ * arma::ones<arma::rowvec>(counts[i]));
+
+ // Calculate the actual means and covariances because they will probably
+ // be different (this is easier to do before we shuffle the points).
+ means[i] = arma::mean(data.cols(point, point + counts[i] - 1), 1);
+ covars[i] = ccov(data.cols(point, point + counts[i] - 1), 1 /* biased */);
+
+ point += counts[i];
+ }
+
+ // Calculate actual weights.
+ for (size_t i = 0; i < gaussians; i++)
+ weights[i] = (double) counts[i] / data.n_cols;
+
+ // Now train the model.
+ MoGEM gmm(gaussians, dims);
+ gmm.ExpectationMaximization(data);
+
+ Log::Warn << "Actual weights: " << std::endl << weights << std::endl;
+ Log::Warn << "Estimated weights: " << std::endl << gmm.Weights()
+ << std::endl;
+
+ arma::uvec sort_ref = sort_index(weights);
+ arma::uvec sort_try = sort_index(gmm.Weights());
+
+ for (int i = 0; i < gaussians; i++)
+ {
+ Log::Warn << "Actual mean " << i << ":" << std::endl;
+ Log::Warn << means[sort_ref[i]] << std::endl;
+ Log::Warn << "Actual covariance " << i << ":" << std::endl;
+ Log::Warn << covars[sort_ref[i]] << std::endl;
+
+ Log::Warn << "Estimated mean " << i << ":" << std::endl;
+ Log::Warn << gmm.Means()[sort_try[i]] << std::endl;
+ Log::Warn << "Estimated covariance" << i << ":" << std::endl;
+ Log::Warn << gmm.Covariances()[sort_try[i]] << std::endl;
+ }
+
+ // Check the model to see that it is correct.
+
+ for (int i = 0; i < gaussians; i++)
+ {
+ // Check the mean.
+ for (int j = 0; j < dims; j++)
+ BOOST_REQUIRE_CLOSE((gmm.Means()[sort_try[i]])[j],
+ (means[sort_ref[i]])[j], 1e-5);
+
+ // Check the covariance.
+ for (int row = 0; row < dims; row++)
+ for (int col = 0; col < dims; col++)
+ BOOST_REQUIRE_CLOSE((gmm.Covariances()[sort_try[i]])(row, col),
+ (covars[sort_ref[i]])(row, col), 1e-5);
+
+ // Check the weight.
+ BOOST_REQUIRE_CLOSE(gmm.Weights()[sort_try[i]], weights[sort_ref[i]],
+ 1e-5);
+ }
+ }
+}
More information about the mlpack-svn
mailing list