[mlpack-svn] r10325 - mlpack/trunk/src/mlpack/tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Nov 18 15:43:16 EST 2011
Author: rcurtin
Date: 2011-11-18 15:43:16 -0500 (Fri, 18 Nov 2011)
New Revision: 10325
Modified:
mlpack/trunk/src/mlpack/tests/CMakeLists.txt
mlpack/trunk/src/mlpack/tests/gmm_test.cpp
Log:
Tests for GMM. Multivariate multi-Gaussian training tests included (that was
the hardest one to get working right).
Modified: mlpack/trunk/src/mlpack/tests/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/tests/CMakeLists.txt 2011-11-18 20:42:58 UTC (rev 10324)
+++ mlpack/trunk/src/mlpack/tests/CMakeLists.txt 2011-11-18 20:43:16 UTC (rev 10325)
@@ -17,7 +17,7 @@
cli_test.cpp
distribution_test.cpp
emst_test.cpp
- #gmm_test.cpp
+ gmm_test.cpp
hmm_test.cpp
infomax_ica_test.cpp
kernel_test.cpp
Modified: mlpack/trunk/src/mlpack/tests/gmm_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/gmm_test.cpp 2011-11-18 20:42:58 UTC (rev 10324)
+++ mlpack/trunk/src/mlpack/tests/gmm_test.cpp 2011-11-18 20:43:16 UTC (rev 10325)
@@ -6,16 +6,16 @@
*/
#include <mlpack/core.h>
-#include "mog_em.hpp"
-#include "mog_l2e.hpp"
-#include "phi.hpp"
+#include <mlpack/methods/gmm/gmm.hpp>
+#include <mlpack/methods/gmm/phi.hpp>
-#define BOOST_TEST_MODULE GMMTest
#include <boost/test/unit_test.hpp>
using namespace mlpack;
using namespace mlpack::gmm;
+BOOST_AUTO_TEST_SUITE(GMMTest);
+
/**
* Test the phi() function, in the univariate Gaussian case.
*/
@@ -90,6 +90,35 @@
}
/**
+ * Test the phi() function, for multiple points in the multivariate Gaussian
+ * case.
+ */
+BOOST_AUTO_TEST_CASE(MultipointMultivariatePhiTest)
+{
+ // Same case as before.
+ arma::vec mean = "5 6 3 3 2";
+ arma::mat cov = "6 1 1 0 2; 0 7 1 0 1; 1 1 4 1 1; 1 0 1 7 0; 2 0 1 1 6";
+
+ arma::mat points = "0 3 2 2 3 4;"
+ "1 2 2 1 0 0;"
+ "2 3 0 5 5 6;"
+ "3 7 8 0 1 1;"
+ "4 8 1 1 0 0;";
+
+ arma::vec phis;
+ phi(points, mean, cov, phis);
+
+ BOOST_REQUIRE_EQUAL(phis.n_elem, 6);
+
+ BOOST_REQUIRE_CLOSE(phis(0), 1.02531207499358e-6, 1e-5);
+ BOOST_REQUIRE_CLOSE(phis(1), 1.82353695848039e-7, 1e-5);
+ BOOST_REQUIRE_CLOSE(phis(2), 1.29759261892949e-6, 1e-5);
+ BOOST_REQUIRE_CLOSE(phis(3), 1.33218060268258e-6, 1e-5);
+ BOOST_REQUIRE_CLOSE(phis(4), 1.12120427975708e-6, 1e-5);
+ BOOST_REQUIRE_CLOSE(phis(5), 4.57951032485297e-7, 1e-5);
+}
+
+/**
* Test training a model on only one Gaussian (randomly generated) in two
* dimensions. We will vary the dataset size from small to large. The EM
* algorithm is used for training the GMM.
@@ -99,7 +128,7 @@
// Initialize random seed.
srand(time(NULL));
- for (size_t iterations = 0; iterations < 10; iterations++)
+ for (size_t iterations = 0; iterations < 4; iterations++)
{
// Determine random covariance and mean.
arma::vec mean;
@@ -118,7 +147,7 @@
data.row(1) += mean(1);
// Now, train the model.
- MoGEM gmm(1, 2);
+ GMM gmm(1, 2);
gmm.ExpectationMaximization(data);
arma::vec actual_mean = arma::mean(data, 1);
@@ -144,106 +173,91 @@
*/
BOOST_AUTO_TEST_CASE(GMMTrainEMMultipleGaussians)
{
- // Initialize random seed... just in case.
- srand(time(NULL));
+ // Higher dimensionality gives us a greater chance of having separated
+ // Gaussians.
+ size_t dims = 8;
+ size_t gaussians = 3;
- for (size_t iterations = 1; iterations < 2; iterations++)
- {
- Log::Warn << "Iteration " << iterations << std::endl;
+ // Generate dataset.
+ arma::mat data;
+ data.zeros(dims, 500);
- // Choose dimension based on iteration number.
- int dims = iterations + 2; // Between 2 and 11 dimensions.
- int gaussians = 2 * (iterations + 1); // Between 2 and 20 Gaussians.
+ std::vector<arma::vec> means(gaussians);
+ std::vector<arma::mat> covars(gaussians);
+ arma::vec weights(gaussians);
+ arma::Col<size_t> counts(gaussians);
- // Generate dataset.
- arma::mat data;
- data.zeros(dims, 1000); // Constant 1k points.
-
- std::vector<arma::vec> means(gaussians);
- std::vector<arma::mat> covars(gaussians);
- arma::vec weights(gaussians);
- arma::Col<size_t> counts(gaussians);
-
- // Choose weights randomly.
+ // Choose weights randomly.
+ weights.zeros();
+ while (weights.min() < 0.02)
+ {
weights.randu(gaussians);
weights /= accu(weights);
- for (size_t i = 0; i < gaussians; i++)
- counts[i] = round(weights[i] * (data.n_cols - gaussians));
- // Ensure one point minimum in each.
- counts += 1;
+ }
- // Account for rounding errors (possibly necessary).
- counts[gaussians - 1] += (data.n_cols - arma::accu(counts));
+ for (size_t i = 0; i < gaussians; i++)
+ counts[i] = round(weights[i] * (data.n_cols - gaussians));
+ // Ensure one point minimum in each.
+ counts += 1;
- // Build each Gaussian individually.
- size_t point = 0;
- for (int i = 0; i < gaussians; i++)
- {
- arma::mat gaussian;
- gaussian.randn(dims, counts[i]);
+ // Account for rounding errors (possibly necessary).
+ counts[gaussians - 1] += (data.n_cols - arma::accu(counts));
- // Randomly generate mean and covariance.
- means[i].randu(dims);
- means[i] += 50 * i;
- covars[i].randu(dims, dims);
+ // Build each Gaussian individually.
+ size_t point = 0;
+ for (int i = 0; i < gaussians; i++)
+ {
+ arma::mat gaussian;
+ gaussian.randn(dims, counts[i]);
- data.cols(point, point + counts[i] - 1) = (covars[i] * gaussian + means[i]
- * arma::ones<arma::rowvec>(counts[i]));
+ // Randomly generate mean and covariance.
+ means[i].randu(dims);
+ means[i] -= 0.5;
+ means[i] *= 50;
- // Calculate the actual means and covariances because they will probably
- // be different (this is easier to do before we shuffle the points).
- means[i] = arma::mean(data.cols(point, point + counts[i] - 1), 1);
- covars[i] = ccov(data.cols(point, point + counts[i] - 1), 1 /* biased */);
+ covars[i].randu(dims, dims);
+ covars[i] *= 2;
- point += counts[i];
- }
+ data.cols(point, point + counts[i] - 1) = (covars[i] * gaussian + means[i]
+ * arma::ones<arma::rowvec>(counts[i]));
- // Calculate actual weights.
- for (size_t i = 0; i < gaussians; i++)
- weights[i] = (double) counts[i] / data.n_cols;
+ // Calculate the actual means and covariances because they will probably
+ // be different (this is easier to do before we shuffle the points).
+ means[i] = arma::mean(data.cols(point, point + counts[i] - 1), 1);
+ covars[i] = ccov(data.cols(point, point + counts[i] - 1), 1 /* biased */);
- // Now train the model.
- MoGEM gmm(gaussians, dims);
- gmm.ExpectationMaximization(data);
+ point += counts[i];
+ }
- Log::Warn << "Actual weights: " << std::endl << weights << std::endl;
- Log::Warn << "Estimated weights: " << std::endl << gmm.Weights()
- << std::endl;
+ // Calculate actual weights.
+ for (size_t i = 0; i < gaussians; i++)
+ weights[i] = (double) counts[i] / data.n_cols;
- arma::uvec sort_ref = sort_index(weights);
- arma::uvec sort_try = sort_index(gmm.Weights());
+ // Now train the model.
+ GMM gmm(gaussians, dims);
+ gmm.ExpectationMaximization(data);
- for (int i = 0; i < gaussians; i++)
- {
- Log::Warn << "Actual mean " << i << ":" << std::endl;
- Log::Warn << means[sort_ref[i]] << std::endl;
- Log::Warn << "Actual covariance " << i << ":" << std::endl;
- Log::Warn << covars[sort_ref[i]] << std::endl;
+ arma::uvec sort_ref = sort_index(weights);
+ arma::uvec sort_try = sort_index(gmm.Weights());
- Log::Warn << "Estimated mean " << i << ":" << std::endl;
- Log::Warn << gmm.Means()[sort_try[i]] << std::endl;
- Log::Warn << "Estimated covariance" << i << ":" << std::endl;
- Log::Warn << gmm.Covariances()[sort_try[i]] << std::endl;
- }
+ // Check the model to see that it is correct.
+ for (int i = 0; i < gaussians; i++)
+ {
+ // Check the mean.
+ for (int j = 0; j < dims; j++)
+ BOOST_REQUIRE_CLOSE((gmm.Means()[sort_try[i]])[j],
+ (means[sort_ref[i]])[j], 1e-5);
- // Check the model to see that it is correct.
+ // Check the covariance.
+ for (int row = 0; row < dims; row++)
+ for (int col = 0; col < dims; col++)
+ BOOST_REQUIRE_CLOSE((gmm.Covariances()[sort_try[i]])(row, col),
+ (covars[sort_ref[i]])(row, col), 1e-5);
- for (int i = 0; i < gaussians; i++)
- {
- // Check the mean.
- for (int j = 0; j < dims; j++)
- BOOST_REQUIRE_CLOSE((gmm.Means()[sort_try[i]])[j],
- (means[sort_ref[i]])[j], 1e-5);
-
- // Check the covariance.
- for (int row = 0; row < dims; row++)
- for (int col = 0; col < dims; col++)
- BOOST_REQUIRE_CLOSE((gmm.Covariances()[sort_try[i]])(row, col),
- (covars[sort_ref[i]])(row, col), 1e-5);
-
- // Check the weight.
- BOOST_REQUIRE_CLOSE(gmm.Weights()[sort_try[i]], weights[sort_ref[i]],
- 1e-5);
- }
+ // Check the weight.
+ BOOST_REQUIRE_CLOSE(gmm.Weights()[sort_try[i]], weights[sort_ref[i]],
+ 1e-5);
}
}
+
+BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-svn
mailing list