[mlpack-svn] r10325 - mlpack/trunk/src/mlpack/tests

Fri Nov 18 15:43:16 EST 2011

Author: rcurtin
Date: 2011-11-18 15:43:16 -0500 (Fri, 18 Nov 2011)
New Revision: 10325

Modified:
   mlpack/trunk/src/mlpack/tests/CMakeLists.txt
   mlpack/trunk/src/mlpack/tests/gmm_test.cpp
Log:
Tests for GMM.  Multivariate multi-Gaussian training tests included (that was
the hardest one to get working right).


Modified: mlpack/trunk/src/mlpack/tests/CMakeLists.txt
===================================================================

--- mlpack/trunk/src/mlpack/tests/CMakeLists.txt	2011-11-18 20:42:58 UTC (rev 10324)
+++ mlpack/trunk/src/mlpack/tests/CMakeLists.txt	2011-11-18 20:43:16 UTC (rev 10325)
@@ -17,7 +17,7 @@
   cli_test.cpp
   distribution_test.cpp
   emst_test.cpp
-  #gmm_test.cpp
+  gmm_test.cpp
   hmm_test.cpp
   infomax_ica_test.cpp
   kernel_test.cpp

Modified: mlpack/trunk/src/mlpack/tests/gmm_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/gmm_test.cpp	2011-11-18 20:42:58 UTC (rev 10324)
+++ mlpack/trunk/src/mlpack/tests/gmm_test.cpp	2011-11-18 20:43:16 UTC (rev 10325)
@@ -6,16 +6,16 @@
  */
 #include <mlpack/core.h>
 
-#include "mog_em.hpp"
-#include "mog_l2e.hpp"
-#include "phi.hpp"
+#include <mlpack/methods/gmm/gmm.hpp>
+#include <mlpack/methods/gmm/phi.hpp>
 
-#define BOOST_TEST_MODULE GMMTest
 #include <boost/test/unit_test.hpp>
 
 using namespace mlpack;
 using namespace mlpack::gmm;
 
+BOOST_AUTO_TEST_SUITE(GMMTest);
+
 /**
  * Test the phi() function, in the univariate Gaussian case.
  */
@@ -90,6 +90,35 @@
 }
 
 /**
+ * Test the phi() function, for multiple points in the multivariate Gaussian
+ * case.
+ */
+BOOST_AUTO_TEST_CASE(MultipointMultivariatePhiTest)
+{
+  // Same case as before.
+  arma::vec mean = "5 6 3 3 2";
+  arma::mat cov = "6 1 1 0 2; 0 7 1 0 1; 1 1 4 1 1; 1 0 1 7 0; 2 0 1 1 6";
+
+  arma::mat points = "0 3 2 2 3 4;"
+                     "1 2 2 1 0 0;"
+                     "2 3 0 5 5 6;"
+                     "3 7 8 0 1 1;"
+                     "4 8 1 1 0 0;";
+
+  arma::vec phis;
+  phi(points, mean, cov, phis);
+
+  BOOST_REQUIRE_EQUAL(phis.n_elem, 6);
+
+  BOOST_REQUIRE_CLOSE(phis(0), 1.02531207499358e-6, 1e-5);
+  BOOST_REQUIRE_CLOSE(phis(1), 1.82353695848039e-7, 1e-5);
+  BOOST_REQUIRE_CLOSE(phis(2), 1.29759261892949e-6, 1e-5);
+  BOOST_REQUIRE_CLOSE(phis(3), 1.33218060268258e-6, 1e-5);
+  BOOST_REQUIRE_CLOSE(phis(4), 1.12120427975708e-6, 1e-5);
+  BOOST_REQUIRE_CLOSE(phis(5), 4.57951032485297e-7, 1e-5);
+}
+
+/**
  * Test training a model on only one Gaussian (randomly generated) in two
  * dimensions.  We will vary the dataset size from small to large.  The EM
  * algorithm is used for training the GMM.
@@ -99,7 +128,7 @@
   // Initialize random seed.
   srand(time(NULL));
 
-  for (size_t iterations = 0; iterations < 10; iterations++)
+  for (size_t iterations = 0; iterations < 4; iterations++)
   {
     // Determine random covariance and mean.
     arma::vec mean;
@@ -118,7 +147,7 @@
     data.row(1) += mean(1);
 
     // Now, train the model.
-    MoGEM gmm(1, 2);
+    GMM gmm(1, 2);
     gmm.ExpectationMaximization(data);
 
     arma::vec actual_mean = arma::mean(data, 1);
@@ -144,106 +173,91 @@
  */
 BOOST_AUTO_TEST_CASE(GMMTrainEMMultipleGaussians)
 {
-  // Initialize random seed... just in case.
-  srand(time(NULL));
+  // Higher dimensionality gives us a greater chance of having separated
+  // Gaussians.
+  size_t dims = 8;
+  size_t gaussians = 3;
 
-  for (size_t iterations = 1; iterations < 2; iterations++)
-  {
-    Log::Warn << "Iteration " << iterations << std::endl;
+  // Generate dataset.
+  arma::mat data;
+  data.zeros(dims, 500);
 
-    // Choose dimension based on iteration number.
-    int dims = iterations + 2; // Between 2 and 11 dimensions.
-    int gaussians = 2 * (iterations + 1); // Between 2 and 20 Gaussians.
+  std::vector<arma::vec> means(gaussians);
+  std::vector<arma::mat> covars(gaussians);
+  arma::vec weights(gaussians);
+  arma::Col<size_t> counts(gaussians);
 
-    // Generate dataset.
-    arma::mat data;
-    data.zeros(dims, 1000); // Constant 1k points.
-
-    std::vector<arma::vec> means(gaussians);
-    std::vector<arma::mat> covars(gaussians);
-    arma::vec weights(gaussians);
-    arma::Col<size_t> counts(gaussians);
-
-    // Choose weights randomly.
+  // Choose weights randomly.
+  weights.zeros();
+  while (weights.min() < 0.02)
+  {
     weights.randu(gaussians);
     weights /= accu(weights);
-    for (size_t i = 0; i < gaussians; i++)
-      counts[i] = round(weights[i] * (data.n_cols - gaussians));
-    // Ensure one point minimum in each.
-    counts += 1;
+  }
 
-    // Account for rounding errors (possibly necessary).
-    counts[gaussians - 1] += (data.n_cols - arma::accu(counts));
+  for (size_t i = 0; i < gaussians; i++)
+    counts[i] = round(weights[i] * (data.n_cols - gaussians));
+  // Ensure one point minimum in each.
+  counts += 1;
 
-    // Build each Gaussian individually.
-    size_t point = 0;
-    for (int i = 0; i < gaussians; i++)
-    {
-      arma::mat gaussian;
-      gaussian.randn(dims, counts[i]);
+  // Account for rounding errors (possibly necessary).
+  counts[gaussians - 1] += (data.n_cols - arma::accu(counts));
 
-      // Randomly generate mean and covariance.
-      means[i].randu(dims);
-      means[i] += 50 * i;
-      covars[i].randu(dims, dims);
+  // Build each Gaussian individually.
+  size_t point = 0;
+  for (int i = 0; i < gaussians; i++)
+  {
+    arma::mat gaussian;
+    gaussian.randn(dims, counts[i]);
 
-      data.cols(point, point + counts[i] - 1) = (covars[i] * gaussian + means[i]
-          * arma::ones<arma::rowvec>(counts[i]));
+    // Randomly generate mean and covariance.
+    means[i].randu(dims);
+    means[i] -= 0.5;
+    means[i] *= 50;
 
-      // Calculate the actual means and covariances because they will probably
-      // be different (this is easier to do before we shuffle the points).
-      means[i] = arma::mean(data.cols(point, point + counts[i] - 1), 1);
-      covars[i] = ccov(data.cols(point, point + counts[i] - 1), 1 /* biased */);
+    covars[i].randu(dims, dims);
+    covars[i] *= 2;
 
-      point += counts[i];
-    }
+    data.cols(point, point + counts[i] - 1) = (covars[i] * gaussian + means[i]
+        * arma::ones<arma::rowvec>(counts[i]));
 
-    // Calculate actual weights.
-    for (size_t i = 0; i < gaussians; i++)
-      weights[i] = (double) counts[i] / data.n_cols;
+    // Calculate the actual means and covariances because they will probably
+    // be different (this is easier to do before we shuffle the points).
+    means[i] = arma::mean(data.cols(point, point + counts[i] - 1), 1);
+    covars[i] = ccov(data.cols(point, point + counts[i] - 1), 1 /* biased */);
 
-    // Now train the model.
-    MoGEM gmm(gaussians, dims);
-    gmm.ExpectationMaximization(data);
+    point += counts[i];
+  }
 
-    Log::Warn << "Actual weights: " << std::endl << weights << std::endl;
-    Log::Warn << "Estimated weights: " << std::endl << gmm.Weights()
-        << std::endl;
+  // Calculate actual weights.
+  for (size_t i = 0; i < gaussians; i++)
+    weights[i] = (double) counts[i] / data.n_cols;
 
-    arma::uvec sort_ref = sort_index(weights);
-    arma::uvec sort_try = sort_index(gmm.Weights());
+  // Now train the model.
+  GMM gmm(gaussians, dims);
+  gmm.ExpectationMaximization(data);
 
-    for (int i = 0; i < gaussians; i++)
-    {
-      Log::Warn << "Actual mean " << i << ":" << std::endl;
-      Log::Warn << means[sort_ref[i]] << std::endl;
-      Log::Warn << "Actual covariance " << i << ":" << std::endl;
-      Log::Warn << covars[sort_ref[i]] << std::endl;
+  arma::uvec sort_ref = sort_index(weights);
+  arma::uvec sort_try = sort_index(gmm.Weights());
 
-      Log::Warn << "Estimated mean " << i << ":" << std::endl;
-      Log::Warn << gmm.Means()[sort_try[i]] << std::endl;
-      Log::Warn << "Estimated covariance" << i << ":" << std::endl;
-      Log::Warn << gmm.Covariances()[sort_try[i]] << std::endl;
-    }
+  // Check the model to see that it is correct.
+  for (int i = 0; i < gaussians; i++)
+  {
+    // Check the mean.
+    for (int j = 0; j < dims; j++)
+      BOOST_REQUIRE_CLOSE((gmm.Means()[sort_try[i]])[j],
+          (means[sort_ref[i]])[j], 1e-5);
 
-    // Check the model to see that it is correct.
+    // Check the covariance.
+    for (int row = 0; row < dims; row++)
+      for (int col = 0; col < dims; col++)
+        BOOST_REQUIRE_CLOSE((gmm.Covariances()[sort_try[i]])(row, col),
+            (covars[sort_ref[i]])(row, col), 1e-5);
 
-    for (int i = 0; i < gaussians; i++)
-    {
-      // Check the mean.
-      for (int j = 0; j < dims; j++)
-        BOOST_REQUIRE_CLOSE((gmm.Means()[sort_try[i]])[j],
-            (means[sort_ref[i]])[j], 1e-5);
-
-      // Check the covariance.
-      for (int row = 0; row < dims; row++)
-        for (int col = 0; col < dims; col++)
-          BOOST_REQUIRE_CLOSE((gmm.Covariances()[sort_try[i]])(row, col),
-              (covars[sort_ref[i]])(row, col), 1e-5);
-
-      // Check the weight.
-      BOOST_REQUIRE_CLOSE(gmm.Weights()[sort_try[i]], weights[sort_ref[i]],
-          1e-5);
-    }
+    // Check the weight.
+    BOOST_REQUIRE_CLOSE(gmm.Weights()[sort_try[i]], weights[sort_ref[i]],
+        1e-5);
   }
 }
+
+BOOST_AUTO_TEST_SUITE_END();