[mlpack-svn] r10340 - mlpack/trunk/src/mlpack/tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Nov 21 14:05:39 EST 2011
Author: rcurtin
Date: 2011-11-21 14:05:38 -0500 (Mon, 21 Nov 2011)
New Revision: 10340
Modified:
mlpack/trunk/src/mlpack/tests/hmm_test.cpp
Log:
Add test for Gaussian HMMs where we are training for both the labeled and
unlabeled case. Almost done with the Gaussian HMM tests.
Modified: mlpack/trunk/src/mlpack/tests/hmm_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/hmm_test.cpp 2011-11-21 18:39:28 UTC (rev 10339)
+++ mlpack/trunk/src/mlpack/tests/hmm_test.cpp 2011-11-21 19:05:38 UTC (rev 10340)
@@ -360,7 +360,7 @@
// Now that our data is generated, we give the HMM the labeled data to train
// on.
- HMM<DiscreteDistribution> hmm(3, 6);
+ HMM<DiscreteDistribution> hmm(3, DiscreteDistribution(6));
hmm.Train(observations, states);
@@ -582,4 +582,128 @@
BOOST_REQUIRE_EQUAL(predictedClasses[i], classes[i]);
}
+/**
+ * Ensure that Gaussian HMMs can be trained properly, for the labeled training
+ * case and also for the unlabeled training case.
+ */
+BOOST_AUTO_TEST_CASE(GaussianHMMTrainTest)
+{
+ srand(time(NULL));
+
+ // Four emission Gaussians and three internal states. The goal is to estimate
+ // the transition matrix correctly, and each distribution correctly.
+ std::vector<GaussianDistribution> emission;
+ emission.push_back(GaussianDistribution("0.0 0.0 0.0", "1.0 0.2 0.2;"
+ "0.2 1.5 0.0;"
+ "0.2 0.0 1.1"));
+ emission.push_back(GaussianDistribution("2.0 1.0 5.0", "0.7 0.3 0.0;"
+ "0.3 2.6 0.0;"
+ "0.0 0.0 1.0"));
+ emission.push_back(GaussianDistribution("5.0 0.0 0.5", "1.0 0.0 0.0;"
+ "0.0 1.0 0.0;"
+ "0.0 0.0 1.0"));
+
+ arma::mat transition("0.3 0.5 0.7;"
+ "0.3 0.4 0.1;"
+ "0.4 0.1 0.2");
+
+ // Now generate observations.
+ std::vector<std::vector<arma::vec> > observations(100);
+ std::vector<std::vector<size_t> > states(100);
+
+ for (size_t obs = 0; obs < 100; obs++)
+ {
+ observations[obs].resize(1000);
+ states[obs].resize(1000);
+
+ // Always start in state zero.
+ states[obs][0] = 0;
+ observations[obs][0] = emission[0].Random();
+
+ for (size_t t = 1; t < 1000; t++)
+ {
+ // Choose the state.
+ double randValue = (double) rand() / (double) RAND_MAX;
+ double probSum = 0;
+ for (size_t state = 0; state < 3; state++)
+ {
+ probSum += transition(state, states[obs][t - 1]);
+ if (probSum >= randValue)
+ {
+ states[obs][t] = state;
+ break;
+ }
+ }
+
+ // Now choose the emission.
+ observations[obs][t] = emission[states[obs][t]].Random();
+ }
+ }
+
+ // Now that the data is generated, train the HMM.
+ HMM<GaussianDistribution> hmm(3, GaussianDistribution(3));
+
+ hmm.Train(observations, states);
+
+ // We use an absolute tolerance of 0.01 for the transition matrices.
+ // Check that the transition matrix is correct.
+ for (size_t row = 0; row < 3; row++)
+ for (size_t col = 0; col < 3; col++)
+ BOOST_REQUIRE_SMALL(transition(row, col) - hmm.Transition()(row, col),
+ 0.01);
+
+ // Check that each distribution is correct.
+ for (size_t dist = 0; dist < 3; dist++)
+ {
+ // Check that the mean is correct. Absolute tolerance of 0.04.
+ for (size_t dim = 0; dim < 3; dim++)
+ BOOST_REQUIRE_SMALL(hmm.Emission()[dist].Mean()(dim) -
+ emission[dist].Mean()(dim), 0.04);
+
+ // Check that the covariance is correct. Absolute tolerance of 0.075.
+ for (size_t row = 0; row < 3; row++)
+ for (size_t col = 0; col < 3; col++)
+ BOOST_REQUIRE_SMALL(hmm.Emission()[dist].Covariance()(row, col) -
+ emission[dist].Covariance()(row, col), 0.075);
+ }
+
+ // Now let's try it all again, but this time, unlabeled. Everything will fail
+ // if we don't have a decent guess at the Gaussians, so we'll take a "poor"
+ // guess at it ourselves. I won't use K-Means because we can't afford to add
+ // the instability of that to our test. We'll leave the covariances as the
+ // identity.
+ HMM<GaussianDistribution> hmm2(3, GaussianDistribution(3));
+ hmm2.Emission()[0].Mean() = "0.3 -0.2 0.1"; // Actual: [0 0 0].
+ hmm2.Emission()[1].Mean() = "1.0 1.4 3.2"; // Actual: [2 1 5].
+ hmm2.Emission()[2].Mean() = "3.1 -0.2 6.1"; // Actual: [5 0 5].
+
+ // We'll only use 20 observation sequences to try and keep training time
+ // shorter.
+ observations.resize(20);
+
+ hmm.Train(observations);
+
+ // We use an absolute tolerance of 0.01 for the transition matrices.
+ // Check that the transition matrix is correct.
+ for (size_t row = 0; row < 3; row++)
+ for (size_t col = 0; col < 3; col++)
+ BOOST_REQUIRE_SMALL(transition(row, col) - hmm.Transition()(row, col),
+ 0.01);
+
+ // Check that each distribution is correct.
+ for (size_t dist = 0; dist < 3; dist++)
+ {
+ // Check that the mean is correct. Absolute tolerance of 0.04.
+ for (size_t dim = 0; dim < 3; dim++)
+ BOOST_REQUIRE_SMALL(hmm.Emission()[dist].Mean()(dim) -
+ emission[dist].Mean()(dim), 0.04);
+
+ // Check that the covariance is correct. Absolute tolerance of 0.075.
+ for (size_t row = 0; row < 3; row++)
+ for (size_t col = 0; col < 3; col++)
+ BOOST_REQUIRE_SMALL(hmm.Emission()[dist].Covariance()(row, col) -
+ emission[dist].Covariance()(row, col), 0.075);
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-svn
mailing list