[mlpack-svn] r10256 - mlpack/trunk/src/mlpack/tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Nov 12 14:16:25 EST 2011
Author: rcurtin
Date: 2011-11-12 14:16:25 -0500 (Sat, 12 Nov 2011)
New Revision: 10256
Added:
mlpack/trunk/src/mlpack/tests/hmm_test.cpp
Modified:
mlpack/trunk/src/mlpack/tests/CMakeLists.txt
Log:
Add HMM test. A bunch of stuff does work.
Modified: mlpack/trunk/src/mlpack/tests/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/tests/CMakeLists.txt 2011-11-12 17:17:47 UTC (rev 10255)
+++ mlpack/trunk/src/mlpack/tests/CMakeLists.txt 2011-11-12 19:16:25 UTC (rev 10256)
@@ -16,7 +16,7 @@
allknn_test.cpp
cli_test.cpp
emst_test.cpp
-# hmm_test.cpp
+ hmm_test.cpp
infomax_ica_test.cpp
kernel_test.cpp
lin_alg_test.cpp
Added: mlpack/trunk/src/mlpack/tests/hmm_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/hmm_test.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/tests/hmm_test.cpp 2011-11-12 19:16:25 UTC (rev 10256)
@@ -0,0 +1,293 @@
+/**
+ * @file hmm_test.cpp
+ *
+ * Test file for HMMs.
+ */
+#include <mlpack/core.h>
+#include <mlpack/methods/hmm/discreteHMM.hpp>
+#include <mlpack/methods/hmm/hmm.hpp>
+#include <boost/test/unit_test.hpp>
+
+using namespace mlpack;
+using namespace mlpack::hmm;
+
+BOOST_AUTO_TEST_SUITE(HMMTest);
+
+/**
+ * We will use the simple case proposed by Russell and Norvig in Artificial
+ * Intelligence: A Modern Approach, 2nd Edition, around p.549.
+ */
+BOOST_AUTO_TEST_CASE(SimpleDiscreteHMMTestViterbi)
+{
+ // We have two hidden states: rain/dry. Two emission states: umbrella/no
+ // umbrella.
+ // In this example, the transition matrix is
+ // rain dry
+ // [[0.7 0.3] rain
+ // [0.3 0.7]] dry
+ // and the emission probability is
+ // rain dry
+ // [[0.9 0.2] umbrella
+ // [0.1 0.8]] no umbrella
+ arma::mat transition("0.7 0.3; 0.3 0.7");
+ arma::mat emission("0.9 0.2; 0.1 0.8");
+
+ HMM<int> hmm(transition, emission);
+
+ // Now let's take a sequence and find what the most likely state is.
+ // We'll use the sequence [U U N U U] (U = umbrella, N = no umbrella) like on
+ // p. 547.
+ arma::vec observation("0 0 1 0 0");
+
+ arma::Col<size_t> states;
+ hmm.Viterbi(observation, states);
+
+ // Check each state.
+ BOOST_REQUIRE_EQUAL(states[0], 0); // Rain.
+ BOOST_REQUIRE_EQUAL(states[1], 0); // Rain.
+ BOOST_REQUIRE_EQUAL(states[2], 1); // No rain.
+ BOOST_REQUIRE_EQUAL(states[3], 0); // Rain.
+ BOOST_REQUIRE_EQUAL(states[4], 0); // Rain.
+}
+
+/**
+ * This example is from Borodovsky & Ekisheva, p. 80-81. It is just slightly
+ * more complex.
+ */
+BOOST_AUTO_TEST_CASE(BorodovskyHMMTestViterbi)
+{
+ // Two hidden states: H (high GC content) and L (low GC content), as well as a
+ // start state.
+ arma::mat transition("0.0 0.0 0.0;"
+ "0.5 0.5 0.4;"
+ "0.5 0.5 0.6");
+ // Four emission states: A, C, G, T. Start state doesn't emit...
+ arma::mat emission("0.25 0.20 0.30;"
+ "0.25 0.30 0.20;"
+ "0.25 0.30 0.20;"
+ "0.25 0.20 0.30");
+
+ HMM<int> hmm(transition, emission);
+
+ // GGCACTGAA.
+ arma::vec observation("2 2 1 0 1 3 2 0 0");
+
+ arma::Col<size_t> states;
+ hmm.Viterbi(observation, states);
+
+ // Most probable path is HHHLLLLLL.
+ BOOST_REQUIRE_EQUAL(states[0], 1);
+ BOOST_REQUIRE_EQUAL(states[1], 1);
+ BOOST_REQUIRE_EQUAL(states[2], 1);
+ BOOST_REQUIRE_EQUAL(states[3], 2);
+ // This could actually be one of two states (equal probability).
+ BOOST_REQUIRE((states[4] == 1) || (states[4] == 2));
+ BOOST_REQUIRE_EQUAL(states[5], 2);
+ // This could also be one of two states.
+ BOOST_REQUIRE((states[6] == 1) || (states[6] == 2));
+ BOOST_REQUIRE_EQUAL(states[7], 2);
+ BOOST_REQUIRE_EQUAL(states[8], 2);
+}
+
+/**
+ * Ensure that the forward-backward algorithm is correct.
+ */
+BOOST_AUTO_TEST_CASE(ForwardBackwardTwoState)
+{
+ arma::vec obs("3 3 2 1 1 1 1 3 3 1");
+ arma::mat transm("0.1 0.9; 0.4 0.6");
+ arma::mat emis("0.85 0; 0.15 0; 0 0.5; 0 0.5");
+
+ HMM<int> hmm(2, 4);
+
+ hmm.Transition() = transm;
+ hmm.Emission() = emis;
+
+ // Now check we are getting the same results as MATLAB for this sequence.
+ arma::mat stateProb;
+ arma::mat forwardProb;
+ arma::mat backwardProb;
+ arma::vec scales;
+
+ double log = hmm.Estimate(obs, stateProb, forwardProb, backwardProb, scales);
+
+ // All values obtained from MATLAB hmmdecode().
+ BOOST_REQUIRE_CLOSE(log, -23.4349, 1e-3);
+
+ BOOST_REQUIRE_SMALL(stateProb(0, 0), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(1, 0), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(0, 1), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(1, 1), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(0, 2), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(1, 2), 1.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(0, 3), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(1, 3), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(0, 4), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(1, 4), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(0, 5), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(1, 5), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(0, 6), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(1, 6), 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(0, 7), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(1, 7), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(0, 8), 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(1, 8), 1.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(stateProb(0, 9), 1.0, 1e-5);
+ BOOST_REQUIRE_SMALL(stateProb(1, 9), 1e-5);
+}
+
+/**
+ * In this example we try to estimate the transmission and emission matrices
+ * based on some observations. We use the simplest possible model.
+ */
+BOOST_AUTO_TEST_CASE(SimplestBaumWelchDiscreteHMM)
+{
+ // Don't yet require a useful distribution.
+ HMM<int> hmm(1, 1); // 1 state, 1 emission.
+
+ std::vector<arma::vec> observations;
+ observations.push_back("0 0 0 0 0 0 0 0 0");
+ observations.push_back("0 0 0 0 0 0 0");
+ observations.push_back("0 0 0 0 0 0 0 0 0 0 0 0");
+ observations.push_back("0 0 0 0 0 0 0 0 0 0");
+
+ hmm.EstimateModel(observations);
+
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(0, 0), 1.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 0), 1.0, 1e-5);
+}
+
+/**
+ * A slightly more complex model to estimate.
+ */
+BOOST_AUTO_TEST_CASE(SimpleBaumWelchDiscreteHMM)
+{
+ HMM<int> hmm(1, 2); // 1 state, 2 emissions.
+ // Randomize the emission matrix.
+ hmm.Emission().randu();
+ hmm.Emission().col(0) /= accu(hmm.Emission().col(0));
+
+ // P(each emission) = 0.5.
+ // I've been careful to make P(first emission = 0) = P(first emission = 1).
+ std::vector<arma::vec> observations;
+ observations.push_back("0 1 0 1 0 1 0 1 0 1 0 1");
+ observations.push_back("0 0 0 0 0 0 1 1 1 1 1 1");
+ observations.push_back("1 1 1 1 1 1 0 0 0 0 0 0");
+ observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+ observations.push_back("0 0 1 1 0 0 0 0 1 1 1 1");
+ observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+ observations.push_back("0 1 0 1 0 1 0 1 0 1 0 1");
+ observations.push_back("0 0 0 0 0 0 1 1 1 1 1 1");
+ observations.push_back("1 1 1 1 1 1 0 0 0 0 0 0");
+ observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+ observations.push_back("0 0 1 1 0 0 0 0 1 1 1 1");
+ observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+ observations.push_back("0 1 0 1 0 1 0 1 0 1 0 1");
+ observations.push_back("0 0 0 0 0 0 1 1 1 1 1 1");
+ observations.push_back("1 1 1 1 1 1 0 0 0 0 0 0");
+ observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+ observations.push_back("0 0 1 1 0 0 0 0 1 1 1 1");
+ observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+
+ hmm.EstimateModel(observations);
+
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(0, 0), 0.5, 1e-5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(1, 0), 0.5, 1e-5);
+ BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 0), 1.0, 1e-5);
+}
+
+/**
+ * Increasing complexity, but still simple; 4 emissions, 2 states; the state can
+ * be determined directly by the emission.
+ */
+BOOST_AUTO_TEST_CASE(SimpleBaumWelchDiscreteHMM_2)
+{
+ HMM<int> hmm(2, 4); // 3 states (first state is start state), 5 emissions.
+
+ // A little bit of obfuscation to the solution.
+ hmm.Transition() = arma::mat("0.1 0.4; 0.9 0.6");
+ hmm.Emission() = arma::mat("0.85 0.0; 0.15 0.0; 0.0 0.5; 0.0 0.5");
+
+ // True emission matrix:
+ // [[1 0 0 ]
+ // [0 0.4 0 ]
+ // [0 0.6 0 ]
+ // [0 0 0.2]
+ // [0 0 0.8]]
+
+ // True transmission matrix:
+ // [[0 0 0 ]
+ // [0.5 0.5 0.5]
+ // [0.5 0.5 0.5]]
+
+ // Generate observations randomly by hand. This is kinda ugly, but it works.
+ std::vector<arma::vec> observations;
+ size_t obsNum = 250; // Number of observations.
+ size_t obsLen = 500; // Number of elements in each observation.
+ for (size_t i = 0; i < obsNum; i++)
+ {
+ arma::vec observation(obsLen);
+
+ size_t state = 0;
+ size_t emission = 0;
+
+ for (size_t obs = 0; obs < obsLen; obs++)
+ {
+ // See if state changed.
+ double r = (double) rand() / (double) RAND_MAX;
+
+ if (r <= 0.5)
+ state = 0;
+ else
+ state = 1;
+
+ // Now set the observation.
+ r = (double) rand() / (double) RAND_MAX;
+
+ switch (state)
+ {
+ // case 0 is not possible.
+ case 0:
+ if (r <= 0.4)
+ emission = 0;
+ else
+ emission = 1;
+ break;
+ case 1:
+ if (r <= 0.2)
+ emission = 2;
+ else
+ emission = 3;
+ break;
+ }
+
+ observation[obs] = emission;
+ }
+
+ observations.push_back(observation);
+ }
+
+ arma::mat out(obsLen, obsNum);
+ for (size_t i = 0; i < obsNum; i++)
+ out.col(i) = observations[i];
+ data::Save("out.csv", out);
+
+ hmm.EstimateModel(observations);
+
+ // Only require 0.75% tolerance, because this is a little fuzzier.
+ BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 0), 0.5, 0.75);
+ BOOST_REQUIRE_CLOSE(hmm.Transition()(1, 0), 0.5, 0.75);
+ BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 1), 0.5, 0.75);
+ BOOST_REQUIRE_CLOSE(hmm.Transition()(1, 1), 0.5, 0.75);
+
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(0, 0), 0.4, 0.75);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(1, 0), 0.6, 0.75);
+ BOOST_REQUIRE_SMALL(hmm.Emission()(2, 0), 0.75);
+ BOOST_REQUIRE_SMALL(hmm.Emission()(3, 0), 0.75);
+ BOOST_REQUIRE_SMALL(hmm.Emission()(0, 1), 0.75);
+ BOOST_REQUIRE_SMALL(hmm.Emission()(1, 1), 0.75);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(2, 1), 0.2, 0.75);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()(3, 1), 0.8, 0.75);
+}
+
+BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-svn
mailing list