[mlpack-svn] r10299 - mlpack/trunk/src/mlpack/tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Nov 16 14:16:36 EST 2011
Author: rcurtin
Date: 2011-11-16 14:16:36 -0500 (Wed, 16 Nov 2011)
New Revision: 10299
Modified:
mlpack/trunk/src/mlpack/tests/hmm_test.cpp
Log:
Use new HMM syntax, with DiscreteDistribution instead of int.
Modified: mlpack/trunk/src/mlpack/tests/hmm_test.cpp
===================================================================
--- mlpack/trunk/src/mlpack/tests/hmm_test.cpp 2011-11-16 19:16:21 UTC (rev 10298)
+++ mlpack/trunk/src/mlpack/tests/hmm_test.cpp 2011-11-16 19:16:36 UTC (rev 10299)
@@ -4,12 +4,13 @@
* Test file for HMMs.
*/
#include <mlpack/core.h>
-#include <mlpack/methods/hmm/discreteHMM.hpp>
#include <mlpack/methods/hmm/hmm.hpp>
+#include <mlpack/methods/hmm/distributions/discrete_distribution.hpp>
#include <boost/test/unit_test.hpp>
using namespace mlpack;
using namespace mlpack::hmm;
+using namespace mlpack::distribution;
BOOST_AUTO_TEST_SUITE(HMMTest);
@@ -30,16 +31,23 @@
// [[0.9 0.2] umbrella
// [0.1 0.8]] no umbrella
arma::mat transition("0.7 0.3; 0.3 0.7");
- arma::mat emission("0.9 0.2; 0.1 0.8");
+ std::vector<DiscreteDistribution> emission(2);
+ emission[0] = DiscreteDistribution("0.9 0.2");
+ emission[1] = DiscreteDistribution("0.1 0.8");
- HMM<int> hmm(transition, emission);
+ HMM<DiscreteDistribution> hmm(transition, emission);
// Now let's take a sequence and find what the most likely state is.
// We'll use the sequence [U U N U U] (U = umbrella, N = no umbrella) like on
// p. 547.
- arma::vec observation("0 0 1 0 0");
+ std::vector<size_t> observation;
+ observation.push_back(0);
+ observation.push_back(0);
+ observation.push_back(1);
+ observation.push_back(0);
+ observation.push_back(0);
- arma::Col<size_t> states;
+ std::vector<size_t> states;
hmm.Predict(observation, states);
// Check each state.
@@ -62,17 +70,26 @@
"0.5 0.5 0.4;"
"0.5 0.5 0.6");
// Four emission states: A, C, G, T. Start state doesn't emit...
- arma::mat emission("0.25 0.20 0.30;"
- "0.25 0.30 0.20;"
- "0.25 0.30 0.20;"
- "0.25 0.20 0.30");
+ std::vector<DiscreteDistribution> emission(3);
+ emission[0] = DiscreteDistribution("0.25 0.25 0.25 0.25");
+ emission[1] = DiscreteDistribution("0.20 0.30 0.30 0.20");
+ emission[2] = DiscreteDistribution("0.30 0.20 0.20 0.30");
- HMM<int> hmm(transition, emission);
+ HMM<DiscreteDistribution> hmm(transition, emission);
// GGCACTGAA.
- arma::vec observation("2 2 1 0 1 3 2 0 0");
+ std::vector<size_t> observation(9);
+ observation[0] = 2;
+ observation[1] = 2;
+ observation[2] = 1;
+ observation[3] = 0;
+ observation[4] = 1;
+ observation[5] = 3;
+ observation[6] = 2;
+ observation[7] = 0;
+ observation[8] = 0;
- arma::Col<size_t> states;
+ std::vector<size_t> states;
hmm.Predict(observation, states);
// Most probable path is HHHLLLLLL.
@@ -94,14 +111,24 @@
*/
BOOST_AUTO_TEST_CASE(ForwardBackwardTwoState)
{
- arma::vec obs("3 3 2 1 1 1 1 3 3 1");
- arma::mat transm("0.1 0.9; 0.4 0.6");
- arma::mat emis("0.85 0; 0.15 0; 0 0.5; 0 0.5");
+ std::vector<size_t> obs(10);
+ obs[0] = 3;
+ obs[1] = 3;
+ obs[2] = 2;
+ obs[3] = 1;
+ obs[4] = 1;
+ obs[5] = 1;
+ obs[6] = 1;
+ obs[7] = 3;
+ obs[8] = 3;
+ obs[9] = 1;
- HMM<int> hmm(2, 4);
+ arma::mat transition("0.1 0.9; 0.4 0.6");
+ std::vector<DiscreteDistribution> emis(2);
+ emis[0] = DiscreteDistribution("0.85 0.15 0.00 0.00");
+ emis[1] = DiscreteDistribution("0.00 0.00 0.50 0.50");
- hmm.Transition() = transm;
- hmm.Emission() = emis;
+ HMM<DiscreteDistribution> hmm(transition, emis);
// Now check we are getting the same results as MATLAB for this sequence.
arma::mat stateProb;
@@ -142,18 +169,18 @@
*/
BOOST_AUTO_TEST_CASE(SimplestBaumWelchDiscreteHMM)
{
- // Don't yet require a useful distribution.
- HMM<int> hmm(1, 1); // 1 state, 1 emission.
+ // Don't yet require a useful distribution. 1 state, 1 emission.
+ HMM<DiscreteDistribution> hmm(1, DiscreteDistribution(1));
- std::vector<arma::vec> observations;
- observations.push_back("0 0 0 0 0 0 0 0 0");
- observations.push_back("0 0 0 0 0 0 0");
- observations.push_back("0 0 0 0 0 0 0 0 0 0 0 0");
- observations.push_back("0 0 0 0 0 0 0 0 0 0");
+ std::vector<std::vector<size_t> > observations;
+ observations.push_back(std::vector<size_t>(8, 0)); // 8 zeros.
+ observations.push_back(std::vector<size_t>(7, 0)); // 7 zeros.
+ observations.push_back(std::vector<size_t>(12, 0)); // 12 zeros.
+ observations.push_back(std::vector<size_t>(10, 0)); // 10 zeros.
hmm.Train(observations);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(0, 0), 1.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[0].Probability(0), 1.0, 1e-5);
BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 0), 1.0, 1e-5);
}
@@ -162,37 +189,27 @@
*/
BOOST_AUTO_TEST_CASE(SimpleBaumWelchDiscreteHMM)
{
- HMM<int> hmm(1, 2); // 1 state, 2 emissions.
+ HMM<DiscreteDistribution> hmm(1, 2); // 1 state, 2 emissions.
// Randomize the emission matrix.
- hmm.Emission().randu();
- hmm.Emission().col(0) /= accu(hmm.Emission().col(0));
+ hmm.Emission()[0].Probabilities(arma::randu<arma::vec>(2));
// P(each emission) = 0.5.
// I've been careful to make P(first emission = 0) = P(first emission = 1).
- std::vector<arma::vec> observations;
- observations.push_back("0 1 0 1 0 1 0 1 0 1 0 1");
- observations.push_back("0 0 0 0 0 0 1 1 1 1 1 1");
- observations.push_back("1 1 1 1 1 1 0 0 0 0 0 0");
- observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
- observations.push_back("0 0 1 1 0 0 0 0 1 1 1 1");
- observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
- observations.push_back("0 1 0 1 0 1 0 1 0 1 0 1");
- observations.push_back("0 0 0 0 0 0 1 1 1 1 1 1");
- observations.push_back("1 1 1 1 1 1 0 0 0 0 0 0");
- observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
- observations.push_back("0 0 1 1 0 0 0 0 1 1 1 1");
- observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
- observations.push_back("0 1 0 1 0 1 0 1 0 1 0 1");
- observations.push_back("0 0 0 0 0 0 1 1 1 1 1 1");
- observations.push_back("1 1 1 1 1 1 0 0 0 0 0 0");
- observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
- observations.push_back("0 0 1 1 0 0 0 0 1 1 1 1");
- observations.push_back("1 1 1 0 0 0 1 1 1 0 0 0");
+ std::vector<std::vector<size_t> > observations;
+ size_t obs[6][12] = {{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
+ {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+ {1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0},
+ {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1},
+ {1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0}};
+ for (size_t i = 0; i < 18; i++)
+ observations.push_back(std::vector<size_t>(obs[i % 6], obs[i % 6] + 12));
+
hmm.Train(observations);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(0, 0), 0.5, 1e-5);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(1, 0), 0.5, 1e-5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[0].Probability(0), 0.5, 1e-5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[0].Probability(1), 0.5, 1e-5);
BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 0), 1.0, 1e-5);
}
@@ -202,31 +219,30 @@
*/
BOOST_AUTO_TEST_CASE(SimpleBaumWelchDiscreteHMM_2)
{
- HMM<int> hmm(2, 4); // 3 states (first state is start state), 5 emissions.
+ HMM<DiscreteDistribution> hmm(2, DiscreteDistribution(4));
// A little bit of obfuscation to the solution.
hmm.Transition() = arma::mat("0.1 0.4; 0.9 0.6");
- hmm.Emission() = arma::mat("0.85 0.0; 0.15 0.0; 0.0 0.5; 0.0 0.5");
+ hmm.Emission()[0].Probabilities("0.85 0.15 0.00 0.00");
+ hmm.Emission()[1].Probabilities("0.00 0.00 0.50 0.50");
// True emission matrix:
- // [[1 0 0 ]
- // [0 0.4 0 ]
- // [0 0.6 0 ]
- // [0 0 0.2]
- // [0 0 0.8]]
+ // [[0.4 0 ]
+ // [0.6 0 ]
+ // [0 0.2]
+ // [0 0.8]]
// True transmission matrix:
- // [[0 0 0 ]
- // [0.5 0.5 0.5]
- // [0.5 0.5 0.5]]
+ // [[0.5 0.5]
+ // [0.5 0.5]]
// Generate observations randomly by hand. This is kinda ugly, but it works.
- std::vector<arma::vec> observations;
+ std::vector<std::vector<size_t> > observations;
size_t obsNum = 250; // Number of observations.
size_t obsLen = 500; // Number of elements in each observation.
for (size_t i = 0; i < obsNum; i++)
{
- arma::vec observation(obsLen);
+ std::vector<size_t> observation(obsLen);
size_t state = 0;
size_t emission = 0;
@@ -267,11 +283,6 @@
observations.push_back(observation);
}
- arma::mat out(obsLen, obsNum);
- for (size_t i = 0; i < obsNum; i++)
- out.col(i) = observations[i];
- data::Save("out.csv", out);
-
hmm.Train(observations);
// Only require 2.5% tolerance, because this is a little fuzzier.
@@ -280,65 +291,53 @@
BOOST_REQUIRE_CLOSE(hmm.Transition()(0, 1), 0.5, 2.5);
BOOST_REQUIRE_CLOSE(hmm.Transition()(1, 1), 0.5, 2.5);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(0, 0), 0.4, 2.5);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(1, 0), 0.6, 2.5);
- BOOST_REQUIRE_SMALL(hmm.Emission()(2, 0), 2.5);
- BOOST_REQUIRE_SMALL(hmm.Emission()(3, 0), 2.5);
- BOOST_REQUIRE_SMALL(hmm.Emission()(0, 1), 2.5);
- BOOST_REQUIRE_SMALL(hmm.Emission()(1, 1), 2.5);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(2, 1), 0.2, 2.5);
- BOOST_REQUIRE_CLOSE(hmm.Emission()(3, 1), 0.8, 2.5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[0].Probability(0), 0.4, 2.5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[0].Probability(1), 0.6, 2.5);
+ BOOST_REQUIRE_SMALL(hmm.Emission()[0].Probability(2), 2.5);
+ BOOST_REQUIRE_SMALL(hmm.Emission()[0].Probability(3), 2.5);
+ BOOST_REQUIRE_SMALL(hmm.Emission()[1].Probability(0), 2.5);
+ BOOST_REQUIRE_SMALL(hmm.Emission()[1].Probability(1), 2.5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[1].Probability(2), 0.2, 2.5);
+ BOOST_REQUIRE_CLOSE(hmm.Emission()[1].Probability(3), 0.8, 2.5);
}
BOOST_AUTO_TEST_CASE(DiscreteHMMLabeledTrainTest)
{
// Generate a random Markov model with 3 hidden states and 6 observations.
arma::mat transition;
- arma::mat emission;
+ std::vector<DiscreteDistribution> emission(3);
transition.randu(3, 3);
- emission.randu(6, 3);
+ emission[0].Probabilities(arma::randu<arma::vec>(6));
+ emission[1].Probabilities(arma::randu<arma::vec>(6));
+ emission[2].Probabilities(arma::randu<arma::vec>(6));
- // Normalize so they are correct transition and emission matrices.
+ // Normalize so they we have a correct transition matrix.
for (size_t col = 0; col < 3; col++)
- {
transition.col(col) /= accu(transition.col(col));
- emission.col(col) /= accu(emission.col(col));
- }
// Now generate sequences.
size_t obsNum = 250;
size_t obsLen = 800;
- std::vector<arma::vec> observations(obsNum);
- std::vector<arma::Col<size_t> > states(obsNum);
+ std::vector<std::vector<size_t> > observations(obsNum);
+ std::vector<std::vector<size_t> > states(obsNum);
for (size_t n = 0; n < obsNum; n++)
{
- observations[n].set_size(obsLen);
- states[n].set_size(obsLen);
+ observations[n].resize(obsLen);
+ states[n].resize(obsLen);
// Random starting state.
states[n][0] = rand() % 3;
// Random starting observation.
- double obs = (double) rand() / (double) RAND_MAX;
- double sumProb = 0;
- for (size_t em = 0; em < 6; em++)
- {
- sumProb += emission(em, states[n][0]);
- if (sumProb >= obs)
- {
- observations[n][0] = em;
- break;
- }
- }
+ observations[n][0] = emission[states[n][0]].Random();
// Now the rest of the observations.
for (size_t t = 1; t < obsLen; t++)
{
- // Choose random numbers for state transition and for emission transition.
- double obs = (double) rand() / (double) RAND_MAX;
+ // Choose random number for state transition.
double state = (double) rand() / (double) RAND_MAX;
// Decide next state.
@@ -354,22 +353,13 @@
}
// Decide observation.
- sumProb = 0;
- for (size_t em = 0; em < 6; em++)
- {
- sumProb += emission(em, states[n][t]);
- if (sumProb >= obs)
- {
- observations[n][t] = em;
- break;
- }
- }
+ observations[n][t] = emission[states[n][t]].Random();
}
}
// Now that our data is generated, we give the HMM the labeled data to train
// on.
- HMM<int> hmm(3, 6);
+ HMM<DiscreteDistribution> hmm(3, 6);
hmm.Train(observations, states);
@@ -381,9 +371,11 @@
BOOST_REQUIRE_SMALL(hmm.Transition()(row, col) - transition(row, col),
0.009);
- for (size_t row = 0; row < hmm.Emission().n_rows; row++)
- for (size_t col = 0; col < hmm.Emission().n_cols; col++)
- BOOST_REQUIRE_SMALL(hmm.Emission()(row, col) - emission(row, col), 0.009);
+ for (size_t col = 0; col < hmm.Emission().size(); col++)
+ for (size_t row = 0; row < hmm.Emission()[col].Probabilities().n_elem;
+ row++)
+ BOOST_REQUIRE_SMALL(hmm.Emission()[col].Probability(row) -
+ emission[col].Probability(row), 0.009);
}
/**
@@ -395,22 +387,22 @@
// Very simple HMM. 4 emissions with equal probability and 2 states with
// equal probability. The default transition and emission matrices satisfy
// this property.
- HMM<int> hmm(2, 4);
+ HMM<DiscreteDistribution> hmm(2, DiscreteDistribution(4));
// Now generate a really, really long sequence.
- arma::vec dataSeq;
- arma::Col<size_t> stateSeq;
+ std::vector<size_t> dataSeq;
+ std::vector<size_t> stateSeq;
hmm.Generate(100000, dataSeq, stateSeq);
// Now find the empirical probabilities of each state.
arma::vec emissionProb(4);
- for (size_t em = 0; em < 4; em++)
- emissionProb[em] = accu(dataSeq == em);
-
arma::vec stateProb(2);
- for (size_t st = 0; st < 2; st++)
- stateProb[st] = accu(stateSeq == st);
+ for (size_t i = 0; i < 100000; i++)
+ {
+ emissionProb[dataSeq[i]]++;
+ stateProb[stateSeq[i]]++;
+ }
// Normalize so these are probabilities.
emissionProb /= accu(emissionProb);
@@ -433,26 +425,26 @@
{
// 6 emissions, 4 states. Random transition and emission probability.
arma::mat transition(4, 4);
- arma::mat emission(6, 4);
+ std::vector<DiscreteDistribution> emission(4);
+ emission[0].Probabilities(arma::randu<arma::vec>(6));
+ emission[1].Probabilities(arma::randu<arma::vec>(6));
+ emission[2].Probabilities(arma::randu<arma::vec>(6));
+ emission[3].Probabilities(arma::randu<arma::vec>(6));
transition.randu();
- emission.randu();
- // Normalize matrices.
+ // Normalize matrix.
for (size_t col = 0; col < 4; col++)
- {
transition.col(col) /= accu(transition.col(col));
- emission.col(col) /= accu(emission.col(col));
- }
// Create HMM object.
- HMM<int> hmm(transition, emission);
+ HMM<DiscreteDistribution> hmm(transition, emission);
// We'll create a bunch of sequences.
int numSeq = 400;
int numObs = 3000;
- std::vector<arma::vec> sequences(numSeq);
- std::vector<arma::Col<size_t> > states(numSeq);
+ std::vector<std::vector<size_t> > sequences(numSeq);
+ std::vector<std::vector<size_t> > states(numSeq);
for (int i = 0; i < numSeq; i++)
{
// Random starting state.
@@ -462,7 +454,7 @@
}
// Now we will calculate the full probabilities.
- HMM<int> hmm2(4, 6);
+ HMM<DiscreteDistribution> hmm2(4, 6);
hmm2.Train(sequences, states);
// Check that training gives the same result. Exact tolerance of 0.005.
@@ -473,8 +465,8 @@
for (size_t row = 0; row < 6; row++)
for (size_t col = 0; col < 4; col++)
- BOOST_REQUIRE_SMALL(hmm.Emission()(row, col) - hmm2.Emission()(row, col),
- 0.005);
+ BOOST_REQUIRE_SMALL(hmm.Emission()[col].Probability(row) -
+ hmm2.Emission()[col].Probability(row), 0.005);
}
BOOST_AUTO_TEST_CASE(DiscreteHMMLogLikelihoodTest)
@@ -483,20 +475,53 @@
arma::mat transition("0.5 0.0 0.1;"
"0.2 0.6 0.2;"
"0.3 0.4 0.7");
- arma::mat emission("0.75 0.0 0.1;"
- "0.25 0.25 0.4;"
- "0.0 0.25 0.4;"
- "0.0 0.5 0.1");
+ std::vector<DiscreteDistribution> emission(3);
+ emission[0].Probabilities("0.75 0.25 0.00 0.00");
+ emission[1].Probabilities("0.00 0.25 0.25 0.50");
+ emission[2].Probabilities("0.10 0.40 0.40 0.10");
- HMM<int> hmm(transition, emission);
+ HMM<DiscreteDistribution> hmm(transition, emission);
// Now generate some sequences and check that the log-likelihood is the same
// as MATLAB gives for this HMM.
- BOOST_REQUIRE_CLOSE(hmm.LogLikelihood("0 1 2 3"), -4.9887223949, 1e-5);
- BOOST_REQUIRE_CLOSE(hmm.LogLikelihood("1 2 0 0"), -6.0288487077, 1e-5);
- BOOST_REQUIRE_CLOSE(hmm.LogLikelihood("3 3 3 3"), -5.5544000018, 1e-5);
- BOOST_REQUIRE_CLOSE(hmm.LogLikelihood("0 2 2 1 2 3 0 0 1 3 1 0 0 3 1 2 2"),
- -24.51556128368, 1e-5);
+ std::vector<size_t> seq(4);
+ seq[0] = 0;
+ seq[1] = 1;
+ seq[2] = 2;
+ seq[3] = 3;
+ BOOST_REQUIRE_CLOSE(hmm.LogLikelihood(seq), -4.9887223949, 1e-5);
+
+ seq[0] = 1;
+ seq[1] = 2;
+ seq[2] = 0;
+ seq[3] = 0;
+ BOOST_REQUIRE_CLOSE(hmm.LogLikelihood(seq), -6.0288487077, 1e-5);
+
+ seq[0] = 3;
+ seq[1] = 3;
+ seq[2] = 3;
+ seq[3] = 3;
+ BOOST_REQUIRE_CLOSE(hmm.LogLikelihood(seq), -5.5544000018, 1e-5);
+
+ seq.resize(17);
+ seq[0] = 0;
+ seq[1] = 2;
+ seq[2] = 2;
+ seq[3] = 1;
+ seq[4] = 2;
+ seq[5] = 3;
+ seq[6] = 0;
+ seq[7] = 0;
+ seq[8] = 1;
+ seq[9] = 3;
+ seq[10] = 1;
+ seq[11] = 0;
+ seq[12] = 0;
+ seq[13] = 3;
+ seq[14] = 1;
+ seq[15] = 2;
+ seq[16] = 2;
+ BOOST_REQUIRE_CLOSE(hmm.LogLikelihood(seq), -24.51556128368, 1e-5);
}
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-svn
mailing list