[mlpack-svn] r10298 - mlpack/trunk/src/mlpack/methods/hmm
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Nov 16 14:16:21 EST 2011
Author: rcurtin
Date: 2011-11-16 14:16:21 -0500 (Wed, 16 Nov 2011)
New Revision: 10298
Modified:
mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
Log:
Abstract away from the discrete case. Now, everything depends on Distribution
(I think).
Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp 2011-11-16 19:15:59 UTC (rev 10297)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp 2011-11-16 19:16:21 UTC (rev 10298)
@@ -23,18 +23,21 @@
//! Transition probability matrix.
arma::mat transition;
- //! Emission probability matrix (for each state).
- arma::mat emission;
+ //! Set of emission probability distributions; one for each state.
+ std::vector<Distribution> emission;
public:
+ //! Convenience typedef for the type of observation we are using.
+ typedef typename Distribution::DataType Observation;
+
/**
* Create the Hidden Markov Model with the given number of hidden states and
- * the given number of emission states.
+ * the given default distribution for emissions.
*
* @param states Number of states.
- * @param emissions Number of possible emissions.
+ * @param emissions Default distribution for emissions.
*/
- HMM(const size_t states, const size_t emissions);
+ HMM(const size_t states, const Distribution emissions);
/**
* Create the Hidden Markov Model with the given transition matrix and the
@@ -50,7 +53,7 @@
* @param transition Transition matrix.
* @param emission Emission probability matrix.
*/
- HMM(const arma::mat& transition, const arma::mat& emission);
+ HMM(const arma::mat& transition, const std::vector<Distribution>& emission);
/**
* Train the model using the Baum-Welch algorithm, with only the given
@@ -59,7 +62,7 @@
*
* @param dataSeq Vector of observation sequences.
*/
- void Train(const std::vector<arma::vec>& dataSeq);
+ void Train(const std::vector<std::vector<Observation> >& dataSeq);
/**
* Train the model using the given labeled observations; the transition and
@@ -69,8 +72,8 @@
* @param stateSeq Vector of state sequences, corresponding to each
* observation.
*/
- void Train(const std::vector<arma::vec>& dataSeq,
- const std::vector<arma::Col<size_t> >& stateSeq);
+ void Train(const std::vector<std::vector<Observation> >& dataSeq,
+ const std::vector<std::vector<size_t> >& stateSeq);
/**
* Estimate the probabilities of each hidden state at each time step for each
@@ -90,7 +93,7 @@
* will be stored.
* @return Log-likelihood of most likely state sequence.
*/
- double Estimate(const arma::vec& dataSeq,
+ double Estimate(const std::vector<Observation>& dataSeq,
arma::mat& stateProb,
arma::mat& forwardProb,
arma::mat& backwardProb,
@@ -107,7 +110,8 @@
* @param stateProb Probabilities of each state at each time interval.
* @return Log-likelihood of most likely state sequence.
*/
- double Estimate(const arma::vec& dataSeq, arma::mat& stateProb) const;
+ double Estimate(const std::vector<Observation>& dataSeq,
+ arma::mat& stateProb) const;
/**
* Generate a random data sequence of the given length. The data sequence is
@@ -120,8 +124,8 @@
* @param startState Hidden state to start sequence in (default 0).
*/
void Generate(const size_t length,
- arma::vec& dataSequence,
- arma::Col<size_t>& stateSequence,
+ std::vector<Observation>& dataSequence,
+ std::vector<size_t>& stateSequence,
const size_t startState = 0) const;
/**
@@ -134,7 +138,8 @@
* stored.
* @return Log-likelihood of most probable state sequence.
*/
- double Predict(const arma::vec& dataSeq, arma::Col<size_t>& stateSeq) const;
+ double Predict(const std::vector<Observation>& dataSeq,
+ std::vector<size_t>& stateSeq) const;
/**
* Compute the log-likelihood of the given data sequence.
@@ -142,7 +147,7 @@
* @param dataSeq Data sequence to evaluate the likelihood of.
* @return Log-likelihood of the given sequence.
*/
- double LogLikelihood(const arma::vec& dataSeq) const;
+ double LogLikelihood(const std::vector<Observation>& dataSeq) const;
/**
* Return the transition matrix.
@@ -155,22 +160,46 @@
arma::mat& Transition() { return transition; }
/**
- * Return the emission probability matrix.
+ * Return the emission distributions.
*/
- const arma::mat& Emission() const { return emission; }
+ const std::vector<Distribution>& Emission() const { return emission; }
/**
* Return a modifiable emission probability matrix reference.
*/
- arma::mat& Emission() { return emission; }
+ std::vector<Distribution>& Emission() { return emission; }
private:
// Helper functions.
- void Forward(const arma::vec& data_seq, arma::vec& scales, arma::mat& fs)
- const;
- void Backward(const arma::vec& data_seq, const arma::vec& scales,
- arma::mat& bs) const;
+ /**
+ * The Forward algorithm (part of the Forward-Backward algorithm). Computes
+ * forward probabilities for each state for each observation in the given data
+ * sequence. The returned matrix has rows equal to the number of hidden
+ * states and columns equal to the number of observations.
+ *
+ * @param dataSeq Data sequence to compute probabilities for.
+ * @param scales Vector in which scaling factors will be saved.
+ * @param forwardProb Matrix in which forward probabilities will be saved.
+ */
+ void Forward(const std::vector<Observation>& dataSeq,
+ arma::vec& scales,
+ arma::mat& forwardProb) const;
+
+ /**
+ * The Backward algorithm (part of the Forward-Backward algorithm). Computes
+ * backward probabilities for each state for each observation in the given
+ * data sequence, using the scaling factors found (presumably) by Forward().
+ * The returned matrix has rows equal to the number of hidden states and
+ * columns equal to the number of observations.
+ *
+ * @param dataSeq Data sequence to compute probabilities for.
+ * @param scales Vector of scaling factors.
+ * @param backwardProb Matrix in which backward probabilities will be saved.
+ */
+ void Backward(const std::vector<Observation>& dataSeq,
+ const arma::vec& scales,
+ arma::mat& backwardProb) const;
};
}; // namespace hmm
Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp 2011-11-16 19:15:59 UTC (rev 10297)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp 2011-11-16 19:16:21 UTC (rev 10298)
@@ -19,9 +19,9 @@
* given number of emission states.
*/
template<typename Distribution>
-HMM<Distribution>::HMM(const size_t states, const size_t emissions) :
+HMM<Distribution>::HMM(const size_t states, const Distribution emissions) :
transition(arma::ones<arma::mat>(states, states) / (double) states),
- emission(arma::ones<arma::mat>(emissions, states) / (double) emissions)
+ emission(states, /* default distribution */ emissions)
{ /* nothing to do */ }
/**
@@ -29,17 +29,19 @@
* emission probability matrix.
*/
template<typename Distribution>
-HMM<Distribution>::HMM(const arma::mat& transition, const arma::mat& emission) :
+HMM<Distribution>::HMM(const arma::mat& transition,
+ const std::vector<Distribution>& emission) :
transition(transition),
emission(emission)
{ /* nothing to do */ }
/**
- * Train the model using the Baum-Welch algorith, with only the given unlabeled
+ * Train the model using the Baum-Welch algorithm, with only the given unlabeled
* observations.
*/
template<typename Distribution>
-void HMM<Distribution>::Train(const std::vector<arma::vec>& dataSeq)
+void HMM<Distribution>::Train(
+ const std::vector<std::vector<Observation> >& dataSeq)
{
// We should allow a guess at the transition and emission matrices.
double loglik = 0;
@@ -53,16 +55,16 @@
// Markov Models: Estimation and Control", pp. 36-40.
for (size_t iter = 0; iter < iterations; iter++)
{
- // Clear new transition and emission matrices.
+ // Clear new transition matrix and emission probabilities.
arma::mat newTransition(transition.n_rows, transition.n_cols);
- arma::mat newEmission(emission.n_rows, emission.n_cols);
newTransition.zeros();
- newEmission.zeros();
// Reset log likelihood.
loglik = 0;
// Loop over each sequence.
+ std::vector<std::vector<double> > emissionProb(transition.n_cols);
+ std::vector<std::vector<size_t> > emissionList(transition.n_cols);
for (size_t seq = 0; seq < dataSeq.size(); seq++)
{
arma::mat stateProb;
@@ -78,37 +80,39 @@
// t + 1)))
// E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t)
// We store the new estimates in a different matrix.
- for (size_t t = 0; t < dataSeq[seq].n_elem; t++)
+ for (size_t t = 0; t < dataSeq[seq].size(); t++)
{
for (size_t j = 0; j < transition.n_cols; j++)
{
- if (t < dataSeq[seq].n_elem - 1)
+ if (t < dataSeq[seq].size() - 1)
{
// Estimate of T_ij (probability of transition from state j to state
// i). We postpone multiplication of the old T_ij until later.
for (size_t i = 0; i < transition.n_rows; i++)
newTransition(i, j) += forward(j, t) * backward(i, t + 1) *
- emission((size_t) dataSeq[seq][t + 1], i) / scales[t + 1];
+ emission[i].Probability(dataSeq[seq][t + 1]) / scales[t + 1];
}
- // Estimate of E_ij (probability of emission i while in state j).
- newEmission((size_t) dataSeq[seq][t], j) += stateProb(j, t);
+ // Add to list of emission observations, for Distribution::Estimate().
+ emissionList[j].push_back(dataSeq[seq][t]);
+ emissionProb[j].push_back(stateProb(j, t));
}
}
}
- // Assign the new matrices. We use %= (element-wise multiplication) because
- // every element of the new transition matrix must still be multiplied by
- // the old elements (this is the multiplication we earlier postponed).
+ // Assign the new transition matrix. We use %= (element-wise
+ // multiplication) because every element of the new transition matrix must
+ // still be multiplied by the old elements (this is the multiplication we
+ // earlier postponed).
transition %= newTransition;
- emission = newEmission;
- // Now we normalize the transition matrices.
+ // Now we normalize the transition matrix.
for (size_t i = 0; i < transition.n_cols; i++)
transition.col(i) /= accu(transition.col(i));
- for (size_t i = 0; i < emission.n_cols; i++)
- emission.col(i) /= accu(emission.col(i));
+ // Now estimate emission probabilities.
+ for (size_t state = 0; state < transition.n_cols; state++)
+ emission[state].Estimate(emissionList[state], emissionProb[state]);
Log::Debug << "Iteration " << iter << ": log-likelihood " << loglik
<< std::endl;
@@ -128,8 +132,9 @@
* emission matrices are directly estimated.
*/
template<typename Distribution>
-void HMM<Distribution>::Train(const std::vector<arma::vec>& dataSeq,
- const std::vector<arma::Col<size_t> >& stateSeq)
+void HMM<Distribution>::Train(
+ const std::vector<std::vector<Observation> >& dataSeq,
+ const std::vector<std::vector<size_t> >& stateSeq)
{
// Simple error checking.
if (dataSeq.size() != stateSeq.size())
@@ -137,31 +142,31 @@
"of state sequences." << std::endl;
transition.zeros();
- emission.zeros();
// Estimate the transition and emission matrices directly from the
// observations.
+ std::vector<std::vector<size_t> > emissionList(transition.n_cols);
for (size_t seq = 0; seq < dataSeq.size(); seq++)
{
// Simple error checking.
- if (dataSeq[seq].n_elem != stateSeq[seq].n_elem)
+ if (dataSeq[seq].size() != stateSeq[seq].size())
Log::Fatal << "HMM::Train(): number of observations in sequence " << seq
<< " not equal to number of states" << std::endl;
// Loop over each observation in the sequence. For estimation of the
// transition matrix, we must ignore the last observation.
- for (size_t t = 0; t < dataSeq[seq].n_elem - 1; t++)
+ for (size_t t = 0; t < dataSeq[seq].size() - 1; t++)
{
transition(stateSeq[seq][t + 1], stateSeq[seq][t])++;
- emission(dataSeq[seq][t], stateSeq[seq][t])++;
+ emissionList[stateSeq[seq][t]].push_back(dataSeq[seq][t]);
}
// Last observation.
- emission(dataSeq[seq][dataSeq[seq].n_elem - 1],
- stateSeq[seq][stateSeq[seq].n_elem - 1])++;
+ emissionList[stateSeq[seq][stateSeq[seq].size() - 1]].push_back(
+ dataSeq[seq][dataSeq[seq].size() - 1]);
}
- // Normalize transition matrix and emission matrix..
+ // Normalize transition matrix.
for (size_t col = 0; col < transition.n_cols; col++)
{
// If the transition probability sum is greater than 0 in this column, the
@@ -169,11 +174,12 @@
// division by 0.
double sum = accu(transition.col(col));
if (sum > 0)
- {
transition.col(col) /= sum;
- emission.col(col) /= accu(emission.col(col));
- }
}
+
+ // Estimate emission matrix.
+ for (size_t state = 0; state < transition.n_cols; state++)
+ emission[state].Estimate(emissionList[state]);
}
/**
@@ -181,22 +187,22 @@
* given data observation.
*/
template<typename Distribution>
-double HMM<Distribution>::Estimate(const arma::vec& data_seq,
- arma::mat& state_prob_mat,
- arma::mat& forward_prob_mat,
- arma::mat& backward_prob_mat,
- arma::vec& scale_vec) const
+double HMM<Distribution>::Estimate(const std::vector<Observation>& dataSeq,
+ arma::mat& stateProb,
+ arma::mat& forwardProb,
+ arma::mat& backwardProb,
+ arma::vec& scales) const
{
// First run the forward-backward algorithm.
- Forward(data_seq, scale_vec, forward_prob_mat);
- Backward(data_seq, scale_vec, backward_prob_mat);
+ Forward(dataSeq, scales, forwardProb);
+ Backward(dataSeq, scales, backwardProb);
// Now assemble the state probability matrix based on the forward and backward
// probabilities.
- state_prob_mat = forward_prob_mat % backward_prob_mat;
+ stateProb = forwardProb % backwardProb;
// Finally assemble the log-likelihood and return it.
- return accu(log(scale_vec));
+ return accu(log(scales));
}
/**
@@ -204,7 +210,7 @@
* given data observation.
*/
template<typename Distribution>
-double HMM<Distribution>::Estimate(const arma::vec& dataSeq,
+double HMM<Distribution>::Estimate(const std::vector<Observation>& dataSeq,
arma::mat& stateProb) const
{
// We don't need to save these.
@@ -221,13 +227,13 @@
*/
template<typename Distribution>
void HMM<Distribution>::Generate(const size_t length,
- arma::vec& dataSequence,
- arma::Col<size_t>& stateSequence,
+ std::vector<Observation>& dataSequence,
+ std::vector<size_t>& stateSequence,
const size_t startState) const
{
// Set vectors to the right size.
- stateSequence.set_size(length);
- dataSequence.set_size(length);
+ stateSequence.resize(length);
+ dataSequence.resize(length);
// Set start state (default is 0).
stateSequence[0] = startState;
@@ -237,16 +243,7 @@
// We just have to find where our random value sits in the probability
// distribution of emissions for our starting state.
- double probSum = 0;
- for (size_t em = 0; em < emission.n_rows; em++)
- {
- probSum += emission(em, startState);
- if (randValue <= probSum)
- {
- dataSequence[0] = em;
- break;
- }
- }
+ dataSequence[0] = emission[startState].Random();
// Now choose the states and emissions for the rest of the sequence.
for (size_t t = 1; t < length; t++)
@@ -256,7 +253,7 @@
// Now find where our random value sits in the probability distribution of
// state changes.
- probSum = 0;
+ double probSum = 0;
for (size_t st = 0; st < transition.n_rows; st++)
{
probSum += transition(st, stateSequence[t - 1]);
@@ -268,20 +265,7 @@
}
// Now choose the emission.
- randValue = (double) rand() / (double) RAND_MAX;
-
- // Now find where our random value sits in the probability distribution of
- // emissions for the state we just chose.
- probSum = 0;
- for (size_t em = 0; em < emission.n_rows; em++)
- {
- probSum += emission(em, stateSequence[t]);
- if (randValue <= probSum)
- {
- dataSequence[t] = em;
- break;
- }
- }
+ dataSequence[t] = emission[stateSequence[t]].Random();
}
}
@@ -291,28 +275,30 @@
* sequence.
*/
template<typename Distribution>
-double HMM<Distribution>::Predict(const arma::vec& dataSeq,
- arma::Col<size_t>& stateSeq) const
+double HMM<Distribution>::Predict(const std::vector<Observation>& dataSeq,
+ std::vector<size_t>& stateSeq) const
{
// This is an implementation of the Viterbi algorithm for finding the most
// probable sequence of states to produce the observed data sequence. We
// don't use log-likelihoods to save that little bit of time, but we'll
// calculate the log-likelihood at the end of it all.
- stateSeq.set_size(dataSeq.n_elem);
- arma::mat stateProb(transition.n_rows, dataSeq.n_elem);
+ stateSeq.resize(dataSeq.size());
+ arma::mat stateProb(transition.n_rows, dataSeq.size());
// The calculation of the first state is slightly different; the probability
// of the first state being state j is the maximum probability that the state
- // came to be j from another state. We can do that in one line.
- stateProb.col(0) = transition.col(0) %
- trans(emission.row((size_t) dataSeq[0]));
+ // came to be j from another state.
+ stateProb.col(0).zeros();
+ for (size_t state = 0; state < transition.n_rows; state++)
+ stateProb[state] = transition(state, 0) *
+ emission[state].Probability(dataSeq[0]);
// Store the best first state.
arma::u32 index;
stateProb.unsafe_col(0).max(index);
stateSeq[0] = index;
- for (size_t t = 1; t < dataSeq.n_elem; t++)
+ for (size_t t = 1; t < dataSeq.size(); t++)
{
// Assemble the state probability for this element.
// Given that we are in state j, we state with the highest probability of
@@ -320,7 +306,7 @@
for (size_t j = 0; j < transition.n_rows; j++)
{
arma::vec prob = stateProb.col(t - 1) % trans(transition.row(j));
- stateProb(j, t) = prob.max() * emission((size_t) dataSeq[t], j);
+ stateProb(j, t) = prob.max() * emission[j].Probability(dataSeq[t]);
}
// Store the best state.
@@ -328,14 +314,15 @@
stateSeq[t] = index;
}
- return log(stateProb(stateSeq[dataSeq.n_elem - 1], dataSeq.n_elem - 1));
+ return log(stateProb(stateSeq[dataSeq.size() - 1], dataSeq.size() - 1));
}
/**
* Compute the log-likelihood of the given data sequence.
*/
template<typename Distribution>
-double HMM<Distribution>::LogLikelihood(const arma::vec& dataSeq) const
+double HMM<Distribution>::LogLikelihood(
+ const std::vector<Observation>& dataSeq) const
{
arma::mat forward;
arma::vec scales;
@@ -350,72 +337,70 @@
* The Forward procedure (part of the Forward-Backward algorithm).
*/
template<typename Distribution>
-void HMM<Distribution>::Forward(const arma::vec& dataSeq,
+void HMM<Distribution>::Forward(const std::vector<Observation>& dataSeq,
arma::vec& scales,
- arma::mat& forwardProbabilities) const
+ arma::mat& forwardProb) const
{
// Our goal is to calculate the forward probabilities:
// P(X_k | o_{1:k}) for all possible states X_k, for each time point k.
- forwardProbabilities.zeros(transition.n_rows, dataSeq.n_elem);
- scales.zeros(dataSeq.n_elem);
+ forwardProb.zeros(transition.n_rows, dataSeq.size());
+ scales.zeros(dataSeq.size());
// Starting state (at t = -1) is assumed to be state 0. This is what MATLAB
// does in their hmmdecode() function, so we will emulate that behavior.
- size_t obs = (size_t) dataSeq[0];
- forwardProbabilities.col(0) = transition.col(0) % trans(emission.row(obs));
+ for (size_t state = 0; state < transition.n_rows; state++)
+ forwardProb(state, 0) = transition(state, 0) *
+ emission[state].Probability(dataSeq[0]);
+
// Then normalize the column.
- scales[0] = accu(forwardProbabilities.col(0));
- forwardProbabilities.col(0) /= scales[0];
+ scales[0] = accu(forwardProb.col(0));
+ forwardProb.col(0) /= scales[0];
// Now compute the probabilities for each successive observation.
- for (size_t t = 1; t < dataSeq.n_elem; t++)
+ for (size_t t = 1; t < dataSeq.size(); t++)
{
- obs = (size_t) dataSeq[t];
-
for (size_t j = 0; j < transition.n_rows; j++)
{
// The forward probability of state j at time t is the sum over all states
// of the probability of the previous state transitioning to the current
// state and emitting the given observation.
- forwardProbabilities(j, t) = accu(forwardProbabilities.col(t - 1) %
- trans(transition.row(j))) * emission(obs, j);
+ forwardProb(j, t) = accu(forwardProb.col(t - 1) %
+ trans(transition.row(j))) * emission[j].Probability(dataSeq[t]);
}
// Normalize probability.
- scales[t] = accu(forwardProbabilities.col(t));
- forwardProbabilities.col(t) /= scales[t];
+ scales[t] = accu(forwardProb.col(t));
+ forwardProb.col(t) /= scales[t];
}
}
template<typename Distribution>
-void HMM<Distribution>::Backward(const arma::vec& dataSeq,
+void HMM<Distribution>::Backward(const std::vector<Observation>& dataSeq,
const arma::vec& scales,
- arma::mat& backwardProbabilities) const
+ arma::mat& backwardProb) const
{
// Our goal is to calculate the backward probabilities:
// P(X_k | o_{k + 1:T}) for all possible states X_k, for each time point k.
- backwardProbabilities.zeros(transition.n_rows, dataSeq.n_elem);
+ backwardProb.zeros(transition.n_rows, dataSeq.size());
// The last element probability is 1.
- backwardProbabilities.col(dataSeq.n_elem - 1).fill(1);
+ backwardProb.col(dataSeq.size() - 1).fill(1);
// Now step backwards through all other observations.
- for (size_t t = dataSeq.n_elem - 2; t + 1 > 0; t--)
+ for (size_t t = dataSeq.size() - 2; t + 1 > 0; t--)
{
- // This will eventually need to depend on the distribution.
- size_t obs = (size_t) dataSeq[t + 1];
-
for (size_t j = 0; j < transition.n_rows; j++)
{
// The backward probability of state j at time t is the sum over all state
// of the probability of the next state having been a transition from the
// current state multiplied by the probability of each of those states
// emitting the given observation.
- backwardProbabilities(j, t) += accu(transition.col(j) %
- backwardProbabilities.col(t + 1) % trans(emission.row(obs)));
+ for (size_t state = 0; state < transition.n_rows; state++)
+ backwardProb(j, t) += transition(state, j) * backwardProb(state, t + 1)
+ * emission[state].Probability(dataSeq[t + 1]);
// Normalize by the weights from the forward algorithm.
- backwardProbabilities(j, t) /= scales[t + 1];
+ backwardProb(j, t) /= scales[t + 1];
}
}
}
More information about the mlpack-svn
mailing list