[mlpack-svn] r16789 - mlpack/trunk/src/mlpack/methods/hmm

Tue Jul 8 15:25:22 EDT 2014

Author: rcurtin
Date: Tue Jul  8 15:25:22 2014
New Revision: 16789

Log:
Add support for HMM initial states.  Slight modification of API for creating
HMMs by hand -- the initial parameter is now required.  This change may affect
some existing results, and the new results may not perfectly agree with MATLAB,
but MATLAB does not have the flexibility to seriously support initial
probabilities.  It is possible to set the initial parameters such that it will
emulate MATLAB behavior right, probably by setting the initial probabilities to
the first column of the transition matrix.


Modified:
   mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
   mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp

Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
==============================================================================

--- mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp	Tue Jul  8 15:25:22 2014
@@ -87,6 +87,9 @@
    * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
    * be set.
    *
+   * By default, the transition matrix and initial probability vector are set to
+   * contain equal probability for each state.
+   *
    * @param states Number of states.
    * @param emissions Default distribution for emissions.
    * @param tolerance Tolerance for convergence of training algorithm
@@ -97,10 +100,15 @@
       const double tolerance = 1e-5);
 
   /**
-   * Create the Hidden Markov Model with the given transition matrix and the
-   * given emission distributions.  The dimensionality of the observations of
-   * the HMM are taken from the given emission distributions.  Alternately, the
-   * dimensionality can be set with Dimensionality().
+   * Create the Hidden Markov Model with the given initial probability vector,
+   * the given transition matrix, and the given emission distributions.  The
+   * dimensionality of the observations of the HMM are taken from the given
+   * emission distributions.  Alternately, the dimensionality can be set with
+   * Dimensionality().
+   *
+   * The initial state probability vector should have length equal to the number
+   * of states, and each entry represents the probability of being in the given
+   * state at time T = 0 (the beginning of a sequence).
    *
    * The transition matrix should be such that T(i, j) is the probability of
    * transition to state i from state j.  The columns of the matrix should sum
@@ -112,12 +120,14 @@
    * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
    * be set.
    *
+   * @param initial Initial state probabilities.
    * @param transition Transition matrix.
    * @param emission Emission distributions.
    * @param tolerance Tolerance for convergence of training algorithm
    *      (Baum-Welch).
    */
-  HMM(const arma::mat& transition,
+  HMM(const arma::vec& initial,
+      const arma::mat& transition,
       const std::vector<Distribution>& emission,
       const double tolerance = 1e-5);
 
@@ -250,6 +260,11 @@
    */
   double LogLikelihood(const arma::mat& dataSeq) const;
 
+  //! Return the vector of initial state probabilities.
+  const arma::vec& Initial() const { return initial; }
+  //! Modify the vector of initial state probabilities.
+  arma::vec& Initial() { return initial; }
+
   //! Return the transition matrix.
   const arma::mat& Transition() const { return transition; }
   //! Return a modifiable transition matrix reference.
@@ -307,6 +322,9 @@
                 const arma::vec& scales,
                 arma::mat& backwardProb) const;
 
+  //! Initial state probability vector.
+  arma::vec initial;
+
   //! Transition probability matrix.
   arma::mat transition;
 

Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp	Tue Jul  8 15:25:22 2014
@@ -22,6 +22,7 @@
 HMM<Distribution>::HMM(const size_t states,
                        const Distribution emissions,
                        const double tolerance) :
+    initial(arma::ones<arma::vec>(states) / (double) states),
     transition(arma::ones<arma::mat>(states, states) / (double) states),
     emission(states, /* default distribution */ emissions),
     dimensionality(emissions.Dimensionality()),
@@ -33,9 +34,11 @@
  * emission probability matrix.
  */
 template<typename Distribution>
-HMM<Distribution>::HMM(const arma::mat& transition,
+HMM<Distribution>::HMM(const arma::vec& initial,
+                       const arma::mat& transition,
                        const std::vector<Distribution>& emission,
                        const double tolerance) :
+    initial(initial),
     transition(transition),
     emission(emission),
     tolerance(tolerance)
@@ -101,6 +104,8 @@
   for (size_t iter = 0; iter < iterations; iter++)
   {
     // Clear new transition matrix and emission probabilities.
+    arma::vec newInitial(transition.n_rows);
+    newInitial.zeros();
     arma::mat newTransition(transition.n_rows, transition.n_cols);
     newTransition.zeros();
 
@@ -122,6 +127,7 @@
       loglik += Estimate(dataSeq[seq], stateProb, forward, backward, scales);
 
       // Now re-estimate the parameters.  This is the M-step.
+      //   pi_i = sum_d ((1 / P(seq[d])) sum_t (f(i, 0) b(i, 0))
       //   T_ij = sum_d ((1 / P(seq[d])) sum_t (f(i, t) T_ij E_i(seq[d][t]) b(i,
       //           t + 1)))
       //   E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t)
@@ -130,6 +136,9 @@
       {
         for (size_t j = 0; j < transition.n_cols; j++)
         {
+          // Add to estimate of initial probability for state j.
+          newInitial[j] = stateProb(j, 0);
+
           if (t < dataSeq[seq].n_cols - 1)
           {
             // Estimate of T_ij (probability of transition from state j to state
@@ -148,6 +157,10 @@
       }
     }
 
+    // Normalize the new initial probabilities.
+    if (dataSeq.size() == 0)
+      initial = newInitial / dataSeq.size();
+
     // Assign the new transition matrix.  We use %= (element-wise
     // multiplication) because every element of the new transition matrix must
     // still be multiplied by the old elements (this is the multiplication we
@@ -191,6 +204,7 @@
         << ")." << std::endl;
   }
 
+  initial.zeros();
   transition.zeros();
 
   // Estimate the transition and emission matrices directly from the
@@ -218,6 +232,7 @@
 
     // Loop over each observation in the sequence.  For estimation of the
     // transition matrix, we must ignore the last observation.
+    initial[stateSeq[seq][0]]++;
     for (size_t t = 0; t < dataSeq[seq].n_cols - 1; t++)
     {
       transition(stateSeq[seq][t + 1], stateSeq[seq][t])++;
@@ -229,6 +244,9 @@
         std::make_pair(seq, stateSeq[seq].n_elem - 1));
   }
 
+  // Normalize initial weights.
+  initial /= accu(initial);
+
   // Normalize transition matrix.
   for (size_t col = 0; col < transition.n_cols; col++)
   {
@@ -370,9 +388,9 @@
   logStateProb.col(0).zeros();
   for (size_t state = 0; state < transition.n_rows; state++)
   {
-    logStateProb[state] = log(transition.unsafe_col(state).max() *
+    logStateProb(state, 0) = log(initial[state] *
         emission[state].Probability(dataSeq.unsafe_col(0)));
-    stateSeqBack[state] = state;
+    stateSeqBack(state, 0) = state;
   }
 
   // Store the best first state.
@@ -429,10 +447,13 @@
   forwardProb.zeros(transition.n_rows, dataSeq.n_cols);
   scales.zeros(dataSeq.n_cols);
 
-  // Starting state (at t = -1) is assumed to be state 0.  This is what MATLAB
-  // does in their hmmdecode() function, so we will emulate that behavior.
+  // The first entry in the forward algorithm uses the initial state
+  // probabilities.  Note that MATLAB assumes that the starting state (at
+  // t = -1) is state 0; this is not our assumption here.  To force that
+  // behavior, you could append a single starting state to every single data
+  // sequence and that should produce results in line with MATLAB.
   for (size_t state = 0; state < transition.n_rows; state++)
-    forwardProb(state, 0) = transition(state, 0) *
+    forwardProb(state, 0) = initial(state) *
         emission[state].Probability(dataSeq.unsafe_col(0));
 
   // Then normalize the column.