Thu Mar 5 21:52:08 EST 2015

Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 6e825ddf1c287f820e6db25d0417b03618c360d0
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Jul 8 19:25:22 2014 +0000

    Add support for HMM initial states.  Slight modification of API for creating
    HMMs by hand -- the initial parameter is now required.  This change may affect
    some existing results, and the new results may not perfectly agree with MATLAB,
    but MATLAB does not have the flexibility to seriously support initial
    probabilities.  It is possible to set the initial parameters such that it will
    emulate MATLAB behavior right, probably by setting the initial probabilities to
    the first column of the transition matrix.


>---------------------------------------------------------------

6e825ddf1c287f820e6db25d0417b03618c360d0
 src/mlpack/methods/hmm/hmm.hpp      | 28 +++++++++++++++++++++++-----
 src/mlpack/methods/hmm/hmm_impl.hpp | 33 +++++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/src/mlpack/methods/hmm/hmm.hpp b/src/mlpack/methods/hmm/hmm.hpp
index 4ea62f2..f416875 100644
--- a/src/mlpack/methods/hmm/hmm.hpp
+++ b/src/mlpack/methods/hmm/hmm.hpp
@@ -87,6 +87,9 @@ class HMM
    * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
    * be set.
    *
+   * By default, the transition matrix and initial probability vector are set to
+   * contain equal probability for each state.
+   *
    * @param states Number of states.
    * @param emissions Default distribution for emissions.
    * @param tolerance Tolerance for convergence of training algorithm
@@ -97,10 +100,15 @@ class HMM
       const double tolerance = 1e-5);
 
   /**
-   * Create the Hidden Markov Model with the given transition matrix and the
-   * given emission distributions.  The dimensionality of the observations of
-   * the HMM are taken from the given emission distributions.  Alternately, the
-   * dimensionality can be set with Dimensionality().
+   * Create the Hidden Markov Model with the given initial probability vector,
+   * the given transition matrix, and the given emission distributions.  The
+   * dimensionality of the observations of the HMM are taken from the given
+   * emission distributions.  Alternately, the dimensionality can be set with
+   * Dimensionality().
+   *
+   * The initial state probability vector should have length equal to the number
+   * of states, and each entry represents the probability of being in the given
+   * state at time T = 0 (the beginning of a sequence).
    *
    * The transition matrix should be such that T(i, j) is the probability of
    * transition to state i from state j.  The columns of the matrix should sum
@@ -112,12 +120,14 @@ class HMM
    * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
    * be set.
    *
+   * @param initial Initial state probabilities.
    * @param transition Transition matrix.
    * @param emission Emission distributions.
    * @param tolerance Tolerance for convergence of training algorithm
    *      (Baum-Welch).
    */
-  HMM(const arma::mat& transition,
+  HMM(const arma::vec& initial,
+      const arma::mat& transition,
       const std::vector<Distribution>& emission,
       const double tolerance = 1e-5);
 
@@ -250,6 +260,11 @@ class HMM
    */
   double LogLikelihood(const arma::mat& dataSeq) const;
 
+  //! Return the vector of initial state probabilities.
+  const arma::vec& Initial() const { return initial; }
+  //! Modify the vector of initial state probabilities.
+  arma::vec& Initial() { return initial; }
+
   //! Return the transition matrix.
   const arma::mat& Transition() const { return transition; }
   //! Return a modifiable transition matrix reference.
@@ -307,6 +322,9 @@ class HMM
                 const arma::vec& scales,
                 arma::mat& backwardProb) const;
 
+  //! Initial state probability vector.
+  arma::vec initial;
+
   //! Transition probability matrix.
   arma::mat transition;
 
diff --git a/src/mlpack/methods/hmm/hmm_impl.hpp b/src/mlpack/methods/hmm/hmm_impl.hpp
index 2992470..a8644f2 100644
--- a/src/mlpack/methods/hmm/hmm_impl.hpp
+++ b/src/mlpack/methods/hmm/hmm_impl.hpp
@@ -22,6 +22,7 @@ template<typename Distribution>
 HMM<Distribution>::HMM(const size_t states,
                        const Distribution emissions,
                        const double tolerance) :
+    initial(arma::ones<arma::vec>(states) / (double) states),
     transition(arma::ones<arma::mat>(states, states) / (double) states),
     emission(states, /* default distribution */ emissions),
     dimensionality(emissions.Dimensionality()),
@@ -33,9 +34,11 @@ HMM<Distribution>::HMM(const size_t states,
  * emission probability matrix.
  */
 template<typename Distribution>
-HMM<Distribution>::HMM(const arma::mat& transition,
+HMM<Distribution>::HMM(const arma::vec& initial,
+                       const arma::mat& transition,
                        const std::vector<Distribution>& emission,
                        const double tolerance) :
+    initial(initial),
     transition(transition),
     emission(emission),
     tolerance(tolerance)
@@ -101,6 +104,8 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
   for (size_t iter = 0; iter < iterations; iter++)
   {
     // Clear new transition matrix and emission probabilities.
+    arma::vec newInitial(transition.n_rows);
+    newInitial.zeros();
     arma::mat newTransition(transition.n_rows, transition.n_cols);
     newTransition.zeros();
 
@@ -122,6 +127,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
       loglik += Estimate(dataSeq[seq], stateProb, forward, backward, scales);
 
       // Now re-estimate the parameters.  This is the M-step.
+      //   pi_i = sum_d ((1 / P(seq[d])) sum_t (f(i, 0) b(i, 0))
       //   T_ij = sum_d ((1 / P(seq[d])) sum_t (f(i, t) T_ij E_i(seq[d][t]) b(i,
       //           t + 1)))
       //   E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t)
@@ -130,6 +136,9 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
       {
         for (size_t j = 0; j < transition.n_cols; j++)
         {
+          // Add to estimate of initial probability for state j.
+          newInitial[j] = stateProb(j, 0);
+
           if (t < dataSeq[seq].n_cols - 1)
           {
             // Estimate of T_ij (probability of transition from state j to state
@@ -148,6 +157,10 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
       }
     }
 
+    // Normalize the new initial probabilities.
+    if (dataSeq.size() == 0)
+      initial = newInitial / dataSeq.size();
+
     // Assign the new transition matrix.  We use %= (element-wise
     // multiplication) because every element of the new transition matrix must
     // still be multiplied by the old elements (this is the multiplication we
@@ -191,6 +204,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
         << ")." << std::endl;
   }
 
+  initial.zeros();
   transition.zeros();
 
   // Estimate the transition and emission matrices directly from the
@@ -218,6 +232,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
 
     // Loop over each observation in the sequence.  For estimation of the
     // transition matrix, we must ignore the last observation.
+    initial[stateSeq[seq][0]]++;
     for (size_t t = 0; t < dataSeq[seq].n_cols - 1; t++)
     {
       transition(stateSeq[seq][t + 1], stateSeq[seq][t])++;
@@ -229,6 +244,9 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
         std::make_pair(seq, stateSeq[seq].n_elem - 1));
   }
 
+  // Normalize initial weights.
+  initial /= accu(initial);
+
   // Normalize transition matrix.
   for (size_t col = 0; col < transition.n_cols; col++)
   {
@@ -370,9 +388,9 @@ double HMM<Distribution>::Predict(const arma::mat& dataSeq,
   logStateProb.col(0).zeros();
   for (size_t state = 0; state < transition.n_rows; state++)
   {
-    logStateProb[state] = log(transition.unsafe_col(state).max() *
+    logStateProb(state, 0) = log(initial[state] *
         emission[state].Probability(dataSeq.unsafe_col(0)));
-    stateSeqBack[state] = state;
+    stateSeqBack(state, 0) = state;
   }
 
   // Store the best first state.
@@ -429,10 +447,13 @@ void HMM<Distribution>::Forward(const arma::mat& dataSeq,
   forwardProb.zeros(transition.n_rows, dataSeq.n_cols);
   scales.zeros(dataSeq.n_cols);
 
-  // Starting state (at t = -1) is assumed to be state 0.  This is what MATLAB
-  // does in their hmmdecode() function, so we will emulate that behavior.
+  // The first entry in the forward algorithm uses the initial state
+  // probabilities.  Note that MATLAB assumes that the starting state (at
+  // t = -1) is state 0; this is not our assumption here.  To force that
+  // behavior, you could append a single starting state to every single data
+  // sequence and that should produce results in line with MATLAB.
   for (size_t state = 0; state < transition.n_rows; state++)
-    forwardProb(state, 0) = transition(state, 0) *
+    forwardProb(state, 0) = initial(state) *
         emission[state].Probability(dataSeq.unsafe_col(0));
 
   // Then normalize the column.