[mlpack-svn] r10524 - mlpack/trunk/src/mlpack/methods/hmm

Sat Dec 3 03:06:00 EST 2011

Author: rcurtin
Date: 2011-12-03 03:06:00 -0500 (Sat, 03 Dec 2011)
New Revision: 10524

Modified:
   mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
   mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
Log:
Adapt HMM to only use arma::vec as observations.


Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp	2011-12-03 08:05:33 UTC (rev 10523)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp	2011-12-03 08:06:00 UTC (rev 10524)
@@ -60,9 +60,6 @@
   std::vector<Distribution> emission;
 
  public:
-  //! Convenience typedef for the type of observation we are using.
-  typedef typename Distribution::DataType Observation;
-
   /**
    * Create the Hidden Markov Model with the given number of hidden states and
    * the given default distribution for emissions.
@@ -95,7 +92,7 @@
    *
    * @param dataSeq Vector of observation sequences.
    */
-  void Train(const std::vector<std::vector<Observation> >& dataSeq);
+  void Train(const std::vector<arma::mat>& dataSeq);
 
   /**
    * Train the model using the given labeled observations; the transition and
@@ -105,8 +102,8 @@
    * @param stateSeq Vector of state sequences, corresponding to each
    *    observation.
    */
-  void Train(const std::vector<std::vector<Observation> >& dataSeq,
-             const std::vector<std::vector<size_t> >& stateSeq);
+  void Train(const std::vector<arma::mat>& dataSeq,
+             const std::vector<arma::Col<size_t> >& stateSeq);
 
   /**
    * Estimate the probabilities of each hidden state at each time step for each
@@ -126,7 +123,7 @@
    *    will be stored.
    * @return Log-likelihood of most likely state sequence.
    */
-  double Estimate(const std::vector<Observation>& dataSeq,
+  double Estimate(const arma::mat& dataSeq,
                   arma::mat& stateProb,
                   arma::mat& forwardProb,
                   arma::mat& backwardProb,
@@ -143,7 +140,7 @@
    * @param stateProb Probabilities of each state at each time interval.
    * @return Log-likelihood of most likely state sequence.
    */
-  double Estimate(const std::vector<Observation>& dataSeq,
+  double Estimate(const arma::mat& dataSeq,
                   arma::mat& stateProb) const;
 
   /**
@@ -157,8 +154,8 @@
    * @param startState Hidden state to start sequence in (default 0).
    */
   void Generate(const size_t length,
-                std::vector<Observation>& dataSequence,
-                std::vector<size_t>& stateSequence,
+                arma::mat& dataSequence,
+                arma::Col<size_t>& stateSequence,
                 const size_t startState = 0) const;
 
   /**
@@ -171,8 +168,8 @@
    *    stored.
    * @return Log-likelihood of most probable state sequence.
    */
-  double Predict(const std::vector<Observation>& dataSeq,
-                 std::vector<size_t>& stateSeq) const;
+  double Predict(const arma::mat& dataSeq,
+                 arma::Col<size_t>& stateSeq) const;
 
   /**
    * Compute the log-likelihood of the given data sequence.
@@ -180,7 +177,7 @@
    * @param dataSeq Data sequence to evaluate the likelihood of.
    * @return Log-likelihood of the given sequence.
    */
-  double LogLikelihood(const std::vector<Observation>& dataSeq) const;
+  double LogLikelihood(const arma::mat& dataSeq) const;
 
   /**
    * Return the transition matrix.
@@ -215,7 +212,7 @@
    * @param scales Vector in which scaling factors will be saved.
    * @param forwardProb Matrix in which forward probabilities will be saved.
    */
-  void Forward(const std::vector<Observation>& dataSeq,
+  void Forward(const arma::mat& dataSeq,
                arma::vec& scales,
                arma::mat& forwardProb) const;
 
@@ -230,9 +227,12 @@
    * @param scales Vector of scaling factors.
    * @param backwardProb Matrix in which backward probabilities will be saved.
    */
-  void Backward(const std::vector<Observation>& dataSeq,
+  void Backward(const arma::mat& dataSeq,
                 const arma::vec& scales,
                 arma::mat& backwardProb) const;
+
+  //! Dimensionality of observations.
+  size_t dimensionality;
 };
 
 }; // namespace hmm

Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp	2011-12-03 08:05:33 UTC (rev 10523)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp	2011-12-03 08:06:00 UTC (rev 10524)
@@ -21,7 +21,8 @@
 template<typename Distribution>
 HMM<Distribution>::HMM(const size_t states, const Distribution emissions) :
     transition(arma::ones<arma::mat>(states, states) / (double) states),
-    emission(states, /* default distribution */ emissions)
+    emission(states, /* default distribution */ emissions),
+    dimensionality(emissions.Dimensionality())
 { /* nothing to do */ }
 
 /**
@@ -33,15 +34,18 @@
                        const std::vector<Distribution>& emission) :
     transition(transition),
     emission(emission)
-{ /* nothing to do */ }
+{
+  // Set the dimensionality, if we can.
+  if (emission.size() > 0)
+    dimensionality = emission[0].Dimensionality();
+}
 
 /**
  * Train the model using the Baum-Welch algorithm, with only the given unlabeled
  * observations.
  */
 template<typename Distribution>
-void HMM<Distribution>::Train(
-    const std::vector<std::vector<Observation> >& dataSeq)
+void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
 {
   // We should allow a guess at the transition and emission matrices.
   double loglik = 0;
@@ -50,6 +54,21 @@
   // Maximum iterations?
   size_t iterations = 1000;
 
+  // Find length of all sequences.
+  size_t totalLength = 0;
+  for (size_t seq = 0; seq < dataSeq.size(); seq++)
+    totalLength += dataSeq[seq].n_cols;
+
+  // Re-set the dimensionality, if we need to.
+  if (dataSeq.size() > 0) // Just in case a user passed an empty sequence...
+    dimensionality = dataSeq[0].n_rows;
+
+  // These are used later for training of each distribution.  We initialize it
+  // all now so we don't have to do any allocation later on.
+  std::vector<arma::vec> emissionProb(transition.n_cols,
+      arma::vec(totalLength));
+  arma::mat emissionList(dimensionality, totalLength);
+
   // This should be the Baum-Welch algorithm (EM for HMM estimation). This
   // follows the procedure outlined in Elliot, Aggoun, and Moore's book "Hidden
   // Markov Models: Estimation and Control", pp. 36-40.
@@ -62,9 +81,10 @@
     // Reset log likelihood.
     loglik = 0;
 
+    // Sum over time.
+    size_t sumTime = 0;
+
     // Loop over each sequence.
-    std::vector<std::vector<double> > emissionProb(transition.n_cols);
-    std::vector<std::vector<Observation> > emissionList(transition.n_cols);
     for (size_t seq = 0; seq < dataSeq.size(); seq++)
     {
       arma::mat stateProb;
@@ -80,23 +100,25 @@
       //           t + 1)))
       //   E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t)
       // We store the new estimates in a different matrix.
-      for (size_t t = 0; t < dataSeq[seq].size(); t++)
+      for (size_t t = 0; t < dataSeq[seq].n_cols; t++)
       {
         for (size_t j = 0; j < transition.n_cols; j++)
         {
-          if (t < dataSeq[seq].size() - 1)
+          if (t < dataSeq[seq].n_cols - 1)
           {
             // Estimate of T_ij (probability of transition from state j to state
             // i).  We postpone multiplication of the old T_ij until later.
             for (size_t i = 0; i < transition.n_rows; i++)
               newTransition(i, j) += forward(j, t) * backward(i, t + 1) *
-                  emission[i].Probability(dataSeq[seq][t + 1]) / scales[t + 1];
+                  emission[i].Probability(dataSeq[seq].unsafe_col(t + 1)) /
+                  scales[t + 1];
           }
 
           // Add to list of emission observations, for Distribution::Estimate().
-          emissionList[j].push_back(dataSeq[seq][t]);
-          emissionProb[j].push_back(stateProb(j, t));
+          emissionList.col(sumTime) = dataSeq[seq].col(t);
+          emissionProb[j][sumTime] = stateProb(j, t);
         }
+        sumTime++;
       }
     }
 
@@ -112,7 +134,7 @@
 
     // Now estimate emission probabilities.
     for (size_t state = 0; state < transition.n_cols; state++)
-      emission[state].Estimate(emissionList[state], emissionProb[state]);
+      emission[state].Estimate(emissionList, emissionProb[state]);
 
     Log::Debug << "Iteration " << iter << ": log-likelihood " << loglik
         << std::endl;
@@ -132,38 +154,50 @@
  * emission matrices are directly estimated.
  */
 template<typename Distribution>
-void HMM<Distribution>::Train(
-    const std::vector<std::vector<Observation> >& dataSeq,
-    const std::vector<std::vector<size_t> >& stateSeq)
+void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
+                              const std::vector<arma::Col<size_t> >& stateSeq)
 {
   // Simple error checking.
   if (dataSeq.size() != stateSeq.size())
-    Log::Fatal << "HMM::Train(): number of data sequences not equal to number "
-        "of state sequences." << std::endl;
+  {
+    Log::Fatal << "HMM::Train(): number of data sequences (" << dataSeq.size()
+        << ") not equal to number of state sequences (" << stateSeq.size()
+        << ")." << std::endl;
+  }
 
   transition.zeros();
 
+  // Re-set the dimensionality, if we need to.
+  if (dataSeq.size() > 0)
+    dimensionality = dataSeq[0].n_rows;
+
   // Estimate the transition and emission matrices directly from the
-  // observations.
-  std::vector<std::vector<Observation> > emissionList(transition.n_cols);
+  // observations.  The emission list holds the time indices for observations
+  // from each state.
+  std::vector<std::vector<std::pair<size_t, size_t> > >
+      emissionList(transition.n_cols);
   for (size_t seq = 0; seq < dataSeq.size(); seq++)
   {
     // Simple error checking.
-    if (dataSeq[seq].size() != stateSeq[seq].size())
-      Log::Fatal << "HMM::Train(): number of observations in sequence " << seq
-          << " not equal to number of states" << std::endl;
+    if (dataSeq[seq].n_cols != stateSeq[seq].n_elem)
+    {
+      Log::Fatal << "HMM::Train(): number of observations ("
+          << dataSeq[seq].n_cols << ") in sequence " << seq
+          << " not equal to number of states (" << stateSeq[seq].n_cols
+          << ") in sequence " << seq << "." << std::endl;
+    }
 
     // Loop over each observation in the sequence.  For estimation of the
     // transition matrix, we must ignore the last observation.
-    for (size_t t = 0; t < dataSeq[seq].size() - 1; t++)
+    for (size_t t = 0; t < dataSeq[seq].n_cols - 1; t++)
     {
       transition(stateSeq[seq][t + 1], stateSeq[seq][t])++;
-      emissionList[stateSeq[seq][t]].push_back(dataSeq[seq][t]);
+      emissionList[stateSeq[seq][t]].push_back(std::make_pair(seq, t));
     }
 
     // Last observation.
-    emissionList[stateSeq[seq][stateSeq[seq].size() - 1]].push_back(
-        dataSeq[seq][dataSeq[seq].size() - 1]);
+    emissionList[stateSeq[seq][stateSeq[seq].n_elem - 1]].push_back(
+        std::make_pair(seq, stateSeq[seq].n_elem - 1));
   }
 
   // Normalize transition matrix.
@@ -179,7 +213,18 @@
 
   // Estimate emission matrix.
   for (size_t state = 0; state < transition.n_cols; state++)
-    emission[state].Estimate(emissionList[state]);
+  {
+    // Generate full sequence of observations for this state from the list of
+    // emissions that are from this state.
+    arma::mat emissions(dimensionality, emissionList[state].size());
+    for (size_t i = 0; i < emissions.n_cols; i++)
+    {
+      emissions.col(i) = dataSeq[emissionList[state][i].first].col(
+          emissionList[state][i].second);
+    }
+
+    emission[state].Estimate(emissions);
+  }
 }
 
 /**
@@ -187,7 +232,7 @@
  * given data observation.
  */
 template<typename Distribution>
-double HMM<Distribution>::Estimate(const std::vector<Observation>& dataSeq,
+double HMM<Distribution>::Estimate(const arma::mat& dataSeq,
                                    arma::mat& stateProb,
                                    arma::mat& forwardProb,
                                    arma::mat& backwardProb,
@@ -210,7 +255,7 @@
  * given data observation.
  */
 template<typename Distribution>
-double HMM<Distribution>::Estimate(const std::vector<Observation>& dataSeq,
+double HMM<Distribution>::Estimate(const arma::mat& dataSeq,
                                    arma::mat& stateProb) const
 {
   // We don't need to save these.
@@ -227,13 +272,13 @@
  */
 template<typename Distribution>
 void HMM<Distribution>::Generate(const size_t length,
-                                 std::vector<Observation>& dataSequence,
-                                 std::vector<size_t>& stateSequence,
+                                 arma::mat& dataSequence,
+                                 arma::Col<size_t>& stateSequence,
                                  const size_t startState) const
 {
   // Set vectors to the right size.
-  stateSequence.resize(length);
-  dataSequence.resize(length);
+  stateSequence.set_size(length);
+  dataSequence.set_size(dimensionality, length);
 
   // Set start state (default is 0).
   stateSequence[0] = startState;
@@ -243,7 +288,7 @@
 
   // We just have to find where our random value sits in the probability
   // distribution of emissions for our starting state.
-  dataSequence[0] = emission[startState].Random();
+  dataSequence.col(0) = emission[startState].Random();
 
   // Now choose the states and emissions for the rest of the sequence.
   for (size_t t = 1; t < length; t++)
@@ -265,7 +310,7 @@
     }
 
     // Now choose the emission.
-    dataSequence[t] = emission[stateSequence[t]].Random();
+    dataSequence.col(t) = emission[stateSequence[t]].Random();
   }
 }
 
@@ -275,15 +320,15 @@
  * sequence.
  */
 template<typename Distribution>
-double HMM<Distribution>::Predict(const std::vector<Observation>& dataSeq,
-                                  std::vector<size_t>& stateSeq) const
+double HMM<Distribution>::Predict(const arma::mat& dataSeq,
+                                  arma::Col<size_t>& stateSeq) const
 {
   // This is an implementation of the Viterbi algorithm for finding the most
   // probable sequence of states to produce the observed data sequence.  We
   // don't use log-likelihoods to save that little bit of time, but we'll
   // calculate the log-likelihood at the end of it all.
-  stateSeq.resize(dataSeq.size());
-  arma::mat logStateProb(transition.n_rows, dataSeq.size());
+  stateSeq.set_size(dataSeq.n_cols);
+  arma::mat logStateProb(transition.n_rows, dataSeq.n_cols);
 
   // Store the logs of the transposed transition matrix.  This is because we
   // will be using the rows of the transition matrix.
@@ -295,14 +340,14 @@
   logStateProb.col(0).zeros();
   for (size_t state = 0; state < transition.n_rows; state++)
     logStateProb[state] = log(transition(state, 0) *
-        emission[state].Probability(dataSeq[0]));
+        emission[state].Probability(dataSeq.unsafe_col(0)));
 
   // Store the best first state.
   arma::u32 index;
   logStateProb.unsafe_col(0).max(index);
   stateSeq[0] = index;
 
-  for (size_t t = 1; t < dataSeq.size(); t++)
+  for (size_t t = 1; t < dataSeq.n_cols; t++)
   {
     // Assemble the state probability for this element.
     // Given that we are in state j, we state with the highest probability of
@@ -311,7 +356,7 @@
     {
       arma::vec prob = logStateProb.col(t - 1) + logTrans.col(j);
       logStateProb(j, t) = prob.max() +
-          log(emission[j].Probability(dataSeq[t]));
+          log(emission[j].Probability(dataSeq.unsafe_col(t)));
     }
 
     // Store the best state.
@@ -319,15 +364,14 @@
     stateSeq[t] = index;
   }
 
-  return logStateProb(stateSeq[dataSeq.size() - 1], dataSeq.size() - 1);
+  return logStateProb(stateSeq(dataSeq.n_cols - 1), dataSeq.n_cols - 1);
 }
 
 /**
  * Compute the log-likelihood of the given data sequence.
  */
 template<typename Distribution>
-double HMM<Distribution>::LogLikelihood(
-    const std::vector<Observation>& dataSeq) const
+double HMM<Distribution>::LogLikelihood(const arma::mat& dataSeq) const
 {
   arma::mat forward;
   arma::vec scales;
@@ -342,27 +386,27 @@
  * The Forward procedure (part of the Forward-Backward algorithm).
  */
 template<typename Distribution>
-void HMM<Distribution>::Forward(const std::vector<Observation>& dataSeq,
+void HMM<Distribution>::Forward(const arma::mat& dataSeq,
                                 arma::vec& scales,
                                 arma::mat& forwardProb) const
 {
   // Our goal is to calculate the forward probabilities:
   //  P(X_k | o_{1:k}) for all possible states X_k, for each time point k.
-  forwardProb.zeros(transition.n_rows, dataSeq.size());
-  scales.zeros(dataSeq.size());
+  forwardProb.zeros(transition.n_rows, dataSeq.n_cols);
+  scales.zeros(dataSeq.n_cols);
 
   // Starting state (at t = -1) is assumed to be state 0.  This is what MATLAB
   // does in their hmmdecode() function, so we will emulate that behavior.
   for (size_t state = 0; state < transition.n_rows; state++)
     forwardProb(state, 0) = transition(state, 0) *
-        emission[state].Probability(dataSeq[0]);
+        emission[state].Probability(dataSeq.unsafe_col(0));
 
   // Then normalize the column.
   scales[0] = accu(forwardProb.col(0));
   forwardProb.col(0) /= scales[0];
 
   // Now compute the probabilities for each successive observation.
-  for (size_t t = 1; t < dataSeq.size(); t++)
+  for (size_t t = 1; t < dataSeq.n_cols; t++)
   {
     for (size_t j = 0; j < transition.n_rows; j++)
     {
@@ -370,7 +414,8 @@
       // of the probability of the previous state transitioning to the current
       // state and emitting the given observation.
       forwardProb(j, t) = accu(forwardProb.col(t - 1) %
-          trans(transition.row(j))) * emission[j].Probability(dataSeq[t]);
+          trans(transition.row(j))) *
+          emission[j].Probability(dataSeq.unsafe_col(t));
     }
 
     // Normalize probability.
@@ -380,19 +425,19 @@
 }
 
 template<typename Distribution>
-void HMM<Distribution>::Backward(const std::vector<Observation>& dataSeq,
+void HMM<Distribution>::Backward(const arma::mat& dataSeq,
                                  const arma::vec& scales,
                                  arma::mat& backwardProb) const
 {
   // Our goal is to calculate the backward probabilities:
   //  P(X_k | o_{k + 1:T}) for all possible states X_k, for each time point k.
-  backwardProb.zeros(transition.n_rows, dataSeq.size());
+  backwardProb.zeros(transition.n_rows, dataSeq.n_cols);
 
   // The last element probability is 1.
-  backwardProb.col(dataSeq.size() - 1).fill(1);
+  backwardProb.col(dataSeq.n_cols - 1).fill(1);
 
   // Now step backwards through all other observations.
-  for (size_t t = dataSeq.size() - 2; t + 1 > 0; t--)
+  for (size_t t = dataSeq.n_cols - 2; t + 1 > 0; t--)
   {
     for (size_t j = 0; j < transition.n_rows; j++)
     {
@@ -402,7 +447,7 @@
       // emitting the given observation.
       for (size_t state = 0; state < transition.n_rows; state++)
         backwardProb(j, t) += transition(state, j) * backwardProb(state, t + 1)
-            * emission[state].Probability(dataSeq[t + 1]);
+            * emission[state].Probability(dataSeq.unsafe_col(t + 1));
 
       // Normalize by the weights from the forward algorithm.
       backwardProb(j, t) /= scales[t + 1];