[mlpack-svn] r10298 - mlpack/trunk/src/mlpack/methods/hmm

Wed Nov 16 14:16:21 EST 2011

Author: rcurtin
Date: 2011-11-16 14:16:21 -0500 (Wed, 16 Nov 2011)
New Revision: 10298

Modified:
   mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
   mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
Log:
Abstract away from the discrete case.  Now, everything depends on Distribution
(I think).


Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp	2011-11-16 19:15:59 UTC (rev 10297)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm.hpp	2011-11-16 19:16:21 UTC (rev 10298)
@@ -23,18 +23,21 @@
   //! Transition probability matrix.
   arma::mat transition;
 
-  //! Emission probability matrix (for each state).
-  arma::mat emission;
+  //! Set of emission probability distributions; one for each state.
+  std::vector<Distribution> emission;
 
  public:
+  //! Convenience typedef for the type of observation we are using.
+  typedef typename Distribution::DataType Observation;
+
   /**
    * Create the Hidden Markov Model with the given number of hidden states and
-   * the given number of emission states.
+   * the given default distribution for emissions.
    *
    * @param states Number of states.
-   * @param emissions Number of possible emissions.
+   * @param emissions Default distribution for emissions.
    */
-  HMM(const size_t states, const size_t emissions);
+  HMM(const size_t states, const Distribution emissions);
 
   /**
    * Create the Hidden Markov Model with the given transition matrix and the
@@ -50,7 +53,7 @@
    * @param transition Transition matrix.
    * @param emission Emission probability matrix.
    */
-  HMM(const arma::mat& transition, const arma::mat& emission);
+  HMM(const arma::mat& transition, const std::vector<Distribution>& emission);
 
   /**
    * Train the model using the Baum-Welch algorithm, with only the given
@@ -59,7 +62,7 @@
    *
    * @param dataSeq Vector of observation sequences.
    */
-  void Train(const std::vector<arma::vec>& dataSeq);
+  void Train(const std::vector<std::vector<Observation> >& dataSeq);
 
   /**
    * Train the model using the given labeled observations; the transition and
@@ -69,8 +72,8 @@
    * @param stateSeq Vector of state sequences, corresponding to each
    *    observation.
    */
-  void Train(const std::vector<arma::vec>& dataSeq,
-             const std::vector<arma::Col<size_t> >& stateSeq);
+  void Train(const std::vector<std::vector<Observation> >& dataSeq,
+             const std::vector<std::vector<size_t> >& stateSeq);
 
   /**
    * Estimate the probabilities of each hidden state at each time step for each
@@ -90,7 +93,7 @@
    *    will be stored.
    * @return Log-likelihood of most likely state sequence.
    */
-  double Estimate(const arma::vec& dataSeq,
+  double Estimate(const std::vector<Observation>& dataSeq,
                   arma::mat& stateProb,
                   arma::mat& forwardProb,
                   arma::mat& backwardProb,
@@ -107,7 +110,8 @@
    * @param stateProb Probabilities of each state at each time interval.
    * @return Log-likelihood of most likely state sequence.
    */
-  double Estimate(const arma::vec& dataSeq, arma::mat& stateProb) const;
+  double Estimate(const std::vector<Observation>& dataSeq,
+                  arma::mat& stateProb) const;
 
   /**
    * Generate a random data sequence of the given length.  The data sequence is
@@ -120,8 +124,8 @@
    * @param startState Hidden state to start sequence in (default 0).
    */
   void Generate(const size_t length,
-                arma::vec& dataSequence,
-                arma::Col<size_t>& stateSequence,
+                std::vector<Observation>& dataSequence,
+                std::vector<size_t>& stateSequence,
                 const size_t startState = 0) const;
 
   /**
@@ -134,7 +138,8 @@
    *    stored.
    * @return Log-likelihood of most probable state sequence.
    */
-  double Predict(const arma::vec& dataSeq, arma::Col<size_t>& stateSeq) const;
+  double Predict(const std::vector<Observation>& dataSeq,
+                 std::vector<size_t>& stateSeq) const;
 
   /**
    * Compute the log-likelihood of the given data sequence.
@@ -142,7 +147,7 @@
    * @param dataSeq Data sequence to evaluate the likelihood of.
    * @return Log-likelihood of the given sequence.
    */
-  double LogLikelihood(const arma::vec& dataSeq) const;
+  double LogLikelihood(const std::vector<Observation>& dataSeq) const;
 
   /**
    * Return the transition matrix.
@@ -155,22 +160,46 @@
   arma::mat& Transition() { return transition; }
 
   /**
-   * Return the emission probability matrix.
+   * Return the emission distributions.
    */
-  const arma::mat& Emission() const { return emission; }
+  const std::vector<Distribution>& Emission() const { return emission; }
 
   /**
    * Return a modifiable emission probability matrix reference.
    */
-  arma::mat& Emission() { return emission; }
+  std::vector<Distribution>& Emission() { return emission; }
 
  private:
   // Helper functions.
 
-  void Forward(const arma::vec& data_seq, arma::vec& scales, arma::mat& fs)
-      const;
-  void Backward(const arma::vec& data_seq, const arma::vec& scales,
-                arma::mat& bs) const;
+  /**
+   * The Forward algorithm (part of the Forward-Backward algorithm).  Computes
+   * forward probabilities for each state for each observation in the given data
+   * sequence.  The returned matrix has rows equal to the number of hidden
+   * states and columns equal to the number of observations.
+   *
+   * @param dataSeq Data sequence to compute probabilities for.
+   * @param scales Vector in which scaling factors will be saved.
+   * @param forwardProb Matrix in which forward probabilities will be saved.
+   */
+  void Forward(const std::vector<Observation>& dataSeq,
+               arma::vec& scales,
+               arma::mat& forwardProb) const;
+
+  /**
+   * The Backward algorithm (part of the Forward-Backward algorithm).  Computes
+   * backward probabilities for each state for each observation in the given
+   * data sequence, using the scaling factors found (presumably) by Forward().
+   * The returned matrix has rows equal to the number of hidden states and
+   * columns equal to the number of observations.
+   *
+   * @param dataSeq Data sequence to compute probabilities for.
+   * @param scales Vector of scaling factors.
+   * @param backwardProb Matrix in which backward probabilities will be saved.
+   */
+  void Backward(const std::vector<Observation>& dataSeq,
+                const arma::vec& scales,
+                arma::mat& backwardProb) const;
 };
 
 }; // namespace hmm

Modified: mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp	2011-11-16 19:15:59 UTC (rev 10297)
+++ mlpack/trunk/src/mlpack/methods/hmm/hmm_impl.hpp	2011-11-16 19:16:21 UTC (rev 10298)
@@ -19,9 +19,9 @@
  * given number of emission states.
  */
 template<typename Distribution>
-HMM<Distribution>::HMM(const size_t states, const size_t emissions) :
+HMM<Distribution>::HMM(const size_t states, const Distribution emissions) :
     transition(arma::ones<arma::mat>(states, states) / (double) states),
-    emission(arma::ones<arma::mat>(emissions, states) / (double) emissions)
+    emission(states, /* default distribution */ emissions)
 { /* nothing to do */ }
 
 /**
@@ -29,17 +29,19 @@
  * emission probability matrix.
  */
 template<typename Distribution>
-HMM<Distribution>::HMM(const arma::mat& transition, const arma::mat& emission) :
+HMM<Distribution>::HMM(const arma::mat& transition,
+                       const std::vector<Distribution>& emission) :
     transition(transition),
     emission(emission)
 { /* nothing to do */ }
 
 /**
- * Train the model using the Baum-Welch algorith, with only the given unlabeled
+ * Train the model using the Baum-Welch algorithm, with only the given unlabeled
  * observations.
  */
 template<typename Distribution>
-void HMM<Distribution>::Train(const std::vector<arma::vec>& dataSeq)
+void HMM<Distribution>::Train(
+    const std::vector<std::vector<Observation> >& dataSeq)
 {
   // We should allow a guess at the transition and emission matrices.
   double loglik = 0;
@@ -53,16 +55,16 @@
   // Markov Models: Estimation and Control", pp. 36-40.
   for (size_t iter = 0; iter < iterations; iter++)
   {
-    // Clear new transition and emission matrices.
+    // Clear new transition matrix and emission probabilities.
     arma::mat newTransition(transition.n_rows, transition.n_cols);
-    arma::mat newEmission(emission.n_rows, emission.n_cols);
     newTransition.zeros();
-    newEmission.zeros();
 
     // Reset log likelihood.
     loglik = 0;
 
     // Loop over each sequence.
+    std::vector<std::vector<double> > emissionProb(transition.n_cols);
+    std::vector<std::vector<size_t> > emissionList(transition.n_cols);
     for (size_t seq = 0; seq < dataSeq.size(); seq++)
     {
       arma::mat stateProb;
@@ -78,37 +80,39 @@
       //           t + 1)))
       //   E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t)
       // We store the new estimates in a different matrix.
-      for (size_t t = 0; t < dataSeq[seq].n_elem; t++)
+      for (size_t t = 0; t < dataSeq[seq].size(); t++)
       {
         for (size_t j = 0; j < transition.n_cols; j++)
         {
-          if (t < dataSeq[seq].n_elem - 1)
+          if (t < dataSeq[seq].size() - 1)
           {
             // Estimate of T_ij (probability of transition from state j to state
             // i).  We postpone multiplication of the old T_ij until later.
             for (size_t i = 0; i < transition.n_rows; i++)
               newTransition(i, j) += forward(j, t) * backward(i, t + 1) *
-                  emission((size_t) dataSeq[seq][t + 1], i) / scales[t + 1];
+                  emission[i].Probability(dataSeq[seq][t + 1]) / scales[t + 1];
           }
 
-          // Estimate of E_ij (probability of emission i while in state j).
-          newEmission((size_t) dataSeq[seq][t], j) += stateProb(j, t);
+          // Add to list of emission observations, for Distribution::Estimate().
+          emissionList[j].push_back(dataSeq[seq][t]);
+          emissionProb[j].push_back(stateProb(j, t));
         }
       }
     }
 
-    // Assign the new matrices.  We use %= (element-wise multiplication) because
-    // every element of the new transition matrix must still be multiplied by
-    // the old elements (this is the multiplication we earlier postponed).
+    // Assign the new transition matrix.  We use %= (element-wise
+    // multiplication) because every element of the new transition matrix must
+    // still be multiplied by the old elements (this is the multiplication we
+    // earlier postponed).
     transition %= newTransition;
-    emission = newEmission;
 
-    // Now we normalize the transition matrices.
+    // Now we normalize the transition matrix.
     for (size_t i = 0; i < transition.n_cols; i++)
       transition.col(i) /= accu(transition.col(i));
 
-    for (size_t i = 0; i < emission.n_cols; i++)
-      emission.col(i) /= accu(emission.col(i));
+    // Now estimate emission probabilities.
+    for (size_t state = 0; state < transition.n_cols; state++)
+      emission[state].Estimate(emissionList[state], emissionProb[state]);
 
     Log::Debug << "Iteration " << iter << ": log-likelihood " << loglik
         << std::endl;
@@ -128,8 +132,9 @@
  * emission matrices are directly estimated.
  */
 template<typename Distribution>
-void HMM<Distribution>::Train(const std::vector<arma::vec>& dataSeq,
-                              const std::vector<arma::Col<size_t> >& stateSeq)
+void HMM<Distribution>::Train(
+    const std::vector<std::vector<Observation> >& dataSeq,
+    const std::vector<std::vector<size_t> >& stateSeq)
 {
   // Simple error checking.
   if (dataSeq.size() != stateSeq.size())
@@ -137,31 +142,31 @@
         "of state sequences." << std::endl;
 
   transition.zeros();
-  emission.zeros();
 
   // Estimate the transition and emission matrices directly from the
   // observations.
+  std::vector<std::vector<size_t> > emissionList(transition.n_cols);
   for (size_t seq = 0; seq < dataSeq.size(); seq++)
   {
     // Simple error checking.
-    if (dataSeq[seq].n_elem != stateSeq[seq].n_elem)
+    if (dataSeq[seq].size() != stateSeq[seq].size())
       Log::Fatal << "HMM::Train(): number of observations in sequence " << seq
           << " not equal to number of states" << std::endl;
 
     // Loop over each observation in the sequence.  For estimation of the
     // transition matrix, we must ignore the last observation.
-    for (size_t t = 0; t < dataSeq[seq].n_elem - 1; t++)
+    for (size_t t = 0; t < dataSeq[seq].size() - 1; t++)
     {
       transition(stateSeq[seq][t + 1], stateSeq[seq][t])++;
-      emission(dataSeq[seq][t], stateSeq[seq][t])++;
+      emissionList[stateSeq[seq][t]].push_back(dataSeq[seq][t]);
     }
 
     // Last observation.
-    emission(dataSeq[seq][dataSeq[seq].n_elem - 1],
-        stateSeq[seq][stateSeq[seq].n_elem - 1])++;
+    emissionList[stateSeq[seq][stateSeq[seq].size() - 1]].push_back(
+        dataSeq[seq][dataSeq[seq].size() - 1]);
   }
 
-  // Normalize transition matrix and emission matrix..
+  // Normalize transition matrix.
   for (size_t col = 0; col < transition.n_cols; col++)
   {
     // If the transition probability sum is greater than 0 in this column, the
@@ -169,11 +174,12 @@
     // division by 0.
     double sum = accu(transition.col(col));
     if (sum > 0)
-    {
       transition.col(col) /= sum;
-      emission.col(col) /= accu(emission.col(col));
-    }
   }
+
+  // Estimate emission matrix.
+  for (size_t state = 0; state < transition.n_cols; state++)
+    emission[state].Estimate(emissionList[state]);
 }
 
 /**
@@ -181,22 +187,22 @@
  * given data observation.
  */
 template<typename Distribution>
-double HMM<Distribution>::Estimate(const arma::vec& data_seq,
-                                   arma::mat& state_prob_mat,
-                                   arma::mat& forward_prob_mat,
-                                   arma::mat& backward_prob_mat,
-                                   arma::vec& scale_vec) const
+double HMM<Distribution>::Estimate(const std::vector<Observation>& dataSeq,
+                                   arma::mat& stateProb,
+                                   arma::mat& forwardProb,
+                                   arma::mat& backwardProb,
+                                   arma::vec& scales) const
 {
   // First run the forward-backward algorithm.
-  Forward(data_seq, scale_vec, forward_prob_mat);
-  Backward(data_seq, scale_vec, backward_prob_mat);
+  Forward(dataSeq, scales, forwardProb);
+  Backward(dataSeq, scales, backwardProb);
 
   // Now assemble the state probability matrix based on the forward and backward
   // probabilities.
-  state_prob_mat = forward_prob_mat % backward_prob_mat;
+  stateProb = forwardProb % backwardProb;
 
   // Finally assemble the log-likelihood and return it.
-  return accu(log(scale_vec));
+  return accu(log(scales));
 }
 
 /**
@@ -204,7 +210,7 @@
  * given data observation.
  */
 template<typename Distribution>
-double HMM<Distribution>::Estimate(const arma::vec& dataSeq,
+double HMM<Distribution>::Estimate(const std::vector<Observation>& dataSeq,
                                    arma::mat& stateProb) const
 {
   // We don't need to save these.
@@ -221,13 +227,13 @@
  */
 template<typename Distribution>
 void HMM<Distribution>::Generate(const size_t length,
-                                 arma::vec& dataSequence,
-                                 arma::Col<size_t>& stateSequence,
+                                 std::vector<Observation>& dataSequence,
+                                 std::vector<size_t>& stateSequence,
                                  const size_t startState) const
 {
   // Set vectors to the right size.
-  stateSequence.set_size(length);
-  dataSequence.set_size(length);
+  stateSequence.resize(length);
+  dataSequence.resize(length);
 
   // Set start state (default is 0).
   stateSequence[0] = startState;
@@ -237,16 +243,7 @@
 
   // We just have to find where our random value sits in the probability
   // distribution of emissions for our starting state.
-  double probSum = 0;
-  for (size_t em = 0; em < emission.n_rows; em++)
-  {
-    probSum += emission(em, startState);
-    if (randValue <= probSum)
-    {
-      dataSequence[0] = em;
-      break;
-    }
-  }
+  dataSequence[0] = emission[startState].Random();
 
   // Now choose the states and emissions for the rest of the sequence.
   for (size_t t = 1; t < length; t++)
@@ -256,7 +253,7 @@
 
     // Now find where our random value sits in the probability distribution of
     // state changes.
-    probSum = 0;
+    double probSum = 0;
     for (size_t st = 0; st < transition.n_rows; st++)
     {
       probSum += transition(st, stateSequence[t - 1]);
@@ -268,20 +265,7 @@
     }
 
     // Now choose the emission.
-    randValue = (double) rand() / (double) RAND_MAX;
-
-    // Now find where our random value sits in the probability distribution of
-    // emissions for the state we just chose.
-    probSum = 0;
-    for (size_t em = 0; em < emission.n_rows; em++)
-    {
-      probSum += emission(em, stateSequence[t]);
-      if (randValue <= probSum)
-      {
-        dataSequence[t] = em;
-        break;
-      }
-    }
+    dataSequence[t] = emission[stateSequence[t]].Random();
   }
 }
 
@@ -291,28 +275,30 @@
  * sequence.
  */
 template<typename Distribution>
-double HMM<Distribution>::Predict(const arma::vec& dataSeq,
-                                  arma::Col<size_t>& stateSeq) const
+double HMM<Distribution>::Predict(const std::vector<Observation>& dataSeq,
+                                  std::vector<size_t>& stateSeq) const
 {
   // This is an implementation of the Viterbi algorithm for finding the most
   // probable sequence of states to produce the observed data sequence.  We
   // don't use log-likelihoods to save that little bit of time, but we'll
   // calculate the log-likelihood at the end of it all.
-  stateSeq.set_size(dataSeq.n_elem);
-  arma::mat stateProb(transition.n_rows, dataSeq.n_elem);
+  stateSeq.resize(dataSeq.size());
+  arma::mat stateProb(transition.n_rows, dataSeq.size());
 
   // The calculation of the first state is slightly different; the probability
   // of the first state being state j is the maximum probability that the state
-  // came to be j from another state.  We can do that in one line.
-  stateProb.col(0) = transition.col(0) %
-      trans(emission.row((size_t) dataSeq[0]));
+  // came to be j from another state.
+  stateProb.col(0).zeros();
+  for (size_t state = 0; state < transition.n_rows; state++)
+    stateProb[state] = transition(state, 0) *
+        emission[state].Probability(dataSeq[0]);
 
   // Store the best first state.
   arma::u32 index;
   stateProb.unsafe_col(0).max(index);
   stateSeq[0] = index;
 
-  for (size_t t = 1; t < dataSeq.n_elem; t++)
+  for (size_t t = 1; t < dataSeq.size(); t++)
   {
     // Assemble the state probability for this element.
     // Given that we are in state j, we state with the highest probability of
@@ -320,7 +306,7 @@
     for (size_t j = 0; j < transition.n_rows; j++)
     {
       arma::vec prob = stateProb.col(t - 1) % trans(transition.row(j));
-      stateProb(j, t) = prob.max() * emission((size_t) dataSeq[t], j);
+      stateProb(j, t) = prob.max() * emission[j].Probability(dataSeq[t]);
     }
 
     // Store the best state.
@@ -328,14 +314,15 @@
     stateSeq[t] = index;
   }
 
-  return log(stateProb(stateSeq[dataSeq.n_elem - 1], dataSeq.n_elem - 1));
+  return log(stateProb(stateSeq[dataSeq.size() - 1], dataSeq.size() - 1));
 }
 
 /**
  * Compute the log-likelihood of the given data sequence.
  */
 template<typename Distribution>
-double HMM<Distribution>::LogLikelihood(const arma::vec& dataSeq) const
+double HMM<Distribution>::LogLikelihood(
+    const std::vector<Observation>& dataSeq) const
 {
   arma::mat forward;
   arma::vec scales;
@@ -350,72 +337,70 @@
  * The Forward procedure (part of the Forward-Backward algorithm).
  */
 template<typename Distribution>
-void HMM<Distribution>::Forward(const arma::vec& dataSeq,
+void HMM<Distribution>::Forward(const std::vector<Observation>& dataSeq,
                                 arma::vec& scales,
-                                arma::mat& forwardProbabilities) const
+                                arma::mat& forwardProb) const
 {
   // Our goal is to calculate the forward probabilities:
   //  P(X_k | o_{1:k}) for all possible states X_k, for each time point k.
-  forwardProbabilities.zeros(transition.n_rows, dataSeq.n_elem);
-  scales.zeros(dataSeq.n_elem);
+  forwardProb.zeros(transition.n_rows, dataSeq.size());
+  scales.zeros(dataSeq.size());
 
   // Starting state (at t = -1) is assumed to be state 0.  This is what MATLAB
   // does in their hmmdecode() function, so we will emulate that behavior.
-  size_t obs = (size_t) dataSeq[0];
-  forwardProbabilities.col(0) = transition.col(0) % trans(emission.row(obs));
+  for (size_t state = 0; state < transition.n_rows; state++)
+    forwardProb(state, 0) = transition(state, 0) *
+        emission[state].Probability(dataSeq[0]);
+
   // Then normalize the column.
-  scales[0] = accu(forwardProbabilities.col(0));
-  forwardProbabilities.col(0) /= scales[0];
+  scales[0] = accu(forwardProb.col(0));
+  forwardProb.col(0) /= scales[0];
 
   // Now compute the probabilities for each successive observation.
-  for (size_t t = 1; t < dataSeq.n_elem; t++)
+  for (size_t t = 1; t < dataSeq.size(); t++)
   {
-    obs = (size_t) dataSeq[t];
-
     for (size_t j = 0; j < transition.n_rows; j++)
     {
       // The forward probability of state j at time t is the sum over all states
       // of the probability of the previous state transitioning to the current
       // state and emitting the given observation.
-      forwardProbabilities(j, t) = accu(forwardProbabilities.col(t - 1) %
-          trans(transition.row(j))) * emission(obs, j);
+      forwardProb(j, t) = accu(forwardProb.col(t - 1) %
+          trans(transition.row(j))) * emission[j].Probability(dataSeq[t]);
     }
 
     // Normalize probability.
-    scales[t] = accu(forwardProbabilities.col(t));
-    forwardProbabilities.col(t) /= scales[t];
+    scales[t] = accu(forwardProb.col(t));
+    forwardProb.col(t) /= scales[t];
   }
 }
 
 template<typename Distribution>
-void HMM<Distribution>::Backward(const arma::vec& dataSeq,
+void HMM<Distribution>::Backward(const std::vector<Observation>& dataSeq,
                                  const arma::vec& scales,
-                                 arma::mat& backwardProbabilities) const
+                                 arma::mat& backwardProb) const
 {
   // Our goal is to calculate the backward probabilities:
   //  P(X_k | o_{k + 1:T}) for all possible states X_k, for each time point k.
-  backwardProbabilities.zeros(transition.n_rows, dataSeq.n_elem);
+  backwardProb.zeros(transition.n_rows, dataSeq.size());
 
   // The last element probability is 1.
-  backwardProbabilities.col(dataSeq.n_elem - 1).fill(1);
+  backwardProb.col(dataSeq.size() - 1).fill(1);
 
   // Now step backwards through all other observations.
-  for (size_t t = dataSeq.n_elem - 2; t + 1 > 0; t--)
+  for (size_t t = dataSeq.size() - 2; t + 1 > 0; t--)
   {
-    // This will eventually need to depend on the distribution.
-    size_t obs = (size_t) dataSeq[t + 1];
-
     for (size_t j = 0; j < transition.n_rows; j++)
     {
       // The backward probability of state j at time t is the sum over all state
       // of the probability of the next state having been a transition from the
       // current state multiplied by the probability of each of those states
       // emitting the given observation.
-      backwardProbabilities(j, t) += accu(transition.col(j) %
-          backwardProbabilities.col(t + 1) % trans(emission.row(obs)));
+      for (size_t state = 0; state < transition.n_rows; state++)
+        backwardProb(j, t) += transition(state, j) * backwardProb(state, t + 1)
+            * emission[state].Probability(dataSeq[t + 1]);
 
       // Normalize by the weights from the forward algorithm.
-      backwardProbabilities(j, t) /= scales[t + 1];
+      backwardProb(j, t) /= scales[t + 1];
     }
   }
 }