[mlpack-svn] r10083 - mlpack/trunk/src/mlpack/methods/hmm
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Oct 31 00:12:31 EDT 2011
Author: jcline3
Date: 2011-10-31 00:12:31 -0400 (Mon, 31 Oct 2011)
New Revision: 10083
Added:
mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cpp
mlpack/trunk/src/mlpack/methods/hmm/discreteDST.hpp
mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cpp
mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.hpp
mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cpp
mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.hpp
mlpack/trunk/src/mlpack/methods/hmm/generate.cpp
mlpack/trunk/src/mlpack/methods/hmm/loglik.cpp
mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cpp
mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.hpp
mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cpp
mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.hpp
mlpack/trunk/src/mlpack/methods/hmm/support.cpp
mlpack/trunk/src/mlpack/methods/hmm/support.hpp
mlpack/trunk/src/mlpack/methods/hmm/train.cpp
mlpack/trunk/src/mlpack/methods/hmm/viterbi.cpp
Removed:
mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cc
mlpack/trunk/src/mlpack/methods/hmm/discreteDST.h
mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cc
mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.h
mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cc
mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.h
mlpack/trunk/src/mlpack/methods/hmm/generate.cc
mlpack/trunk/src/mlpack/methods/hmm/loglik.cc
mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cc
mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.h
mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cc
mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.h
mlpack/trunk/src/mlpack/methods/hmm/support.cc
mlpack/trunk/src/mlpack/methods/hmm/support.h
mlpack/trunk/src/mlpack/methods/hmm/train.cc
mlpack/trunk/src/mlpack/methods/hmm/viterbi.cc
Modified:
mlpack/trunk/src/mlpack/methods/hmm/CMakeLists.txt
Log:
Rename files to .hpp, .cpp
Modified: mlpack/trunk/src/mlpack/methods/hmm/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/CMakeLists.txt 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/CMakeLists.txt 2011-10-31 04:12:31 UTC (rev 10083)
@@ -3,16 +3,16 @@
# Define the files we need to compile.
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
- discreteHMM.h
- discreteHMM.cc
- gaussianHMM.h
- gaussianHMM.cc
- mixgaussHMM.h
- mixgaussHMM.cc
- mixtureDST.h
- mixtureDST.cc
- support.h
- support.cc
+ discreteHMM.hpp
+ discreteHMM.cpp
+ gaussianHMM.hpp
+ gaussianHMM.cpp
+ mixgaussHMM.hpp
+ mixgaussHMM.cpp
+ mixtureDST.hpp
+ mixtureDST.cpp
+ support.hpp
+ support.cpp
)
# Add directory name to sources.
@@ -26,28 +26,28 @@
# executables
add_executable(generate
- generate.cc
+ generate.cpp
)
target_link_libraries(generate
mlpack
)
add_executable(loglik
- loglik.cc
+ loglik.cpp
)
target_link_libraries(loglik
mlpack
)
add_executable(viterbi
- viterbi.cc
+ viterbi.cpp
)
target_link_libraries(viterbi
mlpack
)
add_executable(train
- train.cc
+ train.cpp
)
target_link_libraries(train
mlpack
Deleted: mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,55 +0,0 @@
-#include <mlpack/core.h>
-#include "discreteDST.h"
-#include "support.h"
-
-namespace mlpack {
-namespace hmm {
-
-void DiscreteDST::Init(int n) {
- p.set_size(n);
- acc_p.set_size(n);
-
- double s = 1;
-
- for (int i = 0; i < n - 1; i++) {
- p[i] = RAND_UNIFORM(s * 0.2, s * 0.8);
- s -= p[i];
- }
-
- p[n - 1] = s;
-}
-
-void DiscreteDST::generate(int* v) {
- int n = p.length();
-
- double r = RAND_UNIFORM_01;
- double s = 0;
-
- for (int i = 0; i < N; i++) {
- s += p[i];
- if (s >= r) {
- *v = i;
- return;
- }
- }
-
- *v = n - 1;
-}
-
-void DiscreteDST::end_accumulate() {
- int n = p.length();
-
- double s = 0;
-
- for (int i = 0; i < N; i++)
- s += acc_p[i];
-
- if (s == 0)
- s = -INFINITY;
-
- for (int i = 0; i < N; i++)
- p[i] = ACC_p[i] / s;
-}
-
-}; // namespace hmm
-}; // namespace mlpack
Copied: mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteDST.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,55 @@
+#include <mlpack/core.h>
+#include "discreteDST.hpp"
+#include "support.hpp"
+
+namespace mlpack {
+namespace hmm {
+
+void DiscreteDST::Init(int n) {
+ p.set_size(n);
+ acc_p.set_size(n);
+
+ double s = 1;
+
+ for (int i = 0; i < n - 1; i++) {
+ p[i] = RAND_UNIFORM(s * 0.2, s * 0.8);
+ s -= p[i];
+ }
+
+ p[n - 1] = s;
+}
+
+void DiscreteDST::generate(int* v) {
+ int n = p.length();
+
+ double r = RAND_UNIFORM_01;
+ double s = 0;
+
+ for (int i = 0; i < N; i++) {
+ s += p[i];
+ if (s >= r) {
+ *v = i;
+ return;
+ }
+ }
+
+ *v = n - 1;
+}
+
+void DiscreteDST::end_accumulate() {
+ int n = p.length();
+
+ double s = 0;
+
+ for (int i = 0; i < N; i++)
+ s += acc_p[i];
+
+ if (s == 0)
+ s = -INFINITY;
+
+ for (int i = 0; i < N; i++)
+ p[i] = ACC_p[i] / s;
+}
+
+}; // namespace hmm
+}; // namespace mlpack
Deleted: mlpack/trunk/src/mlpack/methods/hmm/discreteDST.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteDST.h 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteDST.h 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,33 +0,0 @@
-#ifndef __MLPACK_METHODS_HMM_DISCRETE_DISTRIBUTCLIN_H
-#define __MLPACK_METHODS_HMM_DISCRETE_DISTRIBUTCLIN_H
-
-#include <mlpack/core.h>
-
-namespace mlpack {
-namespace hmm {
-
-class DiscreteDST {
- private:
- arma::vec p;
- arma::vec acc_p;
-
- public:
- void Init(int n = 2);
-
- void generate(int* v);
-
- double get(int i) { return p[i]; }
-
- void set(const arma::vec& p_) { p = p; }
-
- void start_accumulate() { acc_p.zeros(); }
-
- void accumulate(int i, double v) { acc_p[i] += v; }
-
- void end_accumulate();
-};
-
-}; // namespace hmm
-}; // namespace mlpack
-
-#endif
Copied: mlpack/trunk/src/mlpack/methods/hmm/discreteDST.hpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/discreteDST.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteDST.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteDST.hpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,33 @@
+#ifndef __MLPACK_METHODS_HMM_DISCRETE_DISTRIBUTION_HPP
+#define __MLPACK_METHODS_HMM_DISCRETE_DISTRIBUTION_HPP
+
+#include <mlpack/core.h>
+
+namespace mlpack {
+namespace hmm {
+
+class DiscreteDST {
+ private:
+ arma::vec p;
+ arma::vec acc_p;
+
+ public:
+ void Init(int n = 2);
+
+ void generate(int* v);
+
+ double get(int i) { return p[i]; }
+
+ void set(const arma::vec& p_) { p = p; }
+
+ void start_accumulate() { acc_p.zeros(); }
+
+ void accumulate(int i, double v) { acc_p[i] += v; }
+
+ void end_accumulate();
+};
+
+}; // namespace hmm
+}; // namespace mlpack
+
+#endif // __MLPACK_METHODS_HMM_DISCRETE_DISTRIBUTINO_HPP
Deleted: mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,578 +0,0 @@
-/**
- * @file discreteHMM.cc
- *
- * This file contains the implementation of functions in discreteHMM.h
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "discreteHMM.h"
-
-namespace mlpack {
-namespace hmm {
-
-using namespace hmm_support;
-
-void DiscreteHMM::setModel(const arma::mat& transmission, const arma::mat& emission) {
- mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
- mlpack::Log::Assert(transmission.n_rows == emission.n_rows);
-
- transmission_ = transmission;
- emission_ = emission;
-}
-
-void DiscreteHMM::Init(const arma::mat& transmission, const arma::mat& emission) {
- transmission_ = transmission;
- emission_ = emission;
-
- mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
- mlpack::Log::Assert(transmission.n_rows == emission.n_rows);
-}
-
-void DiscreteHMM::InitFromFile(const char* profile) {
- std::vector<arma::mat> list_mat;
- load_matrix_list(profile, list_mat);
- if (list_mat.size() < 2)
- mlpack::Log::Fatal << "Number of matrices in the file should be at least 2."
- << std::endl;
- else if (list_mat.size() > 2)
- mlpack::Log::Warn << "Number of matrices in the file should be 2 (any after the second are ignored."
- << std::endl;
-
- transmission_ = list_mat[0];
- emission_ = list_mat[1];
-
- mlpack::Log::Assert(transmission_.n_rows == transmission_.n_cols);
- mlpack::Log::Assert(transmission_.n_rows == emission_.n_rows);
-}
-
-void DiscreteHMM::InitFromData(const std::vector<arma::vec>& list_data_seq, size_t numstate) {
- size_t numsymbol = 0;
- size_t maxseq = 0;
-
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- if (list_data_seq[i].n_elem > list_data_seq[maxseq].n_elem)
- maxseq = i;
- }
-
- for (size_t i = 0; i < list_data_seq[maxseq].n_elem; i++) {
- if (list_data_seq[maxseq][i] > numsymbol)
- numsymbol = (size_t) list_data_seq[maxseq][i];
- }
- numsymbol++;
-
- arma::vec states;
- size_t L = list_data_seq[maxseq].n_elem;
- states.set_size(L);
- for (size_t i = 0; i < L; i++)
- states[i] = rand() % numstate;
-
- DiscreteHMM::EstimateInit(numsymbol, numstate, list_data_seq[maxseq], states, transmission_, emission_);
-}
-
-void DiscreteHMM::LoadProfile(const char* profile) {
- InitFromFile(profile);
-}
-
-void DiscreteHMM::SaveProfile(const char* profile) const {
- TextWriter w_pro;
- if (!(w_pro.Open(profile))) {
- mlpack::Log::Warn << "Couldn't open " << profile << " for writing." <<
- std::endl;
- return;
- }
-
- print_matrix(w_pro, transmission_, "%% transmission", "%f,");
- print_matrix(w_pro, emission_, "%% emission", "%f,");
-}
-
-void DiscreteHMM::GenerateSequence(size_t length, arma::vec& data_seq, arma::vec& state_seq) const {
- DiscreteHMM::GenerateInit(length, transmission_, emission_, data_seq, state_seq);
-}
-
-void DiscreteHMM::EstimateModel(const arma::vec& data_seq, const arma::vec& state_seq) {
- DiscreteHMM::EstimateInit(data_seq, state_seq, transmission_, emission_);
-}
-
-void DiscreteHMM::EstimateModel(size_t numstate, size_t numsymbol, const arma::vec& data_seq, const arma::vec& state_seq) {
- DiscreteHMM::EstimateInit(numsymbol, numstate, data_seq, state_seq, transmission_, emission_);
-}
-
-void DiscreteHMM::DecodeOverwrite(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
- DiscreteHMM::Decode(data_seq, transmission_, emission_, state_prob_mat, forward_prob_mat, backward_prob_mat, scale_vec);
-}
-
-void DiscreteHMM::DecodeInit(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_elem;
-
- state_prob_mat.set_size(M, L);
- forward_prob_mat.set_size(M, L);
- backward_prob_mat.set_size(M, L);
- scale_vec.set_size(L);
-
- DiscreteHMM::Decode(data_seq, transmission_, emission_, state_prob_mat, forward_prob_mat, backward_prob_mat, scale_vec);
-}
-
-void forward_procedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& scales, arma::mat& fs);
-
-double DiscreteHMM::ComputeLogLikelihood(const arma::vec& data_seq) const {
- size_t L = data_seq.n_elem;
- size_t M = transmission_.n_rows;
-
- arma::mat fs(M, L);
- arma::vec sc(L);
-
- DiscreteHMM::ForwardProcedure(data_seq, transmission_, emission_, sc, fs);
-
- double loglik = 0;
- for (size_t t = 0; t < L; t++)
- loglik += log(sc[t]);
- return loglik;
-}
-
-void DiscreteHMM::ComputeLogLikelihood(const std::vector<arma::vec>& list_data_seq, std::vector<double>& list_likelihood) const {
- size_t L = 0;
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- if (list_data_seq[i].n_elem > L)
- L = list_data_seq[i].n_elem;
- }
-
- size_t M = transmission_.n_rows;
-
- arma::mat fs(M, L);
- arma::vec sc(L);
-
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- DiscreteHMM::ForwardProcedure(list_data_seq[i], transmission_, emission_, sc, fs);
-
- size_t L = list_data_seq[i].n_elem;
- double loglik = 0;
- for (size_t t = 0; t < L; t++)
- loglik += log(sc[t]);
-
- list_likelihood.push_back(loglik);
- }
-}
-
-void DiscreteHMM::ComputeViterbiStateSequence(const arma::vec& data_seq, arma::vec& state_seq) const {
- DiscreteHMM::ViterbiInit(data_seq, transmission_, emission_, state_seq);
-}
-
-void DiscreteHMM::TrainBaumWelch(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance) {
- DiscreteHMM::Train(list_data_seq, transmission_, emission_, max_iteration, tolerance);
-}
-
-void DiscreteHMM::TrainViterbi(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance) {
- DiscreteHMM::TrainViterbi(list_data_seq, transmission_, emission_, max_iteration, tolerance);
-}
-
-void DiscreteHMM::GenerateInit(size_t L, const arma::mat& trans, const arma::mat& emis, arma::vec& seq, arma::vec& states) {
- mlpack::Log::Assert((trans.n_rows == trans.n_cols && trans.n_rows == emis.n_rows),
- "DiscreteHMM::GenerateInit(): matrix sizes do not match");
-
- arma::mat trsum, esum;
-
- size_t M, N;
- size_t cur_state;
-
- M = trans.n_rows;
- N = emis.n_cols;
-
- trsum = trans;
- esum = emis;
-
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 1; j < M; j++)
- trsum(i, j) += trsum(i, j - 1);
- for (size_t j = 1; j < N; j++)
- esum(i, j) += esum(i, j - 1);
- }
-
- seq.set_size(L);
- states.set_size(L);
-
- cur_state = 0; // starting state is 0
-
- for (size_t i = 0; i < L; i++) {
- size_t j;
- double r;
-
- // next state
- r = (double) rand() / (double) RAND_MAX;
- for (j = 0; j < M; j++) {
- if (r <= trsum(cur_state, j))
- break;
- }
- cur_state = j;
-
- // emission
- r = (double) rand() / (double) RAND_MAX;
- for (j = 0; j < N; j++) {
- if (r <= esum(cur_state, j))
- break;
- }
-
- seq[i] = j;
- states[i] = cur_state;
- }
-}
-
-void DiscreteHMM::EstimateInit(const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis) {
- mlpack::Log::Assert((seq.n_elem == states.n_elem),
- "DiscreteHMM::EstimateInit(): sequence and states length must be the same");
-
- size_t M = 0;
- size_t N = 0;
-
- for (size_t i = 0; i < seq.n_elem; i++) {
- if (seq[i] > N)
- N = (size_t) seq[i];
- if (states[i] > M)
- M = (size_t) states[i];
- }
-
- M++;
- N++;
-
- DiscreteHMM::EstimateInit(N, M, seq, states, trans, emis);
-}
-
-void DiscreteHMM::EstimateInit(size_t numSymbols, size_t numStates, const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis){
- mlpack::Log::Assert((seq.n_elem == states.n_elem),
- "DiscreteHMM::EstimateInit(): sequence and states length must be the same");
-
- size_t N = numSymbols;
- size_t M = numStates;
- size_t L = seq.n_elem;
-
- arma::vec stateSum;
-
- trans.zeros(M, M);
- emis.zeros(M, N);
- stateSum.zeros(M);
-
- for (size_t i = 0; i < L - 1; i++) {
- size_t state = (size_t) states[i];
- size_t next_state = (size_t) states[i + 1];
- stateSum[state]++;
- trans(state, next_state)++;
- }
-
- for (size_t i = 0; i < M; i++) {
- if (stateSum[i] == 0)
- stateSum[i] = -INFINITY;
-
- for (size_t j = 0; j < M; j++)
- trans(i, j) /= stateSum[i];
- }
-
- stateSum.zeros();
-
- for (size_t i = 0; i < L; i++) {
- size_t state = (size_t) states[i];
- size_t emission = (size_t) seq[i];
- stateSum[state]++;
- emis(state, emission)++;
- }
-
- for (size_t i = 0; i < M; i++) {
- if (stateSum[i] == 0)
- stateSum[i] = -INFINITY;
- for (size_t j = 0; j < N; j++)
- emis(i, j) /= stateSum[i];
- }
-}
-
-void DiscreteHMM::ForwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& scales, arma::mat& fs) {
- size_t L = seq.n_elem;
- size_t M = trans.n_rows;
-
- fs.zeros();
- scales.zeros();
-
- // NOTE: start state is 0
- // time t = 0
- size_t e = (size_t) seq[0];
- for (size_t i = 0; i < M; i++) {
- fs(i, 0) = trans(0, i) * emis(i, e);
- scales[0] += fs(i, 0);
- }
- for (size_t i = 0; i < M; i++)
- fs(i, 0) /= scales[0];
-
- // time t = 1 -> L-1
- for (size_t t = 1; t < L; t++) {
- e = (size_t) seq[t];
- for (size_t j = 0; j < M; j++) {
- for (size_t i = 0; i < M; i++)
- fs(j, t) += fs(i, t - 1) * trans(i, j);
- fs(j, t) *= emis(j, e);
- scales[t] += fs(j, t);
- }
- for (size_t j = 0; j < M; j++)
- fs(j, t) /= scales[t];
- }
-}
-
-void DiscreteHMM::BackwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, const arma::vec& scales, arma::mat& bs) {
- size_t L = seq.n_elem;
- size_t M = trans.n_rows;
-
- bs.zeros();
-
- for (size_t i = 0; i < M; i++)
- bs(i, L - 1) = 1.0;
-
- for (size_t t = L - 2; t >= 0; t--) {
- size_t e = (size_t) seq[t + 1];
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < M; j++)
- bs(i, t) += trans(i, j) * bs(j, t + 1) * emis(j, e);
- bs(i, t) /= scales[t + 1];
- }
- }
-}
-
-double DiscreteHMM::Decode(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
- size_t L = seq.n_elem;
- size_t M = trans.n_rows;
-
- mlpack::Log::Assert((L == pstates.n_cols && L == fs.n_cols && L == bs.n_cols &&
- M == trans.n_cols && M == emis.n_rows),
- "DiscreteHMM::Decode(): sizes do not match");
-
- DiscreteHMM::ForwardProcedure(seq, trans, emis, scales, fs);
- DiscreteHMM::BackwardProcedure(seq, trans, emis, scales, bs);
-
- for (size_t i = 0; i < M; i++) {
- for (size_t t = 0; t < L; t++)
- pstates(i, t) = fs(i,t) * bs(i,t);
- }
-
- double logpseq = 0;
- for (size_t t = 0; t < L; t++)
- logpseq += log(scales[t]);
-
- return logpseq;
-}
-
-double DiscreteHMM::ViterbiInit(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states) {
- size_t L = seq.n_elem;
-
- return DiscreteHMM::ViterbiInit(L, seq, trans, emis, states);
-}
-
-double DiscreteHMM::ViterbiInit(size_t L, const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states) {
- size_t M = trans.n_rows;
- size_t N = emis.n_cols;
-
- mlpack::Log::Assert((M == trans.n_cols && M == emis.n_rows),
- "DiscreteHMM::ViterbiInit(): sizes do not match");
-
- states.set_size(L);
-
- arma::vec v(M);
- v.fill(-INFINITY);
- v[0] = 0;
-
- arma::vec v_old = v;
-
- arma::mat w(M, L);
-
- arma::mat logtrans(M, M), logemis(M, N);
-
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < M; j++)
- logtrans(i, j) = log(trans(i, j));
-
- for (size_t j = 0; j < N; j++)
- logemis(i, j) = log(emis(i, j));
- }
-
- for (size_t t = 0; t < L; t++) {
- size_t e = (size_t) seq[t];
-
- for (size_t j = 0; j < M; j++) {
- double bestVal = -INFINITY;
- double bestPtr = -1;
-
- for (size_t i = 0; i < M; i++) {
- double val = v_old[i] + logtrans(i, j);
- if (val > bestVal) {
- bestVal = val;
- bestPtr = i;
- }
- }
- v[j] = bestVal + logemis(j, e);
- w(j, t) = bestPtr;
- }
- v_old = v;
- }
-
- double bestVal = -INFINITY;
- double bestPtr = -1;
-
- for (size_t i = 0; i < M; i++)
- if (v[i] > bestVal) {
- bestVal = v[i];
- bestPtr = i;
- }
-
- states[L - 1] = bestPtr;
- for (size_t t = L - 2; t >= 0; t--)
- states[t] = w((size_t) states[t + 1], t + 1);
-
- return bestVal;
-}
-
-void DiscreteHMM::Train(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol) {
- size_t L = -1;
- size_t M = guessTR.n_rows;
- size_t N = guessEM.n_cols;
-
- mlpack::Log::Assert((M == guessTR.n_cols && M == guessEM.n_rows),
- "DiscreteHMM::Train(): sizes do not match");
-
- for (size_t i = 0; i < seqs.size(); i++) {
- if (seqs[i].n_elem > L)
- L = seqs[i].n_elem;
- }
-
- arma::mat TR(M, M), EM(M, N); // guess transition and emission matrix
-
- arma::mat ps(M, L), fs(M, L), bs(M, L);
- arma::vec s(L);
-
- double loglik = 0, oldlog;
-
- for (size_t iter = 0; iter < max_iter; iter++) {
- oldlog = loglik;
- loglik = 0;
-
- TR.zeros();
- EM.zeros();
- for (size_t idx = 0; idx < seqs.size(); idx++) {
- L = seqs[idx].n_elem;
- loglik += DiscreteHMM::Decode(seqs[idx], guessTR, guessEM, ps, fs, bs, s);
-
- for (size_t t = 0; t < L - 1; t++) {
- size_t e = (size_t) seqs[idx][t + 1];
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < M; j++)
- TR(i, j) += fs(i, t) * guessTR(i, j) * guessEM(j, e) * bs(j, t + 1) / s[t + 1];
- }
- }
-
- for (size_t t = 0; t < L; t++) {
- size_t e = (size_t) seqs[idx][t];
- for (size_t i = 0; i < M; i++)
- EM(i, e) += ps(i, t);
- }
- }
-
- double s;
- for (size_t i = 0; i < M; i++) {
- s = 0;
- for (size_t j = 0; j < M; j++)
- s += TR(i, j);
- if (s == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
- guessTR(i, i) = 1;
- }
- else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = TR(i, j) / s;
- }
-
- s = 0;
- for (size_t j = 0; j < N; j++)
- s += EM(i, j);
- for (size_t j = 0; j < N; j++)
- guessEM(i, j) = EM(i, j) / s;
- }
-
- printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
- if (fabs(oldlog - loglik) < tol) {
- printf("\nConverged after %zu iterations\n", iter);
- break;
- }
- oldlog = loglik;
- }
-}
-
-void DiscreteHMM::TrainViterbi(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol) {
- size_t L = -1;
- size_t M = guessTR.n_rows;
- size_t N = guessEM.n_cols;
-
- mlpack::Log::Assert((M == guessTR.n_cols && M == guessEM.n_rows),
- "DiscreteHMM::TrainViterbi(): sizes do not match");
-
- for (size_t i = 0; i < seqs.size(); i++) {
- if (seqs[i].n_elem > L)
- L = seqs[i].n_elem;
- }
-
- arma::mat TR(M, M), EM(M, N); // guess transition and emission matrix
-
- double loglik = 0, oldlog;
- for (size_t iter = 0; iter < max_iter; iter++) {
- oldlog = loglik;
- loglik = 0;
-
- TR.fill(1e-4);
- EM.fill(1e-4);
- for (size_t idx = 0; idx < seqs.size(); idx++) {
- arma::vec states;
- L = seqs[idx].n_elem;
- loglik += DiscreteHMM::ViterbiInit(L, seqs[idx], guessTR, guessEM, states);
-
- for (size_t t = 0; t < L-1; t++) {
- size_t i = (size_t) states[t];
- size_t j = (size_t) states[t + 1];
- TR(i, j)++;
- }
-
- for (size_t t = 0; t < L; t++) {
- size_t e = (size_t) seqs[idx][t];
- size_t i = (size_t) states[t];
- EM(i, e)++;
- }
- }
-
- double s;
- print_matrix(TR, "TR");
- for (size_t i = 0; i < M; i++) {
- s = 0;
- for (size_t j = 0; j < M; j++)
- s += TR(i, j);
-
- if (s == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
- guessTR(i, i) = 1;
- } else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = TR(i, j) / s;
- }
-
- s = 0;
- for (size_t j = 0; j < N; j++)
- s += EM(i, j);
- for (size_t j = 0; j < N; j++)
- guessEM(i, j) = EM(i, j) / s;
- }
-
- printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
- if (fabs(oldlog - loglik) < tol) {
- printf("\nConverged after %zu iterations\n", iter);
- break;
- }
- oldlog = loglik;
- }
-}
-
-}; // namespace hmm
-}; // namespace mlpack
Copied: mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,578 @@
+/**
+ * @file discreteHMM.cc
+ *
+ * This file contains the implementation of functions in discreteHMM.h
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "discreteHMM.hpp"
+
+namespace mlpack {
+namespace hmm {
+
+using namespace hmm_support;
+
+void DiscreteHMM::setModel(const arma::mat& transmission, const arma::mat& emission) {
+ mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
+ mlpack::Log::Assert(transmission.n_rows == emission.n_rows);
+
+ transmission_ = transmission;
+ emission_ = emission;
+}
+
+void DiscreteHMM::Init(const arma::mat& transmission, const arma::mat& emission) {
+ transmission_ = transmission;
+ emission_ = emission;
+
+ mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
+ mlpack::Log::Assert(transmission.n_rows == emission.n_rows);
+}
+
+void DiscreteHMM::InitFromFile(const char* profile) {
+ std::vector<arma::mat> list_mat;
+ load_matrix_list(profile, list_mat);
+ if (list_mat.size() < 2)
+ mlpack::Log::Fatal << "Number of matrices in the file should be at least 2."
+ << std::endl;
+ else if (list_mat.size() > 2)
+ mlpack::Log::Warn << "Number of matrices in the file should be 2 (any after the second are ignored."
+ << std::endl;
+
+ transmission_ = list_mat[0];
+ emission_ = list_mat[1];
+
+ mlpack::Log::Assert(transmission_.n_rows == transmission_.n_cols);
+ mlpack::Log::Assert(transmission_.n_rows == emission_.n_rows);
+}
+
+void DiscreteHMM::InitFromData(const std::vector<arma::vec>& list_data_seq, size_t numstate) {
+ size_t numsymbol = 0;
+ size_t maxseq = 0;
+
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ if (list_data_seq[i].n_elem > list_data_seq[maxseq].n_elem)
+ maxseq = i;
+ }
+
+ for (size_t i = 0; i < list_data_seq[maxseq].n_elem; i++) {
+ if (list_data_seq[maxseq][i] > numsymbol)
+ numsymbol = (size_t) list_data_seq[maxseq][i];
+ }
+ numsymbol++;
+
+ arma::vec states;
+ size_t L = list_data_seq[maxseq].n_elem;
+ states.set_size(L);
+ for (size_t i = 0; i < L; i++)
+ states[i] = rand() % numstate;
+
+ DiscreteHMM::EstimateInit(numsymbol, numstate, list_data_seq[maxseq], states, transmission_, emission_);
+}
+
+void DiscreteHMM::LoadProfile(const char* profile) {
+ InitFromFile(profile);
+}
+
+void DiscreteHMM::SaveProfile(const char* profile) const {
+ TextWriter w_pro;
+ if (!(w_pro.Open(profile))) {
+ mlpack::Log::Warn << "Couldn't open " << profile << " for writing." <<
+ std::endl;
+ return;
+ }
+
+ print_matrix(w_pro, transmission_, "%% transmission", "%f,");
+ print_matrix(w_pro, emission_, "%% emission", "%f,");
+}
+
+void DiscreteHMM::GenerateSequence(size_t length, arma::vec& data_seq, arma::vec& state_seq) const {
+ DiscreteHMM::GenerateInit(length, transmission_, emission_, data_seq, state_seq);
+}
+
+void DiscreteHMM::EstimateModel(const arma::vec& data_seq, const arma::vec& state_seq) {
+ DiscreteHMM::EstimateInit(data_seq, state_seq, transmission_, emission_);
+}
+
+void DiscreteHMM::EstimateModel(size_t numstate, size_t numsymbol, const arma::vec& data_seq, const arma::vec& state_seq) {
+ DiscreteHMM::EstimateInit(numsymbol, numstate, data_seq, state_seq, transmission_, emission_);
+}
+
+void DiscreteHMM::DecodeOverwrite(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
+ DiscreteHMM::Decode(data_seq, transmission_, emission_, state_prob_mat, forward_prob_mat, backward_prob_mat, scale_vec);
+}
+
+void DiscreteHMM::DecodeInit(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_elem;
+
+ state_prob_mat.set_size(M, L);
+ forward_prob_mat.set_size(M, L);
+ backward_prob_mat.set_size(M, L);
+ scale_vec.set_size(L);
+
+ DiscreteHMM::Decode(data_seq, transmission_, emission_, state_prob_mat, forward_prob_mat, backward_prob_mat, scale_vec);
+}
+
+void forward_procedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& scales, arma::mat& fs);
+
+double DiscreteHMM::ComputeLogLikelihood(const arma::vec& data_seq) const {
+ size_t L = data_seq.n_elem;
+ size_t M = transmission_.n_rows;
+
+ arma::mat fs(M, L);
+ arma::vec sc(L);
+
+ DiscreteHMM::ForwardProcedure(data_seq, transmission_, emission_, sc, fs);
+
+ double loglik = 0;
+ for (size_t t = 0; t < L; t++)
+ loglik += log(sc[t]);
+ return loglik;
+}
+
+void DiscreteHMM::ComputeLogLikelihood(const std::vector<arma::vec>& list_data_seq, std::vector<double>& list_likelihood) const {
+ size_t L = 0;
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ if (list_data_seq[i].n_elem > L)
+ L = list_data_seq[i].n_elem;
+ }
+
+ size_t M = transmission_.n_rows;
+
+ arma::mat fs(M, L);
+ arma::vec sc(L);
+
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ DiscreteHMM::ForwardProcedure(list_data_seq[i], transmission_, emission_, sc, fs);
+
+ size_t L = list_data_seq[i].n_elem;
+ double loglik = 0;
+ for (size_t t = 0; t < L; t++)
+ loglik += log(sc[t]);
+
+ list_likelihood.push_back(loglik);
+ }
+}
+
+void DiscreteHMM::ComputeViterbiStateSequence(const arma::vec& data_seq, arma::vec& state_seq) const {
+ DiscreteHMM::ViterbiInit(data_seq, transmission_, emission_, state_seq);
+}
+
+void DiscreteHMM::TrainBaumWelch(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance) {
+ DiscreteHMM::Train(list_data_seq, transmission_, emission_, max_iteration, tolerance);
+}
+
+void DiscreteHMM::TrainViterbi(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance) {
+ DiscreteHMM::TrainViterbi(list_data_seq, transmission_, emission_, max_iteration, tolerance);
+}
+
+void DiscreteHMM::GenerateInit(size_t L, const arma::mat& trans, const arma::mat& emis, arma::vec& seq, arma::vec& states) {
+ mlpack::Log::Assert((trans.n_rows == trans.n_cols && trans.n_rows == emis.n_rows),
+ "DiscreteHMM::GenerateInit(): matrix sizes do not match");
+
+ arma::mat trsum, esum;
+
+ size_t M, N;
+ size_t cur_state;
+
+ M = trans.n_rows;
+ N = emis.n_cols;
+
+ trsum = trans;
+ esum = emis;
+
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 1; j < M; j++)
+ trsum(i, j) += trsum(i, j - 1);
+ for (size_t j = 1; j < N; j++)
+ esum(i, j) += esum(i, j - 1);
+ }
+
+ seq.set_size(L);
+ states.set_size(L);
+
+ cur_state = 0; // starting state is 0
+
+ for (size_t i = 0; i < L; i++) {
+ size_t j;
+ double r;
+
+ // next state
+ r = (double) rand() / (double) RAND_MAX;
+ for (j = 0; j < M; j++) {
+ if (r <= trsum(cur_state, j))
+ break;
+ }
+ cur_state = j;
+
+ // emission
+ r = (double) rand() / (double) RAND_MAX;
+ for (j = 0; j < N; j++) {
+ if (r <= esum(cur_state, j))
+ break;
+ }
+
+ seq[i] = j;
+ states[i] = cur_state;
+ }
+}
+
+void DiscreteHMM::EstimateInit(const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis) {
+ mlpack::Log::Assert((seq.n_elem == states.n_elem),
+ "DiscreteHMM::EstimateInit(): sequence and states length must be the same");
+
+ size_t M = 0;
+ size_t N = 0;
+
+ for (size_t i = 0; i < seq.n_elem; i++) {
+ if (seq[i] > N)
+ N = (size_t) seq[i];
+ if (states[i] > M)
+ M = (size_t) states[i];
+ }
+
+ M++;
+ N++;
+
+ DiscreteHMM::EstimateInit(N, M, seq, states, trans, emis);
+}
+
+void DiscreteHMM::EstimateInit(size_t numSymbols, size_t numStates, const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis){
+ mlpack::Log::Assert((seq.n_elem == states.n_elem),
+ "DiscreteHMM::EstimateInit(): sequence and states length must be the same");
+
+ size_t N = numSymbols;
+ size_t M = numStates;
+ size_t L = seq.n_elem;
+
+ arma::vec stateSum;
+
+ trans.zeros(M, M);
+ emis.zeros(M, N);
+ stateSum.zeros(M);
+
+ for (size_t i = 0; i < L - 1; i++) {
+ size_t state = (size_t) states[i];
+ size_t next_state = (size_t) states[i + 1];
+ stateSum[state]++;
+ trans(state, next_state)++;
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ if (stateSum[i] == 0)
+ stateSum[i] = -INFINITY;
+
+ for (size_t j = 0; j < M; j++)
+ trans(i, j) /= stateSum[i];
+ }
+
+ stateSum.zeros();
+
+ for (size_t i = 0; i < L; i++) {
+ size_t state = (size_t) states[i];
+ size_t emission = (size_t) seq[i];
+ stateSum[state]++;
+ emis(state, emission)++;
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ if (stateSum[i] == 0)
+ stateSum[i] = -INFINITY;
+ for (size_t j = 0; j < N; j++)
+ emis(i, j) /= stateSum[i];
+ }
+}
+
+void DiscreteHMM::ForwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& scales, arma::mat& fs) {
+ size_t L = seq.n_elem;
+ size_t M = trans.n_rows;
+
+ fs.zeros();
+ scales.zeros();
+
+ // NOTE: start state is 0
+ // time t = 0
+ size_t e = (size_t) seq[0];
+ for (size_t i = 0; i < M; i++) {
+ fs(i, 0) = trans(0, i) * emis(i, e);
+ scales[0] += fs(i, 0);
+ }
+ for (size_t i = 0; i < M; i++)
+ fs(i, 0) /= scales[0];
+
+ // time t = 1 -> L-1
+ for (size_t t = 1; t < L; t++) {
+ e = (size_t) seq[t];
+ for (size_t j = 0; j < M; j++) {
+ for (size_t i = 0; i < M; i++)
+ fs(j, t) += fs(i, t - 1) * trans(i, j);
+ fs(j, t) *= emis(j, e);
+ scales[t] += fs(j, t);
+ }
+ for (size_t j = 0; j < M; j++)
+ fs(j, t) /= scales[t];
+ }
+}
+
+void DiscreteHMM::BackwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, const arma::vec& scales, arma::mat& bs) {
+ size_t L = seq.n_elem;
+ size_t M = trans.n_rows;
+
+ bs.zeros();
+
+ for (size_t i = 0; i < M; i++)
+ bs(i, L - 1) = 1.0;
+
+ for (size_t t = L - 2; t >= 0; t--) {
+ size_t e = (size_t) seq[t + 1];
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < M; j++)
+ bs(i, t) += trans(i, j) * bs(j, t + 1) * emis(j, e);
+ bs(i, t) /= scales[t + 1];
+ }
+ }
+}
+
+double DiscreteHMM::Decode(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
+ size_t L = seq.n_elem;
+ size_t M = trans.n_rows;
+
+ mlpack::Log::Assert((L == pstates.n_cols && L == fs.n_cols && L == bs.n_cols &&
+ M == trans.n_cols && M == emis.n_rows),
+ "DiscreteHMM::Decode(): sizes do not match");
+
+ DiscreteHMM::ForwardProcedure(seq, trans, emis, scales, fs);
+ DiscreteHMM::BackwardProcedure(seq, trans, emis, scales, bs);
+
+ for (size_t i = 0; i < M; i++) {
+ for (size_t t = 0; t < L; t++)
+ pstates(i, t) = fs(i,t) * bs(i,t);
+ }
+
+ double logpseq = 0;
+ for (size_t t = 0; t < L; t++)
+ logpseq += log(scales[t]);
+
+ return logpseq;
+}
+
+double DiscreteHMM::ViterbiInit(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states) {
+ size_t L = seq.n_elem;
+
+ return DiscreteHMM::ViterbiInit(L, seq, trans, emis, states);
+}
+
+double DiscreteHMM::ViterbiInit(size_t L, const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states) {
+ size_t M = trans.n_rows;
+ size_t N = emis.n_cols;
+
+ mlpack::Log::Assert((M == trans.n_cols && M == emis.n_rows),
+ "DiscreteHMM::ViterbiInit(): sizes do not match");
+
+ states.set_size(L);
+
+ arma::vec v(M);
+ v.fill(-INFINITY);
+ v[0] = 0;
+
+ arma::vec v_old = v;
+
+ arma::mat w(M, L);
+
+ arma::mat logtrans(M, M), logemis(M, N);
+
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < M; j++)
+ logtrans(i, j) = log(trans(i, j));
+
+ for (size_t j = 0; j < N; j++)
+ logemis(i, j) = log(emis(i, j));
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ size_t e = (size_t) seq[t];
+
+ for (size_t j = 0; j < M; j++) {
+ double bestVal = -INFINITY;
+ double bestPtr = -1;
+
+ for (size_t i = 0; i < M; i++) {
+ double val = v_old[i] + logtrans(i, j);
+ if (val > bestVal) {
+ bestVal = val;
+ bestPtr = i;
+ }
+ }
+ v[j] = bestVal + logemis(j, e);
+ w(j, t) = bestPtr;
+ }
+ v_old = v;
+ }
+
+ double bestVal = -INFINITY;
+ double bestPtr = -1;
+
+ for (size_t i = 0; i < M; i++)
+ if (v[i] > bestVal) {
+ bestVal = v[i];
+ bestPtr = i;
+ }
+
+ states[L - 1] = bestPtr;
+ for (size_t t = L - 2; t >= 0; t--)
+ states[t] = w((size_t) states[t + 1], t + 1);
+
+ return bestVal;
+}
+
+void DiscreteHMM::Train(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol) {
+ size_t L = -1;
+ size_t M = guessTR.n_rows;
+ size_t N = guessEM.n_cols;
+
+ mlpack::Log::Assert((M == guessTR.n_cols && M == guessEM.n_rows),
+ "DiscreteHMM::Train(): sizes do not match");
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ if (seqs[i].n_elem > L)
+ L = seqs[i].n_elem;
+ }
+
+ arma::mat TR(M, M), EM(M, N); // guess transition and emission matrix
+
+ arma::mat ps(M, L), fs(M, L), bs(M, L);
+ arma::vec s(L);
+
+ double loglik = 0, oldlog;
+
+ for (size_t iter = 0; iter < max_iter; iter++) {
+ oldlog = loglik;
+ loglik = 0;
+
+ TR.zeros();
+ EM.zeros();
+ for (size_t idx = 0; idx < seqs.size(); idx++) {
+ L = seqs[idx].n_elem;
+ loglik += DiscreteHMM::Decode(seqs[idx], guessTR, guessEM, ps, fs, bs, s);
+
+ for (size_t t = 0; t < L - 1; t++) {
+ size_t e = (size_t) seqs[idx][t + 1];
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < M; j++)
+ TR(i, j) += fs(i, t) * guessTR(i, j) * guessEM(j, e) * bs(j, t + 1) / s[t + 1];
+ }
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ size_t e = (size_t) seqs[idx][t];
+ for (size_t i = 0; i < M; i++)
+ EM(i, e) += ps(i, t);
+ }
+ }
+
+ double s;
+ for (size_t i = 0; i < M; i++) {
+ s = 0;
+ for (size_t j = 0; j < M; j++)
+ s += TR(i, j);
+ if (s == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+ guessTR(i, i) = 1;
+ }
+ else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = TR(i, j) / s;
+ }
+
+ s = 0;
+ for (size_t j = 0; j < N; j++)
+ s += EM(i, j);
+ for (size_t j = 0; j < N; j++)
+ guessEM(i, j) = EM(i, j) / s;
+ }
+
+ printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
+ if (fabs(oldlog - loglik) < tol) {
+ printf("\nConverged after %zu iterations\n", iter);
+ break;
+ }
+ oldlog = loglik;
+ }
+}
+
+void DiscreteHMM::TrainViterbi(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol) {
+ size_t L = -1;
+ size_t M = guessTR.n_rows;
+ size_t N = guessEM.n_cols;
+
+ mlpack::Log::Assert((M == guessTR.n_cols && M == guessEM.n_rows),
+ "DiscreteHMM::TrainViterbi(): sizes do not match");
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ if (seqs[i].n_elem > L)
+ L = seqs[i].n_elem;
+ }
+
+ arma::mat TR(M, M), EM(M, N); // guess transition and emission matrix
+
+ double loglik = 0, oldlog;
+ for (size_t iter = 0; iter < max_iter; iter++) {
+ oldlog = loglik;
+ loglik = 0;
+
+ TR.fill(1e-4);
+ EM.fill(1e-4);
+ for (size_t idx = 0; idx < seqs.size(); idx++) {
+ arma::vec states;
+ L = seqs[idx].n_elem;
+ loglik += DiscreteHMM::ViterbiInit(L, seqs[idx], guessTR, guessEM, states);
+
+ for (size_t t = 0; t < L-1; t++) {
+ size_t i = (size_t) states[t];
+ size_t j = (size_t) states[t + 1];
+ TR(i, j)++;
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ size_t e = (size_t) seqs[idx][t];
+ size_t i = (size_t) states[t];
+ EM(i, e)++;
+ }
+ }
+
+ double s;
+ print_matrix(TR, "TR");
+ for (size_t i = 0; i < M; i++) {
+ s = 0;
+ for (size_t j = 0; j < M; j++)
+ s += TR(i, j);
+
+ if (s == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+ guessTR(i, i) = 1;
+ } else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = TR(i, j) / s;
+ }
+
+ s = 0;
+ for (size_t j = 0; j < N; j++)
+ s += EM(i, j);
+ for (size_t j = 0; j < N; j++)
+ guessEM(i, j) = EM(i, j) / s;
+ }
+
+ printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
+ if (fabs(oldlog - loglik) < tol) {
+ printf("\nConverged after %zu iterations\n", iter);
+ break;
+ }
+ oldlog = loglik;
+ }
+}
+
+}; // namespace hmm
+}; // namespace mlpack
Deleted: mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.h 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.h 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,153 +0,0 @@
-/**
- * @file discreteHMM.h
- *
- * This file contains functions of a discrete Hidden Markov Models (in C) and a
- * wrapper class (in C++) for these functions. It implements log-likelihood
- * computation, viterbi algorithm for the most probable sequence, Baum-Welch
- * algorithm and Viterbi-like algorithm for parameter estimation. It can also
- * generate sequences from a Hidden Markov Model.
- */
-#ifndef __MLPACK_METHODS_HMM_DISCRETE_HMM_H
-#define __MLPACK_METHODS_HMM_DISCRETE_HMM_H
-
-#include <mlpack/core.h>
-
-namespace mlpack {
-namespace hmm {
-
-/**
- * A wrapper class for HMM functionals in discrete case
- *
- * This class maintains transition probabilities and emission probabilities
- * matrices and performs basic HMM functionals, more details below.
- *
- */
-
-class DiscreteHMM {
- /////////// Member variables /////////////////////////////////////
- private:
- /** Transmission probabilities matrix between states */
- arma::mat transmission_;
-
- /** Emission probabilities in each state */
- arma::mat emission_;
-
- public:
- /** Basic getters */
- const arma::mat& transmission() const { return transmission_; }
- const arma::mat& emission() const { return emission_; }
-
- /** Setters used when already initialized */
- void setModel(const arma::mat& transmission, const arma::mat& emission);
-
- /** Initializes from computed transmission and emission matrices */
- void Init(const arma::mat& transmission, const arma::mat& emission);
-
- /** Initializes by loading from a file */
- void InitFromFile(const char* profile);
-
- /** Initializes randomly using data as a guide */
- void InitFromData(const std::vector<arma::vec>& list_data_seq, size_t numstate);
-
- /** Load from file, used when already initialized */
- void LoadProfile(const char* profile);
-
- /** Save matrices to file */
- void SaveProfile(const char* profile) const;
-
- /** Generate a random data sequence of a given length */
- void GenerateSequence(size_t length, arma::vec& data_seq, arma::vec& state_seq) const;
-
- /**
- * Estimate the matrices by a data sequence and a state sequence
- * Must be already initialized
- */
- void EstimateModel(const arma::vec& data_seq, const arma::vec& state_seq);
- void EstimateModel(size_t numstate, size_t numsymbol, const arma::vec& data_seq, const arma::vec& state_seq);
-
- /**
- * Decode a sequence into probabilities of each state at each time step
- * using scaled forward-backward algorithm.
- * Also return forward, backward probabilities and scale factors
- */
- void DecodeOverwrite(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
-
- /** A decode version that initialized the out matrices */
- void DecodeInit(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
-
- /** Compute the log-likelihood of a sequence */
- double ComputeLogLikelihood(const arma::vec& data_seq) const;
-
- /** Compute the log-likelihood of a list of sequences */
- void ComputeLogLikelihood(const std::vector<arma::vec>& list_data_seq, std::vector<double>& list_likelihood) const;
-
- /** Compute the most probable sequence (Viterbi) */
- void ComputeViterbiStateSequence(const arma::vec& data_seq, arma::vec& state_seq) const;
-
- /**
- * Train the model with a list of sequences, must be already initialized
- * using Baum-Welch EM algorithm
- */
- void TrainBaumWelch(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance);
-
- /**
- * Train the model with a list of sequences, must be already initialized
- * using Viterbi algorithm to determine the state sequence of each sequence
- */
- void TrainViterbi(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance);
-
-
- ///////// Static helper functions ///////////////////////////////////////
-
- /**
- * Generating a sequence and states using transition and emission probabilities.
- * L: sequence length
- * trans: Matrix M x M (M states)
- * emis: Matrix M x N (N emissions)
- * seq: uninitialized vector, will have length L
- * states: uninitialized vector, will have length L
- */
- static void GenerateInit(size_t L, const arma::mat& trans, const arma::mat& emis, arma::vec& seq, arma::vec& states);
-
- /** Estimate transition and emission probabilities from sequence and states */
- static void EstimateInit(const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis);
- static void EstimateInit(size_t numSymbols, size_t numStates, const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis);
-
- /** Calculate posteriori probabilities of states at each steps
- * Scaled Forward - Backward procedure
- * seq: Vector of length L of emissions
- * trans: Transition probabilities, size M x M
- * emis: Emission probabilities, size M x N
- * pstates: size M x L
- * fs: scaled forward probabities, size M x L
- * bs: scaled backward probabities, size M x L
- * scales: scale factors, length L
- * RETURN: log probabilities of sequence
- */
- static void ForwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& scales, arma::mat& fs);
- static void BackwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, const arma::vec& scales, arma::mat& bs);
- static double Decode(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales);
-
- /** Calculate the most probable states for a sequence
- * Viterbi algorithm
- * seq: Vector of length L of emissions
- * trans: Transition probabilities, size M x M
- * emis: Emission probabilities, size M x N
- * states: Unitialized, will have length L
- * RETURN: log probability of the most probable sequence
- */
- static double ViterbiInit(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states);
- static double ViterbiInit(size_t L, const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states);
-
- /** Baum-Welch estimation of transition and emission probabilities */
- static void Train(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol);
-
- /** Viterbi estimation of transition and emission probabilities */
- static void TrainViterbi(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol);
-
-};
-
-}; // namespace hmm
-}; // namespace mlpack
-
-#endif
Copied: mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.hpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/discreteHMM.hpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,153 @@
+/**
+ * @file discreteHMM.h
+ *
+ * This file contains functions of a discrete Hidden Markov Models (in C) and a
+ * wrapper class (in C++) for these functions. It implements log-likelihood
+ * computation, viterbi algorithm for the most probable sequence, Baum-Welch
+ * algorithm and Viterbi-like algorithm for parameter estimation. It can also
+ * generate sequences from a Hidden Markov Model.
+ */
+#ifndef __MLPACK_METHODS_HMM_DISCRETE_HMM_H
+#define __MLPACK_METHODS_HMM_DISCRETE_HMM_H
+
+#include <mlpack/core.h>
+
+namespace mlpack {
+namespace hmm {
+
+/**
+ * A wrapper class for HMM functionals in discrete case
+ *
+ * This class maintains transition probabilities and emission probabilities
+ * matrices and performs basic HMM functionals, more details below.
+ *
+ */
+
+class DiscreteHMM {
+ /////////// Member variables /////////////////////////////////////
+ private:
+ /** Transmission probabilities matrix between states */
+ arma::mat transmission_;
+
+ /** Emission probabilities in each state */
+ arma::mat emission_;
+
+ public:
+ /** Basic getters */
+ const arma::mat& transmission() const { return transmission_; }
+ const arma::mat& emission() const { return emission_; }
+
+ /** Setters used when already initialized */
+ void setModel(const arma::mat& transmission, const arma::mat& emission);
+
+ /** Initializes from computed transmission and emission matrices */
+ void Init(const arma::mat& transmission, const arma::mat& emission);
+
+ /** Initializes by loading from a file */
+ void InitFromFile(const char* profile);
+
+ /** Initializes randomly using data as a guide */
+ void InitFromData(const std::vector<arma::vec>& list_data_seq, size_t numstate);
+
+ /** Load from file, used when already initialized */
+ void LoadProfile(const char* profile);
+
+ /** Save matrices to file */
+ void SaveProfile(const char* profile) const;
+
+ /** Generate a random data sequence of a given length */
+ void GenerateSequence(size_t length, arma::vec& data_seq, arma::vec& state_seq) const;
+
+ /**
+ * Estimate the matrices by a data sequence and a state sequence
+ * Must be already initialized
+ */
+ void EstimateModel(const arma::vec& data_seq, const arma::vec& state_seq);
+ void EstimateModel(size_t numstate, size_t numsymbol, const arma::vec& data_seq, const arma::vec& state_seq);
+
+ /**
+ * Decode a sequence into probabilities of each state at each time step
+ * using scaled forward-backward algorithm.
+ * Also return forward, backward probabilities and scale factors
+ */
+ void DecodeOverwrite(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
+
+ /** A decode version that initialized the out matrices */
+ void DecodeInit(const arma::vec& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
+
+ /** Compute the log-likelihood of a sequence */
+ double ComputeLogLikelihood(const arma::vec& data_seq) const;
+
+ /** Compute the log-likelihood of a list of sequences */
+ void ComputeLogLikelihood(const std::vector<arma::vec>& list_data_seq, std::vector<double>& list_likelihood) const;
+
+ /** Compute the most probable sequence (Viterbi) */
+ void ComputeViterbiStateSequence(const arma::vec& data_seq, arma::vec& state_seq) const;
+
+ /**
+ * Train the model with a list of sequences, must be already initialized
+ * using Baum-Welch EM algorithm
+ */
+ void TrainBaumWelch(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance);
+
+ /**
+ * Train the model with a list of sequences, must be already initialized
+ * using Viterbi algorithm to determine the state sequence of each sequence
+ */
+ void TrainViterbi(const std::vector<arma::vec>& list_data_seq, size_t max_iteration, double tolerance);
+
+
+ ///////// Static helper functions ///////////////////////////////////////
+
+ /**
+ * Generating a sequence and states using transition and emission probabilities.
+ * L: sequence length
+ * trans: Matrix M x M (M states)
+ * emis: Matrix M x N (N emissions)
+ * seq: uninitialized vector, will have length L
+ * states: uninitialized vector, will have length L
+ */
+ static void GenerateInit(size_t L, const arma::mat& trans, const arma::mat& emis, arma::vec& seq, arma::vec& states);
+
+ /** Estimate transition and emission probabilities from sequence and states */
+ static void EstimateInit(const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis);
+ static void EstimateInit(size_t numSymbols, size_t numStates, const arma::vec& seq, const arma::vec& states, arma::mat& trans, arma::mat& emis);
+
+ /** Calculate posteriori probabilities of states at each steps
+ * Scaled Forward - Backward procedure
+ * seq: Vector of length L of emissions
+ * trans: Transition probabilities, size M x M
+ * emis: Emission probabilities, size M x N
+ * pstates: size M x L
+ * fs: scaled forward probabities, size M x L
+ * bs: scaled backward probabities, size M x L
+ * scales: scale factors, length L
+ * RETURN: log probabilities of sequence
+ */
+ static void ForwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& scales, arma::mat& fs);
+ static void BackwardProcedure(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, const arma::vec& scales, arma::mat& bs);
+ static double Decode(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales);
+
+ /** Calculate the most probable states for a sequence
+ * Viterbi algorithm
+ * seq: Vector of length L of emissions
+ * trans: Transition probabilities, size M x M
+ * emis: Emission probabilities, size M x N
+ * states: Unitialized, will have length L
+ * RETURN: log probability of the most probable sequence
+ */
+ static double ViterbiInit(const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states);
+ static double ViterbiInit(size_t L, const arma::vec& seq, const arma::mat& trans, const arma::mat& emis, arma::vec& states);
+
+ /** Baum-Welch estimation of transition and emission probabilities */
+ static void Train(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol);
+
+ /** Viterbi estimation of transition and emission probabilities */
+ static void TrainViterbi(const std::vector<arma::vec>& seqs, arma::mat& guessTR, arma::mat& guessEM, size_t max_iter, double tol);
+
+};
+
+}; // namespace hmm
+}; // namespace mlpack
+
+#endif
Deleted: mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,784 +0,0 @@
-/**
- * @file gaussianHMM.cc
- *
- * This file contains the implementation of functions in gaussianHMM.h
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "gaussianHMM.h"
-
-namespace mlpack {
-namespace hmm {
-
-using namespace hmm_support;
-
-void GaussianHMM::setModel(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec, const std::vector<arma::mat>& list_covariance_mat) {
- mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
- mlpack::Log::Assert(transmission.n_rows == list_mean_vec.size());
- mlpack::Log::Assert(transmission.n_rows == list_covariance_mat.size());
-
- for (size_t i = 1; i < list_mean_vec.size(); i++) {
- mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_rows);
- mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_cols);
- mlpack::Log::Assert(list_mean_vec[0].n_elem == list_mean_vec[i].n_elem);
- }
-
- transmission_ = transmission;
- list_mean_vec_ = list_mean_vec;
- list_covariance_mat_ = list_covariance_mat;
- CalculateInverse();
-}
-
-void GaussianHMM::Init(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec,const std::vector<arma::mat>& list_covariance_mat) {
- transmission_ = transmission;
- list_mean_vec_ = list_mean_vec;
- list_covariance_mat_ = list_covariance_mat;
-
- mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
- mlpack::Log::Assert(transmission.n_rows == list_mean_vec.size());
- mlpack::Log::Assert(transmission.n_rows == list_covariance_mat.size());
- for (size_t i = 1; i < list_mean_vec.size(); i++) {
- mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_rows);
- mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_cols);
- mlpack::Log::Assert(list_mean_vec[0].n_elem == list_mean_vec[i].n_elem);
- }
-
- CalculateInverse();
-}
-
-void GaussianHMM::InitFromFile(const char* profile) {
- if (!(GaussianHMM::LoadProfile(profile, transmission_, list_mean_vec_, list_covariance_mat_)))
- mlpack::Log::Fatal << "Couldn't open " << profile << " for reading." <<
- std::endl;
-
- list_inverse_cov_mat_ = list_covariance_mat_;
- gauss_const_vec_.set_size(list_covariance_mat_.size());
-
- CalculateInverse();
-}
-
-void GaussianHMM::InitFromData(const std::vector<arma::mat>& list_data_seq, size_t numstate) {
- GaussianHMM::InitGaussParameter(numstate, list_data_seq, transmission_, list_mean_vec_, list_covariance_mat_);
-
- list_inverse_cov_mat_ = list_covariance_mat_;
- gauss_const_vec_.set_size(list_covariance_mat_.size());
-
- CalculateInverse();
-}
-
-void GaussianHMM::InitFromData(const arma::mat& data_seq, const arma::vec& state_seq) {
- GaussianHMM::EstimateInit(data_seq, state_seq, transmission_, list_mean_vec_, list_covariance_mat_);
- CalculateInverse();
-}
-
-void GaussianHMM::LoadProfile(const char* profile) {
- /*
- transmission_.Destruct();
- list_mean_vec_.Renew();
- list_covariance_mat_.Renew();
- list_inverse_cov_mat_.Renew();
- gauss_const_vec_.Destruct();
- */
- InitFromFile(profile);
-}
-
-void GaussianHMM::SaveProfile(const char* profile) const {
- GaussianHMM::SaveProfile(profile, transmission_, list_mean_vec_, list_covariance_mat_);
-}
-
-void GaussianHMM::CalculateInverse() {
- size_t M = transmission_.n_rows;
- size_t N = list_mean_vec_[0].n_elem;
- for (size_t i = 0; i < M; i++) {
- list_inverse_cov_mat_[i] = inv(list_covariance_mat_[i]); // inverse
- gauss_const_vec_[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(list_covariance_mat_[i]), -0.5);
- }
-}
-
-void GaussianHMM::GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const {
- GaussianHMM::GenerateInit(L, transmission_, list_mean_vec_, list_covariance_mat_, data_seq, state_seq);
-}
-
-void GaussianHMM::EstimateModel(const arma::mat& data_seq, const arma::vec& state_seq) {
- GaussianHMM::EstimateInit(data_seq, state_seq, transmission_, list_mean_vec_, list_covariance_mat_);
- CalculateInverse();
-}
-
-void GaussianHMM::EstimateModel(size_t numstate, const arma::mat& data_seq, const arma::vec& state_seq) {
- GaussianHMM::EstimateInit(numstate, data_seq, state_seq, transmission_, list_mean_vec_, list_covariance_mat_);
- CalculateInverse();
-}
-
-void GaussianHMM::DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat,
- arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_cols;
- arma::mat emission_prob_mat(M, L);
- GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_,
- gauss_const_vec_, emission_prob_mat);
- GaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat, forward_prob_mat,
- backward_prob_mat, scale_vec);
-}
-
-void GaussianHMM::DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat,
- arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_cols;
-
- state_prob_mat.set_size(M, L);
- forward_prob_mat.set_size(M, L);
- backward_prob_mat.set_size(M, L);
- scale_vec.set_size(L);
-
- arma::mat emission_prob_mat(M, L);
-
- GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_,
- gauss_const_vec_, emission_prob_mat);
- GaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat, forward_prob_mat,
- backward_prob_mat, scale_vec);
-}
-
-double GaussianHMM::ComputeLogLikelihood(const arma::mat& data_seq) const {
- size_t L = data_seq.n_cols;
- size_t M = transmission_.n_rows;
-
- arma::mat fs(M, L), emis_prob(M, L);
- arma::vec sc(L);
-
- GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_, gauss_const_vec_, emis_prob);
- GaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
- double loglik = 0;
- for (size_t t = 0; t < L; t++)
- loglik += log(sc[t]);
- return loglik;
-}
-
-void GaussianHMM::ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq, std::vector<double>& list_likelihood) const {
- size_t L = 0;
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- if (list_data_seq[i].n_cols > L)
- L = list_data_seq[i].n_cols;
- }
-
- size_t M = transmission_.n_rows;
-
- arma::mat fs(M, L), emis_prob(M, L);
- arma::vec sc(L);
-
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- size_t L = list_data_seq[i].n_cols;
-
- GaussianHMM::CalculateEmissionProb(list_data_seq[i], list_mean_vec_, list_inverse_cov_mat_, gauss_const_vec_, emis_prob);
- GaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
-
- double loglik = 0;
- for (size_t t = 0; t < L; t++)
- loglik += log(sc[t]);
- list_likelihood.push_back(loglik);
- }
-}
-
-void GaussianHMM::ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_cols;
-
- arma::mat emis_prob(M, L);
-
- GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_, gauss_const_vec_, emis_prob);
- GaussianHMM::ViterbiInit(transmission_, emis_prob, state_seq);
-}
-
-void GaussianHMM::TrainBaumWelch(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
- GaussianHMM::Train(list_data_seq, transmission_, list_mean_vec_, list_covariance_mat_, max_iteration, tolerance);
- CalculateInverse();
-}
-
-void GaussianHMM::TrainViterbi(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
- GaussianHMM::TrainViterbi(list_data_seq, transmission_, list_mean_vec_, list_covariance_mat_, max_iteration, tolerance);
- CalculateInverse();
-}
-
-bool GaussianHMM::LoadProfile(const char* profile, arma::mat& trans, std::vector<arma::vec>& means, std::vector<arma::mat>& covs) {
- std::vector<arma::mat> matlst;
- if (!(load_matrix_list(profile, matlst)))
- return false;
-
- mlpack::Log::Assert(matlst.size() > 0);
-
- trans = matlst[0];
- size_t M = trans.n_rows; // num of states
- mlpack::Log::Assert(matlst.size() == (2 * M + 1));
- size_t N = matlst[1].n_rows; // dimension
-
- for (size_t i = 1; i < (2 * M + 1); i += 2) {
- mlpack::Log::Assert(matlst[i].n_rows == N && matlst[i].n_cols == 1);
- mlpack::Log::Assert(matlst[i + 1].n_rows == N && matlst[i + 1].n_cols == N);
-
- arma::vec m = matlst[i].unsafe_col(0);
-
- means.push_back(m);
- covs.push_back(matlst[i + 1]);
- }
-
- return true;
-}
-
-bool GaussianHMM::SaveProfile(const char* profile, const arma::mat& trans, const std::vector<arma::vec>& means, const std::vector<arma::mat>& covs) {
- TextWriter w_pro;
- if (!(w_pro.Open(profile))) {
- mlpack::Log::Warn << "Couldn't open " << profile << " for writing." <<
- std::endl;
- return false;
- }
-
- size_t M = trans.n_rows; // num of states
- mlpack::Log::Assert(means.size() == M && covs.size() == M);
- size_t N = means[0].n_elem; // dimension
- print_matrix(w_pro, trans, "% transmission", "%f,");
- for (size_t i = 0; i < M; i++) {
- mlpack::Log::Assert(means[i].n_elem == N);
- mlpack::Log::Assert(covs[i].n_rows == N && covs[i].n_cols == N);
- char s[100];
- sprintf(s, "%% mean - state %zu", i);
- print_vector(w_pro, means[i], s, "%f,");
- sprintf(s, "%% covariance - state%zu", i);
- print_matrix(w_pro, covs[i], s, "%f,");
- }
-
- return true;
-}
-
-void GaussianHMM::GenerateInit(size_t L, const arma::mat& trans, const std::vector<arma::vec>& means, const std::vector<arma::mat>& covs, arma::mat& seq, arma::vec& states){
- mlpack::Log::Assert((trans.n_rows == trans.n_cols && trans.n_rows == means.size() && trans.n_rows == covs.size()),
- "GaussianHMM::GenerateInit(): matrix sizes do not match");
-
- arma::mat trsum;
- size_t M, N;
- size_t cur_state;
-
- M = trans.n_rows;
- N = means[0].n_elem; // emission vector length
-
- trsum = trans;
-
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 1; j < M; j++) {
- trsum(i, j) += trsum(i, j - 1);
- }
- }
-
- seq.set_size(N, L);
- states.set_size(L);
-
- cur_state = 0; // starting state is 0
-
- for (size_t i = 0; i < L; i++) {
- size_t j;
-
- // next state
- double r = (double) rand() / (double) RAND_MAX;
- for (j = 0; j < M; j++) {
- if (r <= trsum(cur_state, j))
- break;
- }
- cur_state = j;
-
- // emission
- arma::vec e;
- RAND_NORMAL_INIT(means[cur_state], covs[cur_state], e);
- for (j = 0; j < N; j++)
- seq(j, i) = e[j];
- states[i] = cur_state;
- }
-}
-
-void GaussianHMM::EstimateInit(const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<arma::vec>& means, std::vector<arma::mat>& covs) {
- mlpack::Log::Assert((seq.n_cols == states.n_elem), "GaussianHMM::EstimateInit(): sequence and states length must be the same");
-
- size_t M = 0;
- for (size_t i = 0; i < seq.n_cols; i++) {
- if (states[i] > M)
- M = (size_t) states[i];
- }
- M++;
- GaussianHMM::EstimateInit(M, seq, states, trans, means, covs);
-}
-
-void GaussianHMM::EstimateInit(size_t numStates, const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<arma::vec>& means, std::vector<arma::mat>& covs) {
- mlpack::Log::Assert((seq.n_cols == states.n_elem), "GaussianHMM::EstimateInit(): sequence and states length must be the same");
-
- size_t N = seq.n_rows; // emission vector length
- size_t M = numStates; // number of states
- size_t L = seq.n_cols; // sequence length
-
- arma::vec stateSum;
-
- trans.zeros(M, M);
- stateSum.zeros(M);
-
- for (size_t i = 0; i < M; i++) {
- arma::vec m;
- m.zeros(N);
- means.push_back(m);
-
- arma::mat c;
- c.zeros(N, N);
- covs.push_back(c);
- }
-
- for (size_t i = 0; i < L - 1; i++) {
- size_t state = (size_t) states[i];
- size_t next_state = (size_t) states[i + 1];
- stateSum[state]++;
- trans(state, next_state)++;
- }
-
- for (size_t i = 0; i < M; i++) {
- if (stateSum[i] == 0)
- stateSum[i] = -INFINITY;
-
- for (size_t j = 0; j < M; j++)
- trans(i, j) /= stateSum[i];
- }
-
- stateSum.zeros();
- for (size_t i = 0; i < L; i++) {
- size_t state = (size_t) states[i];
- arma::vec e = seq.unsafe_col(i);
-
- stateSum[state]++;
- means[state] += e;
- }
-
- for (size_t i = 0; i < M; i++) {
- if (stateSum[i] != 0)
- means[i] /= stateSum[i];
- }
-
- for (size_t i = 0; i < L; i++) {
- size_t state = (size_t) states[i];
- arma::vec e = seq.unsafe_col(i);
- arma::vec d = means[state] - e;
- covs[state] += d * arma::trans(d);
- }
-
- for (size_t i = 0; i < M; i++) {
- if (stateSum[i] != 0)
- covs[i] /= stateSum[i];
- }
-}
-
-void GaussianHMM::ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& scales, arma::mat& fs) {
- size_t M = trans.n_rows;
-
- fs.zeros();
- scales.zeros();
-
- // NOTE: start state is 0
- // time t = 0
- for (size_t i = 0; i < M; i++) {
- fs(i, 0) = trans(0, i) * emis_prob(i, 0);
- scales[0] += fs(i, 0);
- }
-
- for (size_t i = 0; i < M; i++)
- fs(i, 0) /= scales[0];
-
- // time t = 1 -> L-1
- for (size_t t = 1; t < L; t++) {
- for (size_t j = 0; j < M; j++) {
- for (size_t i = 0; i < M; i++)
- fs(j, t) += fs(i, t - 1) * trans(i, j);
- fs(j, t) *= emis_prob(j, t);
- scales[t] += fs(j, t);
- }
- for (size_t j = 0; j < M; j++)
- fs(j, t) /= scales[t];
- }
-}
-
-void GaussianHMM::BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, const arma::vec& scales, arma::mat& bs) {
- size_t M = trans.n_rows;
-
- bs.zeros();
-
- for (size_t i = 0; i < M; i++)
- bs(i, L - 1) = 1.0;
-
- for (size_t t = L - 2; t >= 0; t--) {
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < M; j++)
- bs(i, t) += trans(i, j) * bs(j, t + 1) * emis_prob(j, t + 1);
- bs(i, t) /= scales[t + 1];
- }
- }
-}
-
-double GaussianHMM::Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
- size_t M = trans.n_rows;
-
- mlpack::Log::Assert((L == pstates.n_cols && L == fs.n_cols && L == bs.n_cols &&
- M == trans.n_cols && M == emis_prob.n_rows),
- "GaussianHMM::Decode(): sizes do not match");
-
- GaussianHMM::ForwardProcedure(L, trans, emis_prob, scales, fs);
- GaussianHMM::BackwardProcedure(L, trans, emis_prob, scales, bs);
-
- for (size_t i = 0; i < M; i++)
- for (size_t t = 0; t < L; t++)
- pstates(i, t) = fs(i,t) * bs(i,t);
-
- double logpseq = 0;
- for (size_t t = 0; t < L; t++)
- logpseq += log(scales[t]);
-
- return logpseq;
-}
-
-double GaussianHMM::Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
- size_t L = emis_prob.n_cols;
- return GaussianHMM::Decode(L, trans, emis_prob, pstates, fs, bs, scales);
-}
-
-double GaussianHMM::ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
- size_t L = emis_prob.n_cols;
- return GaussianHMM::ViterbiInit(L, trans, emis_prob, states);
-}
-
-double GaussianHMM::ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
- size_t M = trans.n_rows;
- mlpack::Log::Assert((M == trans.n_cols && M == emis_prob.n_rows),
- "GaussianHMM::ViterbitInit(): sizes do not match");
-
- states.set_size(L);
-
- arma::vec v(M), v_old;
- v.fill(-INFINITY);
- v[0] = 0;
- v_old = v;
-
- arma::mat w(M, L);
- arma::mat logtrans(M, M);
-
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < M; j++)
- logtrans(i, j) = log(trans(i, j));
- }
-
-
- for (size_t t = 0; t < L; t++) {
- for (size_t j = 0; j < M; j++) {
- double bestVal = -INFINITY;
- double bestPtr = -1;
- for (size_t i = 0; i < M; i++) {
- double val = v_old[i] + logtrans(i, j);
- if (val > bestVal) {
- bestVal = val;
- bestPtr = i;
- }
- }
- v[j] = bestVal + log(emis_prob(j, t));
- w(j, t) = bestPtr;
- }
- v_old = v;
- }
-
- double bestVal = -INFINITY;
- double bestPtr = -1;
- for (size_t i = 0; i < M; i++)
- if (v[i] > bestVal) {
- bestVal = v[i];
- bestPtr = i;
- }
-
- states[L - 1] = bestPtr;
- for (size_t t = L - 2; t >= 0; t--)
- states[t] = w((size_t) states[t + 1], t + 1);
-
- return bestVal;
-}
-
-void GaussianHMM::CalculateEmissionProb(const arma::mat& seq, const std::vector<arma::vec>& means, const std::vector<arma::mat>& inv_covs, const arma::vec& det, arma::mat& emis_prob) {
- size_t L = seq.n_cols;
- size_t M = means.size();
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seq.unsafe_col(t);
- for (size_t i = 0; i < M; i++)
- emis_prob(i, t) = NORMAL_DENSITY(e, means[i], inv_covs[i], det[i]);
- }
-}
-
-void GaussianHMM::InitGaussParameter(size_t M, const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO) {
- size_t N = seqs[0].n_rows;
-
- std::vector<size_t> labels;
- arma::vec sumState;
-
- kmeans(seqs, M, labels, guessME, 1000, 1e-5);
-
- //for (size_t i = 0; i < labels.size(); i++) printf("%8d", labels[i]);
- //printf("---1---\n");
-
- guessTR.zeros(M, M);
- sumState.zeros(M);
- for (size_t i = 0; i < M; i++) {
- arma::mat m;
- m.zeros(N, N);
- guessCO.push_back(m);
- }
- //printf("---2---\n");
-
- size_t t = 0;
- for (size_t p = 0; p < seqs.size(); p++) {
- for (size_t q = 0; q < seqs[p].n_cols; q++, t++) {
- if (q == seqs[p].n_cols - 1)
- continue;
-
- size_t i = labels[t];
- size_t j = labels[t + 1];
-
- guessTR(i, j)++;
- sumState[i]++;
-
- arma::vec data_j_Vec = seqs[p].unsafe_col(q);
- arma::mat tmp_cov;
-
- arma::vec sub_Vec = data_j_Vec - guessME[i];
- tmp_cov = sub_Vec;
- //printf("t = %d x = %8.3f\n", t, sub_Vec[0]);
- guessCO[i] += tmp_cov * trans(tmp_cov);
- }
- }
- //printf("---3---\n");
-
- for (size_t i = 0; i < M; i++)
- if (sumState[i] == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
-
- guessTR(i, i) = 1;
- guessME[i].zeros();
- guessCO[i].zeros();
-
- for (size_t j = 0; j < N; j++)
- guessCO[i](j, j) = 1;
- }
- else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) /= sumState[i];
-
- guessCO[i] /= sumState[i];
-
- for (size_t j = 0; j < N; j++)
- guessCO[i](j, j) += 1e-3; // make sure the diagonal elements are not too small
- }
- //printf("---4---\n");
-}
-
-void GaussianHMM::TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO, size_t max_iter, double tol) {
- size_t L = -1;
- size_t M = guessTR.n_rows;
- size_t N = guessME[0].n_elem;
- mlpack::Log::Assert((M == guessTR.n_cols && M == guessME.size() && M == guessCO.size()),
- "GaussianHMM::TrainViterbi(): sizes do not match");
-
- for (size_t i = 0; i < seqs.size(); i++) {
- if (seqs[i].n_cols > L)
- L = seqs[i].n_cols;
- }
-
- arma::mat TR(M, M); // accumulating transition
- std::vector<arma::vec> ME = guessME; // accumulating mean
- std::vector<arma::mat> CO = guessCO; // accumulating covariance
- std::vector<arma::mat> INV_CO = CO; // inverse matrix of the covariance
- arma::vec DET(M); // the determinant * constant of the Normal PDF formula
-
- arma::mat emis_prob(M, L);
- arma::vec sumState(M); // the denominator for each state
-
- double loglik = 0, oldlog;
- for (size_t iter = 0; iter < max_iter; iter++) {
- oldlog = loglik;
- loglik = 0;
-
- // set the accumulating values to zeros and compute the inverse matrices and determinant constants
- TR.zeros();
- for (size_t i = 0; i < M; i++) {
- ME[i].zeros();
- CO[i].zeros();
- INV_CO[i] = inv(guessCO[i]);
- DET[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(guessCO[i]), -0.5);
- }
- sumState.zeros();
-
- // for each sequence, we will use forward-backward procedure and then accumulate
- for (size_t idx = 0; idx < seqs.size(); idx++) {
- L = seqs[idx].n_cols;
- arma::vec states;
- GaussianHMM::CalculateEmissionProb(seqs[idx], guessME, INV_CO, DET, emis_prob); // first calculate the emission probabilities of the sequence
- loglik += GaussianHMM::ViterbiInit(L, guessTR, emis_prob, states); // get the most probable state sequence
-
- // accumulate expected transition & mean & covariance
- for (size_t t = 0; t < L - 1; t++) {
- size_t i = (size_t) states[t];
- size_t j = (size_t) states[t + 1];
- TR(i, j)++;
- }
-
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- size_t i = (size_t) states[t];
- sumState[i]++;
- ME[i] += e;
-
- arma::vec d = guessME[i] - e;
- arma::mat D = d;
- CO[i] += D * trans(D);
- }
- // end accumulate
- }
-
- // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
- for (size_t i = 0; i < M; i++) {
- double s = 0;
- for (size_t j = 0; j < M; j++)
- s += TR(i, j);
-
- if (s == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
-
- guessTR(i, i) = 1;
- } else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = TR(i, j) / s;
- }
-
- if (sumState[i] != 0) {
- guessME[i] = ME[i] / sumState[i];
- guessCO[i] = CO[i] / sumState[i];
- }
- }
- // end re-estimate
-
- printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
- if (fabs(oldlog - loglik) < tol) {
- printf("\nConverged after %zu iterations\n", iter);
- break;
- }
- oldlog = loglik;
- }
-}
-
-
-void GaussianHMM::Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO, size_t max_iter, double tol) {
- size_t L = -1;
- size_t M = guessTR.n_rows;
- size_t N = guessME[0].n_elem;
-
- mlpack::Log::Assert((M == guessTR.n_cols && M == guessME.size() && M == guessCO.size()),
- "GaussianHMM::Train(): sizes do not match");
-
- for (size_t i = 0; i < seqs.size(); i++) {
- if (seqs[i].n_cols > L)
- L = seqs[i].n_cols;
- }
-
- arma::mat TR(M, M);; // guess transition and emission matrix
- std::vector<arma::vec> ME = guessME; // accumulating mean
- std::vector<arma::mat> CO = guessCO; // accumulating covariance
- std::vector<arma::mat> INV_CO = CO; // inverse matrix of the covariance
- arma::vec DET(M); // the determinant * constant of the Normal PDF formula
-
- arma::mat ps(M, L), fs(M, L), bs(M, L), emis_prob(M, L); // to hold hmm_decodeG results
- arma::vec s(L); // scaling factors
- arma::vec sumState(M); // the denominator for each state
-
- double loglik = 0, oldlog;
- for (size_t iter = 0; iter < max_iter; iter++) {
- oldlog = loglik;
- loglik = 0;
-
- // set the accumulating values to zeros and compute the inverse matrices and determinant constants
- TR.zeros();
- for (size_t i = 0; i < M; i++) {
- ME[i].zeros();
- CO[i].zeros();
- INV_CO[i] = inv(guessCO[i]);
- DET[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(guessCO[i]), -0.5);
- }
- sumState.zeros();
-
- // for each sequence, we will use forward-backward procedure and then accumulate
- for (size_t idx = 0; idx < seqs.size(); idx++) {
- // first calculate the emission probabilities of the sequence
- L = seqs[idx].n_cols;
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- for (size_t i = 0; i < M; i++)
- emis_prob(i, t) = NORMAL_DENSITY(e, guessME[i], INV_CO[i], DET[i]);
- }
-
- loglik += GaussianHMM::Decode(L, guessTR, emis_prob, ps, fs, bs, s); // forward - backward procedure
-
- // accumulate expected transition & mean & covariance
- for (size_t t = 0; t < L - 1; t++) {
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < M; j++) {
- TR(i, j) += fs(i, t) * guessTR(i, j) * emis_prob(j, t + 1) * bs(j, t + 1) / s[t + 1];
- }
- }
- }
-
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- for (size_t i = 0; i < M; i++) {
- sumState[i] += ps(i, t);
- ME[i] += ps(i, t) * e;
-
- arma::mat D = e;
- CO[i] += ps(i, t) * (D * trans(D));
- }
- }
- // end accumulate
- }
-
- // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
- for (size_t i = 0; i < M; i++) {
- double s = 0;
- for (size_t j = 0; j < M; j++)
- s += TR(i, j);
-
- if (s == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
-
- guessTR(i, i) = 1;
- }
- else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = TR(i, j) / s;
- }
-
- if (sumState[i] != 0) {
- guessME[i] = ME[i] / sumState[i];
- arma::mat D = guessME[i];
- CO[i] /= sumState[i];
- CO[i] -= D * trans(D);
- guessCO[i] = CO[i];
- }
- }
- // end re-estimate
-
- printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
- if (fabs(oldlog - loglik) < tol) {
- printf("\nConverged after %zu iterations\n", iter);
- break;
- }
- oldlog = loglik;
- }
-}
-
-}; // namespace hmm
-}; // namespace mlpack
Copied: mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,784 @@
+/**
+ * @file gaussianHMM.cc
+ *
+ * This file contains the implementation of functions in gaussianHMM.h
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "gaussianHMM.hpp"
+
+namespace mlpack {
+namespace hmm {
+
+using namespace hmm_support;
+
+void GaussianHMM::setModel(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec, const std::vector<arma::mat>& list_covariance_mat) {
+ mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
+ mlpack::Log::Assert(transmission.n_rows == list_mean_vec.size());
+ mlpack::Log::Assert(transmission.n_rows == list_covariance_mat.size());
+
+ for (size_t i = 1; i < list_mean_vec.size(); i++) {
+ mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_rows);
+ mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_cols);
+ mlpack::Log::Assert(list_mean_vec[0].n_elem == list_mean_vec[i].n_elem);
+ }
+
+ transmission_ = transmission;
+ list_mean_vec_ = list_mean_vec;
+ list_covariance_mat_ = list_covariance_mat;
+ CalculateInverse();
+}
+
+void GaussianHMM::Init(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec,const std::vector<arma::mat>& list_covariance_mat) {
+ transmission_ = transmission;
+ list_mean_vec_ = list_mean_vec;
+ list_covariance_mat_ = list_covariance_mat;
+
+ mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
+ mlpack::Log::Assert(transmission.n_rows == list_mean_vec.size());
+ mlpack::Log::Assert(transmission.n_rows == list_covariance_mat.size());
+ for (size_t i = 1; i < list_mean_vec.size(); i++) {
+ mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_rows);
+ mlpack::Log::Assert(list_mean_vec[0].n_elem == list_covariance_mat[i].n_cols);
+ mlpack::Log::Assert(list_mean_vec[0].n_elem == list_mean_vec[i].n_elem);
+ }
+
+ CalculateInverse();
+}
+
+void GaussianHMM::InitFromFile(const char* profile) {
+ if (!(GaussianHMM::LoadProfile(profile, transmission_, list_mean_vec_, list_covariance_mat_)))
+ mlpack::Log::Fatal << "Couldn't open " << profile << " for reading." <<
+ std::endl;
+
+ list_inverse_cov_mat_ = list_covariance_mat_;
+ gauss_const_vec_.set_size(list_covariance_mat_.size());
+
+ CalculateInverse();
+}
+
+void GaussianHMM::InitFromData(const std::vector<arma::mat>& list_data_seq, size_t numstate) {
+ GaussianHMM::InitGaussParameter(numstate, list_data_seq, transmission_, list_mean_vec_, list_covariance_mat_);
+
+ list_inverse_cov_mat_ = list_covariance_mat_;
+ gauss_const_vec_.set_size(list_covariance_mat_.size());
+
+ CalculateInverse();
+}
+
+void GaussianHMM::InitFromData(const arma::mat& data_seq, const arma::vec& state_seq) {
+ GaussianHMM::EstimateInit(data_seq, state_seq, transmission_, list_mean_vec_, list_covariance_mat_);
+ CalculateInverse();
+}
+
+void GaussianHMM::LoadProfile(const char* profile) {
+ /*
+ transmission_.Destruct();
+ list_mean_vec_.Renew();
+ list_covariance_mat_.Renew();
+ list_inverse_cov_mat_.Renew();
+ gauss_const_vec_.Destruct();
+ */
+ InitFromFile(profile);
+}
+
+void GaussianHMM::SaveProfile(const char* profile) const {
+ GaussianHMM::SaveProfile(profile, transmission_, list_mean_vec_, list_covariance_mat_);
+}
+
+void GaussianHMM::CalculateInverse() {
+ size_t M = transmission_.n_rows;
+ size_t N = list_mean_vec_[0].n_elem;
+ for (size_t i = 0; i < M; i++) {
+ list_inverse_cov_mat_[i] = inv(list_covariance_mat_[i]); // inverse
+ gauss_const_vec_[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(list_covariance_mat_[i]), -0.5);
+ }
+}
+
+void GaussianHMM::GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const {
+ GaussianHMM::GenerateInit(L, transmission_, list_mean_vec_, list_covariance_mat_, data_seq, state_seq);
+}
+
+void GaussianHMM::EstimateModel(const arma::mat& data_seq, const arma::vec& state_seq) {
+ GaussianHMM::EstimateInit(data_seq, state_seq, transmission_, list_mean_vec_, list_covariance_mat_);
+ CalculateInverse();
+}
+
+void GaussianHMM::EstimateModel(size_t numstate, const arma::mat& data_seq, const arma::vec& state_seq) {
+ GaussianHMM::EstimateInit(numstate, data_seq, state_seq, transmission_, list_mean_vec_, list_covariance_mat_);
+ CalculateInverse();
+}
+
+void GaussianHMM::DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat,
+ arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_cols;
+ arma::mat emission_prob_mat(M, L);
+ GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_,
+ gauss_const_vec_, emission_prob_mat);
+ GaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat, forward_prob_mat,
+ backward_prob_mat, scale_vec);
+}
+
+void GaussianHMM::DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat,
+ arma::mat& forward_prob_mat, arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_cols;
+
+ state_prob_mat.set_size(M, L);
+ forward_prob_mat.set_size(M, L);
+ backward_prob_mat.set_size(M, L);
+ scale_vec.set_size(L);
+
+ arma::mat emission_prob_mat(M, L);
+
+ GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_,
+ gauss_const_vec_, emission_prob_mat);
+ GaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat, forward_prob_mat,
+ backward_prob_mat, scale_vec);
+}
+
+double GaussianHMM::ComputeLogLikelihood(const arma::mat& data_seq) const {
+ size_t L = data_seq.n_cols;
+ size_t M = transmission_.n_rows;
+
+ arma::mat fs(M, L), emis_prob(M, L);
+ arma::vec sc(L);
+
+ GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_, gauss_const_vec_, emis_prob);
+ GaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
+ double loglik = 0;
+ for (size_t t = 0; t < L; t++)
+ loglik += log(sc[t]);
+ return loglik;
+}
+
+void GaussianHMM::ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq, std::vector<double>& list_likelihood) const {
+ size_t L = 0;
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ if (list_data_seq[i].n_cols > L)
+ L = list_data_seq[i].n_cols;
+ }
+
+ size_t M = transmission_.n_rows;
+
+ arma::mat fs(M, L), emis_prob(M, L);
+ arma::vec sc(L);
+
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ size_t L = list_data_seq[i].n_cols;
+
+ GaussianHMM::CalculateEmissionProb(list_data_seq[i], list_mean_vec_, list_inverse_cov_mat_, gauss_const_vec_, emis_prob);
+ GaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
+
+ double loglik = 0;
+ for (size_t t = 0; t < L; t++)
+ loglik += log(sc[t]);
+ list_likelihood.push_back(loglik);
+ }
+}
+
+void GaussianHMM::ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_cols;
+
+ arma::mat emis_prob(M, L);
+
+ GaussianHMM::CalculateEmissionProb(data_seq, list_mean_vec_, list_inverse_cov_mat_, gauss_const_vec_, emis_prob);
+ GaussianHMM::ViterbiInit(transmission_, emis_prob, state_seq);
+}
+
+void GaussianHMM::TrainBaumWelch(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
+ GaussianHMM::Train(list_data_seq, transmission_, list_mean_vec_, list_covariance_mat_, max_iteration, tolerance);
+ CalculateInverse();
+}
+
+void GaussianHMM::TrainViterbi(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
+ GaussianHMM::TrainViterbi(list_data_seq, transmission_, list_mean_vec_, list_covariance_mat_, max_iteration, tolerance);
+ CalculateInverse();
+}
+
+bool GaussianHMM::LoadProfile(const char* profile, arma::mat& trans, std::vector<arma::vec>& means, std::vector<arma::mat>& covs) {
+ std::vector<arma::mat> matlst;
+ if (!(load_matrix_list(profile, matlst)))
+ return false;
+
+ mlpack::Log::Assert(matlst.size() > 0);
+
+ trans = matlst[0];
+ size_t M = trans.n_rows; // num of states
+ mlpack::Log::Assert(matlst.size() == (2 * M + 1));
+ size_t N = matlst[1].n_rows; // dimension
+
+ for (size_t i = 1; i < (2 * M + 1); i += 2) {
+ mlpack::Log::Assert(matlst[i].n_rows == N && matlst[i].n_cols == 1);
+ mlpack::Log::Assert(matlst[i + 1].n_rows == N && matlst[i + 1].n_cols == N);
+
+ arma::vec m = matlst[i].unsafe_col(0);
+
+ means.push_back(m);
+ covs.push_back(matlst[i + 1]);
+ }
+
+ return true;
+}
+
+bool GaussianHMM::SaveProfile(const char* profile, const arma::mat& trans, const std::vector<arma::vec>& means, const std::vector<arma::mat>& covs) {
+ TextWriter w_pro;
+ if (!(w_pro.Open(profile))) {
+ mlpack::Log::Warn << "Couldn't open " << profile << " for writing." <<
+ std::endl;
+ return false;
+ }
+
+ size_t M = trans.n_rows; // num of states
+ mlpack::Log::Assert(means.size() == M && covs.size() == M);
+ size_t N = means[0].n_elem; // dimension
+ print_matrix(w_pro, trans, "% transmission", "%f,");
+ for (size_t i = 0; i < M; i++) {
+ mlpack::Log::Assert(means[i].n_elem == N);
+ mlpack::Log::Assert(covs[i].n_rows == N && covs[i].n_cols == N);
+ char s[100];
+ sprintf(s, "%% mean - state %zu", i);
+ print_vector(w_pro, means[i], s, "%f,");
+ sprintf(s, "%% covariance - state%zu", i);
+ print_matrix(w_pro, covs[i], s, "%f,");
+ }
+
+ return true;
+}
+
+void GaussianHMM::GenerateInit(size_t L, const arma::mat& trans, const std::vector<arma::vec>& means, const std::vector<arma::mat>& covs, arma::mat& seq, arma::vec& states){
+ mlpack::Log::Assert((trans.n_rows == trans.n_cols && trans.n_rows == means.size() && trans.n_rows == covs.size()),
+ "GaussianHMM::GenerateInit(): matrix sizes do not match");
+
+ arma::mat trsum;
+ size_t M, N;
+ size_t cur_state;
+
+ M = trans.n_rows;
+ N = means[0].n_elem; // emission vector length
+
+ trsum = trans;
+
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 1; j < M; j++) {
+ trsum(i, j) += trsum(i, j - 1);
+ }
+ }
+
+ seq.set_size(N, L);
+ states.set_size(L);
+
+ cur_state = 0; // starting state is 0
+
+ for (size_t i = 0; i < L; i++) {
+ size_t j;
+
+ // next state
+ double r = (double) rand() / (double) RAND_MAX;
+ for (j = 0; j < M; j++) {
+ if (r <= trsum(cur_state, j))
+ break;
+ }
+ cur_state = j;
+
+ // emission
+ arma::vec e;
+ RAND_NORMAL_INIT(means[cur_state], covs[cur_state], e);
+ for (j = 0; j < N; j++)
+ seq(j, i) = e[j];
+ states[i] = cur_state;
+ }
+}
+
+void GaussianHMM::EstimateInit(const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<arma::vec>& means, std::vector<arma::mat>& covs) {
+ mlpack::Log::Assert((seq.n_cols == states.n_elem), "GaussianHMM::EstimateInit(): sequence and states length must be the same");
+
+ size_t M = 0;
+ for (size_t i = 0; i < seq.n_cols; i++) {
+ if (states[i] > M)
+ M = (size_t) states[i];
+ }
+ M++;
+ GaussianHMM::EstimateInit(M, seq, states, trans, means, covs);
+}
+
+void GaussianHMM::EstimateInit(size_t numStates, const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<arma::vec>& means, std::vector<arma::mat>& covs) {
+ mlpack::Log::Assert((seq.n_cols == states.n_elem), "GaussianHMM::EstimateInit(): sequence and states length must be the same");
+
+ size_t N = seq.n_rows; // emission vector length
+ size_t M = numStates; // number of states
+ size_t L = seq.n_cols; // sequence length
+
+ arma::vec stateSum;
+
+ trans.zeros(M, M);
+ stateSum.zeros(M);
+
+ for (size_t i = 0; i < M; i++) {
+ arma::vec m;
+ m.zeros(N);
+ means.push_back(m);
+
+ arma::mat c;
+ c.zeros(N, N);
+ covs.push_back(c);
+ }
+
+ for (size_t i = 0; i < L - 1; i++) {
+ size_t state = (size_t) states[i];
+ size_t next_state = (size_t) states[i + 1];
+ stateSum[state]++;
+ trans(state, next_state)++;
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ if (stateSum[i] == 0)
+ stateSum[i] = -INFINITY;
+
+ for (size_t j = 0; j < M; j++)
+ trans(i, j) /= stateSum[i];
+ }
+
+ stateSum.zeros();
+ for (size_t i = 0; i < L; i++) {
+ size_t state = (size_t) states[i];
+ arma::vec e = seq.unsafe_col(i);
+
+ stateSum[state]++;
+ means[state] += e;
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ if (stateSum[i] != 0)
+ means[i] /= stateSum[i];
+ }
+
+ for (size_t i = 0; i < L; i++) {
+ size_t state = (size_t) states[i];
+ arma::vec e = seq.unsafe_col(i);
+ arma::vec d = means[state] - e;
+ covs[state] += d * arma::trans(d);
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ if (stateSum[i] != 0)
+ covs[i] /= stateSum[i];
+ }
+}
+
+void GaussianHMM::ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& scales, arma::mat& fs) {
+ size_t M = trans.n_rows;
+
+ fs.zeros();
+ scales.zeros();
+
+ // NOTE: start state is 0
+ // time t = 0
+ for (size_t i = 0; i < M; i++) {
+ fs(i, 0) = trans(0, i) * emis_prob(i, 0);
+ scales[0] += fs(i, 0);
+ }
+
+ for (size_t i = 0; i < M; i++)
+ fs(i, 0) /= scales[0];
+
+ // time t = 1 -> L-1
+ for (size_t t = 1; t < L; t++) {
+ for (size_t j = 0; j < M; j++) {
+ for (size_t i = 0; i < M; i++)
+ fs(j, t) += fs(i, t - 1) * trans(i, j);
+ fs(j, t) *= emis_prob(j, t);
+ scales[t] += fs(j, t);
+ }
+ for (size_t j = 0; j < M; j++)
+ fs(j, t) /= scales[t];
+ }
+}
+
+void GaussianHMM::BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, const arma::vec& scales, arma::mat& bs) {
+ size_t M = trans.n_rows;
+
+ bs.zeros();
+
+ for (size_t i = 0; i < M; i++)
+ bs(i, L - 1) = 1.0;
+
+ for (size_t t = L - 2; t >= 0; t--) {
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < M; j++)
+ bs(i, t) += trans(i, j) * bs(j, t + 1) * emis_prob(j, t + 1);
+ bs(i, t) /= scales[t + 1];
+ }
+ }
+}
+
+double GaussianHMM::Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
+ size_t M = trans.n_rows;
+
+ mlpack::Log::Assert((L == pstates.n_cols && L == fs.n_cols && L == bs.n_cols &&
+ M == trans.n_cols && M == emis_prob.n_rows),
+ "GaussianHMM::Decode(): sizes do not match");
+
+ GaussianHMM::ForwardProcedure(L, trans, emis_prob, scales, fs);
+ GaussianHMM::BackwardProcedure(L, trans, emis_prob, scales, bs);
+
+ for (size_t i = 0; i < M; i++)
+ for (size_t t = 0; t < L; t++)
+ pstates(i, t) = fs(i,t) * bs(i,t);
+
+ double logpseq = 0;
+ for (size_t t = 0; t < L; t++)
+ logpseq += log(scales[t]);
+
+ return logpseq;
+}
+
+double GaussianHMM::Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
+ size_t L = emis_prob.n_cols;
+ return GaussianHMM::Decode(L, trans, emis_prob, pstates, fs, bs, scales);
+}
+
+double GaussianHMM::ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
+ size_t L = emis_prob.n_cols;
+ return GaussianHMM::ViterbiInit(L, trans, emis_prob, states);
+}
+
+double GaussianHMM::ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
+ size_t M = trans.n_rows;
+ mlpack::Log::Assert((M == trans.n_cols && M == emis_prob.n_rows),
+ "GaussianHMM::ViterbitInit(): sizes do not match");
+
+ states.set_size(L);
+
+ arma::vec v(M), v_old;
+ v.fill(-INFINITY);
+ v[0] = 0;
+ v_old = v;
+
+ arma::mat w(M, L);
+ arma::mat logtrans(M, M);
+
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < M; j++)
+ logtrans(i, j) = log(trans(i, j));
+ }
+
+
+ for (size_t t = 0; t < L; t++) {
+ for (size_t j = 0; j < M; j++) {
+ double bestVal = -INFINITY;
+ double bestPtr = -1;
+ for (size_t i = 0; i < M; i++) {
+ double val = v_old[i] + logtrans(i, j);
+ if (val > bestVal) {
+ bestVal = val;
+ bestPtr = i;
+ }
+ }
+ v[j] = bestVal + log(emis_prob(j, t));
+ w(j, t) = bestPtr;
+ }
+ v_old = v;
+ }
+
+ double bestVal = -INFINITY;
+ double bestPtr = -1;
+ for (size_t i = 0; i < M; i++)
+ if (v[i] > bestVal) {
+ bestVal = v[i];
+ bestPtr = i;
+ }
+
+ states[L - 1] = bestPtr;
+ for (size_t t = L - 2; t >= 0; t--)
+ states[t] = w((size_t) states[t + 1], t + 1);
+
+ return bestVal;
+}
+
+void GaussianHMM::CalculateEmissionProb(const arma::mat& seq, const std::vector<arma::vec>& means, const std::vector<arma::mat>& inv_covs, const arma::vec& det, arma::mat& emis_prob) {
+ size_t L = seq.n_cols;
+ size_t M = means.size();
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seq.unsafe_col(t);
+ for (size_t i = 0; i < M; i++)
+ emis_prob(i, t) = NORMAL_DENSITY(e, means[i], inv_covs[i], det[i]);
+ }
+}
+
+void GaussianHMM::InitGaussParameter(size_t M, const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO) {
+ size_t N = seqs[0].n_rows;
+
+ std::vector<size_t> labels;
+ arma::vec sumState;
+
+ kmeans(seqs, M, labels, guessME, 1000, 1e-5);
+
+ //for (size_t i = 0; i < labels.size(); i++) printf("%8d", labels[i]);
+ //printf("---1---\n");
+
+ guessTR.zeros(M, M);
+ sumState.zeros(M);
+ for (size_t i = 0; i < M; i++) {
+ arma::mat m;
+ m.zeros(N, N);
+ guessCO.push_back(m);
+ }
+ //printf("---2---\n");
+
+ size_t t = 0;
+ for (size_t p = 0; p < seqs.size(); p++) {
+ for (size_t q = 0; q < seqs[p].n_cols; q++, t++) {
+ if (q == seqs[p].n_cols - 1)
+ continue;
+
+ size_t i = labels[t];
+ size_t j = labels[t + 1];
+
+ guessTR(i, j)++;
+ sumState[i]++;
+
+ arma::vec data_j_Vec = seqs[p].unsafe_col(q);
+ arma::mat tmp_cov;
+
+ arma::vec sub_Vec = data_j_Vec - guessME[i];
+ tmp_cov = sub_Vec;
+ //printf("t = %d x = %8.3f\n", t, sub_Vec[0]);
+ guessCO[i] += tmp_cov * trans(tmp_cov);
+ }
+ }
+ //printf("---3---\n");
+
+ for (size_t i = 0; i < M; i++)
+ if (sumState[i] == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+
+ guessTR(i, i) = 1;
+ guessME[i].zeros();
+ guessCO[i].zeros();
+
+ for (size_t j = 0; j < N; j++)
+ guessCO[i](j, j) = 1;
+ }
+ else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) /= sumState[i];
+
+ guessCO[i] /= sumState[i];
+
+ for (size_t j = 0; j < N; j++)
+ guessCO[i](j, j) += 1e-3; // make sure the diagonal elements are not too small
+ }
+ //printf("---4---\n");
+}
+
+void GaussianHMM::TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO, size_t max_iter, double tol) {
+ size_t L = -1;
+ size_t M = guessTR.n_rows;
+ size_t N = guessME[0].n_elem;
+ mlpack::Log::Assert((M == guessTR.n_cols && M == guessME.size() && M == guessCO.size()),
+ "GaussianHMM::TrainViterbi(): sizes do not match");
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ if (seqs[i].n_cols > L)
+ L = seqs[i].n_cols;
+ }
+
+ arma::mat TR(M, M); // accumulating transition
+ std::vector<arma::vec> ME = guessME; // accumulating mean
+ std::vector<arma::mat> CO = guessCO; // accumulating covariance
+ std::vector<arma::mat> INV_CO = CO; // inverse matrix of the covariance
+ arma::vec DET(M); // the determinant * constant of the Normal PDF formula
+
+ arma::mat emis_prob(M, L);
+ arma::vec sumState(M); // the denominator for each state
+
+ double loglik = 0, oldlog;
+ for (size_t iter = 0; iter < max_iter; iter++) {
+ oldlog = loglik;
+ loglik = 0;
+
+ // set the accumulating values to zeros and compute the inverse matrices and determinant constants
+ TR.zeros();
+ for (size_t i = 0; i < M; i++) {
+ ME[i].zeros();
+ CO[i].zeros();
+ INV_CO[i] = inv(guessCO[i]);
+ DET[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(guessCO[i]), -0.5);
+ }
+ sumState.zeros();
+
+ // for each sequence, we will use forward-backward procedure and then accumulate
+ for (size_t idx = 0; idx < seqs.size(); idx++) {
+ L = seqs[idx].n_cols;
+ arma::vec states;
+ GaussianHMM::CalculateEmissionProb(seqs[idx], guessME, INV_CO, DET, emis_prob); // first calculate the emission probabilities of the sequence
+ loglik += GaussianHMM::ViterbiInit(L, guessTR, emis_prob, states); // get the most probable state sequence
+
+ // accumulate expected transition & mean & covariance
+ for (size_t t = 0; t < L - 1; t++) {
+ size_t i = (size_t) states[t];
+ size_t j = (size_t) states[t + 1];
+ TR(i, j)++;
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ size_t i = (size_t) states[t];
+ sumState[i]++;
+ ME[i] += e;
+
+ arma::vec d = guessME[i] - e;
+ arma::mat D = d;
+ CO[i] += D * trans(D);
+ }
+ // end accumulate
+ }
+
+ // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
+ for (size_t i = 0; i < M; i++) {
+ double s = 0;
+ for (size_t j = 0; j < M; j++)
+ s += TR(i, j);
+
+ if (s == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+
+ guessTR(i, i) = 1;
+ } else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = TR(i, j) / s;
+ }
+
+ if (sumState[i] != 0) {
+ guessME[i] = ME[i] / sumState[i];
+ guessCO[i] = CO[i] / sumState[i];
+ }
+ }
+ // end re-estimate
+
+ printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
+ if (fabs(oldlog - loglik) < tol) {
+ printf("\nConverged after %zu iterations\n", iter);
+ break;
+ }
+ oldlog = loglik;
+ }
+}
+
+
+void GaussianHMM::Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO, size_t max_iter, double tol) {
+ size_t L = -1;
+ size_t M = guessTR.n_rows;
+ size_t N = guessME[0].n_elem;
+
+ mlpack::Log::Assert((M == guessTR.n_cols && M == guessME.size() && M == guessCO.size()),
+ "GaussianHMM::Train(): sizes do not match");
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ if (seqs[i].n_cols > L)
+ L = seqs[i].n_cols;
+ }
+
+ arma::mat TR(M, M);; // guess transition and emission matrix
+ std::vector<arma::vec> ME = guessME; // accumulating mean
+ std::vector<arma::mat> CO = guessCO; // accumulating covariance
+ std::vector<arma::mat> INV_CO = CO; // inverse matrix of the covariance
+ arma::vec DET(M); // the determinant * constant of the Normal PDF formula
+
+ arma::mat ps(M, L), fs(M, L), bs(M, L), emis_prob(M, L); // to hold hmm_decodeG results
+ arma::vec s(L); // scaling factors
+ arma::vec sumState(M); // the denominator for each state
+
+ double loglik = 0, oldlog;
+ for (size_t iter = 0; iter < max_iter; iter++) {
+ oldlog = loglik;
+ loglik = 0;
+
+ // set the accumulating values to zeros and compute the inverse matrices and determinant constants
+ TR.zeros();
+ for (size_t i = 0; i < M; i++) {
+ ME[i].zeros();
+ CO[i].zeros();
+ INV_CO[i] = inv(guessCO[i]);
+ DET[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(guessCO[i]), -0.5);
+ }
+ sumState.zeros();
+
+ // for each sequence, we will use forward-backward procedure and then accumulate
+ for (size_t idx = 0; idx < seqs.size(); idx++) {
+ // first calculate the emission probabilities of the sequence
+ L = seqs[idx].n_cols;
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ for (size_t i = 0; i < M; i++)
+ emis_prob(i, t) = NORMAL_DENSITY(e, guessME[i], INV_CO[i], DET[i]);
+ }
+
+ loglik += GaussianHMM::Decode(L, guessTR, emis_prob, ps, fs, bs, s); // forward - backward procedure
+
+ // accumulate expected transition & mean & covariance
+ for (size_t t = 0; t < L - 1; t++) {
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < M; j++) {
+ TR(i, j) += fs(i, t) * guessTR(i, j) * emis_prob(j, t + 1) * bs(j, t + 1) / s[t + 1];
+ }
+ }
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ for (size_t i = 0; i < M; i++) {
+ sumState[i] += ps(i, t);
+ ME[i] += ps(i, t) * e;
+
+ arma::mat D = e;
+ CO[i] += ps(i, t) * (D * trans(D));
+ }
+ }
+ // end accumulate
+ }
+
+ // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
+ for (size_t i = 0; i < M; i++) {
+ double s = 0;
+ for (size_t j = 0; j < M; j++)
+ s += TR(i, j);
+
+ if (s == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+
+ guessTR(i, i) = 1;
+ }
+ else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = TR(i, j) / s;
+ }
+
+ if (sumState[i] != 0) {
+ guessME[i] = ME[i] / sumState[i];
+ arma::mat D = guessME[i];
+ CO[i] /= sumState[i];
+ CO[i] -= D * trans(D);
+ guessCO[i] = CO[i];
+ }
+ }
+ // end re-estimate
+
+ printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
+ if (fabs(oldlog - loglik) < tol) {
+ printf("\nConverged after %zu iterations\n", iter);
+ break;
+ }
+ oldlog = loglik;
+ }
+}
+
+}; // namespace hmm
+}; // namespace mlpack
Deleted: mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.h 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.h 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,203 +0,0 @@
-/**
- * @file gaussianHMM.h
- *
- * This file contains functions of a Gaussian Hidden Markov Models.
- * It implements log-likelihood computation, viterbi algorithm for the most
- * probable sequence, Baum-Welch algorithm and Viterbi-like algorithm for
- * parameter estimation. It can also generate sequences from a Hidden Markov
- * Model.
- */
-#ifndef __MLPACK_METHODS_HMM_GAUSSIAN_HMM_H
-#define __MLPACK_METHODS_HMM_GAUSSIAN_HMM_H
-
-#include <mlpack/core.h>
-
-namespace mlpack {
-namespace hmm {
-
-/**
- * A wrapper class for HMM functionals in single Gaussian case
- *
- * This class maintains transition probabilities and Gaussian parameters (mean and
- * covariance) for each state, more details below.
- */
-
-class GaussianHMM {
- //////////// Member variables ////////////////////////////////////////////////
- private:
- /** Transmission probabilities matrix between states */
- arma::mat transmission_;
-
- /** List of mean vectors */
- std::vector<arma::vec> list_mean_vec_;
-
- /** List of covariance matrices */
- std::vector<arma::mat> list_covariance_mat_;
-
- /** List of inverse of the covariances */
- std::vector<arma::mat> list_inverse_cov_mat_;
-
- /** Vector of constant in the gaussian density fomular */
- arma::vec gauss_const_vec_;
-
- /** Calculate the inverse covariance and the constant in gaussian fomular */
- void CalculateInverse();
- public:
- /** Getters */
- const arma::mat& transmission() const { return transmission_; }
- const std::vector<arma::vec>& list_mean_vec() const { return list_mean_vec_; }
- const std::vector<arma::mat>& list_covariance_mat() const { return list_covariance_mat_; }
-
- /** Setter used when already initialized */
- void setModel(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec,
- const std::vector<arma::mat>& list_covariance_mat);
-
- /** Initializes from computed transmission and Gaussian parameters */
- void Init(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec,
- const std::vector<arma::mat>& list_covariance_mat);
-
- /** Initializes by loading from a file */
- void InitFromFile(const char* profile);
-
- /** Initializes using K-means algorithm using data as a guide */
- void InitFromData(const std::vector<arma::mat>& list_data_seq, size_t numstate);
-
- /** Initializes using data and state sequence as a guide */
- void InitFromData(const arma::mat& data_seq, const arma::vec& state_seq);
-
- /** Load from file, used when already initialized */
- void LoadProfile(const char* profile);
-
- /** Save matrices to file */
- void SaveProfile(const char* profile) const;
-
- /** Generate a random data sequence of a given length */
- void GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const;
-
- /**
- * Estimate the matrices by a data sequence and a state sequence
- * Must be already initialized
- */
-
- void EstimateModel(const arma::mat& data_seq, const arma::vec& state_seq);
- void EstimateModel(size_t numstate,
- const arma::mat& data_seq, const arma::vec& state_seq);
-
- /**
- * Decode a sequence into probabilities of each state at each time step
- * using scaled forward-backward algorithm.
- * Also return forward, backward probabilities and scale factors
- */
- void DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
- arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
-
- /** A decode version that initialized the output matrices */
- void DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
- arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
-
- /** Compute the log-likelihood of a sequence */
- double ComputeLogLikelihood(const arma::mat& data_seq) const;
-
- /** Compute the log-likelihood of a list of sequences */
- void ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq,
- std::vector<double>& list_likelihood) const;
-
- /** Compute the most probable sequence (Viterbi) */
- void ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const;
-
- /**
- * Train the model with a list of sequences, must be already initialized
- * using Baum-Welch EM algorithm
- */
- void TrainBaumWelch(const std::vector<arma::mat>& list_data_seq,
- size_t max_iteration, double tolerance);
-
- /**
- * Train the model with a list of sequences, must be already initialized
- * using Viterbi algorithm to determine the state sequence of each sequence
- */
- void TrainViterbi(const std::vector<arma::mat>& list_data_seq,
- size_t max_iteration, double tolerance);
-
-
- ////////// Static helper functions ///////////////////////////////////////
-
- static bool LoadProfile(const char* profile, arma::mat& trans,
- std::vector<arma::vec>& means, std::vector<arma::mat>& covs);
- static bool SaveProfile(const char* profile, const arma::mat& trans,
- const std::vector<arma::vec>& means,
- const std::vector<arma::mat>& covs);
- /**
- * Generating a sequence and states using transition and emission probabilities.
- * L: sequence length
- * trans: Matrix M x M (M states)
- * means: list of mean vectors length N (emission vector of length N)
- * covs: list of square root of covariance matrices size N x N
- * seq: generated sequence, uninitialized matrix, will have size N x L
- * states: generated states, uninitialized vector, will have length L
- */
- static void GenerateInit(size_t L, const arma::mat& trans, const std::vector<arma::vec>& means,
- const std::vector<arma::mat>& covs, arma::mat& seq, arma::vec& states);
-
- /** Estimate transition and emission distribution from sequence and states */
- static void EstimateInit(const arma::mat& seq, const arma::vec& states, arma::mat& trans,
- std::vector<arma::vec>& means, std::vector<arma::mat>& covs);
- static void EstimateInit(size_t numStates, const arma::mat& seq, const arma::vec& states,
- arma::mat& trans, std::vector<arma::vec>& means,
- std::vector<arma::mat>& covs);
-
- /**
- * Calculate posteriori probabilities of states at each steps
- * Scaled Forward - Backward procedure
- * trans: Transition probabilities, size M x M
- * emis_prob: Emission probabilities along the sequence,
- * size M x L (L is the sequence length)
- * pstates: size M x L
- * fs: scaled forward probabities, size M x L
- * bs: scaled backward probabities, size M x L
- * scales: scale factors, length L
- * RETURN: log probabilities of sequence
- */
- static void ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
- arma::vec& scales, arma::mat& fs);
- static void BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
- const arma::vec& scales, arma::mat& bs);
- static double Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates,
- arma::mat& fs, arma::mat& bs, arma::vec& scales);
- static double Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
- arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales);
- static void CalculateEmissionProb(const arma::mat& seq, const std::vector<arma::vec>& means,
- const std::vector<arma::mat>& inv_covs, const arma::vec& det,
- arma::mat& emis_prob);
-
- /**
- * Calculate the most probable states for a sequence
- * Viterbi algorithm
- * trans: Transition probabilities, size M x M
- * emis_prob: Emission probabilities, size M x L
- * states: Unitialized, will have length L
- * RETURN: log probability of the most probable sequence
- */
- static double ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
- static double ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
-
- /**
- * Baum-Welch and Viterbi estimation of transition and emission
- * distribution (Gaussian)
- */
- static void InitGaussParameter(size_t M, const std::vector<arma::mat>& seqs,
- arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO);
-
- static void Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
- std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO,
- size_t max_iter, double tol);
-
- static void TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
- std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO,
- size_t max_iter, double tol);
-};
-
-}; // namespace hmm
-}; // namespace mlpack
-
-#endif
Copied: mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.hpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/gaussianHMM.hpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,203 @@
+/**
+ * @file gaussianHMM.h
+ *
+ * This file contains functions of a Gaussian Hidden Markov Models.
+ * It implements log-likelihood computation, viterbi algorithm for the most
+ * probable sequence, Baum-Welch algorithm and Viterbi-like algorithm for
+ * parameter estimation. It can also generate sequences from a Hidden Markov
+ * Model.
+ */
+#ifndef __MLPACK_METHODS_HMM_GAUSSIAN_HMM_H
+#define __MLPACK_METHODS_HMM_GAUSSIAN_HMM_H
+
+#include <mlpack/core.h>
+
+namespace mlpack {
+namespace hmm {
+
+/**
+ * A wrapper class for HMM functionals in single Gaussian case
+ *
+ * This class maintains transition probabilities and Gaussian parameters (mean and
+ * covariance) for each state, more details below.
+ */
+
+class GaussianHMM {
+ //////////// Member variables ////////////////////////////////////////////////
+ private:
+ /** Transmission probabilities matrix between states */
+ arma::mat transmission_;
+
+ /** List of mean vectors */
+ std::vector<arma::vec> list_mean_vec_;
+
+ /** List of covariance matrices */
+ std::vector<arma::mat> list_covariance_mat_;
+
+ /** List of inverse of the covariances */
+ std::vector<arma::mat> list_inverse_cov_mat_;
+
+ /** Vector of constant in the gaussian density fomular */
+ arma::vec gauss_const_vec_;
+
+ /** Calculate the inverse covariance and the constant in gaussian fomular */
+ void CalculateInverse();
+ public:
+ /** Getters */
+ const arma::mat& transmission() const { return transmission_; }
+ const std::vector<arma::vec>& list_mean_vec() const { return list_mean_vec_; }
+ const std::vector<arma::mat>& list_covariance_mat() const { return list_covariance_mat_; }
+
+ /** Setter used when already initialized */
+ void setModel(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec,
+ const std::vector<arma::mat>& list_covariance_mat);
+
+ /** Initializes from computed transmission and Gaussian parameters */
+ void Init(const arma::mat& transmission, const std::vector<arma::vec>& list_mean_vec,
+ const std::vector<arma::mat>& list_covariance_mat);
+
+ /** Initializes by loading from a file */
+ void InitFromFile(const char* profile);
+
+ /** Initializes using K-means algorithm using data as a guide */
+ void InitFromData(const std::vector<arma::mat>& list_data_seq, size_t numstate);
+
+ /** Initializes using data and state sequence as a guide */
+ void InitFromData(const arma::mat& data_seq, const arma::vec& state_seq);
+
+ /** Load from file, used when already initialized */
+ void LoadProfile(const char* profile);
+
+ /** Save matrices to file */
+ void SaveProfile(const char* profile) const;
+
+ /** Generate a random data sequence of a given length */
+ void GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const;
+
+ /**
+ * Estimate the matrices by a data sequence and a state sequence
+ * Must be already initialized
+ */
+
+ void EstimateModel(const arma::mat& data_seq, const arma::vec& state_seq);
+ void EstimateModel(size_t numstate,
+ const arma::mat& data_seq, const arma::vec& state_seq);
+
+ /**
+ * Decode a sequence into probabilities of each state at each time step
+ * using scaled forward-backward algorithm.
+ * Also return forward, backward probabilities and scale factors
+ */
+ void DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
+ arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
+
+ /** A decode version that initialized the output matrices */
+ void DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
+ arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
+
+ /** Compute the log-likelihood of a sequence */
+ double ComputeLogLikelihood(const arma::mat& data_seq) const;
+
+ /** Compute the log-likelihood of a list of sequences */
+ void ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq,
+ std::vector<double>& list_likelihood) const;
+
+ /** Compute the most probable sequence (Viterbi) */
+ void ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const;
+
+ /**
+ * Train the model with a list of sequences, must be already initialized
+ * using Baum-Welch EM algorithm
+ */
+ void TrainBaumWelch(const std::vector<arma::mat>& list_data_seq,
+ size_t max_iteration, double tolerance);
+
+ /**
+ * Train the model with a list of sequences, must be already initialized
+ * using Viterbi algorithm to determine the state sequence of each sequence
+ */
+ void TrainViterbi(const std::vector<arma::mat>& list_data_seq,
+ size_t max_iteration, double tolerance);
+
+
+ ////////// Static helper functions ///////////////////////////////////////
+
+ static bool LoadProfile(const char* profile, arma::mat& trans,
+ std::vector<arma::vec>& means, std::vector<arma::mat>& covs);
+ static bool SaveProfile(const char* profile, const arma::mat& trans,
+ const std::vector<arma::vec>& means,
+ const std::vector<arma::mat>& covs);
+ /**
+ * Generating a sequence and states using transition and emission probabilities.
+ * L: sequence length
+ * trans: Matrix M x M (M states)
+ * means: list of mean vectors length N (emission vector of length N)
+ * covs: list of square root of covariance matrices size N x N
+ * seq: generated sequence, uninitialized matrix, will have size N x L
+ * states: generated states, uninitialized vector, will have length L
+ */
+ static void GenerateInit(size_t L, const arma::mat& trans, const std::vector<arma::vec>& means,
+ const std::vector<arma::mat>& covs, arma::mat& seq, arma::vec& states);
+
+ /** Estimate transition and emission distribution from sequence and states */
+ static void EstimateInit(const arma::mat& seq, const arma::vec& states, arma::mat& trans,
+ std::vector<arma::vec>& means, std::vector<arma::mat>& covs);
+ static void EstimateInit(size_t numStates, const arma::mat& seq, const arma::vec& states,
+ arma::mat& trans, std::vector<arma::vec>& means,
+ std::vector<arma::mat>& covs);
+
+ /**
+ * Calculate posteriori probabilities of states at each steps
+ * Scaled Forward - Backward procedure
+ * trans: Transition probabilities, size M x M
+ * emis_prob: Emission probabilities along the sequence,
+ * size M x L (L is the sequence length)
+ * pstates: size M x L
+ * fs: scaled forward probabities, size M x L
+ * bs: scaled backward probabities, size M x L
+ * scales: scale factors, length L
+ * RETURN: log probabilities of sequence
+ */
+ static void ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
+ arma::vec& scales, arma::mat& fs);
+ static void BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
+ const arma::vec& scales, arma::mat& bs);
+ static double Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates,
+ arma::mat& fs, arma::mat& bs, arma::vec& scales);
+ static double Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
+ arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales);
+ static void CalculateEmissionProb(const arma::mat& seq, const std::vector<arma::vec>& means,
+ const std::vector<arma::mat>& inv_covs, const arma::vec& det,
+ arma::mat& emis_prob);
+
+ /**
+ * Calculate the most probable states for a sequence
+ * Viterbi algorithm
+ * trans: Transition probabilities, size M x M
+ * emis_prob: Emission probabilities, size M x L
+ * states: Unitialized, will have length L
+ * RETURN: log probability of the most probable sequence
+ */
+ static double ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
+ static double ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
+
+ /**
+ * Baum-Welch and Viterbi estimation of transition and emission
+ * distribution (Gaussian)
+ */
+ static void InitGaussParameter(size_t M, const std::vector<arma::mat>& seqs,
+ arma::mat& guessTR, std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO);
+
+ static void Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
+ std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO,
+ size_t max_iter, double tol);
+
+ static void TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
+ std::vector<arma::vec>& guessME, std::vector<arma::mat>& guessCO,
+ size_t max_iter, double tol);
+};
+
+}; // namespace hmm
+}; // namespace mlpack
+
+#endif
Deleted: mlpack/trunk/src/mlpack/methods/hmm/generate.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/generate.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/generate.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,246 +0,0 @@
-/**
- * @file generate.cc
- *
- * This file contains the program to generate sequences from a Hidden Markov
- * Model.
- *
- * Usage:
- * generate --type=TYPE --profile=PROFILE [OPTCLINS]
- * See the usage() function for complete option list
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "discreteHMM.h"
-#include "gaussianHMM.h"
-#include "mixgaussHMM.h"
-#include "mixtureDST.h"
-
-using namespace mlpack;
-using namespace hmm;
-using namespace hmm_support;
-
-bool generate_discrete();
-bool generate_gaussian();
-bool generate_mixture();
-void usage();
-
-/*const fx_entry_doc hmm_generate_main_entries[] = {
- {"type", FX_REQUIRED, FX_STR, NULL,
- " HMM type : discrete | gaussian | mixture.\n"},
- {"profile", FX_REQUIRED, FX_STR, NULL,
- " A file containing HMM profile.\n"},
- {"length", FX_PARAM, FX_INT, NULL,
- " Sequence length, default = 10.\n"},
- {"lenmax", FX_PARAM, FX_INT, NULL,
- " Maximum sequence length, default = length\n"},
- {"numseq", FX_PARAM, FX_INT, NULL,
- " Number of sequance, default = 10.\n"},
- {"seqfile", FX_PARAM, FX_STR, NULL,
- " Output file for the generated sequences.\n"},
- {"statefile", FX_PARAM, FX_STR, NULL,
- " Output file for the generated state sequences.\n"},
- FX_ENTRY_DOC_DONE
-}; */
-
-PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mixture.", "hmm");
-PARAM_STRING_REQ("profile", "A file containing HMM profile.", "hmm");
-PARAM_STRING_REQ("seqfile", "Output file for the generated sequences.", "hmm");
-PARAM_STRING_REQ("statefile", "Output file for the generated state sequences.",
- "hmm");
-
-PARAM_INT("length", "Sequence length, default = 10.", "hmm", 10);
-PARAM_INT("lenmax", "Maximum sequence length, default = 10", "hmm", 10);
-PARAM_INT("numseq", "Number of sequance, default = 10.\n", "hmm", 10);
-
-PARAM_MODULE("hmm", "This is a program generating sequences from HMM models.");
-
-/* const fx_submodule_doc hmm_generate_main_submodules[] = {
- FX_SUBMODULE_DOC_DONE
-}; */
-
-/* const fx_module_doc hmm_generate_main_doc = {
- hmm_generate_main_entries, hmm_generate_main_submodules,
- "This is a program generating sequences from HMM models.\n"
-}; */
-
-int main(int argc, char* argv[]) {
-
- CLI::ParseCommandLine(argc, argv);
- bool s = true;
- if (CLI::HasParam("hmm/type")) {
- const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
- if (strcmp(type, "discrete")==0)
- s = generate_discrete();
- else if (strcmp(type, "gaussian")==0)
- s = generate_gaussian();
- else if (strcmp(type, "mixture")==0)
- s = generate_mixture();
- else {
- Log::Fatal << "Unrecognized type: must be: " <<
- "discrete | gaussian | mixture !!!" << std::endl;
- return true;
- }
- }
- else {
- Log::Fatal << "Unrecognized type: must be: " <<
- "discrete | gaussian | mixture !!!" << std::endl;
- s = false;
- }
- if (!(s)) usage();
-
-}
-
-void usage() {
- Log::Info << std::endl << "Usage:" << std::endl;
- Log::Info << " generate --hmm/type=={discrete|gaussian|mixture} OPTCLINS" << std::endl;
- Log::Info << "[OPTCLINS]" << std::endl;
- Log::Info << " --hmm/profile=file : file contains HMM profile" << std::endl;
- Log::Info << " --hmm/length=NUM : sequence length" << std::endl;
- Log::Info << " --hmm/lenmax=NUM : maximum sequence length, default = length" << std::endl;
- Log::Info << " --hmm/numseq=NUM : number of sequence" << std::endl;
- Log::Info << " --hmm/seqfile=file : output file for generated sequences" << std::endl;
- Log::Info << " --hmm/statefile=file : output file for generated state sequences" << std::endl;
-}
-
-bool generate_mixture() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Fatal << "--hmm/profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const int seqlen = CLI::GetParam<int>("hmm/length");
- const int seqlmax = CLI::GetParam<int>("hmm/lenmax");
- const int numseq = CLI::GetParam<int>("hmm/numseq");
- const char* seqout = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str();
-
- Log::Assert(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
- Log::Assert(numseq > 0, "NUMSEQ must be positive");
-
- double step = (double) (seqlmax - seqlen) / numseq;
-
- MixtureofGaussianHMM hmm;
- hmm.InitFromFile(profile);
-
- TextWriter w_seq, w_state;
- if (!(w_seq.Open(seqout))) {
- Log::Warn << "Couldn't open '" << seqout << "' for writing." << std::endl;
- return false;
- }
-
- if (!(w_state.Open(stateout))) {
- Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
- return false;
- }
-
- double L = seqlen;
- for (int i = 0; i < numseq; i++, L += step) {
- arma::mat seq;
- arma::vec states;
- char s[100];
-
- hmm.GenerateSequence((int)L, seq, states);
-
- sprintf(s, "%% sequence %d", i);
- print_matrix(w_seq, seq, s, "%E,");
- sprintf(s, "%% state sequence %d", i);
- print_vector(w_state, states, s, "%.0f,");
- }
-
- //printf("---END---");
- return true;
-}
-
-bool generate_gaussian() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Fatal << "--hmm/profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const int seqlen = CLI::GetParam<int>("hmm/length");
- const int seqlmax = CLI::GetParam<int>("hmm/lenmax");
- const int numseq = CLI::GetParam<int>("hmm/numseq");
- const char* seqout = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str();
-
- Log::Assert(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
- Log::Assert(numseq > 0, "NUMSEQ must be positive");
-
- double step = (double) (seqlmax - seqlen) / numseq;
-
- GaussianHMM hmm;
- hmm.InitFromFile(profile);
-
- TextWriter w_seq, w_state;
- if (!(w_seq.Open(seqout))) {
- Log::Warn << "Couldn't open '" << seqout << "' for writing." << std::endl;
- return false;
- }
-
- if (!(w_state.Open(stateout))) {
- Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
- return false;
- }
-
- double L = seqlen;
- for (int i = 0; i < numseq; i++, L+=step) {
- arma::mat seq;
- arma::vec states;
- char s[100];
-
- hmm.GenerateSequence((int) L, seq, states);
-
- sprintf(s, "%% sequence %d", i);
- print_matrix(w_seq, seq, s, "%E,");
- sprintf(s, "%% state sequence %d", i);
- print_vector(w_state, states, s, "%.0f,");
- }
- return true;
-}
-
-bool generate_discrete() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Fatal << "--hmm/profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const int seqlen = CLI::GetParam<int>("hmm/length");
- const int seqlmax = CLI::GetParam<int>("hmm/lenmax");
- const int numseq = CLI::GetParam<int>("hmm/numseq");
- const char* seqout = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str();
-
- Log::Assert(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
- Log::Assert(numseq > 0, "NUMSEQ must be positive");
-
- double step = (double) (seqlmax - seqlen) / numseq;
-
- DiscreteHMM hmm;
- hmm.InitFromFile(profile);
-
- TextWriter w_seq, w_state;
- if (!(w_seq.Open(seqout))) {
- Log::Warn << "Couldn't open '" << seqout << "' for writing." << std::endl;
- return false;
- }
-
- if (!(w_state.Open(stateout))) {
- Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
- return false;
- }
-
- double L = seqlen;
- for (int i = 0; i < numseq; i++, L+=step) {
- arma::vec seq, states;
- char s[100];
-
- hmm.GenerateSequence((int) L, seq, states);
-
- sprintf(s, "%% sequence %d", i);
- print_vector(w_seq, seq, s, "%.0f,");
- sprintf(s, "%% state sequence %d", i);
- print_vector(w_state, states, s, "%.0f,");
- }
- return true;
-}
Copied: mlpack/trunk/src/mlpack/methods/hmm/generate.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/generate.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/generate.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/generate.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,246 @@
+/**
+ * @file generate.cc
+ *
+ * This file contains the program to generate sequences from a Hidden Markov
+ * Model.
+ *
+ * Usage:
+ * generate --type=TYPE --profile=PROFILE [OPTCLINS]
+ * See the usage() function for complete option list
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "discreteHMM.hpp"
+#include "gaussianHMM.hpp"
+#include "mixgaussHMM.hpp"
+#include "mixtureDST.hpp"
+
+using namespace mlpack;
+using namespace hmm;
+using namespace hmm_support;
+
+bool generate_discrete();
+bool generate_gaussian();
+bool generate_mixture();
+void usage();
+
+/*const fx_entry_doc hmm_generate_main_entries[] = {
+ {"type", FX_REQUIRED, FX_STR, NULL,
+ " HMM type : discrete | gaussian | mixture.\n"},
+ {"profile", FX_REQUIRED, FX_STR, NULL,
+ " A file containing HMM profile.\n"},
+ {"length", FX_PARAM, FX_INT, NULL,
+ " Sequence length, default = 10.\n"},
+ {"lenmax", FX_PARAM, FX_INT, NULL,
+ " Maximum sequence length, default = length\n"},
+ {"numseq", FX_PARAM, FX_INT, NULL,
+ " Number of sequance, default = 10.\n"},
+ {"seqfile", FX_PARAM, FX_STR, NULL,
+ " Output file for the generated sequences.\n"},
+ {"statefile", FX_PARAM, FX_STR, NULL,
+ " Output file for the generated state sequences.\n"},
+ FX_ENTRY_DOC_DONE
+}; */
+
+PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mixture.", "hmm");
+PARAM_STRING_REQ("profile", "A file containing HMM profile.", "hmm");
+PARAM_STRING_REQ("seqfile", "Output file for the generated sequences.", "hmm");
+PARAM_STRING_REQ("statefile", "Output file for the generated state sequences.",
+ "hmm");
+
+PARAM_INT("length", "Sequence length, default = 10.", "hmm", 10);
+PARAM_INT("lenmax", "Maximum sequence length, default = 10", "hmm", 10);
+PARAM_INT("numseq", "Number of sequance, default = 10.\n", "hmm", 10);
+
+PARAM_MODULE("hmm", "This is a program generating sequences from HMM models.");
+
+/* const fx_submodule_doc hmm_generate_main_submodules[] = {
+ FX_SUBMODULE_DOC_DONE
+}; */
+
+/* const fx_module_doc hmm_generate_main_doc = {
+ hmm_generate_main_entries, hmm_generate_main_submodules,
+ "This is a program generating sequences from HMM models.\n"
+}; */
+
+int main(int argc, char* argv[]) {
+
+ CLI::ParseCommandLine(argc, argv);
+ bool s = true;
+ if (CLI::HasParam("hmm/type")) {
+ const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
+ if (strcmp(type, "discrete")==0)
+ s = generate_discrete();
+ else if (strcmp(type, "gaussian")==0)
+ s = generate_gaussian();
+ else if (strcmp(type, "mixture")==0)
+ s = generate_mixture();
+ else {
+ Log::Fatal << "Unrecognized type: must be: " <<
+ "discrete | gaussian | mixture !!!" << std::endl;
+ return true;
+ }
+ }
+ else {
+ Log::Fatal << "Unrecognized type: must be: " <<
+ "discrete | gaussian | mixture !!!" << std::endl;
+ s = false;
+ }
+ if (!(s)) usage();
+
+}
+
+void usage() {
+ Log::Info << std::endl << "Usage:" << std::endl;
+ Log::Info << " generate --hmm/type=={discrete|gaussian|mixture} OPTCLINS" << std::endl;
+ Log::Info << "[OPTCLINS]" << std::endl;
+ Log::Info << " --hmm/profile=file : file contains HMM profile" << std::endl;
+ Log::Info << " --hmm/length=NUM : sequence length" << std::endl;
+ Log::Info << " --hmm/lenmax=NUM : maximum sequence length, default = length" << std::endl;
+ Log::Info << " --hmm/numseq=NUM : number of sequence" << std::endl;
+ Log::Info << " --hmm/seqfile=file : output file for generated sequences" << std::endl;
+ Log::Info << " --hmm/statefile=file : output file for generated state sequences" << std::endl;
+}
+
+bool generate_mixture() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Fatal << "--hmm/profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const int seqlen = CLI::GetParam<int>("hmm/length");
+ const int seqlmax = CLI::GetParam<int>("hmm/lenmax");
+ const int numseq = CLI::GetParam<int>("hmm/numseq");
+ const char* seqout = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str();
+
+ Log::Assert(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
+ Log::Assert(numseq > 0, "NUMSEQ must be positive");
+
+ double step = (double) (seqlmax - seqlen) / numseq;
+
+ MixtureofGaussianHMM hmm;
+ hmm.InitFromFile(profile);
+
+ TextWriter w_seq, w_state;
+ if (!(w_seq.Open(seqout))) {
+ Log::Warn << "Couldn't open '" << seqout << "' for writing." << std::endl;
+ return false;
+ }
+
+ if (!(w_state.Open(stateout))) {
+ Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
+ return false;
+ }
+
+ double L = seqlen;
+ for (int i = 0; i < numseq; i++, L += step) {
+ arma::mat seq;
+ arma::vec states;
+ char s[100];
+
+ hmm.GenerateSequence((int)L, seq, states);
+
+ sprintf(s, "%% sequence %d", i);
+ print_matrix(w_seq, seq, s, "%E,");
+ sprintf(s, "%% state sequence %d", i);
+ print_vector(w_state, states, s, "%.0f,");
+ }
+
+ //printf("---END---");
+ return true;
+}
+
+bool generate_gaussian() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Fatal << "--hmm/profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const int seqlen = CLI::GetParam<int>("hmm/length");
+ const int seqlmax = CLI::GetParam<int>("hmm/lenmax");
+ const int numseq = CLI::GetParam<int>("hmm/numseq");
+ const char* seqout = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str();
+
+ Log::Assert(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
+ Log::Assert(numseq > 0, "NUMSEQ must be positive");
+
+ double step = (double) (seqlmax - seqlen) / numseq;
+
+ GaussianHMM hmm;
+ hmm.InitFromFile(profile);
+
+ TextWriter w_seq, w_state;
+ if (!(w_seq.Open(seqout))) {
+ Log::Warn << "Couldn't open '" << seqout << "' for writing." << std::endl;
+ return false;
+ }
+
+ if (!(w_state.Open(stateout))) {
+ Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
+ return false;
+ }
+
+ double L = seqlen;
+ for (int i = 0; i < numseq; i++, L+=step) {
+ arma::mat seq;
+ arma::vec states;
+ char s[100];
+
+ hmm.GenerateSequence((int) L, seq, states);
+
+ sprintf(s, "%% sequence %d", i);
+ print_matrix(w_seq, seq, s, "%E,");
+ sprintf(s, "%% state sequence %d", i);
+ print_vector(w_state, states, s, "%.0f,");
+ }
+ return true;
+}
+
+bool generate_discrete() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Fatal << "--hmm/profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const int seqlen = CLI::GetParam<int>("hmm/length");
+ const int seqlmax = CLI::GetParam<int>("hmm/lenmax");
+ const int numseq = CLI::GetParam<int>("hmm/numseq");
+ const char* seqout = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str();
+
+ Log::Assert(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
+ Log::Assert(numseq > 0, "NUMSEQ must be positive");
+
+ double step = (double) (seqlmax - seqlen) / numseq;
+
+ DiscreteHMM hmm;
+ hmm.InitFromFile(profile);
+
+ TextWriter w_seq, w_state;
+ if (!(w_seq.Open(seqout))) {
+ Log::Warn << "Couldn't open '" << seqout << "' for writing." << std::endl;
+ return false;
+ }
+
+ if (!(w_state.Open(stateout))) {
+ Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
+ return false;
+ }
+
+ double L = seqlen;
+ for (int i = 0; i < numseq; i++, L+=step) {
+ arma::vec seq, states;
+ char s[100];
+
+ hmm.GenerateSequence((int) L, seq, states);
+
+ sprintf(s, "%% sequence %d", i);
+ print_vector(w_seq, seq, s, "%.0f,");
+ sprintf(s, "%% state sequence %d", i);
+ print_vector(w_state, states, s, "%.0f,");
+ }
+ return true;
+}
Deleted: mlpack/trunk/src/mlpack/methods/hmm/loglik.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/loglik.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/loglik.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,181 +0,0 @@
-/**
- * @file loglik.cc
- *
- * This file contains the program to compute log-likelihood of sequences
- * according to a Hidden Markov Model.
- *
- * Usage:
- * loglik --type=TYPE --profile=PROFILE [OPTCLINS]
- * See the usage() function for complete option list
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "discreteHMM.h"
-#include "gaussianHMM.h"
-#include "mixgaussHMM.h"
-#include "mixtureDST.h"
-
-using namespace mlpack;
-using namespace hmm;
-using namespace hmm_support;
-
-bool loglik_discrete();
-bool loglik_gaussian();
-bool loglik_mixture();
-void usage();
-
-/*const fx_entry_doc hmm_loglik_main_entries[] = {
- {"type", FX_REQUIRED, FX_STR, NULL,
- " HMM type : discrete | gaussian | mixture.\n"},
- {"profile", FX_REQUIRED, FX_STR, NULL,
- " A file containing HMM profile.\n"},
- {"seqfile", FX_PARAM, FX_STR, NULL,
- " Output file for the data sequences.\n"},
- {"logfile", FX_PARAM, FX_STR, NULL,
- " Output file for the computed log-likelihood of the sequences.\n"},
- FX_ENTRY_DOC_DONE
-};*/
-
-PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mixture.", "hmm");
-PARAM_STRING_REQ("profile", "A file containing HMM profile.", "hmm");
-PARAM_STRING("seqfile", "Outputfile for the datasequences.",
- "hmm", "seq.mix.out");
-PARAM_STRING("logfile",
- "Output file for the computed log-likelihood of the sequences.",
- "hmm", "log.mix.out");
-
-PARAM_MODULE("hmm", "This is a program computing log-likelihood of data \nsequences from HMM models.");
-/* const fx_submodule_doc hmm_loglik_main_submodules[] = {
- FX_SUBMODULE_DOC_DONE
-}; */
-
-/* const fx_module_doc hmm_loglik_main_doc = {
- hmm_loglik_main_entries, hmm_loglik_main_submodules,
- "This is a program computing log-likelihood of data sequences \n"
- "from HMM models.\n"
-}; */
-
-int main(int argc, char* argv[]) {
- CLI::ParseCommandLine(argc, argv);
-
- bool s = true;
- if (CLI::HasParam("hmm/type")) {
- const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
- if (strcmp(type, "discrete")==0)
- s = loglik_discrete();
- else if (strcmp(type, "gaussian")==0)
- s = loglik_gaussian();
- else if (strcmp(type, "mixture")==0)
- s = loglik_mixture();
- else {
- Log::Info << "Unrecognized type: must be: discrete | gaussian | mixture !!!";
- s = false;
- }
- }
- else {
- Log::Info << "Unrecognized type: must be: discrete | gaussian | mixture !!!";
- s = false;
- }
- if (!(s)) usage();
-}
-
-void usage() {
- Log::Warn << "\n" << std::endl;
- Log::Warn << "Usage:\n" << std::endl;
- Log::Warn << " loglik --type=={discrete|gaussian|mixture} OPTCLINS" << std::endl;
- Log::Warn << "[OPTCLINS]" << std::endl;
- Log::Warn << " --profile==file : file contains HMM profile" << std::endl;
- Log::Warn << " --seqfile==file : file contains input sequences" << std::endl;
- Log::Warn << " --logfile==file : output file for log-likelihood of the sequences" << std::endl;
-}
-
-bool loglik_mixture() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Warn << "--profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* logout = CLI::GetParam<std::string>("hmm/logfile").c_str();
-
- MixtureofGaussianHMM hmm;
- hmm.InitFromFile(profile);
-
- std::vector<arma::mat> seqs;
- load_matrix_list(seqin, seqs);
-
- TextWriter w_log;
- if (!(w_log.Open(logout))) {
- Log::Warn << "Couldn't open '" << logout << "' for writing." << std::endl;
- return false;
- }
-
- std::vector<double> list_loglik;
- hmm.ComputeLogLikelihood(seqs, list_loglik);
-
- for (size_t i = 0; i < seqs.size(); i++)
- w_log.Printf("%f\n", list_loglik[i]);
-
- return true;
-}
-
-bool loglik_gaussian() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Warn << "--profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* logout = CLI::GetParam<std::string>("hmm/logfile").c_str();
-
- GaussianHMM hmm;
- hmm.InitFromFile(profile);
-
- std::vector<arma::mat> seqs;
- load_matrix_list(seqin, seqs);
-
- TextWriter w_log;
- if (!(w_log.Open(logout))) {
- Log::Warn << "Couldn't open '"<< logout <<"' for writing." << std::endl;
- return false;
- }
-
- std::vector<double> list_loglik;
- hmm.ComputeLogLikelihood(seqs, list_loglik);
-
- for (size_t i = 0; i < seqs.size(); i++)
- w_log.Printf("%f\n", list_loglik[i]);
-
- return true;
-}
-
-bool loglik_discrete() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Warn << "--profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* logout = CLI::GetParam<std::string>("hmm/logfile").c_str();
-
- DiscreteHMM hmm;
- hmm.InitFromFile(profile);
-
- std::vector<arma::vec> seqs;
- load_vector_list(seqin, seqs);
-
- TextWriter w_log;
- if (!(w_log.Open(logout))) {
- Log::Warn << "Couldn't open '"<< logout <<"' for writing." << std::endl;
- return false;
- }
-
- std::vector<double> list_loglik;
- hmm.ComputeLogLikelihood(seqs, list_loglik);
-
- for (size_t i = 0; i < seqs.size(); i++)
- w_log.Printf("%f\n", list_loglik[i]);
-
- return true;
-}
Copied: mlpack/trunk/src/mlpack/methods/hmm/loglik.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/loglik.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/loglik.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/loglik.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,181 @@
+/**
+ * @file loglik.cc
+ *
+ * This file contains the program to compute log-likelihood of sequences
+ * according to a Hidden Markov Model.
+ *
+ * Usage:
+ * loglik --type=TYPE --profile=PROFILE [OPTCLINS]
+ * See the usage() function for complete option list
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "discreteHMM.hpp"
+#include "gaussianHMM.hpp"
+#include "mixgaussHMM.hpp"
+#include "mixtureDST.hpp"
+
+using namespace mlpack;
+using namespace hmm;
+using namespace hmm_support;
+
+bool loglik_discrete();
+bool loglik_gaussian();
+bool loglik_mixture();
+void usage();
+
+/*const fx_entry_doc hmm_loglik_main_entries[] = {
+ {"type", FX_REQUIRED, FX_STR, NULL,
+ " HMM type : discrete | gaussian | mixture.\n"},
+ {"profile", FX_REQUIRED, FX_STR, NULL,
+ " A file containing HMM profile.\n"},
+ {"seqfile", FX_PARAM, FX_STR, NULL,
+ " Output file for the data sequences.\n"},
+ {"logfile", FX_PARAM, FX_STR, NULL,
+ " Output file for the computed log-likelihood of the sequences.\n"},
+ FX_ENTRY_DOC_DONE
+};*/
+
+PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mixture.", "hmm");
+PARAM_STRING_REQ("profile", "A file containing HMM profile.", "hmm");
+PARAM_STRING("seqfile", "Outputfile for the datasequences.",
+ "hmm", "seq.mix.out");
+PARAM_STRING("logfile",
+ "Output file for the computed log-likelihood of the sequences.",
+ "hmm", "log.mix.out");
+
+PARAM_MODULE("hmm", "This is a program computing log-likelihood of data \nsequences from HMM models.");
+/* const fx_submodule_doc hmm_loglik_main_submodules[] = {
+ FX_SUBMODULE_DOC_DONE
+}; */
+
+/* const fx_module_doc hmm_loglik_main_doc = {
+ hmm_loglik_main_entries, hmm_loglik_main_submodules,
+ "This is a program computing log-likelihood of data sequences \n"
+ "from HMM models.\n"
+}; */
+
+int main(int argc, char* argv[]) {
+ CLI::ParseCommandLine(argc, argv);
+
+ bool s = true;
+ if (CLI::HasParam("hmm/type")) {
+ const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
+ if (strcmp(type, "discrete")==0)
+ s = loglik_discrete();
+ else if (strcmp(type, "gaussian")==0)
+ s = loglik_gaussian();
+ else if (strcmp(type, "mixture")==0)
+ s = loglik_mixture();
+ else {
+ Log::Info << "Unrecognized type: must be: discrete | gaussian | mixture !!!";
+ s = false;
+ }
+ }
+ else {
+ Log::Info << "Unrecognized type: must be: discrete | gaussian | mixture !!!";
+ s = false;
+ }
+ if (!(s)) usage();
+}
+
+void usage() {
+ Log::Warn << "\n" << std::endl;
+ Log::Warn << "Usage:\n" << std::endl;
+ Log::Warn << " loglik --type=={discrete|gaussian|mixture} OPTCLINS" << std::endl;
+ Log::Warn << "[OPTCLINS]" << std::endl;
+ Log::Warn << " --profile==file : file contains HMM profile" << std::endl;
+ Log::Warn << " --seqfile==file : file contains input sequences" << std::endl;
+ Log::Warn << " --logfile==file : output file for log-likelihood of the sequences" << std::endl;
+}
+
+bool loglik_mixture() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Warn << "--profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* logout = CLI::GetParam<std::string>("hmm/logfile").c_str();
+
+ MixtureofGaussianHMM hmm;
+ hmm.InitFromFile(profile);
+
+ std::vector<arma::mat> seqs;
+ load_matrix_list(seqin, seqs);
+
+ TextWriter w_log;
+ if (!(w_log.Open(logout))) {
+ Log::Warn << "Couldn't open '" << logout << "' for writing." << std::endl;
+ return false;
+ }
+
+ std::vector<double> list_loglik;
+ hmm.ComputeLogLikelihood(seqs, list_loglik);
+
+ for (size_t i = 0; i < seqs.size(); i++)
+ w_log.Printf("%f\n", list_loglik[i]);
+
+ return true;
+}
+
+bool loglik_gaussian() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Warn << "--profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* logout = CLI::GetParam<std::string>("hmm/logfile").c_str();
+
+ GaussianHMM hmm;
+ hmm.InitFromFile(profile);
+
+ std::vector<arma::mat> seqs;
+ load_matrix_list(seqin, seqs);
+
+ TextWriter w_log;
+ if (!(w_log.Open(logout))) {
+ Log::Warn << "Couldn't open '"<< logout <<"' for writing." << std::endl;
+ return false;
+ }
+
+ std::vector<double> list_loglik;
+ hmm.ComputeLogLikelihood(seqs, list_loglik);
+
+ for (size_t i = 0; i < seqs.size(); i++)
+ w_log.Printf("%f\n", list_loglik[i]);
+
+ return true;
+}
+
+bool loglik_discrete() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Warn << "--profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* logout = CLI::GetParam<std::string>("hmm/logfile").c_str();
+
+ DiscreteHMM hmm;
+ hmm.InitFromFile(profile);
+
+ std::vector<arma::vec> seqs;
+ load_vector_list(seqin, seqs);
+
+ TextWriter w_log;
+ if (!(w_log.Open(logout))) {
+ Log::Warn << "Couldn't open '"<< logout <<"' for writing." << std::endl;
+ return false;
+ }
+
+ std::vector<double> list_loglik;
+ hmm.ComputeLogLikelihood(seqs, list_loglik);
+
+ for (size_t i = 0; i < seqs.size(); i++)
+ w_log.Printf("%f\n", list_loglik[i]);
+
+ return true;
+}
Deleted: mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,547 +0,0 @@
-/**
- * @file mixgaussHMM.cc
- *
- * This file contains the implementation of functions in mixgaussHMM.h
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "mixgaussHMM.h"
-#include "gaussianHMM.h"
-
-using namespace mlpack;
-using namespace hmm;
-using namespace hmm_support;
-
-void MixtureofGaussianHMM::setModel(const arma::mat& transmission,
- const std::vector<MixtureGauss>& list_mixture_gauss) {
- mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
- mlpack::Log::Assert(transmission.n_rows == list_mixture_gauss.size());
-
- transmission_ = transmission;
- list_mixture_gauss_ = list_mixture_gauss;
-}
-
-void MixtureofGaussianHMM::InitFromFile(const char* profile) {
- if (!(MixtureofGaussianHMM::LoadProfile(profile, transmission_, list_mixture_gauss_)))
- mlpack::Log::Fatal << "Couldn't open " << profile << " for reading." <<
- std::endl;
-}
-
-void MixtureofGaussianHMM::LoadProfile(const char* profile) {
- InitFromFile(profile);
-}
-
-void MixtureofGaussianHMM::SaveProfile(const char* profile) const {
- MixtureofGaussianHMM::SaveProfile(profile, transmission_, list_mixture_gauss_);
-}
-
-void MixtureofGaussianHMM::GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const {
- MixtureofGaussianHMM::GenerateInit(L, transmission_, list_mixture_gauss_, data_seq, state_seq);
-}
-
-void MixtureofGaussianHMM::EstimateModel(size_t numcluster, const arma::mat& data_seq,
- const arma::vec& state_seq) {
- MixtureofGaussianHMM::EstimateInit(numcluster, data_seq, state_seq, transmission_,
- list_mixture_gauss_);
-}
-
-void MixtureofGaussianHMM::EstimateModel(size_t numstate, size_t numcluster,
- const arma::mat& data_seq, const arma::vec& state_seq) {
- MixtureofGaussianHMM::EstimateInit(numstate, numcluster, data_seq, state_seq,
- transmission_, list_mixture_gauss_);
-}
-
-void MixtureofGaussianHMM::DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat,
- arma::mat& forward_prob_mat,
- arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_cols;
-
- arma::mat emission_prob_mat(M, L);
- MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_,
- emission_prob_mat);
- MixtureofGaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat,
- forward_prob_mat, backward_prob_mat, scale_vec);
-}
-
-void MixtureofGaussianHMM::DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat,
- arma::mat& forward_prob_mat, arma::mat& backward_prob_mat,
- arma::vec& scale_vec) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_cols;
- state_prob_mat.set_size(M, L);
- forward_prob_mat.set_size(M, L);
- backward_prob_mat.set_size(M, L);
- scale_vec.set_size(L);
-
- arma::mat emission_prob_mat(M, L);
-
- MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_,
- emission_prob_mat);
- MixtureofGaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat,
- forward_prob_mat, backward_prob_mat, scale_vec);
-}
-
-double MixtureofGaussianHMM::ComputeLogLikelihood(const arma::mat& data_seq) const {
- size_t L = data_seq.n_cols;
- size_t M = transmission_.n_rows;
- arma::mat fs(M, L), emis_prob(M, L);
- arma::vec sc(L);
-
- MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_, emis_prob);
- MixtureofGaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
-
- double loglik = 0;
- for (size_t t = 0; t < L; t++)
- loglik += log(sc[t]);
- return loglik;
-}
-
-void MixtureofGaussianHMM::ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq, std::vector<double>& list_likelihood) const {
- size_t L = 0;
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- if (list_data_seq[i].n_cols > L)
- L = list_data_seq[i].n_cols;
- }
- size_t M = transmission_.n_rows;
- arma::mat fs(M, L), emis_prob(M, L);
- arma::vec sc(L);
-
- for (size_t i = 0; i < list_data_seq.size(); i++) {
- size_t L = list_data_seq[i].n_cols;
-
- MixtureofGaussianHMM::CalculateEmissionProb(list_data_seq[i], list_mixture_gauss_, emis_prob);
- MixtureofGaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
-
- double loglik = 0;
- for (size_t t = 0; t < L; t++)
- loglik += log(sc[t]);
- list_likelihood.push_back(loglik);
- }
-}
-
-void MixtureofGaussianHMM::ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const {
- size_t M = transmission_.n_rows;
- size_t L = data_seq.n_cols;
- arma::mat emis_prob(M, L);
- MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_, emis_prob);
- MixtureofGaussianHMM::ViterbiInit(transmission_, emis_prob, state_seq);
-}
-
-void MixtureofGaussianHMM::TrainBaumWelch(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
- MixtureofGaussianHMM::Train(list_data_seq, transmission_, list_mixture_gauss_, max_iteration, tolerance);
-}
-
-void MixtureofGaussianHMM::TrainViterbi(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
- MixtureofGaussianHMM::TrainViterbi(list_data_seq, transmission_, list_mixture_gauss_, max_iteration, tolerance);
-}
-
-bool MixtureofGaussianHMM::LoadProfile(const char* profile, arma::mat& trans, std::vector<MixtureGauss>& mixs) {
- std::vector<arma::mat> matlst;
- if (!(load_matrix_list(profile, matlst))) {
- mlpack::Log::Warn << "Couldn't open " << profile << " for reading." <<
- std::endl;
- return false;
- }
- mlpack::Log::Assert(matlst.size() >= 4); // at least 1 trans, 1 prior, 1 mean, 1 cov
- trans = matlst[0];
- size_t M = trans.n_rows; // num of states
- size_t N = matlst[2].n_rows; // dimension
- size_t p = 1;
- for (size_t i = 0; i < M; i++) {
- size_t K = matlst[p].n_rows; // num of clusters
- //DEBUG: printf("load p=%d K=%d\n", p, K);
- mlpack::Log::Assert(matlst.size() > p + 2 * K);
- MixtureGauss mix;
- mix.InitFromProfile(matlst, p, N);
- mixs.push_back(mix);
- p += (2 * K + 1);
- }
-
- return true;
-}
-
-bool MixtureofGaussianHMM::SaveProfile(const char* profile, const arma::mat& trans, const std::vector<MixtureGauss>& mixs) {
- TextWriter w_pro;
- if (!(w_pro.Open(profile))) {
- mlpack::Log::Warn << "Couldn't open " << profile << " for writing." <<
- std::endl;
- return false;
- }
- size_t M = trans.n_rows; // num of states
- print_matrix(w_pro, trans, "% transmission", "%E,");
- for (size_t i = 0; i < M; i++) {
- size_t K = mixs[i].n_clusters(); // num of clusters
- char s[100];
- sprintf(s, "%% prior - state %zu", i);
- print_vector(w_pro, mixs[i].get_prior(), s, "%E,");
- for (size_t k = 0; k < K; k++) {
- sprintf(s, "%% mean %zu - state %zu", k, i);
- print_vector(w_pro, mixs[i].get_mean(k), s, "%E,");
- sprintf(s, "%% covariance %zu - state %zu", k, i);
- print_matrix(w_pro, mixs[i].get_cov(k), s, "%E,");
- }
- }
-
- return true;
-}
-
-void MixtureofGaussianHMM::GenerateInit(size_t L, const arma::mat& trans, const std::vector<MixtureGauss>& mixs, arma::mat& seq, arma::vec& states){
- mlpack::Log::Assert((trans.n_rows == trans.n_cols && trans.n_rows == mixs.size()),
- "MixtureOfGaussianHMM::GenerateInit(): matrices sizes do not match");
-
- arma::mat trsum;
- size_t M, N;
- size_t cur_state;
-
- M = trans.n_rows;
- N = mixs[0].v_length(); // emission vector length
-
- trsum = trans;
-
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 1; j < M; j++)
- trsum(i, j) += trsum(i, j - 1);
- }
-
- seq.set_size(N, L);
- states.set_size(L);
-
- cur_state = 0; // starting state is 0
-
- for (size_t i = 0; i < L; i++) {
- size_t j;
-
- // next state
- double r = RAND_UNIFORM_01();
- for (j = 0; j < M; j++) {
- if (r <= trsum(cur_state, j))
- break;
- }
- cur_state = j;
-
- // emission
- arma::vec e;
- mixs[cur_state].generate(e);
- for (j = 0; j < N; j++)
- seq(j, i) = e[j];
- states[i] = cur_state;
- }
-}
-
-void MixtureofGaussianHMM::EstimateInit(size_t numStates, size_t numClusters, const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<MixtureGauss>& mixs) {
- mlpack::Log::Assert((seq.n_cols == states.n_elem),
- "MixtureOfGaussianHMM::EstimateInit(): sequence and states length must be the same");
-
- size_t N = seq.n_rows; // emission vector length
- size_t M = numStates; // number of states
- size_t L = seq.n_cols; // sequence length
- size_t K = numClusters;
-
- trans.zeros(M, M);
- arma::vec stateSum(M);
- stateSum.zeros();
-
- for (size_t i = 0; i < L - 1; i++) {
- size_t state = (size_t) states[i];
- size_t next_state = (size_t) states[i + 1];
- stateSum[state]++;
- trans(state, next_state)++;
- }
-
- for (size_t i = 0; i < M; i++) {
- if (stateSum[i] == 0)
- stateSum[i] = -INFINITY;
-
- for (size_t j = 0; j < M; j++)
- trans(i, j) /= stateSum[i];
- }
-
- std::vector<arma::mat> data;
- arma::vec n_data(M);
- n_data.zeros();
-
- for (size_t i = 0; i < L; i++) {
- size_t state = (size_t) states[i];
- n_data[state]++;
- }
-
- for (size_t i = 0; i < M; i++) {
- arma::mat m(N, (size_t) n_data[i]);
- //printf("n[%d]=%8.0f\n", i, n_data[i]);
- data.push_back(m);
- }
-
- n_data.zeros();
- for (size_t i = 0; i < L; i++) {
- size_t state = (size_t) states[i];
- for (size_t j = 0; j < N; j++)
- data[state](j, (size_t) n_data[state]) = seq(j, i);
-
- n_data[state]++;
- //printf("%d %d %8.0f\n", i, state, n_data[state]);
- }
-
- for (size_t i = 0; i < M; i++) {
- std::vector<size_t> labels;
- std::vector<arma::vec> means;
- kmeans(data[i], K, labels, means, 500, 1e-3);
-
- //printf("STATE #%d %d\n", i, K);
- MixtureGauss m;
- m.Init(K, data[i], labels);
- mixs.push_back(m);
- }
-}
-
-void MixtureofGaussianHMM::EstimateInit(size_t NumClusters, const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<MixtureGauss>& mixs) {
- mlpack::Log::Assert((seq.n_cols == states.n_elem),
- "MixtureofGaussianHMM::EstimateInit(): sequence and states length must be the same");
-
- size_t M = 0;
- for (size_t i = 0; i < seq.n_cols; i++) {
- if (states[i] > M)
- M = (size_t) states[i];
- }
- M++;
- MixtureofGaussianHMM::EstimateInit(M, NumClusters, seq, states, trans, mixs);
-}
-
-void MixtureofGaussianHMM::ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& scales, arma::mat& fs) {
- GaussianHMM::ForwardProcedure(L, trans, emis_prob, scales, fs);
-}
-
-void MixtureofGaussianHMM::BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, const arma::vec& scales, arma::mat& bs) {
- GaussianHMM::BackwardProcedure(L, trans, emis_prob, scales, bs);
-}
-
-double MixtureofGaussianHMM::Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
- return GaussianHMM::Decode(trans, emis_prob, pstates, fs, bs, scales);
-}
-
-double MixtureofGaussianHMM::Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
- return GaussianHMM::Decode(L, trans, emis_prob, pstates, fs, bs, scales);
-}
-
-double MixtureofGaussianHMM::ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
- return GaussianHMM::ViterbiInit(trans, emis_prob, states);
-}
-
-double MixtureofGaussianHMM::ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
- return GaussianHMM::ViterbiInit(L, trans, emis_prob, states);
-}
-
-void MixtureofGaussianHMM::CalculateEmissionProb(const arma::mat& seq, const std::vector<MixtureGauss>& mixs, arma::mat& emis_prob) {
- size_t M = mixs.size();
- size_t L = seq.n_cols;
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seq.unsafe_col(t);
-
- for (size_t i = 0; i < M; i++)
- emis_prob(i, t) = mixs[i].getPDF(e);
- }
-}
-
-void MixtureofGaussianHMM::Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol) {
- size_t L = -1;
- size_t M = guessTR.n_rows;
-
- mlpack::Log::Assert((M == guessTR.n_cols && M == guessMG.size()),
- "MixtureofGaussianHMM::Train(): sizes do not match");
-
- for (size_t i = 0; i < seqs.size(); i++) {
- if (seqs[i].n_cols > L)
- L = seqs[i].n_cols;
- }
-
- arma::mat TR(M, M); // guess transition and emission matrix
-
- arma::mat ps(M, L), fs(M, L), bs(M, L), emis_prob(M, L); // to hold hmm_decodeG results
- std::vector<arma::mat> emis_prob_cluster;
- arma::vec s(L); // scaling factors
- arma::vec sumState(M); // the denominator for each state
-
- for (size_t i = 0; i < M; i++) {
- arma::mat m;
- size_t K = guessMG[i].n_clusters();
- m.set_size(K, L);
- emis_prob_cluster.push_back(m);
- }
-
- double loglik = 0, oldlog;
- for (size_t iter = 0; iter < max_iter; iter++) {
- oldlog = loglik;
- loglik = 0;
-
- // set the accumulating values to zeros and compute the inverse matrices and determinant constants
- TR.zeros();
- for (size_t i = 0; i < M; i++)
- guessMG[i].start_accumulate();
-
- sumState.zeros();
-
- // for each sequence, we will use forward-backward procedure and then accumulate
- for (size_t idx = 0; idx < seqs.size(); idx++) {
- // first calculate the emission probabilities of the sequence
- L = seqs[idx].n_cols;
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- for (size_t i = 0; i < M; i++) {
- double s = 0;
- size_t K = guessMG[i].n_clusters();
- for (size_t j = 0; j < K; j++) {
- emis_prob_cluster[i](j, t) = guessMG[i].getPDF(j, e);
- s += emis_prob_cluster[i](j, t);
- }
- emis_prob(i, t) = s;
- }
- }
-
- loglik += MixtureofGaussianHMM::Decode(L, guessTR, emis_prob, ps, fs, bs, s); // forward - backward procedure
-
- // accumulate expected transition & gaussian mixture parameters
- for (size_t t = 0; t < L-1; t++) {
- for (size_t i = 0; i < M; i++)
- for (size_t j = 0; j < M; j++)
- TR(i, j) += fs(i, t) * guessTR(i, j) * emis_prob(j, t + 1) * bs(j, t + 1) / s[t + 1];
- }
-
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- for (size_t i = 0; i < M; i++) {
- double v = ps(i, t);
- size_t K = guessMG[i].n_clusters();
- for (size_t j = 0; j < K; j++)
- guessMG[i].accumulate(v * emis_prob_cluster[i](j, t) / emis_prob(i, t), j, e);
- }
- }
- // end accumulate
- }
-
- // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
- for (size_t i = 0; i < M; i++) {
- double s = 0;
- for (size_t j = 0; j < M; j++)
- s += TR(i, j);
-
- if (s == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
- guessTR(i, i) = 1;
- } else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = TR(i, j) / s;
- }
-
- guessMG[i].end_accumulate();
- }
- // end re-estimate
-
- printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
- if (fabs(oldlog - loglik) < tol) {
- printf("\nConverged after %zu iterations\n", iter);
- break;
- }
- oldlog = loglik;
- }
-}
-
-void MixtureofGaussianHMM::TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol) {
- size_t L = -1;
- size_t M = guessTR.n_rows;
- mlpack::Log::Assert((M == guessTR.n_cols && M == guessMG.size()),
- "MixtureofGaussianHMM::TrainViterbi(): sizes do not match");
-
- for (size_t i = 0; i < seqs.size(); i++)
- if (seqs[i].n_cols > L)
- L = seqs[i].n_cols;
-
- arma::mat TR(M, M); // guess transition and emission matrix
-
- arma::mat emis_prob(M, L); // to hold hmm_decodeG results
- std::vector<arma::mat> emis_prob_cluster;
-
- for (size_t i = 0; i < M; i++) {
- arma::mat m;
- size_t K = guessMG[i].n_clusters();
- m.set_size(K, L);
- emis_prob_cluster.push_back(m);
- }
-
- double loglik = 0, oldlog;
- for (size_t iter = 0; iter < max_iter; iter++) {
- oldlog = loglik;
- loglik = 0;
-
- // set the accumulating values to zeros and compute the inverse matrices and determinant constants
- TR.zeros();
- for (size_t i = 0; i < M; i++)
- guessMG[i].start_accumulate();
-
- // for each sequence, we will use viterbi procedure to find the most probable state sequence and then accumulate
- for (size_t idx = 0; idx < seqs.size(); idx++) {
- arma::vec states;
- // first calculate the emission probabilities of the sequence
- L = seqs[idx].n_cols;
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- for (size_t i = 0; i < M; i++) {
- double s = 0;
- size_t K = guessMG[i].n_clusters();
- for (size_t j = 0; j < K; j++) {
- emis_prob_cluster[i](j, t) = guessMG[i].getPDF(j, e);
- s += emis_prob_cluster[i](j, t);
- }
- emis_prob(i, t) = s;
- }
- }
-
- loglik += GaussianHMM::ViterbiInit(L, guessTR, emis_prob, states); // viterbi procedure
-
- // accumulate expected transition & gaussian mixture parameters
- for (size_t t = 0; t < L-1; t++) {
- size_t i = (size_t) states[t];
- size_t j = (size_t) states[t + 1];
- TR(i, j)++;
- }
-
- for (size_t t = 0; t < L; t++) {
- arma::vec e = seqs[idx].unsafe_col(t);
- size_t i = (size_t) states[t];
- size_t K = guessMG[i].n_clusters();
- for (size_t j = 0; j < K; j++)
- guessMG[i].accumulate(emis_prob_cluster[i](j, t) / emis_prob(i, t), j, e);
- }
- // end accumulate
- }
-
- // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
- for (size_t i = 0; i < M; i++) {
- double s = 0;
- for (size_t j = 0; j < M; j++)
- s += TR(i, j);
-
- if (s == 0) {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = 0;
- guessTR(i, i) = 1;
- }
- else {
- for (size_t j = 0; j < M; j++)
- guessTR(i, j) = TR(i, j) / s;
- }
-
- guessMG[i].end_accumulate();
- }
- // end re-estimate
-
- printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
- if (fabs(oldlog - loglik) < tol) {
- printf("\nConverged after %zu iterations\n", iter);
- break;
- }
- oldlog = loglik;
- }
-}
-
Copied: mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,547 @@
+/**
+ * @file mixgaussHMM.cc
+ *
+ * This file contains the implementation of functions in mixgaussHMM.h
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "mixgaussHMM.hpp"
+#include "gaussianHMM.hpp"
+
+using namespace mlpack;
+using namespace hmm;
+using namespace hmm_support;
+
+void MixtureofGaussianHMM::setModel(const arma::mat& transmission,
+ const std::vector<MixtureGauss>& list_mixture_gauss) {
+ mlpack::Log::Assert(transmission.n_rows == transmission.n_cols);
+ mlpack::Log::Assert(transmission.n_rows == list_mixture_gauss.size());
+
+ transmission_ = transmission;
+ list_mixture_gauss_ = list_mixture_gauss;
+}
+
+void MixtureofGaussianHMM::InitFromFile(const char* profile) {
+ if (!(MixtureofGaussianHMM::LoadProfile(profile, transmission_, list_mixture_gauss_)))
+ mlpack::Log::Fatal << "Couldn't open " << profile << " for reading." <<
+ std::endl;
+}
+
+void MixtureofGaussianHMM::LoadProfile(const char* profile) {
+ InitFromFile(profile);
+}
+
+void MixtureofGaussianHMM::SaveProfile(const char* profile) const {
+ MixtureofGaussianHMM::SaveProfile(profile, transmission_, list_mixture_gauss_);
+}
+
+void MixtureofGaussianHMM::GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const {
+ MixtureofGaussianHMM::GenerateInit(L, transmission_, list_mixture_gauss_, data_seq, state_seq);
+}
+
+void MixtureofGaussianHMM::EstimateModel(size_t numcluster, const arma::mat& data_seq,
+ const arma::vec& state_seq) {
+ MixtureofGaussianHMM::EstimateInit(numcluster, data_seq, state_seq, transmission_,
+ list_mixture_gauss_);
+}
+
+void MixtureofGaussianHMM::EstimateModel(size_t numstate, size_t numcluster,
+ const arma::mat& data_seq, const arma::vec& state_seq) {
+ MixtureofGaussianHMM::EstimateInit(numstate, numcluster, data_seq, state_seq,
+ transmission_, list_mixture_gauss_);
+}
+
+void MixtureofGaussianHMM::DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat,
+ arma::mat& forward_prob_mat,
+ arma::mat& backward_prob_mat, arma::vec& scale_vec) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_cols;
+
+ arma::mat emission_prob_mat(M, L);
+ MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_,
+ emission_prob_mat);
+ MixtureofGaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat,
+ forward_prob_mat, backward_prob_mat, scale_vec);
+}
+
+void MixtureofGaussianHMM::DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat,
+ arma::mat& forward_prob_mat, arma::mat& backward_prob_mat,
+ arma::vec& scale_vec) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_cols;
+ state_prob_mat.set_size(M, L);
+ forward_prob_mat.set_size(M, L);
+ backward_prob_mat.set_size(M, L);
+ scale_vec.set_size(L);
+
+ arma::mat emission_prob_mat(M, L);
+
+ MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_,
+ emission_prob_mat);
+ MixtureofGaussianHMM::Decode(transmission_, emission_prob_mat, state_prob_mat,
+ forward_prob_mat, backward_prob_mat, scale_vec);
+}
+
+double MixtureofGaussianHMM::ComputeLogLikelihood(const arma::mat& data_seq) const {
+ size_t L = data_seq.n_cols;
+ size_t M = transmission_.n_rows;
+ arma::mat fs(M, L), emis_prob(M, L);
+ arma::vec sc(L);
+
+ MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_, emis_prob);
+ MixtureofGaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
+
+ double loglik = 0;
+ for (size_t t = 0; t < L; t++)
+ loglik += log(sc[t]);
+ return loglik;
+}
+
+void MixtureofGaussianHMM::ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq, std::vector<double>& list_likelihood) const {
+ size_t L = 0;
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ if (list_data_seq[i].n_cols > L)
+ L = list_data_seq[i].n_cols;
+ }
+ size_t M = transmission_.n_rows;
+ arma::mat fs(M, L), emis_prob(M, L);
+ arma::vec sc(L);
+
+ for (size_t i = 0; i < list_data_seq.size(); i++) {
+ size_t L = list_data_seq[i].n_cols;
+
+ MixtureofGaussianHMM::CalculateEmissionProb(list_data_seq[i], list_mixture_gauss_, emis_prob);
+ MixtureofGaussianHMM::ForwardProcedure(L, transmission_, emis_prob, sc, fs);
+
+ double loglik = 0;
+ for (size_t t = 0; t < L; t++)
+ loglik += log(sc[t]);
+ list_likelihood.push_back(loglik);
+ }
+}
+
+void MixtureofGaussianHMM::ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const {
+ size_t M = transmission_.n_rows;
+ size_t L = data_seq.n_cols;
+ arma::mat emis_prob(M, L);
+ MixtureofGaussianHMM::CalculateEmissionProb(data_seq, list_mixture_gauss_, emis_prob);
+ MixtureofGaussianHMM::ViterbiInit(transmission_, emis_prob, state_seq);
+}
+
+void MixtureofGaussianHMM::TrainBaumWelch(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
+ MixtureofGaussianHMM::Train(list_data_seq, transmission_, list_mixture_gauss_, max_iteration, tolerance);
+}
+
+void MixtureofGaussianHMM::TrainViterbi(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance) {
+ MixtureofGaussianHMM::TrainViterbi(list_data_seq, transmission_, list_mixture_gauss_, max_iteration, tolerance);
+}
+
+bool MixtureofGaussianHMM::LoadProfile(const char* profile, arma::mat& trans, std::vector<MixtureGauss>& mixs) {
+ std::vector<arma::mat> matlst;
+ if (!(load_matrix_list(profile, matlst))) {
+ mlpack::Log::Warn << "Couldn't open " << profile << " for reading." <<
+ std::endl;
+ return false;
+ }
+ mlpack::Log::Assert(matlst.size() >= 4); // at least 1 trans, 1 prior, 1 mean, 1 cov
+ trans = matlst[0];
+ size_t M = trans.n_rows; // num of states
+ size_t N = matlst[2].n_rows; // dimension
+ size_t p = 1;
+ for (size_t i = 0; i < M; i++) {
+ size_t K = matlst[p].n_rows; // num of clusters
+ //DEBUG: printf("load p=%d K=%d\n", p, K);
+ mlpack::Log::Assert(matlst.size() > p + 2 * K);
+ MixtureGauss mix;
+ mix.InitFromProfile(matlst, p, N);
+ mixs.push_back(mix);
+ p += (2 * K + 1);
+ }
+
+ return true;
+}
+
+bool MixtureofGaussianHMM::SaveProfile(const char* profile, const arma::mat& trans, const std::vector<MixtureGauss>& mixs) {
+ TextWriter w_pro;
+ if (!(w_pro.Open(profile))) {
+ mlpack::Log::Warn << "Couldn't open " << profile << " for writing." <<
+ std::endl;
+ return false;
+ }
+ size_t M = trans.n_rows; // num of states
+ print_matrix(w_pro, trans, "% transmission", "%E,");
+ for (size_t i = 0; i < M; i++) {
+ size_t K = mixs[i].n_clusters(); // num of clusters
+ char s[100];
+ sprintf(s, "%% prior - state %zu", i);
+ print_vector(w_pro, mixs[i].get_prior(), s, "%E,");
+ for (size_t k = 0; k < K; k++) {
+ sprintf(s, "%% mean %zu - state %zu", k, i);
+ print_vector(w_pro, mixs[i].get_mean(k), s, "%E,");
+ sprintf(s, "%% covariance %zu - state %zu", k, i);
+ print_matrix(w_pro, mixs[i].get_cov(k), s, "%E,");
+ }
+ }
+
+ return true;
+}
+
+void MixtureofGaussianHMM::GenerateInit(size_t L, const arma::mat& trans, const std::vector<MixtureGauss>& mixs, arma::mat& seq, arma::vec& states){
+ mlpack::Log::Assert((trans.n_rows == trans.n_cols && trans.n_rows == mixs.size()),
+ "MixtureOfGaussianHMM::GenerateInit(): matrices sizes do not match");
+
+ arma::mat trsum;
+ size_t M, N;
+ size_t cur_state;
+
+ M = trans.n_rows;
+ N = mixs[0].v_length(); // emission vector length
+
+ trsum = trans;
+
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 1; j < M; j++)
+ trsum(i, j) += trsum(i, j - 1);
+ }
+
+ seq.set_size(N, L);
+ states.set_size(L);
+
+ cur_state = 0; // starting state is 0
+
+ for (size_t i = 0; i < L; i++) {
+ size_t j;
+
+ // next state
+ double r = RAND_UNIFORM_01();
+ for (j = 0; j < M; j++) {
+ if (r <= trsum(cur_state, j))
+ break;
+ }
+ cur_state = j;
+
+ // emission
+ arma::vec e;
+ mixs[cur_state].generate(e);
+ for (j = 0; j < N; j++)
+ seq(j, i) = e[j];
+ states[i] = cur_state;
+ }
+}
+
+void MixtureofGaussianHMM::EstimateInit(size_t numStates, size_t numClusters, const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<MixtureGauss>& mixs) {
+ mlpack::Log::Assert((seq.n_cols == states.n_elem),
+ "MixtureOfGaussianHMM::EstimateInit(): sequence and states length must be the same");
+
+ size_t N = seq.n_rows; // emission vector length
+ size_t M = numStates; // number of states
+ size_t L = seq.n_cols; // sequence length
+ size_t K = numClusters;
+
+ trans.zeros(M, M);
+ arma::vec stateSum(M);
+ stateSum.zeros();
+
+ for (size_t i = 0; i < L - 1; i++) {
+ size_t state = (size_t) states[i];
+ size_t next_state = (size_t) states[i + 1];
+ stateSum[state]++;
+ trans(state, next_state)++;
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ if (stateSum[i] == 0)
+ stateSum[i] = -INFINITY;
+
+ for (size_t j = 0; j < M; j++)
+ trans(i, j) /= stateSum[i];
+ }
+
+ std::vector<arma::mat> data;
+ arma::vec n_data(M);
+ n_data.zeros();
+
+ for (size_t i = 0; i < L; i++) {
+ size_t state = (size_t) states[i];
+ n_data[state]++;
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ arma::mat m(N, (size_t) n_data[i]);
+ //printf("n[%d]=%8.0f\n", i, n_data[i]);
+ data.push_back(m);
+ }
+
+ n_data.zeros();
+ for (size_t i = 0; i < L; i++) {
+ size_t state = (size_t) states[i];
+ for (size_t j = 0; j < N; j++)
+ data[state](j, (size_t) n_data[state]) = seq(j, i);
+
+ n_data[state]++;
+ //printf("%d %d %8.0f\n", i, state, n_data[state]);
+ }
+
+ for (size_t i = 0; i < M; i++) {
+ std::vector<size_t> labels;
+ std::vector<arma::vec> means;
+ kmeans(data[i], K, labels, means, 500, 1e-3);
+
+ //printf("STATE #%d %d\n", i, K);
+ MixtureGauss m;
+ m.Init(K, data[i], labels);
+ mixs.push_back(m);
+ }
+}
+
+void MixtureofGaussianHMM::EstimateInit(size_t NumClusters, const arma::mat& seq, const arma::vec& states, arma::mat& trans, std::vector<MixtureGauss>& mixs) {
+ mlpack::Log::Assert((seq.n_cols == states.n_elem),
+ "MixtureofGaussianHMM::EstimateInit(): sequence and states length must be the same");
+
+ size_t M = 0;
+ for (size_t i = 0; i < seq.n_cols; i++) {
+ if (states[i] > M)
+ M = (size_t) states[i];
+ }
+ M++;
+ MixtureofGaussianHMM::EstimateInit(M, NumClusters, seq, states, trans, mixs);
+}
+
+void MixtureofGaussianHMM::ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& scales, arma::mat& fs) {
+ GaussianHMM::ForwardProcedure(L, trans, emis_prob, scales, fs);
+}
+
+void MixtureofGaussianHMM::BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob, const arma::vec& scales, arma::mat& bs) {
+ GaussianHMM::BackwardProcedure(L, trans, emis_prob, scales, bs);
+}
+
+double MixtureofGaussianHMM::Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
+ return GaussianHMM::Decode(trans, emis_prob, pstates, fs, bs, scales);
+}
+
+double MixtureofGaussianHMM::Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales) {
+ return GaussianHMM::Decode(L, trans, emis_prob, pstates, fs, bs, scales);
+}
+
+double MixtureofGaussianHMM::ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
+ return GaussianHMM::ViterbiInit(trans, emis_prob, states);
+}
+
+double MixtureofGaussianHMM::ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states) {
+ return GaussianHMM::ViterbiInit(L, trans, emis_prob, states);
+}
+
+void MixtureofGaussianHMM::CalculateEmissionProb(const arma::mat& seq, const std::vector<MixtureGauss>& mixs, arma::mat& emis_prob) {
+ size_t M = mixs.size();
+ size_t L = seq.n_cols;
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seq.unsafe_col(t);
+
+ for (size_t i = 0; i < M; i++)
+ emis_prob(i, t) = mixs[i].getPDF(e);
+ }
+}
+
+void MixtureofGaussianHMM::Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol) {
+ size_t L = -1;
+ size_t M = guessTR.n_rows;
+
+ mlpack::Log::Assert((M == guessTR.n_cols && M == guessMG.size()),
+ "MixtureofGaussianHMM::Train(): sizes do not match");
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ if (seqs[i].n_cols > L)
+ L = seqs[i].n_cols;
+ }
+
+ arma::mat TR(M, M); // guess transition and emission matrix
+
+ arma::mat ps(M, L), fs(M, L), bs(M, L), emis_prob(M, L); // to hold hmm_decodeG results
+ std::vector<arma::mat> emis_prob_cluster;
+ arma::vec s(L); // scaling factors
+ arma::vec sumState(M); // the denominator for each state
+
+ for (size_t i = 0; i < M; i++) {
+ arma::mat m;
+ size_t K = guessMG[i].n_clusters();
+ m.set_size(K, L);
+ emis_prob_cluster.push_back(m);
+ }
+
+ double loglik = 0, oldlog;
+ for (size_t iter = 0; iter < max_iter; iter++) {
+ oldlog = loglik;
+ loglik = 0;
+
+ // set the accumulating values to zeros and compute the inverse matrices and determinant constants
+ TR.zeros();
+ for (size_t i = 0; i < M; i++)
+ guessMG[i].start_accumulate();
+
+ sumState.zeros();
+
+ // for each sequence, we will use forward-backward procedure and then accumulate
+ for (size_t idx = 0; idx < seqs.size(); idx++) {
+ // first calculate the emission probabilities of the sequence
+ L = seqs[idx].n_cols;
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ for (size_t i = 0; i < M; i++) {
+ double s = 0;
+ size_t K = guessMG[i].n_clusters();
+ for (size_t j = 0; j < K; j++) {
+ emis_prob_cluster[i](j, t) = guessMG[i].getPDF(j, e);
+ s += emis_prob_cluster[i](j, t);
+ }
+ emis_prob(i, t) = s;
+ }
+ }
+
+ loglik += MixtureofGaussianHMM::Decode(L, guessTR, emis_prob, ps, fs, bs, s); // forward - backward procedure
+
+ // accumulate expected transition & gaussian mixture parameters
+ for (size_t t = 0; t < L-1; t++) {
+ for (size_t i = 0; i < M; i++)
+ for (size_t j = 0; j < M; j++)
+ TR(i, j) += fs(i, t) * guessTR(i, j) * emis_prob(j, t + 1) * bs(j, t + 1) / s[t + 1];
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ for (size_t i = 0; i < M; i++) {
+ double v = ps(i, t);
+ size_t K = guessMG[i].n_clusters();
+ for (size_t j = 0; j < K; j++)
+ guessMG[i].accumulate(v * emis_prob_cluster[i](j, t) / emis_prob(i, t), j, e);
+ }
+ }
+ // end accumulate
+ }
+
+ // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
+ for (size_t i = 0; i < M; i++) {
+ double s = 0;
+ for (size_t j = 0; j < M; j++)
+ s += TR(i, j);
+
+ if (s == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+ guessTR(i, i) = 1;
+ } else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = TR(i, j) / s;
+ }
+
+ guessMG[i].end_accumulate();
+ }
+ // end re-estimate
+
+ printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
+ if (fabs(oldlog - loglik) < tol) {
+ printf("\nConverged after %zu iterations\n", iter);
+ break;
+ }
+ oldlog = loglik;
+ }
+}
+
+void MixtureofGaussianHMM::TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR, std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol) {
+ size_t L = -1;
+ size_t M = guessTR.n_rows;
+ mlpack::Log::Assert((M == guessTR.n_cols && M == guessMG.size()),
+ "MixtureofGaussianHMM::TrainViterbi(): sizes do not match");
+
+ for (size_t i = 0; i < seqs.size(); i++)
+ if (seqs[i].n_cols > L)
+ L = seqs[i].n_cols;
+
+ arma::mat TR(M, M); // guess transition and emission matrix
+
+ arma::mat emis_prob(M, L); // to hold hmm_decodeG results
+ std::vector<arma::mat> emis_prob_cluster;
+
+ for (size_t i = 0; i < M; i++) {
+ arma::mat m;
+ size_t K = guessMG[i].n_clusters();
+ m.set_size(K, L);
+ emis_prob_cluster.push_back(m);
+ }
+
+ double loglik = 0, oldlog;
+ for (size_t iter = 0; iter < max_iter; iter++) {
+ oldlog = loglik;
+ loglik = 0;
+
+ // set the accumulating values to zeros and compute the inverse matrices and determinant constants
+ TR.zeros();
+ for (size_t i = 0; i < M; i++)
+ guessMG[i].start_accumulate();
+
+ // for each sequence, we will use viterbi procedure to find the most probable state sequence and then accumulate
+ for (size_t idx = 0; idx < seqs.size(); idx++) {
+ arma::vec states;
+ // first calculate the emission probabilities of the sequence
+ L = seqs[idx].n_cols;
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ for (size_t i = 0; i < M; i++) {
+ double s = 0;
+ size_t K = guessMG[i].n_clusters();
+ for (size_t j = 0; j < K; j++) {
+ emis_prob_cluster[i](j, t) = guessMG[i].getPDF(j, e);
+ s += emis_prob_cluster[i](j, t);
+ }
+ emis_prob(i, t) = s;
+ }
+ }
+
+ loglik += GaussianHMM::ViterbiInit(L, guessTR, emis_prob, states); // viterbi procedure
+
+ // accumulate expected transition & gaussian mixture parameters
+ for (size_t t = 0; t < L-1; t++) {
+ size_t i = (size_t) states[t];
+ size_t j = (size_t) states[t + 1];
+ TR(i, j)++;
+ }
+
+ for (size_t t = 0; t < L; t++) {
+ arma::vec e = seqs[idx].unsafe_col(t);
+ size_t i = (size_t) states[t];
+ size_t K = guessMG[i].n_clusters();
+ for (size_t j = 0; j < K; j++)
+ guessMG[i].accumulate(emis_prob_cluster[i](j, t) / emis_prob(i, t), j, e);
+ }
+ // end accumulate
+ }
+
+ // after accumulate all sequences: re-estimate transition & mean & covariance for the next iteration
+ for (size_t i = 0; i < M; i++) {
+ double s = 0;
+ for (size_t j = 0; j < M; j++)
+ s += TR(i, j);
+
+ if (s == 0) {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = 0;
+ guessTR(i, i) = 1;
+ }
+ else {
+ for (size_t j = 0; j < M; j++)
+ guessTR(i, j) = TR(i, j) / s;
+ }
+
+ guessMG[i].end_accumulate();
+ }
+ // end re-estimate
+
+ printf("Iter = %zu Loglik = %8.4f\n", iter, loglik);
+ if (fabs(oldlog - loglik) < tol) {
+ printf("\nConverged after %zu iterations\n", iter);
+ break;
+ }
+ oldlog = loglik;
+ }
+}
+
Deleted: mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.h 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.h 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,173 +0,0 @@
-/**
- * @file mixgaussHMM.h
- *
- * This file contains functions of a Mixture of Gaussians Hidden Markov Models.
- * It implements log-likelihood computation, viterbi algorithm for the most
- * probable sequence, Baum-Welch algorithm and Viterbi-like algorithm for parameter
- * estimation. It can also generate sequences from a Hidden Markov Model.
- */
-#ifndef __MLPACK_METHODS_HMM_MIXGAUSS_HMM_H
-#define __MLPACK_METHODS_HMM_MIXGAUSS_HMM_H
-
-#include <mlpack/core.h>
-#include "mixtureDST.h"
-
-namespace mlpack {
-namespace hmm {
-
-/**
- * A wrapper class for HMM functionals in Mixture of Gaussion case
- *
- * This class maintains transition probabilities and Mixture of Gaussian parameters
- * (mean and covariance) for each state, more details below.
- */
-class MixtureofGaussianHMM {
- /////////////// Member variables /////////////////////////////////////////////////
- private:
- /** Transmission probabilities matrix between states */
- arma::mat transmission_;
-
- /** List of Mixture of Gaussian objects corresponding to each state */
- std::vector<MixtureGauss> list_mixture_gauss_;
-
- public:
- /** Getters */
- const arma::mat& transmission() const { return transmission_; }
- const std::vector<MixtureGauss>& list_mixture_gauss() const { return list_mixture_gauss_; }
-
- /** Setter used when already initialized */
- void setModel(const arma::mat& transmission,
- const std::vector<MixtureGauss>& list_mixture_gauss);
-
- /** Initializes from computed transmission and Mixture of Gaussian parameters */
- void Init(const arma::mat& transmission, const std::vector<MixtureGauss>& list_mixture_gauss);
-
- /** Initializes by loading from a file */
- void InitFromFile(const char* profile);
-
- /** Initializes empty object */
- void Init() {
- transmission_.set_size(0, 0);
- }
-
- /** Load from file, used when already initialized */
- void LoadProfile(const char* profile);
-
- /** Save matrices to file */
- void SaveProfile(const char* profile) const;
-
- /** Generate a random data sequence of a given length */
- void GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const;
-
- /**
- * Estimate the matrices by a data sequence and a state sequence
- * Must be already initialized
- */
-
- void EstimateModel(size_t numcluster, const arma::mat& data_seq, const arma::vec& state_seq);
- void EstimateModel(size_t numstate, size_t numcluster,
- const arma::mat& data_seq, const arma::vec& state_seq);
-
- /**
- * Decode a sequence into probabilities of each state at each time step
- * using scaled forward-backward algorithm.
- * Also return forward, backward probabilities and scale factors
- */
- void DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
- arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
-
- /** A decode version that initialized the output matrices */
- void DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
- arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
-
- /** Compute the log-likelihood of a sequence */
- double ComputeLogLikelihood(const arma::mat& data_seq) const;
-
- /** Compute the log-likelihood of a list of sequences */
- void ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq, std::vector<double>& list_likelihood) const;
-
- /** Compute the most probable sequence (Viterbi) */
- void ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const;
-
- /**
- * Train the model with a list of sequences, must be already initialized
- * using Baum-Welch EM algorithm
- */
- void TrainBaumWelch(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance);
-
- /**
- * Train the model with a list of sequences, must be already initialized
- * using Viterbi algorithm to determine the state sequence of each sequence
- */
- void TrainViterbi(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance);
-
-
- ////////// Static helper functions ///////////////////////////////////////
- static bool LoadProfile(const char* profile, arma::mat& trans, std::vector<MixtureGauss>& mixs);
- static bool SaveProfile(const char* profile, const arma::mat& trans, const std::vector<MixtureGauss>& mixs);
-
- /**
- * Generating a sequence and states using transition and emission probabilities.
- * L: sequence length
- * trans: Matrix M x M (M states)
- * means: list of mean vectors length N (emission vector of length N)
- * covs: list of square root of covariance matrices size N x N
- * seq: generated sequence, uninitialized matrix, will have size N x L
- * states: generated states, uninitialized vector, will have length L
- */
- static void GenerateInit(size_t L, const arma::mat& trans, const std::vector<MixtureGauss>& mixs, arma::mat& seq, arma::vec& states);
-
- /** Estimate transition and emission distribution from sequence and states */
- static void EstimateInit(size_t NumClusters, const arma::mat& seq, const arma::vec& states,
- arma::mat& trans, std::vector<MixtureGauss>& mixs);
- static void EstimateInit(size_t numStates, size_t NumClusters, const arma::mat& seq,
- const arma::vec& states, arma::mat& trans, std::vector<MixtureGauss>& mixs);
-
- /**
- * Calculate posteriori probabilities of states at each steps
- * Scaled Forward - Backward procedure
- * trans: Transition probabilities, size M x M
- * emis_prob: Emission probabilities along the sequence,
- * size M x L (L is the sequence length)
- * pstates: size M x L
- * fs: scaled forward probabities, size M x L
- * bs: scaled backward probabities, size M x L
- * scales: scale factors, length L
- * RETURN: log probabilities of sequence
- */
- static void ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
- arma::vec& scales, arma::mat& fs);
- static void BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
- const arma::vec& scales, arma::mat& bs);
- static double Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates,
- arma::mat& fs, arma::mat& bs, arma::vec& scales);
- static double Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
- arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales);
-
- static void CalculateEmissionProb(const arma::mat& seq, const std::vector<MixtureGauss>& mixs, arma::mat& emis_prob);
-
- /**
- * Calculate the most probable states for a sequence
- * Viterbi algorithm
- * trans: Transition probabilities, size M x M
- * emis_prob: Emission probabilities, size M x L
- * states: Unitialized, will have length L
- * RETURN: log probability of the most probable sequence
- */
- static double ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
- static double ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
-
- /**
- * Baum-Welch and Viterbi estimation of transition and emission
- * distribution (Gaussian)
- */
- static void Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
- std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol);
- static void TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
- std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol);
-};
-
-}; // namespace hmm
-}; // namespace mlpack
-
-#endif
Copied: mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.hpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixgaussHMM.hpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,173 @@
+/**
+ * @file mixgaussHMM.h
+ *
+ * This file contains functions of a Mixture of Gaussians Hidden Markov Models.
+ * It implements log-likelihood computation, viterbi algorithm for the most
+ * probable sequence, Baum-Welch algorithm and Viterbi-like algorithm for parameter
+ * estimation. It can also generate sequences from a Hidden Markov Model.
+ */
+#ifndef __MLPACK_METHODS_HMM_MIXGAUSS_HMM_H
+#define __MLPACK_METHODS_HMM_MIXGAUSS_HMM_H
+
+#include <mlpack/core.h>
+#include "mixtureDST.hpp"
+
+namespace mlpack {
+namespace hmm {
+
+/**
+ * A wrapper class for HMM functionals in Mixture of Gaussion case
+ *
+ * This class maintains transition probabilities and Mixture of Gaussian parameters
+ * (mean and covariance) for each state, more details below.
+ */
+class MixtureofGaussianHMM {
+ /////////////// Member variables /////////////////////////////////////////////////
+ private:
+ /** Transmission probabilities matrix between states */
+ arma::mat transmission_;
+
+ /** List of Mixture of Gaussian objects corresponding to each state */
+ std::vector<MixtureGauss> list_mixture_gauss_;
+
+ public:
+ /** Getters */
+ const arma::mat& transmission() const { return transmission_; }
+ const std::vector<MixtureGauss>& list_mixture_gauss() const { return list_mixture_gauss_; }
+
+ /** Setter used when already initialized */
+ void setModel(const arma::mat& transmission,
+ const std::vector<MixtureGauss>& list_mixture_gauss);
+
+ /** Initializes from computed transmission and Mixture of Gaussian parameters */
+ void Init(const arma::mat& transmission, const std::vector<MixtureGauss>& list_mixture_gauss);
+
+ /** Initializes by loading from a file */
+ void InitFromFile(const char* profile);
+
+ /** Initializes empty object */
+ void Init() {
+ transmission_.set_size(0, 0);
+ }
+
+ /** Load from file, used when already initialized */
+ void LoadProfile(const char* profile);
+
+ /** Save matrices to file */
+ void SaveProfile(const char* profile) const;
+
+ /** Generate a random data sequence of a given length */
+ void GenerateSequence(size_t L, arma::mat& data_seq, arma::vec& state_seq) const;
+
+ /**
+ * Estimate the matrices by a data sequence and a state sequence
+ * Must be already initialized
+ */
+
+ void EstimateModel(size_t numcluster, const arma::mat& data_seq, const arma::vec& state_seq);
+ void EstimateModel(size_t numstate, size_t numcluster,
+ const arma::mat& data_seq, const arma::vec& state_seq);
+
+ /**
+ * Decode a sequence into probabilities of each state at each time step
+ * using scaled forward-backward algorithm.
+ * Also return forward, backward probabilities and scale factors
+ */
+ void DecodeOverwrite(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
+ arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
+
+ /** A decode version that initialized the output matrices */
+ void DecodeInit(const arma::mat& data_seq, arma::mat& state_prob_mat, arma::mat& forward_prob_mat,
+ arma::mat& backward_prob_mat, arma::vec& scale_vec) const;
+
+ /** Compute the log-likelihood of a sequence */
+ double ComputeLogLikelihood(const arma::mat& data_seq) const;
+
+ /** Compute the log-likelihood of a list of sequences */
+ void ComputeLogLikelihood(const std::vector<arma::mat>& list_data_seq, std::vector<double>& list_likelihood) const;
+
+ /** Compute the most probable sequence (Viterbi) */
+ void ComputeViterbiStateSequence(const arma::mat& data_seq, arma::vec& state_seq) const;
+
+ /**
+ * Train the model with a list of sequences, must be already initialized
+ * using Baum-Welch EM algorithm
+ */
+ void TrainBaumWelch(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance);
+
+ /**
+ * Train the model with a list of sequences, must be already initialized
+ * using Viterbi algorithm to determine the state sequence of each sequence
+ */
+ void TrainViterbi(const std::vector<arma::mat>& list_data_seq, size_t max_iteration, double tolerance);
+
+
+ ////////// Static helper functions ///////////////////////////////////////
+ static bool LoadProfile(const char* profile, arma::mat& trans, std::vector<MixtureGauss>& mixs);
+ static bool SaveProfile(const char* profile, const arma::mat& trans, const std::vector<MixtureGauss>& mixs);
+
+ /**
+ * Generating a sequence and states using transition and emission probabilities.
+ * L: sequence length
+ * trans: Matrix M x M (M states)
+ * means: list of mean vectors length N (emission vector of length N)
+ * covs: list of square root of covariance matrices size N x N
+ * seq: generated sequence, uninitialized matrix, will have size N x L
+ * states: generated states, uninitialized vector, will have length L
+ */
+ static void GenerateInit(size_t L, const arma::mat& trans, const std::vector<MixtureGauss>& mixs, arma::mat& seq, arma::vec& states);
+
+ /** Estimate transition and emission distribution from sequence and states */
+ static void EstimateInit(size_t NumClusters, const arma::mat& seq, const arma::vec& states,
+ arma::mat& trans, std::vector<MixtureGauss>& mixs);
+ static void EstimateInit(size_t numStates, size_t NumClusters, const arma::mat& seq,
+ const arma::vec& states, arma::mat& trans, std::vector<MixtureGauss>& mixs);
+
+ /**
+ * Calculate posteriori probabilities of states at each steps
+ * Scaled Forward - Backward procedure
+ * trans: Transition probabilities, size M x M
+ * emis_prob: Emission probabilities along the sequence,
+ * size M x L (L is the sequence length)
+ * pstates: size M x L
+ * fs: scaled forward probabities, size M x L
+ * bs: scaled backward probabities, size M x L
+ * scales: scale factors, length L
+ * RETURN: log probabilities of sequence
+ */
+ static void ForwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
+ arma::vec& scales, arma::mat& fs);
+ static void BackwardProcedure(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
+ const arma::vec& scales, arma::mat& bs);
+ static double Decode(const arma::mat& trans, const arma::mat& emis_prob, arma::mat& pstates,
+ arma::mat& fs, arma::mat& bs, arma::vec& scales);
+ static double Decode(size_t L, const arma::mat& trans, const arma::mat& emis_prob,
+ arma::mat& pstates, arma::mat& fs, arma::mat& bs, arma::vec& scales);
+
+ static void CalculateEmissionProb(const arma::mat& seq, const std::vector<MixtureGauss>& mixs, arma::mat& emis_prob);
+
+ /**
+ * Calculate the most probable states for a sequence
+ * Viterbi algorithm
+ * trans: Transition probabilities, size M x M
+ * emis_prob: Emission probabilities, size M x L
+ * states: Unitialized, will have length L
+ * RETURN: log probability of the most probable sequence
+ */
+ static double ViterbiInit(const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
+ static double ViterbiInit(size_t L, const arma::mat& trans, const arma::mat& emis_prob, arma::vec& states);
+
+ /**
+ * Baum-Welch and Viterbi estimation of transition and emission
+ * distribution (Gaussian)
+ */
+ static void Train(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
+ std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol);
+ static void TrainViterbi(const std::vector<arma::mat>& seqs, arma::mat& guessTR,
+ std::vector<MixtureGauss>& guessMG, size_t max_iter, double tol);
+};
+
+}; // namespace hmm
+}; // namespace mlpack
+
+#endif
Deleted: mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,197 +0,0 @@
-/**
- * @file mixtureDST.cc
- *
- * This file contains implementation of functions in mixtureDST.h
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "mixtureDST.h"
-
-namespace mlpack {
-namespace hmm {
-using namespace hmm_support;
-
-void MixtureGauss::Init(size_t K, size_t N) {
- for (size_t i = 0; i < K; i++) {
- arma::vec v;
- RAND_NORMAL_01_INIT(N, v);
- means.push_back(v);
- }
-
- for (size_t i = 0; i < means.size(); i++) {
- arma::mat m(N, N);
- m.zeros();
- for (size_t j = 0; j < N; j++)
- m(j, j) = 1.0;
-
- covs.push_back(m);
- }
-
- prior.set_size(means.size());
- prior.fill(1.0 / K);
-
- ACC_means = means;
- ACC_covs = covs;
- ACC_prior.set_size(K);
- inv_covs = covs;
- det_covs.set_size(covs.size());
-
- for (size_t i = 0; i < K; i++) {
- inv_covs[i] = inv(covs[i]);
- det_covs[i] = pow(2.0 * M_PI, -N/2.0) * pow(det(covs[i]), -0.5);
- }
-}
-
-void MixtureGauss::Init(size_t K, const arma::mat& data, const std::vector<size_t>& labels) {
- size_t N = data.n_rows;
- for (size_t i = 0; i < K; i++) {
- arma::vec v(N);
- means.push_back(v);
- }
-
- for (size_t i = 0; i < means.size(); i++) {
- arma::mat m(N, N);
- covs.push_back(m);
- }
-
- prior.set_size(means.size());
-
- ACC_means = means;
- ACC_covs = covs;
- ACC_prior.set_size(K);
- inv_covs = covs;
- det_covs.set_size(covs.size());
-
- start_accumulate();
- //printf("cols = %d rows = %d\n", data.n_cols(), data.n_rows());
- for (size_t i = 0; i < data.n_cols; i++) {
- arma::vec v = data.unsafe_col(i);
- //printf("%d\n", i);
- accumulate_cluster(labels[i], v);
- }
- end_accumulate_cluster();
-}
-
-void MixtureGauss::InitFromFile(const char* mean_fn, const char* covs_fn, const char* prior_fn) {
- arma::mat meansmat;
- meansmat.load(mean_fn, arma::auto_detect, false, true);
-
- mat2arrlst(meansmat, means);
- size_t N = means[0].n_elem;
- size_t K = means.size();
- if (covs_fn != NULL) {
- arma::mat covsmat;
- covsmat.load(covs_fn, arma::auto_detect, false, true);
-
- mat2arrlstmat(N, covsmat, covs);
- mlpack::Log::Assert(K == covs.size(), "MixtureGauss::InitFromFile(): sizes do not match!");
- } else {
- for (size_t i = 0; i < means.size(); i++) {
- arma::mat m(N, N);
- m.zeros();
-
- for (size_t j = 0; j < N; j++)
- m(j, j) = 1.0;
-
- covs.push_back(m);
- }
- }
-
- if (prior_fn != NULL) {
- arma::mat priormat;
- priormat.load(prior_fn, arma::auto_detect, false, true);
-
- mlpack::Log::Assert(K == priormat.n_cols, "MixtureGauss::InitFromFile(): sizes do not match!");
-
- prior.set_size(K);
-
- for (size_t i = 0; i < K; i++)
- prior[i] = priormat(0, i);
- } else {
- prior.set_size(means.size());
- prior.fill(1.0 / K);
- }
-
- ACC_means = means;
- ACC_covs = covs;
- ACC_prior.set_size(K);
- inv_covs = covs;
- det_covs.set_size(covs.size());
-
- for (size_t i = 0; i < K; i++) {
- inv_covs[i] = inv(covs[i]);
- det_covs[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(covs[i]), -0.5);
- }
-}
-
-void MixtureGauss::InitFromProfile(const std::vector<arma::mat>& matlst, size_t start, size_t N) {
- mlpack::Log::Assert(matlst[start].n_cols == 1);
- arma::vec tmp = matlst[start].unsafe_col(0);
- prior = tmp;
-
- // DEBUG: print_vector(prior, " prior = ");
-
- size_t K = prior.n_elem;
- for (size_t i = start + 1; i < start + (2 * K + 1); i += 2) {
- mlpack::Log::Assert(matlst[i].n_rows == N && matlst[i].n_cols == 1);
- mlpack::Log::Assert(matlst[i + 1].n_rows == N && matlst[i + 1].n_cols == N);
-
- arma::vec m = matlst[i].unsafe_col(0);
- means.push_back(m);
- covs.push_back(matlst[i + 1]);
- }
-
- ACC_means = means;
- ACC_covs = covs;
- ACC_prior.set_size(K);
- inv_covs = covs;
- det_covs.set_size(covs.size());
-
- for (size_t i = 0; i < K; i++) {
- inv_covs[i] = inv(covs[i]);
- det_covs[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(covs[i]), -0.5);
- }
-}
-
-void MixtureGauss::print_mixture(const char* s) const {
- size_t K = means.size();
- printf("%s - Mixture (%zu)\n", s, K);
- print_vector(prior, " PRCLIR");
- for (size_t i = 0; i < K; i++) {
- printf(" CLUSTER %zu:\n", i);
- print_vector(means[i], " MEANS");
- print_matrix(covs[i], " COVS");
- }
-}
-
-void MixtureGauss::generate(arma::vec& v) const {
- size_t K = means.size();
- double r = RAND_UNIFORM_01();
- size_t cluster = K - 1;
- double s = 0;
- for (size_t i = 0; i < K; i++) {
- s += prior[i];
- if (s >= r) {
- cluster = i;
- break;
- }
- }
- RAND_NORMAL_INIT(means[cluster], covs[cluster], v);
-}
-
-double MixtureGauss::getPDF(const arma::vec& v) const {
- size_t K = means.size();
- double s = 0;
- for (size_t i = 0; i < K; i++)
- s += getPDF(i, v);
-
- return s;
-}
-
-double MixtureGauss::getPDF(size_t cluster, const arma::vec& v) const {
- return prior[cluster] * NORMAL_DENSITY(v, means[cluster], inv_covs[cluster], det_covs[cluster]);
-}
-
-}; // namespace hmm
-}; // namespace mlpack
Copied: mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,197 @@
+/**
+ * @file mixtureDST.cc
+ *
+ * This file contains implementation of functions in mixtureDST.h
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "mixtureDST.hpp"
+
+namespace mlpack {
+namespace hmm {
+using namespace hmm_support;
+
+void MixtureGauss::Init(size_t K, size_t N) {
+ for (size_t i = 0; i < K; i++) {
+ arma::vec v;
+ RAND_NORMAL_01_INIT(N, v);
+ means.push_back(v);
+ }
+
+ for (size_t i = 0; i < means.size(); i++) {
+ arma::mat m(N, N);
+ m.zeros();
+ for (size_t j = 0; j < N; j++)
+ m(j, j) = 1.0;
+
+ covs.push_back(m);
+ }
+
+ prior.set_size(means.size());
+ prior.fill(1.0 / K);
+
+ ACC_means = means;
+ ACC_covs = covs;
+ ACC_prior.set_size(K);
+ inv_covs = covs;
+ det_covs.set_size(covs.size());
+
+ for (size_t i = 0; i < K; i++) {
+ inv_covs[i] = inv(covs[i]);
+ det_covs[i] = pow(2.0 * M_PI, -N/2.0) * pow(det(covs[i]), -0.5);
+ }
+}
+
+void MixtureGauss::Init(size_t K, const arma::mat& data, const std::vector<size_t>& labels) {
+ size_t N = data.n_rows;
+ for (size_t i = 0; i < K; i++) {
+ arma::vec v(N);
+ means.push_back(v);
+ }
+
+ for (size_t i = 0; i < means.size(); i++) {
+ arma::mat m(N, N);
+ covs.push_back(m);
+ }
+
+ prior.set_size(means.size());
+
+ ACC_means = means;
+ ACC_covs = covs;
+ ACC_prior.set_size(K);
+ inv_covs = covs;
+ det_covs.set_size(covs.size());
+
+ start_accumulate();
+ //printf("cols = %d rows = %d\n", data.n_cols(), data.n_rows());
+ for (size_t i = 0; i < data.n_cols; i++) {
+ arma::vec v = data.unsafe_col(i);
+ //printf("%d\n", i);
+ accumulate_cluster(labels[i], v);
+ }
+ end_accumulate_cluster();
+}
+
+void MixtureGauss::InitFromFile(const char* mean_fn, const char* covs_fn, const char* prior_fn) {
+ arma::mat meansmat;
+ meansmat.load(mean_fn, arma::auto_detect, false, true);
+
+ mat2arrlst(meansmat, means);
+ size_t N = means[0].n_elem;
+ size_t K = means.size();
+ if (covs_fn != NULL) {
+ arma::mat covsmat;
+ covsmat.load(covs_fn, arma::auto_detect, false, true);
+
+ mat2arrlstmat(N, covsmat, covs);
+ mlpack::Log::Assert(K == covs.size(), "MixtureGauss::InitFromFile(): sizes do not match!");
+ } else {
+ for (size_t i = 0; i < means.size(); i++) {
+ arma::mat m(N, N);
+ m.zeros();
+
+ for (size_t j = 0; j < N; j++)
+ m(j, j) = 1.0;
+
+ covs.push_back(m);
+ }
+ }
+
+ if (prior_fn != NULL) {
+ arma::mat priormat;
+ priormat.load(prior_fn, arma::auto_detect, false, true);
+
+ mlpack::Log::Assert(K == priormat.n_cols, "MixtureGauss::InitFromFile(): sizes do not match!");
+
+ prior.set_size(K);
+
+ for (size_t i = 0; i < K; i++)
+ prior[i] = priormat(0, i);
+ } else {
+ prior.set_size(means.size());
+ prior.fill(1.0 / K);
+ }
+
+ ACC_means = means;
+ ACC_covs = covs;
+ ACC_prior.set_size(K);
+ inv_covs = covs;
+ det_covs.set_size(covs.size());
+
+ for (size_t i = 0; i < K; i++) {
+ inv_covs[i] = inv(covs[i]);
+ det_covs[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(covs[i]), -0.5);
+ }
+}
+
+void MixtureGauss::InitFromProfile(const std::vector<arma::mat>& matlst, size_t start, size_t N) {
+ mlpack::Log::Assert(matlst[start].n_cols == 1);
+ arma::vec tmp = matlst[start].unsafe_col(0);
+ prior = tmp;
+
+ // DEBUG: print_vector(prior, " prior = ");
+
+ size_t K = prior.n_elem;
+ for (size_t i = start + 1; i < start + (2 * K + 1); i += 2) {
+ mlpack::Log::Assert(matlst[i].n_rows == N && matlst[i].n_cols == 1);
+ mlpack::Log::Assert(matlst[i + 1].n_rows == N && matlst[i + 1].n_cols == N);
+
+ arma::vec m = matlst[i].unsafe_col(0);
+ means.push_back(m);
+ covs.push_back(matlst[i + 1]);
+ }
+
+ ACC_means = means;
+ ACC_covs = covs;
+ ACC_prior.set_size(K);
+ inv_covs = covs;
+ det_covs.set_size(covs.size());
+
+ for (size_t i = 0; i < K; i++) {
+ inv_covs[i] = inv(covs[i]);
+ det_covs[i] = pow(2.0 * M_PI, -N / 2.0) * pow(det(covs[i]), -0.5);
+ }
+}
+
+void MixtureGauss::print_mixture(const char* s) const {
+ size_t K = means.size();
+ printf("%s - Mixture (%zu)\n", s, K);
+ print_vector(prior, " PRCLIR");
+ for (size_t i = 0; i < K; i++) {
+ printf(" CLUSTER %zu:\n", i);
+ print_vector(means[i], " MEANS");
+ print_matrix(covs[i], " COVS");
+ }
+}
+
+void MixtureGauss::generate(arma::vec& v) const {
+ size_t K = means.size();
+ double r = RAND_UNIFORM_01();
+ size_t cluster = K - 1;
+ double s = 0;
+ for (size_t i = 0; i < K; i++) {
+ s += prior[i];
+ if (s >= r) {
+ cluster = i;
+ break;
+ }
+ }
+ RAND_NORMAL_INIT(means[cluster], covs[cluster], v);
+}
+
+double MixtureGauss::getPDF(const arma::vec& v) const {
+ size_t K = means.size();
+ double s = 0;
+ for (size_t i = 0; i < K; i++)
+ s += getPDF(i, v);
+
+ return s;
+}
+
+double MixtureGauss::getPDF(size_t cluster, const arma::vec& v) const {
+ return prior[cluster] * NORMAL_DENSITY(v, means[cluster], inv_covs[cluster], det_covs[cluster]);
+}
+
+}; // namespace hmm
+}; // namespace mlpack
Deleted: mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.h 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.h 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,171 +0,0 @@
-/**
- * @file mixtureDST.h
- *
- * This file contains the class represent a mixture of Gaussian distribution
- * It maintains the priori cluster probabilities, means and covariance matrices
- * of Gaussian cluters in the mixture.
- *
- * It also provides accumulate mechanism tho update the clusters
- * parameters (i.e mean and covariance) and the prior probabilities.
- *
- */
-#ifndef __MLPACK_METHODS_HMM_MIXTURE_GAUSSIAN_H
-#define __MLPACK_METHODS_HMM_MIXTURE_GAUSSIAN_H
-
-#include <mlpack/core.h>
-
-#include "support.h"
-
-namespace mlpack {
-namespace hmm {
-
-class MixtureGauss {
- ////////////// Member variables //////////////////////////////////////
- private:
- /** List of means of clusters */
- std::vector<arma::vec> means;
-
- /** List of covariance matrices of clusters */
- std::vector<arma::mat> covs;
-
- /** Prior probabilities of the clusters */
- arma::vec prior;
-
- /** Inverse of covariance matrices */
- std::vector<arma::mat> inv_covs;
-
- /** Vector of constant in normal density formula */
- arma::vec det_covs;
-
- /** Accumulating means */
- std::vector<arma::vec> ACC_means;
-
- /** Accumulating covariance */
- std::vector<arma::mat> ACC_covs;
-
- /** Accumulating prior probability */
- arma::vec ACC_prior;
-
- /** The total denominator to divide by after accumulating complete*/
- double total;
-
- public:
- /** Init the mixture from file */
- void InitFromFile(const char* mean_fn, const char* covs_fn = NULL, const char* prior_fn = NULL);
-
- /**
- * Init the mixture from profile, read from a list of matrices
- * start with the prior vector, follows by the mean and covariance
- * of each cluster.
- */
- void InitFromProfile(const std::vector<arma::mat>& matlst, size_t start, size_t N);
-
- /** Init with K clusters and dimension N */
- void Init(size_t K, size_t N);
-
- /** Init with K clusters and the data with label */
- void Init(size_t K, const arma::mat& data, const std::vector<size_t>& labels);
-
- /** Print the mixture to stdout for debugging */
- void print_mixture(const char* s) const;
-
- /** Generate a random vector from the mixture distribution */
- void generate(arma::vec& v) const;
-
- /** Get the value of density funtion at a given point */
- double getPDF(const arma::vec& v) const;
-
- /** Get the value of density funtion of each cluster at a given point */
- double getPDF(size_t cluster, const arma::vec& v) const;
-
- /** Get the prior probabilities vector */
- const arma::vec& get_prior() const { return prior; }
-
- /** Get the mean of certain cluster */
- const arma::vec& get_mean(size_t k) const { return means[k]; }
-
- /** Get the covariance of certain cluster */
- const arma::mat& get_cov(size_t k) const { return covs[k]; }
-
- /** Get the number of cluster */
- size_t n_clusters() const { return means.size(); }
-
- /** Get the dimension */
- size_t v_length() const { return means[0].n_elem; }
-
-
- /** Start accumulate by setting accumulate variable to zero */
- void start_accumulate() {
- total = 0;
- for (size_t i = 0; i < means.size(); i++) {
- ACC_means[i].zeros();
- ACC_covs[i].zeros();
- ACC_prior.zeros();
- }
- }
-
- /** Accumulate a vector v */
- void accumulate(const arma::vec& v) {
- double s = getPDF(v);
- for (size_t i = 0; i < means.size(); i++) {
- double p = getPDF(i, v) / s;
- ACC_prior[i] += p;
- ACC_means[i] += p * v;
- arma::vec d = means[i] - v;
- ACC_covs[i] += p * (d * trans(d));
- }
- total++;
- }
-
- /** Accumulate a vector into certain cluster */
- void accumulate_cluster(size_t i, const arma::vec& v) {
- ACC_means[i] += v;
- ACC_covs[i] += v * trans(v);
- ACC_prior[i]++;
- total++;
- }
-
- /** Accumulate a vector into certain cluster with weight */
- void accumulate(double p, size_t i, const arma::vec& v) {
- ACC_means[i] += p * v;
- ACC_covs[i] += p * (v * trans(v));
- ACC_prior[i] += p;
- total += p;
- }
-
- /** End the accumulate, calculate the new mean, covariance and prior */
- void end_accumulate_cluster() {
- for (size_t i = 0; i < means.size(); i++)
- if (ACC_prior[i] != 0) {
- means[i] = ACC_means[i] / ACC_prior[i];
-
- ACC_covs[i] /= ACC_prior[i];
- ACC_covs[i] -= means[i] * trans(means[i]);
- covs[i] = ACC_covs[i];
- prior[i] = ACC_prior[i] / total;
-
- inv_covs[i] = inv(covs[i]);
- det_covs[i] = pow(2.0 * M_PI, -means[i].n_elem / 2.0) * pow(det(covs[i]), -0.5);
- }
- }
-
- /** End the accumulate, calculate the new mean, covariance and prior */
- void end_accumulate() {
- for (size_t i = 0; i < means.size(); i++) {
- if (ACC_prior[i] != 0) {
- ACC_covs[i] /= ACC_prior[i];
- ACC_covs[i] -= means[i] * trans(means[i]);
- covs[i] = ACC_covs[i];
- prior[i] = ACC_prior[i] / total;
-
- inv_covs[i] = covs[i];
- det_covs[i] = pow(2.0 * M_PI, -means[i].n_elem / 2.0) * pow(det(covs[i]), -0.5);
- }
- }
- }
-};
-
-}; // namespace hmm
-}; // namespace mlpack
-
-#endif
Copied: mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.hpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/mixtureDST.hpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,171 @@
+/**
+ * @file mixtureDST.h
+ *
+ * This file contains the class represent a mixture of Gaussian distribution
+ * It maintains the priori cluster probabilities, means and covariance matrices
+ * of Gaussian cluters in the mixture.
+ *
+ * It also provides accumulate mechanism tho update the clusters
+ * parameters (i.e mean and covariance) and the prior probabilities.
+ *
+ */
+#ifndef __MLPACK_METHODS_HMM_MIXTURE_GAUSSIAN_H
+#define __MLPACK_METHODS_HMM_MIXTURE_GAUSSIAN_H
+
+#include <mlpack/core.h>
+
+#include "support.hpp"
+
+namespace mlpack {
+namespace hmm {
+
+class MixtureGauss {
+ ////////////// Member variables //////////////////////////////////////
+ private:
+ /** List of means of clusters */
+ std::vector<arma::vec> means;
+
+ /** List of covariance matrices of clusters */
+ std::vector<arma::mat> covs;
+
+ /** Prior probabilities of the clusters */
+ arma::vec prior;
+
+ /** Inverse of covariance matrices */
+ std::vector<arma::mat> inv_covs;
+
+ /** Vector of constant in normal density formula */
+ arma::vec det_covs;
+
+ /** Accumulating means */
+ std::vector<arma::vec> ACC_means;
+
+ /** Accumulating covariance */
+ std::vector<arma::mat> ACC_covs;
+
+ /** Accumulating prior probability */
+ arma::vec ACC_prior;
+
+ /** The total denominator to divide by after accumulating complete*/
+ double total;
+
+ public:
+ /** Init the mixture from file */
+ void InitFromFile(const char* mean_fn, const char* covs_fn = NULL, const char* prior_fn = NULL);
+
+ /**
+ * Init the mixture from profile, read from a list of matrices
+ * start with the prior vector, follows by the mean and covariance
+ * of each cluster.
+ */
+ void InitFromProfile(const std::vector<arma::mat>& matlst, size_t start, size_t N);
+
+ /** Init with K clusters and dimension N */
+ void Init(size_t K, size_t N);
+
+ /** Init with K clusters and the data with label */
+ void Init(size_t K, const arma::mat& data, const std::vector<size_t>& labels);
+
+ /** Print the mixture to stdout for debugging */
+ void print_mixture(const char* s) const;
+
+ /** Generate a random vector from the mixture distribution */
+ void generate(arma::vec& v) const;
+
+ /** Get the value of density funtion at a given point */
+ double getPDF(const arma::vec& v) const;
+
+ /** Get the value of density funtion of each cluster at a given point */
+ double getPDF(size_t cluster, const arma::vec& v) const;
+
+ /** Get the prior probabilities vector */
+ const arma::vec& get_prior() const { return prior; }
+
+ /** Get the mean of certain cluster */
+ const arma::vec& get_mean(size_t k) const { return means[k]; }
+
+ /** Get the covariance of certain cluster */
+ const arma::mat& get_cov(size_t k) const { return covs[k]; }
+
+ /** Get the number of cluster */
+ size_t n_clusters() const { return means.size(); }
+
+ /** Get the dimension */
+ size_t v_length() const { return means[0].n_elem; }
+
+
+ /** Start accumulate by setting accumulate variable to zero */
+ void start_accumulate() {
+ total = 0;
+ for (size_t i = 0; i < means.size(); i++) {
+ ACC_means[i].zeros();
+ ACC_covs[i].zeros();
+ ACC_prior.zeros();
+ }
+ }
+
+ /** Accumulate a vector v */
+ void accumulate(const arma::vec& v) {
+ double s = getPDF(v);
+ for (size_t i = 0; i < means.size(); i++) {
+ double p = getPDF(i, v) / s;
+ ACC_prior[i] += p;
+ ACC_means[i] += p * v;
+ arma::vec d = means[i] - v;
+ ACC_covs[i] += p * (d * trans(d));
+ }
+ total++;
+ }
+
+ /** Accumulate a vector into certain cluster */
+ void accumulate_cluster(size_t i, const arma::vec& v) {
+ ACC_means[i] += v;
+ ACC_covs[i] += v * trans(v);
+ ACC_prior[i]++;
+ total++;
+ }
+
+ /** Accumulate a vector into certain cluster with weight */
+ void accumulate(double p, size_t i, const arma::vec& v) {
+ ACC_means[i] += p * v;
+ ACC_covs[i] += p * (v * trans(v));
+ ACC_prior[i] += p;
+ total += p;
+ }
+
+ /** End the accumulate, calculate the new mean, covariance and prior */
+ void end_accumulate_cluster() {
+ for (size_t i = 0; i < means.size(); i++)
+ if (ACC_prior[i] != 0) {
+ means[i] = ACC_means[i] / ACC_prior[i];
+
+ ACC_covs[i] /= ACC_prior[i];
+ ACC_covs[i] -= means[i] * trans(means[i]);
+ covs[i] = ACC_covs[i];
+ prior[i] = ACC_prior[i] / total;
+
+ inv_covs[i] = inv(covs[i]);
+ det_covs[i] = pow(2.0 * M_PI, -means[i].n_elem / 2.0) * pow(det(covs[i]), -0.5);
+ }
+ }
+
+ /** End the accumulate, calculate the new mean, covariance and prior */
+ void end_accumulate() {
+ for (size_t i = 0; i < means.size(); i++) {
+ if (ACC_prior[i] != 0) {
+ ACC_covs[i] /= ACC_prior[i];
+ ACC_covs[i] -= means[i] * trans(means[i]);
+ covs[i] = ACC_covs[i];
+ prior[i] = ACC_prior[i] / total;
+
+ inv_covs[i] = covs[i];
+ det_covs[i] = pow(2.0 * M_PI, -means[i].n_elem / 2.0) * pow(det(covs[i]), -0.5);
+ }
+ }
+ }
+};
+
+}; // namespace hmm
+}; // namespace mlpack
+
+#endif
Deleted: mlpack/trunk/src/mlpack/methods/hmm/support.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/support.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/support.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,449 +0,0 @@
-#include <mlpack/core.h>
-#include <mlpack/core/kernels/lmetric.hpp>
-#include "../../core/col/tokenizer.h"
-
-#include "support.h"
-
-namespace mlpack {
-namespace hmm {
-namespace hmm_support {
-
- double RAND_UNIFORM_01() {
- return (double) rand() / (double) RAND_MAX;
- }
-
- double RAND_UNIFORM(double a, double b) {
- return RAND_UNIFORM_01() * (b - a) + a;
- }
-
- void print_matrix(const arma::mat& a, const char* msg) {
- printf("%s - Matrix (%d x %d) = \n", msg, a.n_rows, a.n_cols);
- for (size_t i = 0; i < a.n_rows; i++) {
- for (size_t j = 0; j < a.n_cols; j++)
- printf("%8.4f", a(i, j));
-
- printf("\n");
- }
- }
-
- void print_matrix(TextWriter& writer, const arma::mat& a, const char* msg, const char* format) {
- writer.Printf("%s - Matrix (%d x %d) = \n", msg, a.n_rows, a.n_cols);
- for (size_t j = 0; j < a.n_cols; j++) {
- for (size_t i = 0; i < a.n_rows; i++)
- writer.Printf(format, a(i, j));
-
- writer.Printf("\n");
- }
- }
-
- void print_vector(const arma::vec& a, const char* msg) {
- printf("%s - Vector (%d) = \n", msg, a.n_elem);
- for (size_t i = 0; i < a.n_elem; i++)
- printf("%8.4f", a[i]);
- printf("\n");
- }
-
- void print_vector(TextWriter& writer, const arma::vec& a, const char* msg, const char* format) {
- writer.Printf("%s - Vector (%d) = \n", msg, a.n_elem);
- for (size_t i = 0; i < a.n_elem; i++)
- writer.Printf(format, a[i]);
- writer.Printf("\n");
- }
-
- double RAND_NORMAL_01() {
- double r = 2, u, v;
- while (r > 1) {
- u = RAND_UNIFORM(-1, 1);
- v = RAND_UNIFORM(-1, 1);
- r = (u * u) + (v * v);
- }
- return sqrt(-2 * log(r) / r) * u;
- }
-
- void RAND_NORMAL_01_INIT(size_t N, arma::vec& v) {
- double r, u, t;
- v.set_size(N);
- for (size_t i = 0; i < N; i+=2) {
- r = 2;
- while (r > 1) {
- u = RAND_UNIFORM(-1, 1);
- t = RAND_UNIFORM(-1, 1);
- r = (u * u) + (t * t);
- }
- v[i] = sqrt(-2 * log(r) / r) * u;
- if (i + 1 < N)
- v[i + 1] = sqrt(-2 * log(r) / r) * t;
- }
- }
-
- void RAND_NORMAL_INIT(const arma::vec& mean, const arma::mat& cov, arma::vec& v) {
- size_t N = mean.n_elem;
- arma::vec v01;
- RAND_NORMAL_01_INIT(N, v01);
- v = cov * v01;
- v += mean;
- }
-
- // return x'Ay
- double MyMulExpert(const arma::vec& x, const arma::mat& A, const arma::vec& y) {
- size_t M = A.n_rows;
- size_t N = A.n_cols;
- mlpack::Log::Assert((M == x.n_elem && N == y.n_elem), "MyMulExpert: sizes do not match");
-
- double s = 0;
- for (size_t i = 0; i < M; i++)
- for (size_t j = 0; j < N; j++)
- s += x[i] * A(i, j) * y[j];
-
- return s;
- }
-
- double NORMAL_DENSITY(const arma::vec& x, const arma::vec& mean, const arma::mat& inv_cov, double det_cov) {
- arma::vec d = mean - x;
- return det_cov * exp(-0.5 * MyMulExpert(d, inv_cov, d));
- }
-
- bool kmeans(const std::vector<arma::mat>& data, size_t num_clusters,
- std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
- size_t max_iter, double error_thresh)
- {
- std::vector<size_t> counts; //number of points in each cluster
- std::vector<arma::vec> tmp_centroids;
- size_t num_points, num_dims;
- size_t i, j, num_iter = 0;
- double error, old_error;
-
- num_points = 0;
- for (size_t i = 0; i < data.size(); i++)
- num_points += data[i].n_cols;
-
- if (num_points < num_clusters)
- return false;
-
- num_dims = data[0].n_rows;
-
- centroids_.reserve(num_clusters);
- tmp_centroids.reserve(num_clusters);
-
- counts.reserve(num_clusters);
- labels_.reserve(num_points);
-
- //Initialize the clusters to k points
- for (j = 0; j < num_clusters; j++) {
- size_t i = (size_t) math::Random(0, data.size() - 0.5);
- size_t k = (size_t) math::Random(0, data[i].n_cols - 0.5);
-
- arma::vec temp_vector = data[i].unsafe_col(k);
- centroids_[j] = temp_vector;
- tmp_centroids[j].set_size(num_dims);
- }
-
- error = DBL_MAX;
-
- do {
- old_error = error; error = 0;
-
- for (i = 0; i < num_clusters; i++) {
- tmp_centroids[i].zeros();
- counts[i] = 0;
- }
- i = 0;
- for (size_t t = 0, i = 0; t < data.size(); t++) {
- for (size_t k = 0; k < data[t].n_cols; k++, i++) {
- // Find the cluster closest to this point and update its label
- double min_distance = DBL_MAX;
- arma::vec data_i_Vec = data[t].unsafe_col(k);
-
- for (j = 0; j < num_clusters; j++) {
- double distance =
- mlpack::kernel::LMetric<2>::Evaluate(data_i_Vec, centroids_[j]);
-
- if (distance < min_distance) {
- labels_[i] = j;
- min_distance = distance;
- }
- }
-
- // Accumulate the stats for the new centroid of the target cluster
- tmp_centroids[labels_[i]] += data_i_Vec;
- counts[labels_[i]]++;
- error += min_distance;
- }
-
- // Now update all the centroids
- for (size_t j = 0; j < num_clusters; j++) {
- if (counts[j] > 0)
- centroids_[j] = tmp_centroids[j] / (double) counts[j];
- }
- num_iter++;
- }
-
- } while ((fabs(error - old_error) > error_thresh)
- && (num_iter < max_iter));
-
- return true;
- }
-
- bool kmeans(const arma::mat& data, size_t num_clusters,
- std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
- size_t max_iter, double error_thresh)
- {
- std::vector<size_t> counts; //number of points in each cluster
- std::vector<arma::vec> tmp_centroids;
- size_t num_points, num_dims;
- size_t i, j, num_iter = 0;
- double error, old_error;
-
- if (data.n_cols < num_clusters)
- return false;
-
- num_points = data.n_cols;
- num_dims = data.n_rows;
-
- centroids_.reserve(num_clusters);
- tmp_centroids.reserve(num_clusters);
-
- counts.reserve(num_clusters);
- labels_.reserve(num_points);
-
- // Initialize the clusters to k points
- for (i = 0, j = 0; j < num_clusters; i += num_points / num_clusters, j++) {
- arma::vec temp_vector = data.unsafe_col(i);
- centroids_[j] = temp_vector;
- tmp_centroids[j].set_size(num_dims);
- }
-
- error = DBL_MAX;
-
- do {
- old_error = error; error = 0;
-
- for (i = 0; i < num_clusters; i++) {
- tmp_centroids[i].zeros();
- counts[i] = 0;
- }
-
- for (i = 0; i<num_points; i++) {
- // Find the cluster closest to this point and update its label
- double min_distance = DBL_MAX;
- arma::vec data_i_Vec = data.unsafe_col(i);
-
- for (j = 0; j < num_clusters; j++) {
- double distance =
- mlpack::kernel::LMetric<2>::Evaluate(data_i_Vec, centroids_[j]);
- if (distance < min_distance) {
- labels_[i] = j;
- min_distance = distance;
- }
- }
-
- // Accumulate the stats for the new centroid of the target cluster
- tmp_centroids[labels_[i]] += data_i_Vec;
- counts[labels_[i]]++;
- error += min_distance;
- }
-
- // Now update all the centroids
- for (size_t j = 0; j < num_clusters; j++) {
- if (counts[j] > 0)
- centroids_[j] = tmp_centroids[j] / (double) counts[j];
- }
- num_iter++;
-
- } while ((fabs(error - old_error) > error_thresh)
- && (num_iter < max_iter));
-
- return true;
- }
-
- void mat2arrlst(arma::mat& a, std::vector<arma::vec>& seqs) {
- size_t n = a.n_cols;
- for (size_t i = 0; i < n; i++) {
- arma::vec seq = a.unsafe_col(i);
- seqs.push_back(seq);
- }
- }
-
- void mat2arrlstmat(size_t N, arma::mat& a, std::vector<arma::mat>& seqs) {
- size_t n = a.n_cols;
- for (size_t i = 0; i < n; i+=N) {
- arma::mat b = a.cols(i, i + N);
- seqs.push_back(b);
- }
- }
-
- bool skip_blank(TextLineReader& reader) {
- for (;;) {
- if (!reader.MoreLines())
- return false;
-// char* pos = reader.Peek().begin();
-
- std::string::iterator pos = reader.Peek().begin();
- while (*pos == ' ' || *pos == ',' || *pos == '\t')
- pos++;
-
- if (*pos == '\0' || *pos == '%')
- reader.Gobble();
- else
- break;
- }
- return true;
- }
-
- bool read_matrix(TextLineReader& reader, arma::mat& matrix) {
- if (!skip_blank(reader)) { // EOF ?
- matrix.set_size(0, 0);
- return false;
- } else {
- size_t n_rows = 0;
- size_t n_cols = 0;
- bool is_done;
-
- // How many columns ?
- std::vector<std::string> num_str;
- tokenizeString(reader.Peek(), ", \t", num_str);
- n_cols = num_str.size();
-
- std::vector<double> num_double;
-
- for(;;) { // read each rows
- n_rows++;
- // deprecated: double* point = num_double.AddBack(n_cols);
- std::vector<std::string> num_str;
- tokenizeString(reader.Peek(), ", \t", num_str);
-// reader.Peek().Split(", \t", &num_str);
-
- mlpack::Log::Assert(num_str.size() == n_cols);
-
- std::istringstream is;
- for (size_t i = 0; i < n_cols; i++) {
- double d;
- is.str(num_str[i]);
- if( !(is >> d ) )
- abort();
- num_double.push_back(d);
- }
-
- is_done = false;
-
- reader.Gobble();
-
- for (;;) {
- if (!reader.MoreLines()) {
- is_done = true;
- break;
- }
- std::string::iterator pos = reader.Peek().begin();
-
- while (*pos == ' ' || *pos == '\t')
- pos++;
-
- if (*pos == '\0')
- reader.Gobble();
- else if (*pos == '%') {
- is_done = true;
- break;
- }
- else
- break;
- }
-
- if (is_done) {
- std::vector<double> empty;
- num_double.swap(empty);
-
- //FIXME
-// matrix->Own(num_double.ReleasePtr(), n_cols, n_rows);
-
- return true;
- }
- }
- }
- }
-
- bool read_vector(TextLineReader& reader, arma::vec& vec) {
- if (!skip_blank(reader)) { // EOF ?
- vec.set_size(0);
- return false;
- }
-
- std::vector<double> num_double;
-
- for(;;) { // read each rows
- bool is_done = false;
-
- std::vector<std::string> num_str;
- tokenizeString(reader.Peek(), ", \t", num_str);
-
- // deprecated: double* point = num_double.AddBack(num_str.size());
-
- std::istringstream is;
- for (size_t i = 0; i < num_str.size(); i++) {
- double d;
- is.str(num_str[i]);
- if( !(is >> d) )
- abort();
- }
-
- reader.Gobble();
-
- for (;;) {
- if (!reader.MoreLines()) {
- is_done = true;
- break;
- }
- std::string::iterator pos = reader.Peek().begin();
- while (*pos == ' ' || *pos == '\t')
- pos++;
- if (*pos == '\0')
- reader.Gobble();
- else if (*pos == '%') {
- is_done = true;
- break;
- } else
- break;
- }
-
- if (is_done) {
- //num_double.Trim();
- // FIXME
-// vec->Own(num_double.ReleasePtr(), length);
-
- return true;
- }
- }
- }
-
- bool load_matrix_list(const char* filename, std::vector<arma::mat>& matlst) {
- TextLineReader reader;
- if (!(reader.Open(filename)))
- return false;
- do {
- arma::mat tmp;
- if (read_matrix(reader, tmp) == true)
- matlst.push_back(tmp);
- else
- break;
- } while (1);
-
- return true;
- }
-
- bool load_vector_list(const char* filename, std::vector<arma::vec>& veclst) {
- TextLineReader reader;
- if (!(reader.Open(filename)))
- return false;
-
- do {
- arma::vec vec;
- if (read_vector(reader, vec) == true)
- veclst.push_back(vec);
- else
- break;
- } while (1);
-
- return true;
- }
-}; // namespace hmm_support
-}; // namespace hmm
-}; // namespace mlpack
Copied: mlpack/trunk/src/mlpack/methods/hmm/support.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/support.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/support.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/support.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,449 @@
+#include <mlpack/core.h>
+#include <mlpack/core/kernels/lmetric.hpp>
+#include "../../core/col/tokenizer.h"
+
+#include "support.hpp"
+
+namespace mlpack {
+namespace hmm {
+namespace hmm_support {
+
+ double RAND_UNIFORM_01() {
+ return (double) rand() / (double) RAND_MAX;
+ }
+
+ double RAND_UNIFORM(double a, double b) {
+ return RAND_UNIFORM_01() * (b - a) + a;
+ }
+
+ void print_matrix(const arma::mat& a, const char* msg) {
+ printf("%s - Matrix (%d x %d) = \n", msg, a.n_rows, a.n_cols);
+ for (size_t i = 0; i < a.n_rows; i++) {
+ for (size_t j = 0; j < a.n_cols; j++)
+ printf("%8.4f", a(i, j));
+
+ printf("\n");
+ }
+ }
+
+ void print_matrix(TextWriter& writer, const arma::mat& a, const char* msg, const char* format) {
+ writer.Printf("%s - Matrix (%d x %d) = \n", msg, a.n_rows, a.n_cols);
+ for (size_t j = 0; j < a.n_cols; j++) {
+ for (size_t i = 0; i < a.n_rows; i++)
+ writer.Printf(format, a(i, j));
+
+ writer.Printf("\n");
+ }
+ }
+
+ void print_vector(const arma::vec& a, const char* msg) {
+ printf("%s - Vector (%d) = \n", msg, a.n_elem);
+ for (size_t i = 0; i < a.n_elem; i++)
+ printf("%8.4f", a[i]);
+ printf("\n");
+ }
+
+ void print_vector(TextWriter& writer, const arma::vec& a, const char* msg, const char* format) {
+ writer.Printf("%s - Vector (%d) = \n", msg, a.n_elem);
+ for (size_t i = 0; i < a.n_elem; i++)
+ writer.Printf(format, a[i]);
+ writer.Printf("\n");
+ }
+
+ double RAND_NORMAL_01() {
+ double r = 2, u, v;
+ while (r > 1) {
+ u = RAND_UNIFORM(-1, 1);
+ v = RAND_UNIFORM(-1, 1);
+ r = (u * u) + (v * v);
+ }
+ return sqrt(-2 * log(r) / r) * u;
+ }
+
+ void RAND_NORMAL_01_INIT(size_t N, arma::vec& v) {
+ double r, u, t;
+ v.set_size(N);
+ for (size_t i = 0; i < N; i+=2) {
+ r = 2;
+ while (r > 1) {
+ u = RAND_UNIFORM(-1, 1);
+ t = RAND_UNIFORM(-1, 1);
+ r = (u * u) + (t * t);
+ }
+ v[i] = sqrt(-2 * log(r) / r) * u;
+ if (i + 1 < N)
+ v[i + 1] = sqrt(-2 * log(r) / r) * t;
+ }
+ }
+
+ void RAND_NORMAL_INIT(const arma::vec& mean, const arma::mat& cov, arma::vec& v) {
+ size_t N = mean.n_elem;
+ arma::vec v01;
+ RAND_NORMAL_01_INIT(N, v01);
+ v = cov * v01;
+ v += mean;
+ }
+
+ // return x'Ay
+ double MyMulExpert(const arma::vec& x, const arma::mat& A, const arma::vec& y) {
+ size_t M = A.n_rows;
+ size_t N = A.n_cols;
+ mlpack::Log::Assert((M == x.n_elem && N == y.n_elem), "MyMulExpert: sizes do not match");
+
+ double s = 0;
+ for (size_t i = 0; i < M; i++)
+ for (size_t j = 0; j < N; j++)
+ s += x[i] * A(i, j) * y[j];
+
+ return s;
+ }
+
+ double NORMAL_DENSITY(const arma::vec& x, const arma::vec& mean, const arma::mat& inv_cov, double det_cov) {
+ arma::vec d = mean - x;
+ return det_cov * exp(-0.5 * MyMulExpert(d, inv_cov, d));
+ }
+
+ bool kmeans(const std::vector<arma::mat>& data, size_t num_clusters,
+ std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
+ size_t max_iter, double error_thresh)
+ {
+ std::vector<size_t> counts; //number of points in each cluster
+ std::vector<arma::vec> tmp_centroids;
+ size_t num_points, num_dims;
+ size_t i, j, num_iter = 0;
+ double error, old_error;
+
+ num_points = 0;
+ for (size_t i = 0; i < data.size(); i++)
+ num_points += data[i].n_cols;
+
+ if (num_points < num_clusters)
+ return false;
+
+ num_dims = data[0].n_rows;
+
+ centroids_.reserve(num_clusters);
+ tmp_centroids.reserve(num_clusters);
+
+ counts.reserve(num_clusters);
+ labels_.reserve(num_points);
+
+ //Initialize the clusters to k points
+ for (j = 0; j < num_clusters; j++) {
+ size_t i = (size_t) math::Random(0, data.size() - 0.5);
+ size_t k = (size_t) math::Random(0, data[i].n_cols - 0.5);
+
+ arma::vec temp_vector = data[i].unsafe_col(k);
+ centroids_[j] = temp_vector;
+ tmp_centroids[j].set_size(num_dims);
+ }
+
+ error = DBL_MAX;
+
+ do {
+ old_error = error; error = 0;
+
+ for (i = 0; i < num_clusters; i++) {
+ tmp_centroids[i].zeros();
+ counts[i] = 0;
+ }
+ i = 0;
+ for (size_t t = 0, i = 0; t < data.size(); t++) {
+ for (size_t k = 0; k < data[t].n_cols; k++, i++) {
+ // Find the cluster closest to this point and update its label
+ double min_distance = DBL_MAX;
+ arma::vec data_i_Vec = data[t].unsafe_col(k);
+
+ for (j = 0; j < num_clusters; j++) {
+ double distance =
+ mlpack::kernel::LMetric<2>::Evaluate(data_i_Vec, centroids_[j]);
+
+ if (distance < min_distance) {
+ labels_[i] = j;
+ min_distance = distance;
+ }
+ }
+
+ // Accumulate the stats for the new centroid of the target cluster
+ tmp_centroids[labels_[i]] += data_i_Vec;
+ counts[labels_[i]]++;
+ error += min_distance;
+ }
+
+ // Now update all the centroids
+ for (size_t j = 0; j < num_clusters; j++) {
+ if (counts[j] > 0)
+ centroids_[j] = tmp_centroids[j] / (double) counts[j];
+ }
+ num_iter++;
+ }
+
+ } while ((fabs(error - old_error) > error_thresh)
+ && (num_iter < max_iter));
+
+ return true;
+ }
+
+ bool kmeans(const arma::mat& data, size_t num_clusters,
+ std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
+ size_t max_iter, double error_thresh)
+ {
+ std::vector<size_t> counts; //number of points in each cluster
+ std::vector<arma::vec> tmp_centroids;
+ size_t num_points, num_dims;
+ size_t i, j, num_iter = 0;
+ double error, old_error;
+
+ if (data.n_cols < num_clusters)
+ return false;
+
+ num_points = data.n_cols;
+ num_dims = data.n_rows;
+
+ centroids_.reserve(num_clusters);
+ tmp_centroids.reserve(num_clusters);
+
+ counts.reserve(num_clusters);
+ labels_.reserve(num_points);
+
+ // Initialize the clusters to k points
+ for (i = 0, j = 0; j < num_clusters; i += num_points / num_clusters, j++) {
+ arma::vec temp_vector = data.unsafe_col(i);
+ centroids_[j] = temp_vector;
+ tmp_centroids[j].set_size(num_dims);
+ }
+
+ error = DBL_MAX;
+
+ do {
+ old_error = error; error = 0;
+
+ for (i = 0; i < num_clusters; i++) {
+ tmp_centroids[i].zeros();
+ counts[i] = 0;
+ }
+
+ for (i = 0; i<num_points; i++) {
+ // Find the cluster closest to this point and update its label
+ double min_distance = DBL_MAX;
+ arma::vec data_i_Vec = data.unsafe_col(i);
+
+ for (j = 0; j < num_clusters; j++) {
+ double distance =
+ mlpack::kernel::LMetric<2>::Evaluate(data_i_Vec, centroids_[j]);
+ if (distance < min_distance) {
+ labels_[i] = j;
+ min_distance = distance;
+ }
+ }
+
+ // Accumulate the stats for the new centroid of the target cluster
+ tmp_centroids[labels_[i]] += data_i_Vec;
+ counts[labels_[i]]++;
+ error += min_distance;
+ }
+
+ // Now update all the centroids
+ for (size_t j = 0; j < num_clusters; j++) {
+ if (counts[j] > 0)
+ centroids_[j] = tmp_centroids[j] / (double) counts[j];
+ }
+ num_iter++;
+
+ } while ((fabs(error - old_error) > error_thresh)
+ && (num_iter < max_iter));
+
+ return true;
+ }
+
+ void mat2arrlst(arma::mat& a, std::vector<arma::vec>& seqs) {
+ size_t n = a.n_cols;
+ for (size_t i = 0; i < n; i++) {
+ arma::vec seq = a.unsafe_col(i);
+ seqs.push_back(seq);
+ }
+ }
+
+ void mat2arrlstmat(size_t N, arma::mat& a, std::vector<arma::mat>& seqs) {
+ size_t n = a.n_cols;
+ for (size_t i = 0; i < n; i+=N) {
+ arma::mat b = a.cols(i, i + N);
+ seqs.push_back(b);
+ }
+ }
+
+ bool skip_blank(TextLineReader& reader) {
+ for (;;) {
+ if (!reader.MoreLines())
+ return false;
+// char* pos = reader.Peek().begin();
+
+ std::string::iterator pos = reader.Peek().begin();
+ while (*pos == ' ' || *pos == ',' || *pos == '\t')
+ pos++;
+
+ if (*pos == '\0' || *pos == '%')
+ reader.Gobble();
+ else
+ break;
+ }
+ return true;
+ }
+
+ bool read_matrix(TextLineReader& reader, arma::mat& matrix) {
+ if (!skip_blank(reader)) { // EOF ?
+ matrix.set_size(0, 0);
+ return false;
+ } else {
+ size_t n_rows = 0;
+ size_t n_cols = 0;
+ bool is_done;
+
+ // How many columns ?
+ std::vector<std::string> num_str;
+ tokenizeString(reader.Peek(), ", \t", num_str);
+ n_cols = num_str.size();
+
+ std::vector<double> num_double;
+
+ for(;;) { // read each rows
+ n_rows++;
+ // deprecated: double* point = num_double.AddBack(n_cols);
+ std::vector<std::string> num_str;
+ tokenizeString(reader.Peek(), ", \t", num_str);
+// reader.Peek().Split(", \t", &num_str);
+
+ mlpack::Log::Assert(num_str.size() == n_cols);
+
+ std::istringstream is;
+ for (size_t i = 0; i < n_cols; i++) {
+ double d;
+ is.str(num_str[i]);
+ if( !(is >> d ) )
+ abort();
+ num_double.push_back(d);
+ }
+
+ is_done = false;
+
+ reader.Gobble();
+
+ for (;;) {
+ if (!reader.MoreLines()) {
+ is_done = true;
+ break;
+ }
+ std::string::iterator pos = reader.Peek().begin();
+
+ while (*pos == ' ' || *pos == '\t')
+ pos++;
+
+ if (*pos == '\0')
+ reader.Gobble();
+ else if (*pos == '%') {
+ is_done = true;
+ break;
+ }
+ else
+ break;
+ }
+
+ if (is_done) {
+ std::vector<double> empty;
+ num_double.swap(empty);
+
+ //FIXME
+// matrix->Own(num_double.ReleasePtr(), n_cols, n_rows);
+
+ return true;
+ }
+ }
+ }
+ }
+
+ bool read_vector(TextLineReader& reader, arma::vec& vec) {
+ if (!skip_blank(reader)) { // EOF ?
+ vec.set_size(0);
+ return false;
+ }
+
+ std::vector<double> num_double;
+
+ for(;;) { // read each rows
+ bool is_done = false;
+
+ std::vector<std::string> num_str;
+ tokenizeString(reader.Peek(), ", \t", num_str);
+
+ // deprecated: double* point = num_double.AddBack(num_str.size());
+
+ std::istringstream is;
+ for (size_t i = 0; i < num_str.size(); i++) {
+ double d;
+ is.str(num_str[i]);
+ if( !(is >> d) )
+ abort();
+ }
+
+ reader.Gobble();
+
+ for (;;) {
+ if (!reader.MoreLines()) {
+ is_done = true;
+ break;
+ }
+ std::string::iterator pos = reader.Peek().begin();
+ while (*pos == ' ' || *pos == '\t')
+ pos++;
+ if (*pos == '\0')
+ reader.Gobble();
+ else if (*pos == '%') {
+ is_done = true;
+ break;
+ } else
+ break;
+ }
+
+ if (is_done) {
+ //num_double.Trim();
+ // FIXME
+// vec->Own(num_double.ReleasePtr(), length);
+
+ return true;
+ }
+ }
+ }
+
+ bool load_matrix_list(const char* filename, std::vector<arma::mat>& matlst) {
+ TextLineReader reader;
+ if (!(reader.Open(filename)))
+ return false;
+ do {
+ arma::mat tmp;
+ if (read_matrix(reader, tmp) == true)
+ matlst.push_back(tmp);
+ else
+ break;
+ } while (1);
+
+ return true;
+ }
+
+ bool load_vector_list(const char* filename, std::vector<arma::vec>& veclst) {
+ TextLineReader reader;
+ if (!(reader.Open(filename)))
+ return false;
+
+ do {
+ arma::vec vec;
+ if (read_vector(reader, vec) == true)
+ veclst.push_back(vec);
+ else
+ break;
+ } while (1);
+
+ return true;
+ }
+}; // namespace hmm_support
+}; // namespace hmm
+}; // namespace mlpack
Deleted: mlpack/trunk/src/mlpack/methods/hmm/support.h
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/support.h 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/support.h 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,75 +0,0 @@
-#ifndef __MLPACK_METHODS_HMM_SUPPORT_H
-#define __MLPACK_METHODS_HMM_SUPPORT_H
-
-#include <mlpack/core.h>
-
-namespace mlpack {
-namespace hmm {
-
-namespace hmm_support {
- /** Generate uniform random value in [0, 1] */
- double RAND_UNIFORM_01();
-
- /** Generate uniform random value in [a, b] */
- double RAND_UNIFORM(double a, double b);
-
- /** Generate normal (Gaussian) random value N(0, 1) */
- double RAND_NORMAL_01();
-
- /** Generate normal (Gaussian) random vector N(0, 1) */
- void RAND_NORMAL_01_INIT(size_t N, arma::vec& v);
-
- /** Generate normal (Gaussian) random vector N(mean, cov^2) */
- void RAND_NORMAL_INIT(const arma::vec& mean, const arma::mat& cov, arma::vec& v);
-
- /** Calculate quadratic form x'Ay */
- double MyMulExpert(const arma::vec& x, const arma::mat& A, const arma::vec& y);
-
- /** Compute normal density function */
- double NORMAL_DENSITY(const arma::vec& x, const arma::vec& mean, const arma::mat& inv_cov, double det_cov);
-
- /** Print a matrix to stdout */
- void print_matrix(const arma::mat& a, const char* msg);
-
- /** Print a matrix to a TextWriter object */
- void print_matrix(TextWriter& writer, const arma::mat& a, const char* msg, const char* format = "%f,");
-
- /** Print a vector to stdout */
- void print_vector(const arma::vec& a, const char* msg);
-
- /** Print a vector to a TextWriter object */
- void print_vector(TextWriter& writer, const arma::vec& a, const char* msg, const char* format = "%f,");
-
- /** Compute the centroids and label the samples by K-means algorithm */
- bool kmeans(const std::vector<arma::mat>& data, size_t num_clusters,
- std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
- size_t max_iter = 1000, double error_thresh = 1e-3);
-
- bool kmeans(const arma::mat &data, size_t num_clusters,
- std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
- size_t max_iter = 1000, double error_thresh = 1e-04);
-
- /** Convert a matrix in to an array list of vectors of its column */
- void mat2arrlst(arma::mat& a, std::vector<arma::vec>& seqs);
-
- /** Convert a matrix in to an array list of matrices of slice of its columns */
- void mat2arrlstmat(size_t N, arma::mat& a, std::vector<arma::mat>& seqs);
-
- /**
- * Load an array list of matrices from file where the matrices
- * are seperated by a line start with %
- */
- bool load_matrix_list(const char* filename, std::vector<arma::mat>& matlst);
-
- /**
- * Load an array list of vectors from file where the vectors
- * are seperated by a line start with %
- */
- bool load_vector_list(const char* filename, std::vector<arma::vec>& veclst);
-}; // namespace hmm_support
-
-}; // namespace hmm
-}; // namespace mlpack
-
-#endif
-
Copied: mlpack/trunk/src/mlpack/methods/hmm/support.hpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/support.h)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/support.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/support.hpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,75 @@
+#ifndef __MLPACK_METHODS_HMM_SUPPORT_H
+#define __MLPACK_METHODS_HMM_SUPPORT_H
+
+#include <mlpack/core.h>
+
+namespace mlpack {
+namespace hmm {
+
+namespace hmm_support {
+ /** Generate uniform random value in [0, 1] */
+ double RAND_UNIFORM_01();
+
+ /** Generate uniform random value in [a, b] */
+ double RAND_UNIFORM(double a, double b);
+
+ /** Generate normal (Gaussian) random value N(0, 1) */
+ double RAND_NORMAL_01();
+
+ /** Generate normal (Gaussian) random vector N(0, 1) */
+ void RAND_NORMAL_01_INIT(size_t N, arma::vec& v);
+
+ /** Generate normal (Gaussian) random vector N(mean, cov^2) */
+ void RAND_NORMAL_INIT(const arma::vec& mean, const arma::mat& cov, arma::vec& v);
+
+ /** Calculate quadratic form x'Ay */
+ double MyMulExpert(const arma::vec& x, const arma::mat& A, const arma::vec& y);
+
+ /** Compute normal density function */
+ double NORMAL_DENSITY(const arma::vec& x, const arma::vec& mean, const arma::mat& inv_cov, double det_cov);
+
+ /** Print a matrix to stdout */
+ void print_matrix(const arma::mat& a, const char* msg);
+
+ /** Print a matrix to a TextWriter object */
+ void print_matrix(TextWriter& writer, const arma::mat& a, const char* msg, const char* format = "%f,");
+
+ /** Print a vector to stdout */
+ void print_vector(const arma::vec& a, const char* msg);
+
+ /** Print a vector to a TextWriter object */
+ void print_vector(TextWriter& writer, const arma::vec& a, const char* msg, const char* format = "%f,");
+
+ /** Compute the centroids and label the samples by K-means algorithm */
+ bool kmeans(const std::vector<arma::mat>& data, size_t num_clusters,
+ std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
+ size_t max_iter = 1000, double error_thresh = 1e-3);
+
+ bool kmeans(const arma::mat &data, size_t num_clusters,
+ std::vector<size_t>& labels_, std::vector<arma::vec>& centroids_,
+ size_t max_iter = 1000, double error_thresh = 1e-04);
+
+ /** Convert a matrix in to an array list of vectors of its column */
+ void mat2arrlst(arma::mat& a, std::vector<arma::vec>& seqs);
+
+ /** Convert a matrix in to an array list of matrices of slice of its columns */
+ void mat2arrlstmat(size_t N, arma::mat& a, std::vector<arma::mat>& seqs);
+
+ /**
+ * Load an array list of matrices from file where the matrices
+ * are seperated by a line start with %
+ */
+ bool load_matrix_list(const char* filename, std::vector<arma::mat>& matlst);
+
+ /**
+ * Load an array list of vectors from file where the vectors
+ * are seperated by a line start with %
+ */
+ bool load_vector_list(const char* filename, std::vector<arma::vec>& veclst);
+}; // namespace hmm_support
+
+}; // namespace hmm
+}; // namespace mlpack
+
+#endif
+
Deleted: mlpack/trunk/src/mlpack/methods/hmm/train.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/train.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/train.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,357 +0,0 @@
-/**
- * @file train.cc
- *
- * This file contains the program to estimate Hidden Markov Model parameter
- * using training sequences.
- *
- * It use two algorithm: Baum-Welch (EM) and Viterbi
- *
- * Usage:
- * train --type=TYPE --profile=PROFILE --seqfile=FILE [OPTCLINS]
- * See the usage() function for complete option list
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "discreteHMM.h"
-#include "gaussianHMM.h"
-#include "mixgaussHMM.h"
-#include "mixtureDST.h"
-
-using namespace mlpack;
-using namespace hmm;
-using namespace hmm_support;
-
-bool train_baumwelch();
-bool train_viterbi();
-void usage();
-
-/* const fx_entry_doc hmm_train_main_entries[] = {
- {"type", FX_REQUIRED, FX_STR, NULL,
- " HMM type : discrete | gaussian | mixture.\n"},
- {"algorithm", FX_PARAM, FX_STR, NULL,
- " Training algoritm: baumwelch | viterbi.\n"},
- {"seqfile", FX_REQUIRED, FX_STR, NULL,
- " Output file for the data sequences.\n"},
- {"guess", FX_PARAM, FX_STR, NULL,
- " File containing guessing HMM model profile.\n"},
- {"numstate", FX_PARAM, FX_INT, NULL,
- " If no guessing profile specified, at least provide the number of states.\n"},
- {"profile", FX_REQUIRED, FX_STR, NULL,
- " Output file containing trained HMM profile.\n"},
- {"maxiter", FX_PARAM, FX_INT, NULL,
- " Maximum number of iterations, default = 500.\n"},
- {"tolerance", FX_PARAM, FX_DOUBLE, NULL,
- " Error tolerance on log-likelihood as a stopping criteria.\n"},
- FX_ENTRY_DOC_DONE
-}; */
-
-PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mixture.", "hmm");
-PARAM_STRING_REQ("profile", "A file containing HMM profile.", "hmm");
-PARAM_STRING_REQ("seqfile", "Output file for the generated sequences.", "hmm");
-PARAM_STRING("algorithm", "Training algorithm: baumwelch | viterbi.", "hmm",
- "baumwelch");
-PARAM_STRING("guess", "File containing guessing HMM model profile.", "hmm",
- "");
-
-
-PARAM(double, "tolerance",
- "Error tolerance on log-likelihood as a stopping criteria.", "hmm", 1e-3, false);
-PARAM_INT("maxiter", "Maximum number of iterations, default = 500.", "hmm", 500);
-PARAM_INT("numstate", "If no guessing profile specified, at least provide the number of states.", "hmm", 10);
-
-PARAM_MODULE("hmm", "This is a program generating sequences from HMM models.");
-
-/*
-const fx_submodule_doc hmm_train_main_submodules[] = {
- FX_SUBMODULE_DOC_DONE
-}; */
-
-/*
-const fx_module_doc hmm_train_main_doc = {
- hmm_train_main_entries, hmm_train_main_submodules,
- "This is a program training HMM models from data sequences. \n"
-};*/
-
-void usage() {
- Log::Warn << "Usage:" << std::endl;
- Log::Warn << " train --type=={discrete|gaussian|mixture} OPTCLIN" << std::endl;
- Log::Warn << "[OPTCLINS]" << std::endl;
- Log::Warn << " --algorithm={baumwelch|viterbi} : algorithm used for training, default Baum-Welch" << std::endl;
- Log::Warn << " --seqfile=file : file contains input sequences" << std::endl;
- Log::Warn << " --guess=file : file contains guess HMM profile" << std::endl;
- Log::Warn << " --numstate=NUM : if no guess profile is specified, at least specify the number of state" << std::endl;
- Log::Warn << " --profile=file : output file for estimated HMM profile" << std::endl;
- Log::Warn << " --maxiter=NUM : maximum number of iteration, default=500" << std::endl;
- Log::Warn << " --tolerance=NUM : error tolerance on log-likelihood, default=1e-3" << std::endl;
-}
-
-int main(int argc, char* argv[]) {
- CLI::ParseCommandLine(argc, argv);
-
- bool s = true;
- if (CLI::HasParam("hmm/type")) {
- const char* algorithm = CLI::GetParam<std::string>("hmm/algorithm").c_str();
- if (strcmp(algorithm,"baumwelch") == 0)
- s = train_baumwelch();
- else if (strcmp(algorithm,"viterbi") == 0)
- s = train_viterbi();
- else {
- Log::Fatal << "Unrecognized algorithm: must be baumwelch or viterbi!";
- s = false;
- }
- }
- else {
- Log::Fatal << "Unrecognized type: must be: discrete | gaussian | mixture!";
- s = false;
- }
- if (!(s)) usage();
-}
-
-bool train_baumwelch_discrete();
-bool train_baumwelch_gaussian();
-bool train_baumwelch_mixture();
-
-bool train_baumwelch() {
- const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
- if (strcmp(type, "discrete")==0)
- return train_baumwelch_discrete();
- else if (strcmp(type, "gaussian")==0)
- return train_baumwelch_gaussian();
- else if (strcmp(type, "mixture")==0)
- return train_baumwelch_mixture();
- else {
- printf("Unrecognized type: must be: discrete | gaussian | mixture!\n");
- return false;
- }
-}
-
-bool train_viterbi_discrete();
-bool train_viterbi_gaussian();
-bool train_viterbi_mixture();
-
-bool train_viterbi() {
- const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
- if (strcmp(type, "discrete")==0)
- return train_viterbi_discrete();
- else if (strcmp(type, "gaussian")==0)
- return train_viterbi_gaussian();
- else if (strcmp(type, "mixture")==0)
- return train_viterbi_mixture();
- else {
- printf("Unrecognized type: must be: discrete | gaussian | mixture !!!\n");
- return false;
- }
-}
-
-bool train_baumwelch_mixture() {
- if (!CLI::HasParam("hmm/seqfile")) {
- printf("--seqfile must be defined.\n");
- return false;
- }
-
- MixtureofGaussianHMM hmm;
- std::vector<arma::mat> seqs;
-
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.mix.out"
-
- load_matrix_list(seqin, seqs);
-
- if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
- const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
- Log::Info << "Load parameters from file " << guess << std::endl;
- hmm.InitFromFile(guess);
- }
- else {
- hmm.Init();
- Log::Fatal <<"Automatic initialization not supported !!!";
- return false;
- }
-
- int maxiter = CLI::GetParam<int>("hmm/maxiter");
- double tol = CLI::GetParam<double>("hmm/tolerance");
-
- hmm.TrainBaumWelch(seqs, maxiter, tol);
-
- hmm.SaveProfile(proout);
-
- return true;
-}
-
-bool train_baumwelch_gaussian() {
- if (!CLI::HasParam("hmm/seqfile")) {
- printf("--seqfile must be defined.\n");
- return false;
- }
- GaussianHMM hmm;
- std::vector<arma::mat> seqs;
-
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.gauss.out");
-
- load_matrix_list(seqin, seqs);
-
- if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
- const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
- Log::Info << "Load parameters from file " << guess << std::endl;
- hmm.InitFromFile(guess);
- }
- else { // otherwise initialized using information from the data
- int numstate = CLI::GetParam<int>("hmm/numstate");
- Log::Info << "Generate HMM parameters: NUMSTATE = " << numstate << std::endl;
- hmm.InitFromData(seqs, numstate);
- Log::Info << "Done." << std::endl;
- }
-
- int maxiter = CLI::GetParam<int>("hmm/maxiter");
- double tol = CLI::GetParam<double>("hmm/tolerance");
-
- printf("Training ...\n");
- hmm.TrainBaumWelch(seqs, maxiter, tol);
- printf("Done.\n");
-
- hmm.SaveProfile(proout);
-
- return true;
-}
-
-bool train_baumwelch_discrete() {
- if (!CLI::HasParam("hmm/seqfile")) {
- printf("--seqfile must be defined.\n");
- return false;
- }
-
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.dis.out");
-
- std::vector<arma::vec> seqs;
- load_vector_list(seqin, seqs);
-
- DiscreteHMM hmm;
-
- if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
- const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
- Log::Info << "Load HMM parameters from file " << guess << std::endl;
- hmm.InitFromFile(guess);
- }
- else { // otherwise randomly initialized using information from the data
- int numstate = CLI::GetParam<int>("hmm/numstate");
- Log::Info << "Randomly generate parameters: NUMSTATE = " << numstate << std::endl;
- hmm.InitFromData(seqs, numstate);
- }
-
- int maxiter = CLI::GetParam<int>("hmm/maxiter");
- double tol = CLI::GetParam<double>("hmm/tolerance");
-
- hmm.TrainBaumWelch(seqs, maxiter, tol);
-
- hmm.SaveProfile(proout);
-
- return true;
-}
-
-bool train_viterbi_mixture() {
- if (!CLI::HasParam("hmm/seqfile")) {
- printf("--seqfile must be defined.\n");
- return false;
- }
-
- MixtureofGaussianHMM hmm;
- std::vector<arma::mat> seqs;
-
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.mix.out");
-
- load_matrix_list(seqin, seqs);
-
- if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
- const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
- Log::Info << "Load parameters from file " << guess << std::endl;
- hmm.InitFromFile(guess);
- } else {
- hmm.Init();
- Log::Info << "Automatic initialization not supported !!!" << std::endl;
- return false;
- }
-
- int maxiter = CLI::GetParam<int>("hmm/maxiter");
- double tol = CLI::GetParam<double>("hmm/tolerance");
-
- hmm.TrainViterbi(seqs, maxiter, tol);
-
- hmm.SaveProfile(proout);
-
- return true;
-}
-
-bool train_viterbi_gaussian() {
- if (!CLI::HasParam("hmm/seqfile")) {
- Log::Fatal << "--seqfile must be defined." << std::endl;
- return false;
- }
-
- GaussianHMM hmm;
- std::vector<arma::mat> seqs;
-
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.gauss.viterbi.out");
-
- load_matrix_list(seqin, seqs);
-
- if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
- const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
- Log::Info << "Load parameters from file " << guess << std::endl;
- hmm.InitFromFile(guess);
- }
- else { // otherwise initialized using information from the data
- int numstate = CLI::GetParam<int>("hmm/numstate");
- Log::Info << "Generate parameters: NUMSTATE = " << numstate << std::endl;
- hmm.InitFromData(seqs, numstate);
- }
-
- int maxiter = CLI::GetParam<int>("hmm/maxiter");
- double tol = CLI::GetParam<double>("hmm/tolerance");
-
- hmm.TrainViterbi(seqs, maxiter, tol);
-
- hmm.SaveProfile(proout);
-
- return true;
-}
-
-bool train_viterbi_discrete() {
- if (!CLI::HasParam("hmm/seqfile")) {
- printf("--seqfile must be defined.\n");
- return false;
- }
-
- DiscreteHMM hmm;
- std::vector<arma::vec> seqs;
-
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
- const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.dis.viterbi.out");
-
- load_vector_list(seqin, seqs);
-
- if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
- std::vector<arma::mat> matlst;
- const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
- Log::Info << "Load parameters from file " << guess << std::endl;
- hmm.InitFromFile(guess);
- }
- else { // otherwise randomly initialized using information from the data
- int numstate = CLI::GetParam<int>("hmm/numstate");
- printf("Generate parameters with NUMSTATE = %d\n", numstate);
- hmm.InitFromData(seqs, numstate);
- }
-
- int maxiter = CLI::GetParam<int>("hmm/maxiter");
- double tol = CLI::GetParam<double>("hmm/tolerance");
-
- hmm.TrainViterbi(seqs, maxiter, tol);
-
- hmm.SaveProfile(proout);
-
- return true;
-}
Copied: mlpack/trunk/src/mlpack/methods/hmm/train.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/train.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/train.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/train.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,357 @@
+/**
+ * @file train.cc
+ *
+ * This file contains the program to estimate Hidden Markov Model parameter
+ * using training sequences.
+ *
+ * It use two algorithm: Baum-Welch (EM) and Viterbi
+ *
+ * Usage:
+ * train --type=TYPE --profile=PROFILE --seqfile=FILE [OPTCLINS]
+ * See the usage() function for complete option list
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "discreteHMM.hpp"
+#include "gaussianHMM.hpp"
+#include "mixgaussHMM.hpp"
+#include "mixtureDST.hpp"
+
+using namespace mlpack;
+using namespace hmm;
+using namespace hmm_support;
+
+bool train_baumwelch();
+bool train_viterbi();
+void usage();
+
+/* const fx_entry_doc hmm_train_main_entries[] = {
+ {"type", FX_REQUIRED, FX_STR, NULL,
+ " HMM type : discrete | gaussian | mixture.\n"},
+ {"algorithm", FX_PARAM, FX_STR, NULL,
+ " Training algoritm: baumwelch | viterbi.\n"},
+ {"seqfile", FX_REQUIRED, FX_STR, NULL,
+ " Output file for the data sequences.\n"},
+ {"guess", FX_PARAM, FX_STR, NULL,
+ " File containing guessing HMM model profile.\n"},
+ {"numstate", FX_PARAM, FX_INT, NULL,
+ " If no guessing profile specified, at least provide the number of states.\n"},
+ {"profile", FX_REQUIRED, FX_STR, NULL,
+ " Output file containing trained HMM profile.\n"},
+ {"maxiter", FX_PARAM, FX_INT, NULL,
+ " Maximum number of iterations, default = 500.\n"},
+ {"tolerance", FX_PARAM, FX_DOUBLE, NULL,
+ " Error tolerance on log-likelihood as a stopping criteria.\n"},
+ FX_ENTRY_DOC_DONE
+}; */
+
+PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mixture.", "hmm");
+PARAM_STRING_REQ("profile", "A file containing HMM profile.", "hmm");
+PARAM_STRING_REQ("seqfile", "Output file for the generated sequences.", "hmm");
+PARAM_STRING("algorithm", "Training algorithm: baumwelch | viterbi.", "hmm",
+ "baumwelch");
+PARAM_STRING("guess", "File containing guessing HMM model profile.", "hmm",
+ "");
+
+
+PARAM(double, "tolerance",
+ "Error tolerance on log-likelihood as a stopping criteria.", "hmm", 1e-3, false);
+PARAM_INT("maxiter", "Maximum number of iterations, default = 500.", "hmm", 500);
+PARAM_INT("numstate", "If no guessing profile specified, at least provide the number of states.", "hmm", 10);
+
+PARAM_MODULE("hmm", "This is a program generating sequences from HMM models.");
+
+/*
+const fx_submodule_doc hmm_train_main_submodules[] = {
+ FX_SUBMODULE_DOC_DONE
+}; */
+
+/*
+const fx_module_doc hmm_train_main_doc = {
+ hmm_train_main_entries, hmm_train_main_submodules,
+ "This is a program training HMM models from data sequences. \n"
+};*/
+
+void usage() {
+ Log::Warn << "Usage:" << std::endl;
+ Log::Warn << " train --type=={discrete|gaussian|mixture} OPTCLIN" << std::endl;
+ Log::Warn << "[OPTCLINS]" << std::endl;
+ Log::Warn << " --algorithm={baumwelch|viterbi} : algorithm used for training, default Baum-Welch" << std::endl;
+ Log::Warn << " --seqfile=file : file contains input sequences" << std::endl;
+ Log::Warn << " --guess=file : file contains guess HMM profile" << std::endl;
+ Log::Warn << " --numstate=NUM : if no guess profile is specified, at least specify the number of state" << std::endl;
+ Log::Warn << " --profile=file : output file for estimated HMM profile" << std::endl;
+ Log::Warn << " --maxiter=NUM : maximum number of iteration, default=500" << std::endl;
+ Log::Warn << " --tolerance=NUM : error tolerance on log-likelihood, default=1e-3" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ CLI::ParseCommandLine(argc, argv);
+
+ bool s = true;
+ if (CLI::HasParam("hmm/type")) {
+ const char* algorithm = CLI::GetParam<std::string>("hmm/algorithm").c_str();
+ if (strcmp(algorithm,"baumwelch") == 0)
+ s = train_baumwelch();
+ else if (strcmp(algorithm,"viterbi") == 0)
+ s = train_viterbi();
+ else {
+ Log::Fatal << "Unrecognized algorithm: must be baumwelch or viterbi!";
+ s = false;
+ }
+ }
+ else {
+ Log::Fatal << "Unrecognized type: must be: discrete | gaussian | mixture!";
+ s = false;
+ }
+ if (!(s)) usage();
+}
+
+bool train_baumwelch_discrete();
+bool train_baumwelch_gaussian();
+bool train_baumwelch_mixture();
+
+bool train_baumwelch() {
+ const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
+ if (strcmp(type, "discrete")==0)
+ return train_baumwelch_discrete();
+ else if (strcmp(type, "gaussian")==0)
+ return train_baumwelch_gaussian();
+ else if (strcmp(type, "mixture")==0)
+ return train_baumwelch_mixture();
+ else {
+ printf("Unrecognized type: must be: discrete | gaussian | mixture!\n");
+ return false;
+ }
+}
+
+bool train_viterbi_discrete();
+bool train_viterbi_gaussian();
+bool train_viterbi_mixture();
+
+bool train_viterbi() {
+ const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
+ if (strcmp(type, "discrete")==0)
+ return train_viterbi_discrete();
+ else if (strcmp(type, "gaussian")==0)
+ return train_viterbi_gaussian();
+ else if (strcmp(type, "mixture")==0)
+ return train_viterbi_mixture();
+ else {
+ printf("Unrecognized type: must be: discrete | gaussian | mixture !!!\n");
+ return false;
+ }
+}
+
+bool train_baumwelch_mixture() {
+ if (!CLI::HasParam("hmm/seqfile")) {
+ printf("--seqfile must be defined.\n");
+ return false;
+ }
+
+ MixtureofGaussianHMM hmm;
+ std::vector<arma::mat> seqs;
+
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.mix.out"
+
+ load_matrix_list(seqin, seqs);
+
+ if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
+ const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
+ Log::Info << "Load parameters from file " << guess << std::endl;
+ hmm.InitFromFile(guess);
+ }
+ else {
+ hmm.Init();
+ Log::Fatal <<"Automatic initialization not supported !!!";
+ return false;
+ }
+
+ int maxiter = CLI::GetParam<int>("hmm/maxiter");
+ double tol = CLI::GetParam<double>("hmm/tolerance");
+
+ hmm.TrainBaumWelch(seqs, maxiter, tol);
+
+ hmm.SaveProfile(proout);
+
+ return true;
+}
+
+bool train_baumwelch_gaussian() {
+ if (!CLI::HasParam("hmm/seqfile")) {
+ printf("--seqfile must be defined.\n");
+ return false;
+ }
+ GaussianHMM hmm;
+ std::vector<arma::mat> seqs;
+
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.gauss.out");
+
+ load_matrix_list(seqin, seqs);
+
+ if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
+ const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
+ Log::Info << "Load parameters from file " << guess << std::endl;
+ hmm.InitFromFile(guess);
+ }
+ else { // otherwise initialized using information from the data
+ int numstate = CLI::GetParam<int>("hmm/numstate");
+ Log::Info << "Generate HMM parameters: NUMSTATE = " << numstate << std::endl;
+ hmm.InitFromData(seqs, numstate);
+ Log::Info << "Done." << std::endl;
+ }
+
+ int maxiter = CLI::GetParam<int>("hmm/maxiter");
+ double tol = CLI::GetParam<double>("hmm/tolerance");
+
+ printf("Training ...\n");
+ hmm.TrainBaumWelch(seqs, maxiter, tol);
+ printf("Done.\n");
+
+ hmm.SaveProfile(proout);
+
+ return true;
+}
+
+bool train_baumwelch_discrete() {
+ if (!CLI::HasParam("hmm/seqfile")) {
+ printf("--seqfile must be defined.\n");
+ return false;
+ }
+
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.dis.out");
+
+ std::vector<arma::vec> seqs;
+ load_vector_list(seqin, seqs);
+
+ DiscreteHMM hmm;
+
+ if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
+ const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
+ Log::Info << "Load HMM parameters from file " << guess << std::endl;
+ hmm.InitFromFile(guess);
+ }
+ else { // otherwise randomly initialized using information from the data
+ int numstate = CLI::GetParam<int>("hmm/numstate");
+ Log::Info << "Randomly generate parameters: NUMSTATE = " << numstate << std::endl;
+ hmm.InitFromData(seqs, numstate);
+ }
+
+ int maxiter = CLI::GetParam<int>("hmm/maxiter");
+ double tol = CLI::GetParam<double>("hmm/tolerance");
+
+ hmm.TrainBaumWelch(seqs, maxiter, tol);
+
+ hmm.SaveProfile(proout);
+
+ return true;
+}
+
+bool train_viterbi_mixture() {
+ if (!CLI::HasParam("hmm/seqfile")) {
+ printf("--seqfile must be defined.\n");
+ return false;
+ }
+
+ MixtureofGaussianHMM hmm;
+ std::vector<arma::mat> seqs;
+
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.mix.out");
+
+ load_matrix_list(seqin, seqs);
+
+ if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
+ const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
+ Log::Info << "Load parameters from file " << guess << std::endl;
+ hmm.InitFromFile(guess);
+ } else {
+ hmm.Init();
+ Log::Info << "Automatic initialization not supported !!!" << std::endl;
+ return false;
+ }
+
+ int maxiter = CLI::GetParam<int>("hmm/maxiter");
+ double tol = CLI::GetParam<double>("hmm/tolerance");
+
+ hmm.TrainViterbi(seqs, maxiter, tol);
+
+ hmm.SaveProfile(proout);
+
+ return true;
+}
+
+bool train_viterbi_gaussian() {
+ if (!CLI::HasParam("hmm/seqfile")) {
+ Log::Fatal << "--seqfile must be defined." << std::endl;
+ return false;
+ }
+
+ GaussianHMM hmm;
+ std::vector<arma::mat> seqs;
+
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.gauss.viterbi.out");
+
+ load_matrix_list(seqin, seqs);
+
+ if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
+ const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
+ Log::Info << "Load parameters from file " << guess << std::endl;
+ hmm.InitFromFile(guess);
+ }
+ else { // otherwise initialized using information from the data
+ int numstate = CLI::GetParam<int>("hmm/numstate");
+ Log::Info << "Generate parameters: NUMSTATE = " << numstate << std::endl;
+ hmm.InitFromData(seqs, numstate);
+ }
+
+ int maxiter = CLI::GetParam<int>("hmm/maxiter");
+ double tol = CLI::GetParam<double>("hmm/tolerance");
+
+ hmm.TrainViterbi(seqs, maxiter, tol);
+
+ hmm.SaveProfile(proout);
+
+ return true;
+}
+
+bool train_viterbi_discrete() {
+ if (!CLI::HasParam("hmm/seqfile")) {
+ printf("--seqfile must be defined.\n");
+ return false;
+ }
+
+ DiscreteHMM hmm;
+ std::vector<arma::vec> seqs;
+
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str();
+ const char* proout = CLI::GetParam<std::string>("hmm/profile").c_str(); //"pro.dis.viterbi.out");
+
+ load_vector_list(seqin, seqs);
+
+ if (CLI::HasParam("hmm/guess")) { // guessed parameters in a file
+ std::vector<arma::mat> matlst;
+ const char* guess = CLI::GetParam<std::string>("hmm/guess").c_str();
+ Log::Info << "Load parameters from file " << guess << std::endl;
+ hmm.InitFromFile(guess);
+ }
+ else { // otherwise randomly initialized using information from the data
+ int numstate = CLI::GetParam<int>("hmm/numstate");
+ printf("Generate parameters with NUMSTATE = %d\n", numstate);
+ hmm.InitFromData(seqs, numstate);
+ }
+
+ int maxiter = CLI::GetParam<int>("hmm/maxiter");
+ double tol = CLI::GetParam<double>("hmm/tolerance");
+
+ hmm.TrainViterbi(seqs, maxiter, tol);
+
+ hmm.SaveProfile(proout);
+
+ return true;
+}
Deleted: mlpack/trunk/src/mlpack/methods/hmm/viterbi.cc
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/viterbi.cc 2011-10-31 03:47:24 UTC (rev 10082)
+++ mlpack/trunk/src/mlpack/methods/hmm/viterbi.cc 2011-10-31 04:12:31 UTC (rev 10083)
@@ -1,190 +0,0 @@
-/**
- * @file viterbi.cc
- *
- * This file contains the program to compute the most probable state sequences
- * in a Hidden Markov Model of given sequences
- * Model.
- *
- * Usage:
- * viterbi --type=TYPE --profile=PROFILE --seqfile=FILE [OPTCLINS]
- * See the usage() function for complete option list
- */
-#include <mlpack/core.h>
-
-#include "support.h"
-#include "discreteHMM.h"
-#include "gaussianHMM.h"
-#include "mixgaussHMM.h"
-#include "mixtureDST.h"
-
-using namespace mlpack;
-using namespace hmm;
-using namespace hmm_support;
-
-bool viterbi_discrete();
-bool viterbi_gaussian();
-bool viterbi_mixture();
-void usage();
-
-/* const fx_entry_doc hmm_viterbi_main_entries[] = {
- {"type", FX_REQUIRED, FX_STR, NULL,
- " HMM type : discrete | gaussian | mixture.\n"},
- {"profile", FX_REQUIRED, FX_STR, NULL,
- " A file containing HMM profile.\n"},
- {"seqfile", FX_PARAM, FX_STR, NULL,
- " Output file for the data sequences.\n"},
- {"statefile", FX_PARAM, FX_STR, NULL,
- " Output file for the most probable state sequences.\n"},
- FX_ENTRY_DOC_DONE
-}; */
-
-PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mistruxe.", "hmm");
-PARAM_STRING_REQ("profile", "A file containing HMM profile", "hmm");
-PARAM_STRING_REQ("seqfile", "Output file for the data sequences.", "hmm");
-PARAM_STRING_REQ("statefile", "Output file for the most probable state sequences.", "hmm");
-
-PARAM_MODULE("hmm", "This is a program computing th emost probable state\n sequences of data sequences from HMM models.\n");
-/* const fx_submodule_doc hmm_viterbi_main_submodules[] = {
- FX_SUBMODULE_DOC_DONE
-}; */
-
-/* const fx_module_doc hmm_viterbi_main_doc = {
- hmm_viterbi_main_entries, hmm_viterbi_main_submodules,
- "This is a program computing the most probable state sequences \n"
- "of data sequences from HMM models.\n"
-}; */
-
-int main(int argc, char* argv[]) {
- CLI::ParseCommandLine(argc, argv);
-
- bool s = true;
- if (CLI::HasParam("hmm/type")) {
- const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
- if (strcmp(type, "discrete") == 0)
- s = viterbi_discrete();
- else if (strcmp(type, "gaussian") == 0)
- s = viterbi_gaussian();
- else if (strcmp(type, "mixture") == 0)
- s = viterbi_mixture();
- else {
- Log::Warn << "Unrecognized type: must be: discrete | gaussian | mixture!" << std::endl;
- s = false;
- }
- }
- else {
- Log::Warn << "Unrecognized type: must be: discrete | gaussian | mixture!";
- s = false;
- }
- if (!(s)) usage();
-}
-
-void usage() {
- Log::Warn << "Usage:" << std::endl;
- Log::Warn << " viterbi --type=={discrete|gaussian|mixture} OPTCLINS" << std::endl;
- Log::Warn << "[OPTCLINS]" << std::endl;
- Log::Warn << " --profile=file : file contains HMM profile" << std::endl;
- Log::Warn << " --seqfile=file : file contains input sequences" << std::endl;
- Log::Warn << " --statefile=file : output file for state sequences" << std::endl;
-}
-
-bool viterbi_mixture() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Fatal << "--profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str(); //"seq.mix.out");
- const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str(); //"state.viterbi.mix.out");
-
- MixtureofGaussianHMM hmm;
- hmm.InitFromFile(profile);
-
- std::vector<arma::mat> seqs;
- load_matrix_list(seqin, seqs);
-
- TextWriter w_state;
- if (!(w_state.Open(stateout))) {
- Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
- return false;
- }
-
- for (size_t i = 0; i < seqs.size(); i++) {
- arma::vec states;
- char s[100];
-
- hmm.ComputeViterbiStateSequence(seqs[i], states);
-
- sprintf(s, "%% viterbi state sequence %zu", i);
- print_vector(w_state, states, s, "%.0f,");
- }
-
- return true;
-}
-
-bool viterbi_gaussian() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Fatal << "--profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str(); //"seq.gauss.out");
- const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str(); //"state.viterbi.gauss.out");
-
- GaussianHMM hmm;
- hmm.InitFromFile(profile);
-
- std::vector<arma::mat> seqs;
- load_matrix_list(seqin, seqs);
-
- TextWriter w_state;
- if (!(w_state.Open(stateout))) {
- Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
- return false;
- }
-
- for (size_t i = 0; i < seqs.size(); i++) {
- arma::vec states;
- char s[100];
- hmm.ComputeViterbiStateSequence(seqs[i], states);
-
- sprintf(s, "%% viterbi state sequence %zu", i);
- print_vector(w_state, states, s, "%.0f,");
- }
-
- return true;
-}
-
-bool viterbi_discrete() {
- if (!CLI::HasParam("hmm/profile")) {
- Log::Fatal << "--profile must be defined." << std::endl;
- return false;
- }
- const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
- const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str(); //"seq.out");
- const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str(); //"state.viterbi.out");
-
- DiscreteHMM hmm;
-
- hmm.InitFromFile(profile);
-
- std::vector<arma::vec> seqs;
- load_vector_list(seqin, seqs);
-
- TextWriter w_state;
- if (!(w_state.Open(stateout))) {
- Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
- return false;
- }
-
- for (size_t i = 0; i < seqs.size(); i++) {
- arma::vec states;
- char s[100];
-
- hmm.ComputeViterbiStateSequence(seqs[i], states);
-
- sprintf(s, "%% viterbi state sequence %zu", i);
- print_vector(w_state, states, s, "%.0f,");
- }
-
- return true;
-}
Copied: mlpack/trunk/src/mlpack/methods/hmm/viterbi.cpp (from rev 10030, mlpack/trunk/src/mlpack/methods/hmm/viterbi.cc)
===================================================================
--- mlpack/trunk/src/mlpack/methods/hmm/viterbi.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/hmm/viterbi.cpp 2011-10-31 04:12:31 UTC (rev 10083)
@@ -0,0 +1,190 @@
+/**
+ * @file viterbi.cc
+ *
+ * This file contains the program to compute the most probable state sequences
+ * in a Hidden Markov Model of given sequences
+ * Model.
+ *
+ * Usage:
+ * viterbi --type=TYPE --profile=PROFILE --seqfile=FILE [OPTCLINS]
+ * See the usage() function for complete option list
+ */
+#include <mlpack/core.h>
+
+#include "support.hpp"
+#include "discreteHMM.hpp"
+#include "gaussianHMM.hpp"
+#include "mixgaussHMM.hpp"
+#include "mixtureDST.hpp"
+
+using namespace mlpack;
+using namespace hmm;
+using namespace hmm_support;
+
+bool viterbi_discrete();
+bool viterbi_gaussian();
+bool viterbi_mixture();
+void usage();
+
+/* const fx_entry_doc hmm_viterbi_main_entries[] = {
+ {"type", FX_REQUIRED, FX_STR, NULL,
+ " HMM type : discrete | gaussian | mixture.\n"},
+ {"profile", FX_REQUIRED, FX_STR, NULL,
+ " A file containing HMM profile.\n"},
+ {"seqfile", FX_PARAM, FX_STR, NULL,
+ " Output file for the data sequences.\n"},
+ {"statefile", FX_PARAM, FX_STR, NULL,
+ " Output file for the most probable state sequences.\n"},
+ FX_ENTRY_DOC_DONE
+}; */
+
+PARAM_STRING_REQ("type", "HMM type : discrete | gaussian | mistruxe.", "hmm");
+PARAM_STRING_REQ("profile", "A file containing HMM profile", "hmm");
+PARAM_STRING_REQ("seqfile", "Output file for the data sequences.", "hmm");
+PARAM_STRING_REQ("statefile", "Output file for the most probable state sequences.", "hmm");
+
+PARAM_MODULE("hmm", "This is a program computing th emost probable state\n sequences of data sequences from HMM models.\n");
+/* const fx_submodule_doc hmm_viterbi_main_submodules[] = {
+ FX_SUBMODULE_DOC_DONE
+}; */
+
+/* const fx_module_doc hmm_viterbi_main_doc = {
+ hmm_viterbi_main_entries, hmm_viterbi_main_submodules,
+ "This is a program computing the most probable state sequences \n"
+ "of data sequences from HMM models.\n"
+}; */
+
+int main(int argc, char* argv[]) {
+ CLI::ParseCommandLine(argc, argv);
+
+ bool s = true;
+ if (CLI::HasParam("hmm/type")) {
+ const char* type = CLI::GetParam<std::string>("hmm/type").c_str();
+ if (strcmp(type, "discrete") == 0)
+ s = viterbi_discrete();
+ else if (strcmp(type, "gaussian") == 0)
+ s = viterbi_gaussian();
+ else if (strcmp(type, "mixture") == 0)
+ s = viterbi_mixture();
+ else {
+ Log::Warn << "Unrecognized type: must be: discrete | gaussian | mixture!" << std::endl;
+ s = false;
+ }
+ }
+ else {
+ Log::Warn << "Unrecognized type: must be: discrete | gaussian | mixture!";
+ s = false;
+ }
+ if (!(s)) usage();
+}
+
+void usage() {
+ Log::Warn << "Usage:" << std::endl;
+ Log::Warn << " viterbi --type=={discrete|gaussian|mixture} OPTCLINS" << std::endl;
+ Log::Warn << "[OPTCLINS]" << std::endl;
+ Log::Warn << " --profile=file : file contains HMM profile" << std::endl;
+ Log::Warn << " --seqfile=file : file contains input sequences" << std::endl;
+ Log::Warn << " --statefile=file : output file for state sequences" << std::endl;
+}
+
+bool viterbi_mixture() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Fatal << "--profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str(); //"seq.mix.out");
+ const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str(); //"state.viterbi.mix.out");
+
+ MixtureofGaussianHMM hmm;
+ hmm.InitFromFile(profile);
+
+ std::vector<arma::mat> seqs;
+ load_matrix_list(seqin, seqs);
+
+ TextWriter w_state;
+ if (!(w_state.Open(stateout))) {
+ Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
+ return false;
+ }
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ arma::vec states;
+ char s[100];
+
+ hmm.ComputeViterbiStateSequence(seqs[i], states);
+
+ sprintf(s, "%% viterbi state sequence %zu", i);
+ print_vector(w_state, states, s, "%.0f,");
+ }
+
+ return true;
+}
+
+bool viterbi_gaussian() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Fatal << "--profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str(); //"seq.gauss.out");
+ const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str(); //"state.viterbi.gauss.out");
+
+ GaussianHMM hmm;
+ hmm.InitFromFile(profile);
+
+ std::vector<arma::mat> seqs;
+ load_matrix_list(seqin, seqs);
+
+ TextWriter w_state;
+ if (!(w_state.Open(stateout))) {
+ Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
+ return false;
+ }
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ arma::vec states;
+ char s[100];
+ hmm.ComputeViterbiStateSequence(seqs[i], states);
+
+ sprintf(s, "%% viterbi state sequence %zu", i);
+ print_vector(w_state, states, s, "%.0f,");
+ }
+
+ return true;
+}
+
+bool viterbi_discrete() {
+ if (!CLI::HasParam("hmm/profile")) {
+ Log::Fatal << "--profile must be defined." << std::endl;
+ return false;
+ }
+ const char* profile = CLI::GetParam<std::string>("hmm/profile").c_str();
+ const char* seqin = CLI::GetParam<std::string>("hmm/seqfile").c_str(); //"seq.out");
+ const char* stateout = CLI::GetParam<std::string>("hmm/statefile").c_str(); //"state.viterbi.out");
+
+ DiscreteHMM hmm;
+
+ hmm.InitFromFile(profile);
+
+ std::vector<arma::vec> seqs;
+ load_vector_list(seqin, seqs);
+
+ TextWriter w_state;
+ if (!(w_state.Open(stateout))) {
+ Log::Warn << "Couldn't open '" << stateout << "' for writing." << std::endl;
+ return false;
+ }
+
+ for (size_t i = 0; i < seqs.size(); i++) {
+ arma::vec states;
+ char s[100];
+
+ hmm.ComputeViterbiStateSequence(seqs[i], states);
+
+ sprintf(s, "%% viterbi state sequence %zu", i);
+ print_vector(w_state, states, s, "%.0f,");
+ }
+
+ return true;
+}
More information about the mlpack-svn
mailing list