[mlpack-svn] r10309 - mlpack/trunk/src/mlpack/methods/gmm
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Nov 17 00:15:20 EST 2011
Author: rcurtin
Date: 2011-11-17 00:15:19 -0500 (Thu, 17 Nov 2011)
New Revision: 10309
Modified:
mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp
mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp
Log:
Rename MoGEM to GMM. Make cond_prob calculation a lot faster (but there are
still some speed issues to work on).
Modified: mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp 2011-11-17 05:14:47 UTC (rev 10308)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp 2011-11-17 05:15:19 UTC (rev 10309)
@@ -1,19 +1,19 @@
/**
+ * @file gmm.cpp
* @author Parikshit Ram (pram at cc.gatech.edu)
- * @file mog_em.cpp
+ * @author Ryan Curtin
*
* Implementation for the loglikelihood function, the EM algorithm
* and also computes the K-means for getting an initial point
- *
*/
-#include "mog_em.hpp"
+#include "gmm.hpp"
#include "phi.hpp"
#include "kmeans.hpp"
using namespace mlpack;
using namespace gmm;
-void MoGEM::ExpectationMaximization(const arma::mat& data)
+void GMM::ExpectationMaximization(const arma::mat& data)
{
// Create temporary models and set to the right size.
std::vector<arma::vec> means_trial(gaussians, arma::vec(dimension));
@@ -21,64 +21,54 @@
arma::mat(dimension, dimension));
arma::vec weights_trial(gaussians);
- arma::mat cond_prob(gaussians, data.n_cols);
+ arma::mat cond_prob(data.n_cols, gaussians);
- long double l, l_old, best_l, TINY = 1.0e-10;
+ long double l, l_old, best_l, TINY = 1.0e-4;
best_l = -DBL_MAX;
- // We will perform five trials, and then save the trial with the best result
+ // We will perform ten trials, and then save the trial with the best result
// as our trained model.
- for (size_t iteration = 0; iteration < 5; iteration++)
+ for (size_t iter = 0; iter < 10; iter++)
{
// Use k-means to find initial values for the parameters.
KMeans(data, gaussians, means_trial, covariances_trial, weights_trial);
- Log::Warn << "K-Means results:" << std::endl;
- for (size_t i = 0; i < gaussians; i++)
- {
- Log::Warn << "Mean " << i << ":" << std::endl;
- Log::Warn << means_trial[i] << std::endl;
- Log::Warn << "Covariance " << i << ":" << std::endl;
- Log::Warn << covariances_trial[i] << std::endl;
- }
- Log::Warn << "Weights: " << std::endl << weights_trial << std::endl;
-
// Calculate the log likelihood of the model.
l = Loglikelihood(data, means_trial, covariances_trial, weights_trial);
l_old = -DBL_MAX;
// Iterate to update the model until no more improvement is found.
- size_t max_iterations = 1000;
+ size_t max_iterations = 300;
size_t iteration = 0;
while (std::abs(l - l_old) > TINY && iteration < max_iterations)
{
- Log::Warn << "Iteration " << iteration << std::endl;
// Calculate the conditional probabilities of choosing a particular
// Gaussian given the data and the present theta value.
- for (size_t j = 0; j < data.n_cols; j++)
+ for (size_t i = 0; i < gaussians; i++)
{
- for (size_t i = 0; i < gaussians; i++)
- {
- cond_prob(i, j) = phi(data.unsafe_col(j), means_trial[i],
- covariances_trial[i]) * weights_trial[i];
- }
-
- // Normalize column to have sum probability of one.
- cond_prob.col(j) /= arma::sum(cond_prob.col(j));
+ // Store conditional probabilities into cond_prob vector for each
+ // Gaussian. First we make an alias of the cond_prob vector.
+ arma::vec cond_prob_alias = cond_prob.unsafe_col(i);
+ phi(data, means_trial[i], covariances_trial[i], cond_prob_alias);
+ cond_prob_alias *= weights_trial[i];
}
- // Store the sums of each row because they are used multiple times.
- arma::vec prob_row_sums = arma::sum(cond_prob, 1 /* row-wise */);
+ // Normalize row-wise.
+ for (size_t i = 0; i < cond_prob.n_rows; i++)
+ cond_prob.row(i) /= accu(cond_prob.row(i));
+ // Store the sum of the probability of each state over all the data.
+ arma::vec prob_row_sums = arma::sum(cond_prob, 0 /* column-wise */);
+
// Calculate the new value of the means using the updated conditional
// probabilities.
for (size_t i = 0; i < gaussians; i++)
{
means_trial[i].zeros();
for (size_t j = 0; j < data.n_cols; j++)
- means_trial[i] += cond_prob(i, j) * data.col(j);
+ means_trial[i] += cond_prob(j, i) * data.col(j);
means_trial[i] /= prob_row_sums[i];
}
@@ -91,7 +81,7 @@
for (size_t j = 0; j < data.n_cols; j++)
{
arma::vec tmp = data.col(j) - means_trial[i];
- covariances_trial[i] += cond_prob(i, j) * (tmp * trans(tmp));
+ covariances_trial[i] += cond_prob(j, i) * (tmp * trans(tmp));
}
covariances_trial[i] /= prob_row_sums[i];
@@ -100,28 +90,17 @@
// Calculate the new values for omega using the updated conditional
// probabilities.
weights_trial = prob_row_sums / data.n_cols;
-/*
- Log::Warn << "Estimated weights:" << std::endl << weights_trial
- << std::endl;
- for (size_t i = 0; i < gaussians; i++)
- {
-// Log::Warn << "Estimated mean " << i << ":" << std::endl;
-// Log::Warn << means_trial[i] << std::endl;
- Log::Warn << "Estimated covariance " << i << ":" << std::endl;
- Log::Warn << covariances_trial[i] << std::endl;
- }
-*/
-
// Update values of l; calculate new log-likelihood.
l_old = l;
l = Loglikelihood(data, means_trial, covariances_trial, weights_trial);
- Log::Warn << "Improved log likelihood to " << l << std::endl;
-
iteration++;
}
+ Log::Warn << "Likelihood of iteration " << iter << " (total " << iteration
+ << " iterations): " << l << std::endl;
+
// The trial model is trained. Is it better than our existing model?
if (l > best_l)
{
@@ -138,10 +117,10 @@
return;
}
-long double MoGEM::Loglikelihood(const arma::mat& data,
- const std::vector<arma::vec>& means_l,
- const std::vector<arma::mat>& covariances_l,
- const arma::vec& weights_l) const
+long double GMM::Loglikelihood(const arma::mat& data,
+ const std::vector<arma::vec>& means_l,
+ const std::vector<arma::mat>& covariances_l,
+ const arma::vec& weights_l) const
{
long double loglikelihood = 0;
long double likelihood;
Modified: mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp 2011-11-17 05:14:47 UTC (rev 10308)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp 2011-11-17 05:15:19 UTC (rev 10309)
@@ -10,12 +10,12 @@
#include <mlpack/core.h>
-PARAM_MODULE("mog", "Parameters for the Gaussian mixture model.");
+PARAM_MODULE("gmm", "Parameters for the Gaussian mixture model.");
-PARAM_INT("k", "The number of Gaussians in the mixture model (defaults to 1).",
- "mog", 1);
-PARAM_INT("d", "The number of dimensions of the data on which the mixture "
- "model is to be fit.", "mog", 0);
+PARAM_INT("gaussians", "The number of Gaussians in the mixture model (default "
+ "1).", "gmm", 1);
+PARAM_INT("dimension", "The number of dimensions of the data on which the "
+ "mixture model is to be fit.", "gmm", 0);
namespace mlpack {
namespace gmm {
@@ -31,14 +31,14 @@
* Example use:
*
* @code
- * MoGEM mog;
+ * GMM mog;
* ArrayList<double> results;
*
* mog.Init(number_of_gaussians, dimension);
* mog.ExpectationMaximization(data, &results, optim_flag);
* @endcode
*/
-class MoGEM {
+class GMM {
private:
//! The number of Gaussians in the model.
size_t gaussians;
@@ -59,7 +59,7 @@
* @param gaussians Number of Gaussians in this GMM.
* @param dimension Dimensionality of each Gaussian.
*/
- MoGEM(size_t gaussians, size_t dimension) :
+ GMM(size_t gaussians, size_t dimension) :
gaussians(gaussians),
dimension(dimension),
means(gaussians),
More information about the mlpack-svn
mailing list