[mlpack-svn] r10322 - mlpack/trunk/src/mlpack/methods/gmm
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Nov 18 15:42:36 EST 2011
Author: rcurtin
Date: 2011-11-18 15:42:36 -0500 (Fri, 18 Nov 2011)
New Revision: 10322
Modified:
mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp
mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp
Log:
Update GMM code. It should be a little faster training, but it is still too
slow for my preferences. I am not sure what is making it so slow.
Modified: mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp 2011-11-18 20:32:44 UTC (rev 10321)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp 2011-11-18 20:42:36 UTC (rev 10322)
@@ -31,12 +31,12 @@
// as our trained model.
for (size_t iter = 0; iter < 10; iter++)
{
- // Use k-means to find initial values for the parameters.
KMeans(data, gaussians, means_trial, covariances_trial, weights_trial);
- // Calculate the log likelihood of the model.
l = Loglikelihood(data, means_trial, covariances_trial, weights_trial);
+ Log::Info << "K-means log-likelihood: " << l << std::endl;
+
l_old = -DBL_MAX;
// Iterate to update the model until no more improvement is found.
@@ -60,31 +60,22 @@
cond_prob.row(i) /= accu(cond_prob.row(i));
// Store the sum of the probability of each state over all the data.
- arma::vec prob_row_sums = arma::sum(cond_prob, 0 /* column-wise */);
+ arma::vec prob_row_sums = trans(arma::sum(cond_prob, 0 /* columnwise */));
// Calculate the new value of the means using the updated conditional
// probabilities.
for (size_t i = 0; i < gaussians; i++)
{
- means_trial[i].zeros();
- for (size_t j = 0; j < data.n_cols; j++)
- means_trial[i] += cond_prob(j, i) * data.col(j);
+ means_trial[i] = (data * cond_prob.col(i)) / prob_row_sums[i];
- means_trial[i] /= prob_row_sums[i];
- }
+ // Calculate the new value of the covariances using the updated
+ // conditional probabilities and the updated means.
+ arma::mat tmp = data - (means_trial[i] *
+ arma::ones<arma::rowvec>(data.n_cols));
+ arma::mat tmp_b = tmp % (arma::ones<arma::vec>(data.n_rows) *
+ trans(cond_prob.col(i)));
- // Calculate the new value of the covariances using the updated
- // conditional probabilities and the updated means.
- for (size_t i = 0; i < gaussians; i++)
- {
- covariances_trial[i].zeros();
- for (size_t j = 0; j < data.n_cols; j++)
- {
- arma::vec tmp = data.col(j) - means_trial[i];
- covariances_trial[i] += cond_prob(j, i) * (tmp * trans(tmp));
- }
-
- covariances_trial[i] /= prob_row_sums[i];
+ covariances_trial[i] = (tmp * trans(tmp_b)) / prob_row_sums[i];
}
// Calculate the new values for omega using the updated conditional
@@ -98,7 +89,7 @@
iteration++;
}
- Log::Warn << "Likelihood of iteration " << iter << " (total " << iteration
+ Log::Info << "Likelihood of iteration " << iter << " (total " << iteration
<< " iterations): " << l << std::endl;
// The trial model is trained. Is it better than our existing model?
@@ -123,17 +114,18 @@
const arma::vec& weights_l) const
{
long double loglikelihood = 0;
- long double likelihood;
- for (size_t j = 0; j < data.n_cols; j++)
+ arma::vec phis;
+ arma::mat likelihoods(gaussians, data.n_cols);
+ for (size_t i = 0; i < gaussians; i++)
{
- likelihood = 0;
- for(size_t i = 0; i < gaussians; i++)
- likelihood += weights_l(i) * phi(data.unsafe_col(j), means_l[i],
- covariances_l[i]);
-
- loglikelihood += log(likelihood);
+ phi(data, means_l[i], covariances_l[i], phis);
+ likelihoods.row(i) = weights_l(i) * trans(phis);
}
+ // Now sum over every point.
+ for (size_t j = 0; j < data.n_cols; j++)
+ loglikelihood += log(accu(likelihoods.col(j)));
+
return loglikelihood;
}
Modified: mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp 2011-11-18 20:32:44 UTC (rev 10321)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp 2011-11-18 20:42:36 UTC (rev 10322)
@@ -14,8 +14,6 @@
PARAM_INT("gaussians", "The number of Gaussians in the mixture model (default "
"1).", "gmm", 1);
-PARAM_INT("dimension", "The number of dimensions of the data on which the "
- "mixture model is to be fit.", "gmm", 0);
namespace mlpack {
namespace gmm {
More information about the mlpack-svn
mailing list