[mlpack-svn] r10322 - mlpack/trunk/src/mlpack/methods/gmm

Fri Nov 18 15:42:36 EST 2011

Author: rcurtin
Date: 2011-11-18 15:42:36 -0500 (Fri, 18 Nov 2011)
New Revision: 10322

Modified:
   mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp
   mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp
Log:
Update GMM code.  It should be a little faster training, but it is still too
slow for my preferences.  I am not sure what is making it so slow.


Modified: mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp	2011-11-18 20:32:44 UTC (rev 10321)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm.cpp	2011-11-18 20:42:36 UTC (rev 10322)
@@ -31,12 +31,12 @@
   // as our trained model.
   for (size_t iter = 0; iter < 10; iter++)
   {
-    // Use k-means to find initial values for the parameters.
     KMeans(data, gaussians, means_trial, covariances_trial, weights_trial);
 
-    // Calculate the log likelihood of the model.
     l = Loglikelihood(data, means_trial, covariances_trial, weights_trial);
 
+    Log::Info << "K-means log-likelihood: " << l << std::endl;
+
     l_old = -DBL_MAX;
 
     // Iterate to update the model until no more improvement is found.
@@ -60,31 +60,22 @@
         cond_prob.row(i) /= accu(cond_prob.row(i));
 
       // Store the sum of the probability of each state over all the data.
-      arma::vec prob_row_sums = arma::sum(cond_prob, 0 /* column-wise */);
+      arma::vec prob_row_sums = trans(arma::sum(cond_prob, 0 /* columnwise */));
 
       // Calculate the new value of the means using the updated conditional
       // probabilities.
       for (size_t i = 0; i < gaussians; i++)
       {
-        means_trial[i].zeros();
-        for (size_t j = 0; j < data.n_cols; j++)
-          means_trial[i] += cond_prob(j, i) * data.col(j);
+        means_trial[i] = (data * cond_prob.col(i)) / prob_row_sums[i];
 
-        means_trial[i] /= prob_row_sums[i];
-      }
+        // Calculate the new value of the covariances using the updated
+        // conditional probabilities and the updated means.
+        arma::mat tmp = data - (means_trial[i] *
+            arma::ones<arma::rowvec>(data.n_cols));
+        arma::mat tmp_b = tmp % (arma::ones<arma::vec>(data.n_rows) *
+            trans(cond_prob.col(i)));
 
-      // Calculate the new value of the covariances using the updated
-      // conditional probabilities and the updated means.
-      for (size_t i = 0; i < gaussians; i++)
-      {
-        covariances_trial[i].zeros();
-        for (size_t j = 0; j < data.n_cols; j++)
-        {
-          arma::vec tmp = data.col(j) - means_trial[i];
-          covariances_trial[i] += cond_prob(j, i) * (tmp * trans(tmp));
-        }
-
-        covariances_trial[i] /= prob_row_sums[i];
+        covariances_trial[i] = (tmp * trans(tmp_b)) / prob_row_sums[i];
       }
 
       // Calculate the new values for omega using the updated conditional
@@ -98,7 +89,7 @@
       iteration++;
     }
 
-    Log::Warn << "Likelihood of iteration " << iter << " (total " << iteration
+    Log::Info << "Likelihood of iteration " << iter << " (total " << iteration
         << " iterations): " << l << std::endl;
 
     // The trial model is trained.  Is it better than our existing model?
@@ -123,17 +114,18 @@
                                const arma::vec& weights_l) const
 {
   long double loglikelihood = 0;
-  long double likelihood;
 
-  for (size_t j = 0; j < data.n_cols; j++)
+  arma::vec phis;
+  arma::mat likelihoods(gaussians, data.n_cols);
+  for (size_t i = 0; i < gaussians; i++)
   {
-    likelihood = 0;
-    for(size_t i = 0; i < gaussians; i++)
-      likelihood += weights_l(i) * phi(data.unsafe_col(j), means_l[i],
-          covariances_l[i]);
-
-    loglikelihood += log(likelihood);
+    phi(data, means_l[i], covariances_l[i], phis);
+    likelihoods.row(i) = weights_l(i) * trans(phis);
   }
 
+  // Now sum over every point.
+  for (size_t j = 0; j < data.n_cols; j++)
+    loglikelihood += log(accu(likelihoods.col(j)));
+
   return loglikelihood;
 }

Modified: mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp	2011-11-18 20:32:44 UTC (rev 10321)
+++ mlpack/trunk/src/mlpack/methods/gmm/gmm.hpp	2011-11-18 20:42:36 UTC (rev 10322)
@@ -14,8 +14,6 @@
 
 PARAM_INT("gaussians", "The number of Gaussians in the mixture model (default "
     "1).", "gmm", 1);
-PARAM_INT("dimension", "The number of dimensions of the data on which the "
-    "mixture model is to be fit.", "gmm", 0);
 
 namespace mlpack {
 namespace gmm {