[mlpack-svn] r14893 - mlpack/trunk/src/mlpack/methods/gmm

Fri Apr 12 13:22:50 EDT 2013

Author: rcurtin
Date: 2013-04-12 13:22:49 -0400 (Fri, 12 Apr 2013)
New Revision: 14893

Modified:
   mlpack/trunk/src/mlpack/methods/gmm/em_fit.hpp
   mlpack/trunk/src/mlpack/methods/gmm/em_fit_impl.hpp
Log:
Don't use a specified perturbation but instead a simple (but rather slow)
algorithm to detect positive definiteness of covariance matrices and force them
to be positive definite if necessary.  All of this is "singularity avoidance",
to keep covariance matrices from becoming extremely small and then introducing
NaNs and infs everywhere.


Modified: mlpack/trunk/src/mlpack/methods/gmm/em_fit.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/gmm/em_fit.hpp	2013-04-11 22:37:15 UTC (rev 14892)
+++ mlpack/trunk/src/mlpack/methods/gmm/em_fit.hpp	2013-04-12 17:22:49 UTC (rev 14893)
@@ -39,15 +39,20 @@
    * of iterations to 0 means that the EM algorithm will iterate until
    * convergence (with the given tolerance).
    *
-   * @param clusterer Object which will perform the initial clustering.
+   * The parameter forcePositive controls whether or not the covariance matrices
+   * are checked for positive definiteness at each iteration.  This could be a
+   * time-consuming task, so, if you know your data is well-behaved, you can set
+   * it to false and save some runtime.
+   *
    * @param maxIterations Maximum number of iterations for EM.
    * @param tolerance Log-likelihood tolerance required for convergence.
-   * @param perturbation Value to add to zero-valued diagonal covariance entries
-   *     to ensure positive-definiteness.
+   * @param forcePositive Check for positive-definiteness of each covariance
+   *     matrix at each iteration.
+   * @param clusterer Object which will perform the initial clustering.
    */
   EMFit(const size_t maxIterations = 300,
         const double tolerance = 1e-10,
-        const double perturbation = 1e-30,
+        const bool forcePositive = true,
         InitialClusteringType clusterer = InitialClusteringType());
 
   /**
@@ -98,10 +103,12 @@
   //! Modify the tolerance for the convergence of the EM algorithm.
   double& Tolerance() { return tolerance; }
 
-  //! Get the perturbation added to zero diagonal covariance elements.
-  double Perturbation() const { return perturbation; }
-  //! Modify the perturbation added to zero diagonal covariance elements.
-  double& Perturbation() { return perturbation; }
+  //! Get whether or not the covariance matrices are forced to be positive
+  //! definite.
+  bool ForcePositive() const { return forcePositive; }
+  //! Modify whether or not the covariance matrices are forced to be positive
+  //! definite.
+  bool& ForcePositive() { return forcePositive; }
 
  private:
   /**
@@ -138,8 +145,8 @@
   size_t maxIterations;
   //! Tolerance for convergence of EM.
   double tolerance;
-  //! Perturbation to add to zero-valued diagonal covariance values.
-  double perturbation;
+  //! Whether or not to force positive definiteness of covariance matrices.
+  bool forcePositive;
   //! Object which will perform the clustering.
   InitialClusteringType clusterer;
 };

Modified: mlpack/trunk/src/mlpack/methods/gmm/em_fit_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/gmm/em_fit_impl.hpp	2013-04-11 22:37:15 UTC (rev 14892)
+++ mlpack/trunk/src/mlpack/methods/gmm/em_fit_impl.hpp	2013-04-12 17:22:49 UTC (rev 14893)
@@ -20,11 +20,11 @@
 template<typename InitialClusteringType>
 EMFit<InitialClusteringType>::EMFit(const size_t maxIterations,
                                     const double tolerance,
-                                    const double perturbation,
+                                    const bool forcePositive,
                                     InitialClusteringType clusterer) :
     maxIterations(maxIterations),
     tolerance(tolerance),
-    perturbation(perturbation),
+    forcePositive(forcePositive),
     clusterer(clusterer)
 { /* Nothing to do. */ }
 
@@ -48,6 +48,9 @@
   size_t iteration = 1;
   while (std::abs(l - lOld) > tolerance && iteration != maxIterations)
   {
+    Log::Info << "EMFit::Estimate(): iteration " << iteration << ", "
+        << "log-likelihood " << l << "." << std::endl;
+
     // Calculate the conditional probabilities of choosing a particular
     // Gaussian given the observations and the present theta value.
     for (size_t i = 0; i < means.size(); i++)
@@ -74,9 +77,12 @@
 
     // Calculate the new value of the means using the updated conditional
     // probabilities.
+    size_t nonPositive = 0; // Count the number of non-positive covariances.
     for (size_t i = 0; i < means.size(); i++)
     {
-      means[i] = (observations * condProb.col(i)) / probRowSums[i];
+      // Don't update if there's no probability of the Gaussian having points.
+      if (probRowSums[i] != 0)
+        means[i] = (observations * condProb.col(i)) / probRowSums[i];
 
       // Calculate the new value of the covariances using the updated
       // conditional probabilities and the updated means.
@@ -85,19 +91,30 @@
       arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) *
           trans(condProb.col(i)));
 
-      covariances[i] = (tmp * trans(tmpB)) / probRowSums[i];
+      // Don't update if there's no probability of the Gaussian having points.
+      if (probRowSums[i] != 0.0)
+        covariances[i] = (tmp * trans(tmpB)) / probRowSums[i];
 
-      for (size_t d = 0; d < covariances[i].n_rows; ++d)
+      // Ensure positive-definiteness.  TODO: make this more efficient.
+      if (forcePositive && det(covariances[i]) <= 1e-50)
       {
-        if (covariances[i](d, d) == 0.0)
+        ++nonPositive;
+        Log::Debug << "Covariance matrix " << i << " is not positive definite. "
+            << "Adding perturbation." << std::endl;
+
+        double perturbation = 1e-30;
+        while (det(covariances[i]) <= 1e-50)
         {
-          Log::Debug << "Covariance " << i << " has zero in diagonal element "
-              << d << "!  Adding perturbation." << std::endl;
-          covariances[i](d, d) += perturbation;
+          covariances[i].diag() += perturbation;
+          perturbation *= 10; // Slow, but we don't want to add too much.
         }
       }
     }
 
+    if (nonPositive > 0)
+      Log::Warn << nonPositive << " covariance matrices are very small. "
+          << "Consider reducing the number of Gaussians." << std::endl;
+
     // Calculate the new values for omega using the updated conditional
     // probabilities.
     weights = probRowSums / observations.n_cols;
@@ -178,13 +195,17 @@
 
       covariances[i] = (tmp * trans(tmpB)) / probRowSums[i];
 
-      for (size_t d = 0; d < covariances[i].n_rows; ++d)
+      // Ensure positive-definiteness.  TODO: make this more efficient.
+      if (forcePositive && det(covariances[i]) <= 1e-50)
       {
-        if (covariances[i](d, d) == 0.0)
+        Log::Debug << "Covariance matrix " << i << " is not positive definite. "
+            << "Adding perturbation." << std::endl;
+
+        double perturbation = 1e-30;
+        while (det(covariances[i]) <= 1e-50)
         {
-          Log::Debug << "Covariance " << i << " has zero in diagonal element "
-            << d << "!  Adding perturbation." << std::endl;
-          covariances[i](d, d) += perturbation;
+          covariances[i].diag() += perturbation;
+          perturbation *= 10; // Slow, but we don't want to add too much.
         }
       }
     }
@@ -257,13 +278,17 @@
   {
     covariances[i] /= (weights[i] > 1) ? weights[i] : 1;
 
-    for (size_t d = 0; d < covariances[i].n_rows; ++d)
+    // Ensure positive-definiteness.  TODO: make this more efficient.
+    if (forcePositive && det(covariances[i]) <= 1e-50)
     {
-      if (covariances[i](d, d) == 0.0)
+      Log::Debug << "Covariance matrix " << i << " is not positive definite. "
+          << "Adding perturbation." << std::endl;
+
+      double perturbation = 1e-50;
+      while (det(covariances[i]) <= 1e-50)
       {
-        Log::Debug << "Covariance " << i << " has zero in diagonal element "
-          << d << "!  Adding perturbation." << std::endl;
-        covariances[i](d, d) += perturbation;
+        covariances[i].diag() += perturbation;
+        perturbation *= 10; // Slow, but we don't want to add too much.
       }
     }
   }