[mlpack-git] master: distributions now offer LogProbability() in addition to Probability() (cb4df6f)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Fri Jan 16 21:04:35 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/196c3f05730874f418e0c113695c88ee790aedc6...cb4df6fd89c9b2111d4d05633ac8b2c8be4dbeda

>---------------------------------------------------------------

commit cb4df6fd89c9b2111d4d05633ac8b2c8be4dbeda
Author: Stephen Tu <tu.stephenl at gmail.com>
Date:   Fri Jan 16 18:03:56 2015 -0800

    distributions now offer LogProbability() in addition to Probability()
    
    this will encourage consumers (e.g. HMM) to work in log space.


>---------------------------------------------------------------

cb4df6fd89c9b2111d4d05633ac8b2c8be4dbeda
 src/mlpack/core/dists/discrete_distribution.hpp   | 18 ++++++++-
 src/mlpack/core/dists/gaussian_distribution.cpp   | 29 ++++----------
 src/mlpack/core/dists/gaussian_distribution.hpp   | 46 +++++++++++++++++------
 src/mlpack/core/dists/laplace_distribution.cpp    |  8 ++--
 src/mlpack/core/dists/laplace_distribution.hpp    | 14 +++++--
 src/mlpack/core/dists/regression_distribution.hpp | 15 ++++++--
 6 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/src/mlpack/core/dists/discrete_distribution.hpp b/src/mlpack/core/dists/discrete_distribution.hpp
index 1f01058..fb3c954 100644
--- a/src/mlpack/core/dists/discrete_distribution.hpp
+++ b/src/mlpack/core/dists/discrete_distribution.hpp
@@ -5,8 +5,8 @@
  * Implementation of the discrete distribution, where each discrete observation
  * has a given probability.
  */
-#ifndef __MLPACK_METHODS_HMM_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
-#define __MLPACK_METHODS_HMM_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
+#ifndef __MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
+#define __MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
 
 #include <mlpack/core.hpp>
 
@@ -105,6 +105,20 @@ class DiscreteDistribution
   }
 
   /**
+   * Return the log probability of the given observation.  If the observation is
+   * greater than the number of possible observations, then a crash will
+   * probably occur -- bounds checking is not performed.
+   *
+   * @param observation Observation to return the log probability of.
+   * @return Log probability of the given observation.
+   */
+  double LogProbability(const arma::vec& observation) const
+  {
+    // TODO: consider storing log_probabilities instead
+    return log(Probability(observation));
+  }
+
+  /**
    * Return a randomly generated observation (one-dimensional vector; one
    * observation) according to the probability distribution defined by this
    * object.
diff --git a/src/mlpack/core/dists/gaussian_distribution.cpp b/src/mlpack/core/dists/gaussian_distribution.cpp
index 35c2d12..cb37e79 100644
--- a/src/mlpack/core/dists/gaussian_distribution.cpp
+++ b/src/mlpack/core/dists/gaussian_distribution.cpp
@@ -10,30 +10,17 @@
 using namespace mlpack;
 using namespace mlpack::distribution;
 
-/**
- * Calculates the multivariate Gaussian probability density function for each
- * data point (column) in the given matrix, with respect to the given mean and
- * variance.
- *
- * @param x List of observations.
- * @param mean Mean of multivariate Gaussian.
- * @param cov Covariance of multivariate Gaussian.
- * @param probabilities Output probabilities for each input observation.
- */
-
-double GaussianDistribution::Probability(const arma::vec& observation) const
+double GaussianDistribution::LogProbability(const arma::vec& observation) const
 {
-  arma::vec diff = mean - observation;
-
-  arma::vec exponent = -0.5 * (trans(diff) * inv(covariance) * diff);
-
-  // TODO: What if det(cov) < 0?
-  return pow(2 * M_PI, (double) observation.n_elem / -2.0) *
-      pow(det(covariance), -0.5) * exp(exponent[0]);
+  const size_t k = observation.n_elem;
+  double logdetsigma = 0;
+  double sign = 0.;
+  arma::log_det(logdetsigma, sign, covariance);
+  const arma::vec diff = mean - observation;
+  const arma::vec v = (diff.t() * arma::inv(covariance) * diff);
+  return -0.5 * k * log2pi - 0.5 * logdetsigma - 0.5 * v(0);
 }
 
-
-
 arma::vec GaussianDistribution::Random() const
 {
   // Should we store chol(covariance) for easier calculation later?
diff --git a/src/mlpack/core/dists/gaussian_distribution.hpp b/src/mlpack/core/dists/gaussian_distribution.hpp
index 7b8716b..0e7ea24 100644
--- a/src/mlpack/core/dists/gaussian_distribution.hpp
+++ b/src/mlpack/core/dists/gaussian_distribution.hpp
@@ -5,8 +5,8 @@
  *
  * Implementation of the Gaussian distribution.
  */
-#ifndef __MLPACK_METHODS_HMM_DISTRIBUTIONS_GAUSSIAN_DISTRIBUTION_HPP
-#define __MLPACK_METHODS_HMM_DISTRIBUTIONS_GAUSSIAN_DISTRIBUTION_HPP
+#ifndef __MLPACK_CORE_DISTRIBUTIONS_GAUSSIAN_DISTRIBUTION_HPP
+#define __MLPACK_CORE_DISTRIBUTIONS_GAUSSIAN_DISTRIBUTION_HPP
 
 #include <mlpack/core.hpp>
 
@@ -24,6 +24,9 @@ class GaussianDistribution
   //! Covariance of the distribution.
   arma::mat covariance;
 
+  //! log(2pi)
+  static const constexpr double log2pi = 1.83787706640934533908193770912475883;
+
  public:
   /**
    * Default constructor, which creates a Gaussian with zero dimension.
@@ -51,7 +54,15 @@ class GaussianDistribution
   /**
    * Return the probability of the given observation.
    */
-  double Probability(const arma::vec& observation) const;
+  double Probability(const arma::vec& observation) const
+  {
+    return exp(LogProbability(observation));
+  }
+
+  /**
+   * Return the log probability of the given observation.
+   */
+  double LogProbability(const arma::vec& observation) const;
 
   /**
    * Calculates the multivariate Gaussian probability density function for each
@@ -60,7 +71,14 @@ class GaussianDistribution
    * @param x List of observations.
    * @param probabilities Output probabilities for each input observation.
    */
-  void Probability(const arma::mat& x, arma::vec& probabilities) const;
+  void Probability(const arma::mat& x, arma::vec& probabilities) const
+  {
+    arma::vec logProbabilities;
+    LogProbability(x, logProbabilities);
+    probabilities = arma::exp(logProbabilities);
+  }
+
+  void LogProbability(const arma::mat& x, arma::vec& logProbabilities) const;
 
   /**
    * Return a randomly generated observation according to the probability
@@ -122,14 +140,14 @@ class GaussianDistribution
 };
 
 /**
-* Calculates the multivariate Gaussian probability density function for each
+* Calculates the multivariate Gaussian log probability density function for each
 * data point (column) in the given matrix
 *
 * @param x List of observations.
-* @param probabilities Output probabilities for each input observation.
+* @param probabilities Output log probabilities for each input observation.
 */
-inline void GaussianDistribution::Probability(const arma::mat& x,
-                                              arma::vec& probabilities) const
+inline void GaussianDistribution::LogProbability(const arma::mat& x,
+                                                 arma::vec& logProbabilities) const
 {
   // Column i of 'diffs' is the difference between x.col(i) and the mean.
   arma::mat diffs = x - (mean * arma::ones<arma::rowvec>(x.n_cols));
@@ -139,12 +157,16 @@ inline void GaussianDistribution::Probability(const arma::mat& x,
   // the right hand part of the equation (instead of the left side) so that
   // later we are referencing columns, not rows -- that is faster.
   arma::mat rhs = -0.5 * inv(covariance) * diffs;
-  arma::vec exponents(diffs.n_cols); // We will now fill this.
+  arma::vec logExponents(diffs.n_cols); // We will now fill this.
   for (size_t i = 0; i < diffs.n_cols; i++)
-    exponents(i) = exp(accu(diffs.unsafe_col(i) % rhs.unsafe_col(i)));
+    logExponents(i) = accu(diffs.unsafe_col(i) % rhs.unsafe_col(i));
+
+  double logdetsigma = 0;
+  double sign = 0.;
+  arma::log_det(logdetsigma, sign, covariance);
+  const size_t k = x.n_rows;
 
-  probabilities = pow(2 * M_PI, (double) mean.n_elem / -2.0) *
-  pow(arma::det(covariance), -0.5) * exponents;
+  logProbabilities = -0.5 * k * log2pi - 0.5 * logdetsigma + logExponents;
 }
 
 
diff --git a/src/mlpack/core/dists/laplace_distribution.cpp b/src/mlpack/core/dists/laplace_distribution.cpp
index 992b00f..e6a5e02 100644
--- a/src/mlpack/core/dists/laplace_distribution.cpp
+++ b/src/mlpack/core/dists/laplace_distribution.cpp
@@ -12,12 +12,12 @@ using namespace mlpack;
 using namespace mlpack::distribution;
 
 /**
- * Return the probability of the given observation.
+ * Return the log probability of the given observation.
  */
-double LaplaceDistribution::Probability(const arma::vec& observation) const
+double LaplaceDistribution::LogProbability(const arma::vec& observation) const
 {
-  // Evaluate the PDF of the Laplace distribution to determine the probability.
-  return (0.5 / scale) * std::exp(arma::norm(observation - mean, 2) / scale);
+  // Evaluate the PDF of the Laplace distribution to determine the log probability.
+  return -log(2. * scale) - arma::norm(observation - mean, 2) / scale;
 }
 
 /**
diff --git a/src/mlpack/core/dists/laplace_distribution.hpp b/src/mlpack/core/dists/laplace_distribution.hpp
index 83e1980..019fa4d 100644
--- a/src/mlpack/core/dists/laplace_distribution.hpp
+++ b/src/mlpack/core/dists/laplace_distribution.hpp
@@ -5,8 +5,8 @@
  * Laplace (double exponential) distribution used in SA.
  */
 
-#ifndef __MLPACK_CORE_OPTIMIZER_SA_LAPLACE_DISTRIBUTION_HPP
-#define __MLPACK_CORE_OPTIMIZER_SA_LAPLACE_DISTRIBUTION_HPP
+#ifndef __MLPACK_CORE_DISTRIBUTIONS_LAPLACE_DISTRIBUTION_HPP
+#define __MLPACK_CORE_DISTRIBUTIONS_LAPLACE_DISTRIBUTION_HPP
 
 namespace mlpack {
 namespace distribution {
@@ -75,7 +75,15 @@ class LaplaceDistribution
   /**
    * Return the probability of the given observation.
    */
-  double Probability(const arma::vec& observation) const;
+  double Probability(const arma::vec& observation) const
+  {
+    return exp(LogProbability(observation));
+  }
+
+  /**
+   * Return the log probability of the given observation.
+   */
+  double LogProbability(const arma::vec& observation) const;
 
   /**
    * Return a randomly generated observation according to the probability
diff --git a/src/mlpack/core/dists/regression_distribution.hpp b/src/mlpack/core/dists/regression_distribution.hpp
index bddeb1f..6cda681 100644
--- a/src/mlpack/core/dists/regression_distribution.hpp
+++ b/src/mlpack/core/dists/regression_distribution.hpp
@@ -4,8 +4,8 @@
  *
  * Implementation of conditional Gaussian distribution for HMM regression (HMMR)
  */
-#ifndef __MLPACK_METHODS_HMM_DISTRIBUTIONS_REGRESSION_DISTRIBUTION_HPP
-#define __MLPACK_METHODS_HMM_DISTRIBUTIONS_REGRESSION_DISTRIBUTION_HPP
+#ifndef __MLPACK_CORE_DISTRIBUTIONS_REGRESSION_DISTRIBUTION_HPP
+#define __MLPACK_CORE_DISTRIBUTIONS_REGRESSION_DISTRIBUTION_HPP
 
 #include <mlpack/core.hpp>
 #include <mlpack/core/dists/gaussian_distribution.hpp>
@@ -81,6 +81,15 @@ class RegressionDistribution
   double Probability(const arma::vec& observation) const;
 
   /**
+  * Evaluate log probability density function of given observation
+  *
+  * @param observation point to evaluate log probability at
+  */
+  double LogProbability(const arma::vec& observation) const {
+    return log(Probability(observation));
+  }
+
+  /**
    * Calculate y_i for each data point in points.
    *
    * @param points the data points to calculate with.
@@ -92,7 +101,7 @@ class RegressionDistribution
   const arma::vec& Parameters() const { return rf.Parameters(); }
 
   //! Return the dimensionality
-    size_t Dimensionality() const { return rf.Parameters().n_elem; }
+  size_t Dimensionality() const { return rf.Parameters().n_elem; }
 };
 
 



More information about the mlpack-git mailing list