[mlpack-git] master: Adds functionality to solve #749 (9cb117f)

Fri Aug 5 13:58:10 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/4185aa07bbdef80ccdc78a3a3e479499058933db...931ec74894dd2fe0218d0d7dd77fc0ad9be0954d

>---------------------------------------------------------------

commit 9cb117f671f55186baddf38ce71107a2a3ae027f
Author: Yannis Mentekidis <mentekid at gmail.com>
Date:   Fri Aug 5 18:58:10 2016 +0100

    Adds functionality to solve #749


>---------------------------------------------------------------

9cb117f671f55186baddf38ce71107a2a3ae027f
 src/mlpack/core/dists/gamma_distribution.cpp | 99 ++++++++++++++++++++++++++++
 src/mlpack/core/dists/gamma_distribution.hpp | 73 ++++++++++++++++++++
 src/mlpack/tests/distribution_test.cpp       | 70 ++++++++++++++++++++
 3 files changed, 242 insertions(+)

diff --git a/src/mlpack/core/dists/gamma_distribution.cpp b/src/mlpack/core/dists/gamma_distribution.cpp
index 7b3e000..664aa36 100644
--- a/src/mlpack/core/dists/gamma_distribution.cpp
+++ b/src/mlpack/core/dists/gamma_distribution.cpp
@@ -23,6 +23,16 @@ GammaDistribution::GammaDistribution(const arma::mat& data,
   Train(data, tol);
 }
 
+GammaDistribution::GammaDistribution(const arma::vec& alpha, 
+                                     const arma::vec& beta)
+{
+  if (beta.n_elem != alpha.n_elem)
+    throw std::runtime_error("Alpha and beta vector dimensions mismatch.");
+
+  this->alpha = alpha;
+  this->beta = beta;
+}
+
 // Returns true if computation converged.
 inline bool GammaDistribution::Converged(const double aOld,
                                          const double aNew,
@@ -31,6 +41,7 @@ inline bool GammaDistribution::Converged(const double aOld,
   return (std::abs(aNew - aOld) / aNew) < tol;
 }
 
+
 // Fits an alpha and beta parameter to each dimension of the data.
 void GammaDistribution::Train(const arma::mat& rdata, const double tol)
 {
@@ -53,6 +64,13 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol)
   Train(logMeanxVec, meanLogxVec, meanxVec, tol);
 }
 
+// Fits an alpha and beta parameter according to observation probabilities.
+void GammaDistribution::Train(const arma::mat& observations, 
+                              const arma::vec& probabilities,
+                              const double tol)
+{
+}
+
 // Fits an alpha and beta parameter to each dimension of the data.
 void GammaDistribution::Train(const arma::vec& logMeanxVec, 
                               const arma::vec& meanLogxVec,
@@ -117,3 +135,84 @@ void GammaDistribution::Train(const arma::vec& logMeanxVec,
     beta(row) = meanx / aEst;
   }
 }
+
+// Returns the probability of the provided observations.
+void GammaDistribution::Probability(const arma::mat& observations, 
+                                    arma::vec& probabilities) const
+{
+  size_t numObs = observations.n_cols;
+
+  // Set all equal to 1 (multiplication neutral).
+  probabilities.ones(numObs);
+
+  // Compute denominator only once for each dimension.
+  arma::vec denominators(alpha.n_elem);
+  for (size_t d = 0; d < alpha.n_elem; ++d)
+    denominators(d) = std::tgamma(alpha(d)) * std::pow(beta(d), alpha(d));
+
+  // Compute probability of each observation.
+  for (size_t i = 0; i < numObs; ++i)
+  {
+    for (size_t d = 0; d < observations.n_rows; ++d)
+    {
+      // Compute probability using Multiplication Law.
+      probabilities(i) *= 
+        std::pow(observations(d, i), alpha(d) - 1) *
+        std::exp(-observations(d, i) / beta(d)) /
+        denominators(d);
+    }
+  }
+}
+
+// Returns the probability of one observation (x) for one of the Gamma's
+// dimensions.
+double GammaDistribution::Probability(double x, size_t dim) const
+{
+  return 
+    std::pow(x, alpha(dim) - 1) * std::exp(-x / beta(dim)) / 
+    (std::tgamma(alpha(dim)) * std::pow(beta(dim), alpha(dim)));
+}
+
+// Returns the log probability of the provided observations.
+void GammaDistribution::LogProbability(const arma::mat& observations, 
+                                       arma::vec& LogProbabilities) const
+{
+  size_t numObs = observations.n_cols;
+
+  // Set all equal to 0 (addition neutral).
+  LogProbabilities.zeros(numObs);
+
+  // Compute denominator only once for each dimension.
+  arma::vec denominators(alpha.n_elem);
+  for (size_t d = 0; d < alpha.n_elem; ++d)
+    denominators(d) = std::tgamma(alpha(d)) * std::pow(beta(d), alpha(d));
+
+  // Compute probability of each observation.
+  for (size_t i = 0; i < numObs; ++i)
+  {
+    for (size_t d = 0; d < observations.n_rows; ++d)
+    {
+      // Compute probability using Multiplication Law and Logarithm addition
+      // property.
+      LogProbabilities(i) += std::log( 
+        std::pow(observations(d, i), alpha(d) - 1) 
+        * std::exp(-observations(d, i) / beta(d)) 
+        / denominators(d));
+    }
+  }
+}
+
+// Returns a gamma-random d-dimensional vector.
+arma::vec GammaDistribution::Random() const
+{
+  arma::vec randVec(alpha.n_elem);
+
+  std::default_random_engine generator;
+  for (size_t d = 0; d < alpha.n_elem; ++d)
+  {
+    std::gamma_distribution<double> dist(alpha(d), beta(d));
+    randVec(d) = dist(generator);
+  }
+
+  return randVec;
+}
diff --git a/src/mlpack/core/dists/gamma_distribution.hpp b/src/mlpack/core/dists/gamma_distribution.hpp
index 2570289..e10d3e8 100644
--- a/src/mlpack/core/dists/gamma_distribution.hpp
+++ b/src/mlpack/core/dists/gamma_distribution.hpp
@@ -63,6 +63,14 @@ class GammaDistribution
     GammaDistribution(const arma::mat& data, const double tol = 1e-8);
 
     /**
+     * Construct the Gamma distribution given two vectors alpha and beta.
+     *
+     * @param alpha The vector of alphas, one per dimension.
+     * @param beta The vector of betas, one per dimension.
+     */
+    GammaDistribution(const arma::vec& alpha, const arma::vec& beta);
+
+    /**
      * Destructor.
      */
     ~GammaDistribution() {};
@@ -78,6 +86,19 @@ class GammaDistribution
      */
     void Train(const arma::mat& rdata, const double tol = 1e-8);
     
+    /** Fits an alpha and beta parameter according to observation probabilities.
+     * 
+     * @param observations The reference data, one observation per column
+     * @param probabilities The probability of each observation. One value per
+     *     column of the observations matrix.
+     * @param tol Convergence tolerance. This is *not* an absolute measure:
+     *    It will stop the approximation once the *change* in the value is 
+     *    smaller than tol.
+     */
+    void Train(const arma::mat& observations, 
+                                  const arma::vec& probabilities,
+                                  const double tol = 1e-8);
+    
     /**
      * This function trains (fits distribution parameters) to a dataset with
      * pre-computed statistics logMeanx, meanLogx, meanx for each dimension.
@@ -94,6 +115,58 @@ class GammaDistribution
                const arma::vec& meanxVec,
                const double tol = 1e-8);
 
+
+    /**
+     * This function returns the probability of a group of observations.
+     *
+     * The probability of the value x is
+     *
+     * \frac{x^(\alpha - 1)}{\Gamma(\alpha) * \beta^\alpha} * e ^
+     * {-\frac{x}{\beta}}
+     *
+     * for one dimension. This implementation assumes each dimension is
+     * independent, so the product rule is used.
+     *
+     * @param observations Matrix of observations, one per column.
+     * @param probabilities column vector of probabilities, one per observation.
+     */
+    void Probability(const arma::mat& observations, 
+                     arma::vec& Probabilities) const;
+
+    /*
+     * This is a shortcut to the Probability(arma::mat&, arma::vec&) function
+     * for when we want to evaluate only the probability of one dimension of the
+     * gamma.
+     *
+     * @param x The 1-dimensional observation.
+     * @param dim The dimension for which to calculate the probability
+     */
+    double Probability(double x, size_t dim) const;
+
+    /**
+     * This function returns the logarithm of the probability of a group of
+     * observations.
+     *
+     * The logarithm of the probability of a value x is
+     *
+     * log(\frac{x^(\alpha - 1)}{\Gamma(\alpha) * \beta^\alpha} * e ^
+     * {-\frac{x}{\beta}})
+     *
+     * for one dimension. This implementation assumes each dimension is
+     * independent, so the product rule is used.
+     *
+     * @param observations Matrix of observations, one per column.
+     * @param logProbabilities column vector of log probabilities, one per 
+     *     observation.
+     */
+    void LogProbability(const arma::mat& observations, 
+                     arma::vec& LogProbabilities) const;
+
+    /**
+     * This function returns an observation of this distribution
+     */
+    arma::vec Random() const;
+
     // Access to Gamma distribution parameters.
 
     //! Get the alpha parameter of the given dimension.
diff --git a/src/mlpack/tests/distribution_test.cpp b/src/mlpack/tests/distribution_test.cpp
index 44c1156..7a411ee 100644
--- a/src/mlpack/tests/distribution_test.cpp
+++ b/src/mlpack/tests/distribution_test.cpp
@@ -539,4 +539,74 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainStatisticsTest)
   BOOST_REQUIRE_CLOSE(d1.Beta(0), d2.Beta(0), 1e-5);
 }
 
+BOOST_AUTO_TEST_CASE(GammaDistributionProbabilityTest)
+{
+  // Train two 1-dimensional distributions.
+  const arma::vec a1("2.0"), b1("0.9"), a2("3.1"), b2("1.4");
+  arma::mat x1("2.0"), x2("2.94");
+  arma::vec prob1, prob2;
+
+  // Evaluated at wolfram|alpha
+  GammaDistribution d1(a1, b1);
+  d1.Probability(x1, prob1);
+  BOOST_REQUIRE_CLOSE(prob1(0), 0.267575, 1e-3);
+
+  // Evaluated at wolfram|alpha
+  GammaDistribution d2(a2, b2);
+  d2.Probability(x2, prob2);
+  BOOST_REQUIRE_CLOSE(prob2(0), 0.189043, 1e-3);
+
+  // Check that the overload that returns the probability for 1 dimension
+  // agrees.
+  BOOST_REQUIRE_CLOSE(prob2(0), d2.Probability(2.94, 0), 1e-5);
+
+  // Combine into one 2-dimensional distribution.
+  const arma::vec a3("2.0 3.1"), b3("0.9 1.4");
+  arma::mat x3(2, 2);
+  x3
+    << 2.0 << 2.94 << arma::endr
+    << 2.0 << 2.94;
+  arma::vec prob3;
+
+  // Expect that the 2-dimensional distribution returns the product of the
+  // 1-dimensional distributions (evaluated at wolfram|alpha).
+  GammaDistribution d3(a3, b3);
+  d3.Probability(x3, prob3);
+  BOOST_REQUIRE_CLOSE(prob3(0), 0.04408, 1e-2);
+  BOOST_REQUIRE_CLOSE(prob3(1), 0.026165, 1e-2);
+}
+
+BOOST_AUTO_TEST_CASE(GammaDistributionLogProbabilityTest)
+{
+  // Train two 1-dimensional distributions.
+  const arma::vec a1("2.0"), b1("0.9"), a2("3.1"), b2("1.4");
+  arma::mat x1("2.0"), x2("2.94");
+  arma::vec prob1, prob2;
+
+  // Evaluated at wolfram|alpha
+  GammaDistribution d1(a1, b1);
+  d1.LogProbability(x1, prob1);
+  BOOST_REQUIRE_CLOSE(prob1(0), std::log(0.267575), 1e-3);
+
+  // Evaluated at wolfram|alpha
+  GammaDistribution d2(a2, b2);
+  d2.LogProbability(x2, prob2);
+  BOOST_REQUIRE_CLOSE(prob2(0), std::log(0.189043), 1e-3);
+
+  // Combine into one 2-dimensional distribution.
+  const arma::vec a3("2.0 3.1"), b3("0.9 1.4");
+  arma::mat x3(2, 2);
+  x3
+    << 2.0 << 2.94 << arma::endr
+    << 2.0 << 2.94;
+  arma::vec prob3;
+
+  // Expect that the 2-dimensional distribution returns the product of the
+  // 1-dimensional distributions (evaluated at wolfram|alpha).
+  GammaDistribution d3(a3, b3);
+  d3.LogProbability(x3, prob3);
+  BOOST_REQUIRE_CLOSE(prob3(0), std::log(0.04408), 1e-3);
+  BOOST_REQUIRE_CLOSE(prob3(1), std::log(0.026165), 1e-3);
+}
+
 BOOST_AUTO_TEST_SUITE_END();