[mlpack-git] master: Adds functionality to solve #749 (9cb117f)
gitdub at mlpack.org
gitdub at mlpack.org
Fri Aug 5 13:58:10 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/4185aa07bbdef80ccdc78a3a3e479499058933db...931ec74894dd2fe0218d0d7dd77fc0ad9be0954d
>---------------------------------------------------------------
commit 9cb117f671f55186baddf38ce71107a2a3ae027f
Author: Yannis Mentekidis <mentekid at gmail.com>
Date: Fri Aug 5 18:58:10 2016 +0100
Adds functionality to solve #749
>---------------------------------------------------------------
9cb117f671f55186baddf38ce71107a2a3ae027f
src/mlpack/core/dists/gamma_distribution.cpp | 99 ++++++++++++++++++++++++++++
src/mlpack/core/dists/gamma_distribution.hpp | 73 ++++++++++++++++++++
src/mlpack/tests/distribution_test.cpp | 70 ++++++++++++++++++++
3 files changed, 242 insertions(+)
diff --git a/src/mlpack/core/dists/gamma_distribution.cpp b/src/mlpack/core/dists/gamma_distribution.cpp
index 7b3e000..664aa36 100644
--- a/src/mlpack/core/dists/gamma_distribution.cpp
+++ b/src/mlpack/core/dists/gamma_distribution.cpp
@@ -23,6 +23,16 @@ GammaDistribution::GammaDistribution(const arma::mat& data,
Train(data, tol);
}
+GammaDistribution::GammaDistribution(const arma::vec& alpha,
+ const arma::vec& beta)
+{
+ if (beta.n_elem != alpha.n_elem)
+ throw std::runtime_error("Alpha and beta vector dimensions mismatch.");
+
+ this->alpha = alpha;
+ this->beta = beta;
+}
+
// Returns true if computation converged.
inline bool GammaDistribution::Converged(const double aOld,
const double aNew,
@@ -31,6 +41,7 @@ inline bool GammaDistribution::Converged(const double aOld,
return (std::abs(aNew - aOld) / aNew) < tol;
}
+
// Fits an alpha and beta parameter to each dimension of the data.
void GammaDistribution::Train(const arma::mat& rdata, const double tol)
{
@@ -53,6 +64,13 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol)
Train(logMeanxVec, meanLogxVec, meanxVec, tol);
}
+// Fits an alpha and beta parameter according to observation probabilities.
+void GammaDistribution::Train(const arma::mat& observations,
+ const arma::vec& probabilities,
+ const double tol)
+{
+}
+
// Fits an alpha and beta parameter to each dimension of the data.
void GammaDistribution::Train(const arma::vec& logMeanxVec,
const arma::vec& meanLogxVec,
@@ -117,3 +135,84 @@ void GammaDistribution::Train(const arma::vec& logMeanxVec,
beta(row) = meanx / aEst;
}
}
+
+// Returns the probability of the provided observations.
+void GammaDistribution::Probability(const arma::mat& observations,
+ arma::vec& probabilities) const
+{
+ size_t numObs = observations.n_cols;
+
+ // Set all equal to 1 (multiplication neutral).
+ probabilities.ones(numObs);
+
+ // Compute denominator only once for each dimension.
+ arma::vec denominators(alpha.n_elem);
+ for (size_t d = 0; d < alpha.n_elem; ++d)
+ denominators(d) = std::tgamma(alpha(d)) * std::pow(beta(d), alpha(d));
+
+ // Compute probability of each observation.
+ for (size_t i = 0; i < numObs; ++i)
+ {
+ for (size_t d = 0; d < observations.n_rows; ++d)
+ {
+ // Compute probability using Multiplication Law.
+ probabilities(i) *=
+ std::pow(observations(d, i), alpha(d) - 1) *
+ std::exp(-observations(d, i) / beta(d)) /
+ denominators(d);
+ }
+ }
+}
+
+// Returns the probability of one observation (x) for one of the Gamma's
+// dimensions.
+double GammaDistribution::Probability(double x, size_t dim) const
+{
+ return
+ std::pow(x, alpha(dim) - 1) * std::exp(-x / beta(dim)) /
+ (std::tgamma(alpha(dim)) * std::pow(beta(dim), alpha(dim)));
+}
+
+// Returns the log probability of the provided observations.
+void GammaDistribution::LogProbability(const arma::mat& observations,
+ arma::vec& LogProbabilities) const
+{
+ size_t numObs = observations.n_cols;
+
+ // Set all equal to 0 (addition neutral).
+ LogProbabilities.zeros(numObs);
+
+ // Compute denominator only once for each dimension.
+ arma::vec denominators(alpha.n_elem);
+ for (size_t d = 0; d < alpha.n_elem; ++d)
+ denominators(d) = std::tgamma(alpha(d)) * std::pow(beta(d), alpha(d));
+
+ // Compute probability of each observation.
+ for (size_t i = 0; i < numObs; ++i)
+ {
+ for (size_t d = 0; d < observations.n_rows; ++d)
+ {
+ // Compute probability using Multiplication Law and Logarithm addition
+ // property.
+ LogProbabilities(i) += std::log(
+ std::pow(observations(d, i), alpha(d) - 1)
+ * std::exp(-observations(d, i) / beta(d))
+ / denominators(d));
+ }
+ }
+}
+
+// Returns a gamma-random d-dimensional vector.
+arma::vec GammaDistribution::Random() const
+{
+ arma::vec randVec(alpha.n_elem);
+
+ std::default_random_engine generator;
+ for (size_t d = 0; d < alpha.n_elem; ++d)
+ {
+ std::gamma_distribution<double> dist(alpha(d), beta(d));
+ randVec(d) = dist(generator);
+ }
+
+ return randVec;
+}
diff --git a/src/mlpack/core/dists/gamma_distribution.hpp b/src/mlpack/core/dists/gamma_distribution.hpp
index 2570289..e10d3e8 100644
--- a/src/mlpack/core/dists/gamma_distribution.hpp
+++ b/src/mlpack/core/dists/gamma_distribution.hpp
@@ -63,6 +63,14 @@ class GammaDistribution
GammaDistribution(const arma::mat& data, const double tol = 1e-8);
/**
+ * Construct the Gamma distribution given two vectors alpha and beta.
+ *
+ * @param alpha The vector of alphas, one per dimension.
+ * @param beta The vector of betas, one per dimension.
+ */
+ GammaDistribution(const arma::vec& alpha, const arma::vec& beta);
+
+ /**
* Destructor.
*/
~GammaDistribution() {};
@@ -78,6 +86,19 @@ class GammaDistribution
*/
void Train(const arma::mat& rdata, const double tol = 1e-8);
+ /** Fits an alpha and beta parameter according to observation probabilities.
+ *
+ * @param observations The reference data, one observation per column
+ * @param probabilities The probability of each observation. One value per
+ * column of the observations matrix.
+ * @param tol Convergence tolerance. This is *not* an absolute measure:
+ * It will stop the approximation once the *change* in the value is
+ * smaller than tol.
+ */
+ void Train(const arma::mat& observations,
+ const arma::vec& probabilities,
+ const double tol = 1e-8);
+
/**
* This function trains (fits distribution parameters) to a dataset with
* pre-computed statistics logMeanx, meanLogx, meanx for each dimension.
@@ -94,6 +115,58 @@ class GammaDistribution
const arma::vec& meanxVec,
const double tol = 1e-8);
+
+ /**
+ * This function returns the probability of a group of observations.
+ *
+ * The probability of the value x is
+ *
+ * \frac{x^(\alpha - 1)}{\Gamma(\alpha) * \beta^\alpha} * e ^
+ * {-\frac{x}{\beta}}
+ *
+ * for one dimension. This implementation assumes each dimension is
+ * independent, so the product rule is used.
+ *
+ * @param observations Matrix of observations, one per column.
+ * @param probabilities column vector of probabilities, one per observation.
+ */
+ void Probability(const arma::mat& observations,
+ arma::vec& Probabilities) const;
+
+ /*
+ * This is a shortcut to the Probability(arma::mat&, arma::vec&) function
+ * for when we want to evaluate only the probability of one dimension of the
+ * gamma.
+ *
+ * @param x The 1-dimensional observation.
+ * @param dim The dimension for which to calculate the probability
+ */
+ double Probability(double x, size_t dim) const;
+
+ /**
+ * This function returns the logarithm of the probability of a group of
+ * observations.
+ *
+ * The logarithm of the probability of a value x is
+ *
+ * log(\frac{x^(\alpha - 1)}{\Gamma(\alpha) * \beta^\alpha} * e ^
+ * {-\frac{x}{\beta}})
+ *
+ * for one dimension. This implementation assumes each dimension is
+ * independent, so the product rule is used.
+ *
+ * @param observations Matrix of observations, one per column.
+ * @param logProbabilities column vector of log probabilities, one per
+ * observation.
+ */
+ void LogProbability(const arma::mat& observations,
+ arma::vec& LogProbabilities) const;
+
+ /**
+ * This function returns an observation of this distribution
+ */
+ arma::vec Random() const;
+
// Access to Gamma distribution parameters.
//! Get the alpha parameter of the given dimension.
diff --git a/src/mlpack/tests/distribution_test.cpp b/src/mlpack/tests/distribution_test.cpp
index 44c1156..7a411ee 100644
--- a/src/mlpack/tests/distribution_test.cpp
+++ b/src/mlpack/tests/distribution_test.cpp
@@ -539,4 +539,74 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainStatisticsTest)
BOOST_REQUIRE_CLOSE(d1.Beta(0), d2.Beta(0), 1e-5);
}
+BOOST_AUTO_TEST_CASE(GammaDistributionProbabilityTest)
+{
+ // Train two 1-dimensional distributions.
+ const arma::vec a1("2.0"), b1("0.9"), a2("3.1"), b2("1.4");
+ arma::mat x1("2.0"), x2("2.94");
+ arma::vec prob1, prob2;
+
+ // Evaluated at wolfram|alpha
+ GammaDistribution d1(a1, b1);
+ d1.Probability(x1, prob1);
+ BOOST_REQUIRE_CLOSE(prob1(0), 0.267575, 1e-3);
+
+ // Evaluated at wolfram|alpha
+ GammaDistribution d2(a2, b2);
+ d2.Probability(x2, prob2);
+ BOOST_REQUIRE_CLOSE(prob2(0), 0.189043, 1e-3);
+
+ // Check that the overload that returns the probability for 1 dimension
+ // agrees.
+ BOOST_REQUIRE_CLOSE(prob2(0), d2.Probability(2.94, 0), 1e-5);
+
+ // Combine into one 2-dimensional distribution.
+ const arma::vec a3("2.0 3.1"), b3("0.9 1.4");
+ arma::mat x3(2, 2);
+ x3
+ << 2.0 << 2.94 << arma::endr
+ << 2.0 << 2.94;
+ arma::vec prob3;
+
+ // Expect that the 2-dimensional distribution returns the product of the
+ // 1-dimensional distributions (evaluated at wolfram|alpha).
+ GammaDistribution d3(a3, b3);
+ d3.Probability(x3, prob3);
+ BOOST_REQUIRE_CLOSE(prob3(0), 0.04408, 1e-2);
+ BOOST_REQUIRE_CLOSE(prob3(1), 0.026165, 1e-2);
+}
+
+BOOST_AUTO_TEST_CASE(GammaDistributionLogProbabilityTest)
+{
+ // Train two 1-dimensional distributions.
+ const arma::vec a1("2.0"), b1("0.9"), a2("3.1"), b2("1.4");
+ arma::mat x1("2.0"), x2("2.94");
+ arma::vec prob1, prob2;
+
+ // Evaluated at wolfram|alpha
+ GammaDistribution d1(a1, b1);
+ d1.LogProbability(x1, prob1);
+ BOOST_REQUIRE_CLOSE(prob1(0), std::log(0.267575), 1e-3);
+
+ // Evaluated at wolfram|alpha
+ GammaDistribution d2(a2, b2);
+ d2.LogProbability(x2, prob2);
+ BOOST_REQUIRE_CLOSE(prob2(0), std::log(0.189043), 1e-3);
+
+ // Combine into one 2-dimensional distribution.
+ const arma::vec a3("2.0 3.1"), b3("0.9 1.4");
+ arma::mat x3(2, 2);
+ x3
+ << 2.0 << 2.94 << arma::endr
+ << 2.0 << 2.94;
+ arma::vec prob3;
+
+ // Expect that the 2-dimensional distribution returns the product of the
+ // 1-dimensional distributions (evaluated at wolfram|alpha).
+ GammaDistribution d3(a3, b3);
+ d3.LogProbability(x3, prob3);
+ BOOST_REQUIRE_CLOSE(prob3(0), std::log(0.04408), 1e-3);
+ BOOST_REQUIRE_CLOSE(prob3(1), std::log(0.026165), 1e-3);
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list