[mlpack-git] master: Minor style cleanups and changes: (f364cba)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 10:11:12 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/5528947e1b32c9411d584986866cb3650a3a5303...f364cba65c4e7a608662f6a48afe103c7e492ec7
>---------------------------------------------------------------
commit f364cba65c4e7a608662f6a48afe103c7e492ec7
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Jul 25 10:11:12 2016 -0400
Minor style cleanups and changes:
- Change Alpha() to Alpha(i) and Beta() to Beta(i) (i.e. hide underlying
representation in case we ever change it later).
- Add constructor that can train.
- Modify default constructor to accept a dimensionality.
- Add Dimensionality() for accessing the number of dimensions in the
distribution.
>---------------------------------------------------------------
f364cba65c4e7a608662f6a48afe103c7e492ec7
src/mlpack/core/dists/gamma_distribution.cpp | 26 ++++++----
src/mlpack/core/dists/gamma_distribution.hpp | 72 ++++++++++++++++++++++------
src/mlpack/tests/distribution_test.cpp | 35 ++++++++++----
3 files changed, 103 insertions(+), 30 deletions(-)
diff --git a/src/mlpack/core/dists/gamma_distribution.cpp b/src/mlpack/core/dists/gamma_distribution.cpp
index c014077..8cfb040 100644
--- a/src/mlpack/core/dists/gamma_distribution.cpp
+++ b/src/mlpack/core/dists/gamma_distribution.cpp
@@ -6,13 +6,25 @@
*/
#include "gamma_distribution.hpp"
#include <boost/math/special_functions/digamma.hpp>
-//#include <boost/math/special_functions/trigamma.hpp> // Moved to prereqs.hpp
using namespace mlpack;
using namespace mlpack::distribution;
+GammaDistribution::GammaDistribution(const size_t dimensionality)
+{
+ // Initialize distribution.
+ alpha.zeros(dimensionality);
+ beta.zeros(dimensionality);
+}
+
+GammaDistribution::GammaDistribution(const arma::mat& data,
+ const double tol)
+{
+ Train(data, tol);
+}
+
// Returns true if computation converged.
-inline bool GammaDistribution::converged(const double aOld,
+inline bool GammaDistribution::Converged(const double aOld,
const double aNew,
const double tol)
{
@@ -22,7 +34,6 @@ inline bool GammaDistribution::converged(const double aOld,
// Fits an alpha and beta parameter to each dimension of the data.
void GammaDistribution::Train(const arma::mat& rdata, const double tol)
{
-
// If fittingSet is empty, nothing to do.
if (arma::size(rdata) == arma::size(arma::mat()))
return;
@@ -44,7 +55,6 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol)
// Treat each dimension (i.e. row) independently.
for (size_t row = 0; row < rdata.n_rows; ++row)
{
-
// Statistics for this row.
const double meanLogx = meanLogxVec(row);
const double meanx = meanxVec(row);
@@ -69,12 +79,12 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol)
// Protect against nan values (aEst will be passed to logarithm).
if (aEst <= 0)
- throw std::logic_error("GammaDistribution parameter alpha will be <=0");
+ throw std::logic_error("GammaDistribution::Train(): estimated invalid "
+ "negative value for parameter alpha!");
- } while (! converged(aEst, aOld, tol) );
+ } while (!Converged(aEst, aOld, tol));
alpha(row) = aEst;
- beta(row) = meanx/aEst;
+ beta(row) = meanx / aEst;
}
- return;
}
diff --git a/src/mlpack/core/dists/gamma_distribution.hpp b/src/mlpack/core/dists/gamma_distribution.hpp
index 5763180..698e7b4 100644
--- a/src/mlpack/core/dists/gamma_distribution.hpp
+++ b/src/mlpack/core/dists/gamma_distribution.hpp
@@ -10,24 +10,57 @@
* Based on "Estimating a Gamma Distribution" by Thomas P. Minka:
* research.microsoft.com/~minka/papers/minka-gamma.pdf
*/
-
#ifndef _MLPACK_CORE_DISTRIBUTIONS_GAMMA_DISTRIBUTION_HPP
#define _MLPACK_CORE_DISTRIBUTIONS_GAMMA_DISTRIBUTION_HPP
#include <mlpack/core.hpp>
-namespace mlpack{
-namespace distribution{
+
+namespace mlpack {
+namespace distribution {
/**
- * Class for fitting the Gamma Distribution to a dataset.
+ * This class represents the Gamma distribution. It supports training a Gamma
+ * distribution on a given dataset and accessing the fitted alpha and beta
+ * parameters.
+ *
+ * This class supports multidimensional Gamma distributions; however, it is
+ * assumed that each dimension is independent; therefore, a multidimensional
+ * Gamma distribution here may be seen as a set of independent
+ * single-dimensional Gamma distributions---and the parameters are estimated
+ * under this assumption.
+ *
+ * The estimation algorithm used can be found in the following paper:
+ *
+ * @code
+ * @techreport{minka2002estimating,
+ * title={Estimating a {G}amma distribution},
+ * author={Minka, Thomas P.},
+ * institution={Microsoft Research},
+ * address={Cambridge, U.K.},
+ * year={2002}
+ * }
+ * @endcode
*/
class GammaDistribution
{
public:
/**
- * Empty constructor.
+ * Construct the Gamma distribution with the given number of dimensions
+ * (default 0); each parameter will be initialized to 0.
+ *
+ * @param dimensionality Number of dimensions.
+ */
+ GammaDistribution(const size_t dimensionality = 0);
+
+ /**
+ * Construct the Gamma distribution, training on the given parameters.
+ *
+ * @param data Data to train the distribution on.
+ * @param tol Convergence tolerance. This is *not* an absolute measure:
+ * It will stop the approximation once the *change* in the value is
+ * smaller than tol.
*/
- GammaDistribution() { /* Nothing to do. */ };
+ GammaDistribution(const arma::mat& data, const double tol = 1e-8);
/**
* Destructor.
@@ -45,15 +78,26 @@ class GammaDistribution
*/
void Train(const arma::mat& rdata, const double tol = 1e-8);
- // Access to Gamma Distribution Parameters.
- /* Get alpha parameters of each dimension */
- arma::Col<double>& Alpha(void) { return alpha; };
- /* Get beta parameters of each dimension */
- arma::Col<double>& Beta(void) { return beta; };
+ // Access to Gamma distribution parameters.
+
+ //! Get the alpha parameter of the given dimension.
+ double Alpha(const size_t dim) const { return alpha[dim]; }
+ //! Modify the alpha parameter of the given dimension.
+ double& Alpha(const size_t dim) { return alpha[dim]; }
+
+ //! Get the beta parameter of the given dimension.
+ double Beta(const size_t dim) const { return beta[dim]; }
+ //! Modify the beta parameter of the given dimension.
+ double& Beta(const size_t dim) { return beta[dim]; }
+
+ //! Get the dimensionality of the distribution.
+ size_t Dimensionality() const { return alpha.n_elem; }
private:
- arma::Col<double> alpha; // Array of fitted alphas.
- arma::Col<double> beta; // Array of fitted betas.
+ //! Array of fitted alphas.
+ arma::vec alpha;
+ //! Array of fitted betas.
+ arma::vec beta;
/**
* This is a small function that returns true if the update of alpha is smaller
@@ -64,7 +108,7 @@ class GammaDistribution
* @param tol Convergence tolerance. Relative measure (see documentation of
* GammaDistribution::Train)
*/
- inline bool converged(const double aOld,
+ inline bool Converged(const double aOld,
const double aNew,
const double tol);
};
diff --git a/src/mlpack/tests/distribution_test.cpp b/src/mlpack/tests/distribution_test.cpp
index 3862548..68c5754 100644
--- a/src/mlpack/tests/distribution_test.cpp
+++ b/src/mlpack/tests/distribution_test.cpp
@@ -416,8 +416,8 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainTest)
gDist.Train(rdata);
// Training must estimate d pairs of alpha and beta parameters.
- BOOST_REQUIRE_EQUAL(gDist.Alpha().n_elem, d);
- BOOST_REQUIRE_EQUAL(gDist.Beta().n_elem, d);
+ BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d);
+ BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d);
// Create a N' x d' gamma distribution, fit results without new object.
size_t N2 = 350;
@@ -433,8 +433,8 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainTest)
gDist.Train(rdata2);
// Training must estimate d' pairs of alpha and beta parameters.
- BOOST_REQUIRE_EQUAL(gDist.Alpha().n_elem, d2);
- BOOST_REQUIRE_EQUAL(gDist.Beta().n_elem, d2);
+ BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d2);
+ BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d2);
}
/**
@@ -472,8 +472,8 @@ BOOST_AUTO_TEST_CASE(GammaDistributionFittingTest)
gDist.Train(rdata);
// Estimated parameter must be close to real.
- BOOST_REQUIRE_CLOSE(gDist.Alpha()[0], alphaReal, errorTolerance);
- BOOST_REQUIRE_CLOSE(gDist.Beta()[0], betaReal, errorTolerance);
+ BOOST_REQUIRE_CLOSE(gDist.Alpha(0), alphaReal, errorTolerance);
+ BOOST_REQUIRE_CLOSE(gDist.Beta(0), betaReal, errorTolerance);
/** Iteration 2 (different parameter set) **/
@@ -494,8 +494,27 @@ BOOST_AUTO_TEST_CASE(GammaDistributionFittingTest)
gDist2.Train(rdata2);
// Estimated parameter must be close to real.
- BOOST_REQUIRE_CLOSE(gDist2.Alpha()[0], alphaReal2, errorTolerance);
- BOOST_REQUIRE_CLOSE(gDist2.Beta()[0], betaReal2, errorTolerance);
+ BOOST_REQUIRE_CLOSE(gDist2.Alpha(0), alphaReal2, errorTolerance);
+ BOOST_REQUIRE_CLOSE(gDist2.Beta(0), betaReal2, errorTolerance);
+}
+
+/**
+ * Test that Train() and the constructor that takes data give the same resulting
+ * distribution.
+ */
+BOOST_AUTO_TEST_CASE(GammaDistributionTrainConstructorTest)
+{
+ const arma::mat data = arma::randu<arma::mat>(10, 500);
+
+ GammaDistribution d1(data);
+ GammaDistribution d2;
+ d2.Train(data);
+
+ for (size_t i = 0; i < 10; ++i)
+ {
+ BOOST_REQUIRE_CLOSE(d1.Alpha(i), d2.Alpha(i), 1e-5);
+ BOOST_REQUIRE_CLOSE(d1.Beta(i), d2.Beta(i), 1e-5);
+ }
}
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list