[mlpack-git] master: Minor style cleanups and changes: (f364cba)

Mon Jul 25 10:11:12 EDT 2016

Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/5528947e1b32c9411d584986866cb3650a3a5303...f364cba65c4e7a608662f6a48afe103c7e492ec7

>---------------------------------------------------------------

commit f364cba65c4e7a608662f6a48afe103c7e492ec7
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Jul 25 10:11:12 2016 -0400

    Minor style cleanups and changes:
    
     - Change Alpha() to Alpha(i) and Beta() to Beta(i) (i.e. hide underlying
       representation in case we ever change it later).
     - Add constructor that can train.
     - Modify default constructor to accept a dimensionality.
     - Add Dimensionality() for accessing the number of dimensions in the
       distribution.


>---------------------------------------------------------------

f364cba65c4e7a608662f6a48afe103c7e492ec7
 src/mlpack/core/dists/gamma_distribution.cpp | 26 ++++++----
 src/mlpack/core/dists/gamma_distribution.hpp | 72 ++++++++++++++++++++++------
 src/mlpack/tests/distribution_test.cpp       | 35 ++++++++++----
 3 files changed, 103 insertions(+), 30 deletions(-)

diff --git a/src/mlpack/core/dists/gamma_distribution.cpp b/src/mlpack/core/dists/gamma_distribution.cpp
index c014077..8cfb040 100644
--- a/src/mlpack/core/dists/gamma_distribution.cpp
+++ b/src/mlpack/core/dists/gamma_distribution.cpp
@@ -6,13 +6,25 @@
  */
 #include "gamma_distribution.hpp"
 #include <boost/math/special_functions/digamma.hpp>
-//#include <boost/math/special_functions/trigamma.hpp> // Moved to prereqs.hpp
 
 using namespace mlpack;
 using namespace mlpack::distribution;
 
+GammaDistribution::GammaDistribution(const size_t dimensionality)
+{
+  // Initialize distribution.
+  alpha.zeros(dimensionality);
+  beta.zeros(dimensionality);
+}
+
+GammaDistribution::GammaDistribution(const arma::mat& data,
+                                     const double tol)
+{
+  Train(data, tol);
+}
+
 // Returns true if computation converged.
-inline bool GammaDistribution::converged(const double aOld, 
+inline bool GammaDistribution::Converged(const double aOld,
                                          const double aNew,
                                          const double tol)
 {
@@ -22,7 +34,6 @@ inline bool GammaDistribution::converged(const double aOld,
 // Fits an alpha and beta parameter to each dimension of the data.
 void GammaDistribution::Train(const arma::mat& rdata, const double tol)
 {
-
   // If fittingSet is empty, nothing to do.
   if (arma::size(rdata) == arma::size(arma::mat()))
     return;
@@ -44,7 +55,6 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol)
   // Treat each dimension (i.e. row) independently.
   for (size_t row = 0; row < rdata.n_rows; ++row)
   {
-
     // Statistics for this row.
     const double meanLogx = meanLogxVec(row);
     const double meanx = meanxVec(row);
@@ -69,12 +79,12 @@ void GammaDistribution::Train(const arma::mat& rdata, const double tol)
 
       // Protect against nan values (aEst will be passed to logarithm).
       if (aEst <= 0)
-        throw std::logic_error("GammaDistribution parameter alpha will be <=0");
+        throw std::logic_error("GammaDistribution::Train(): estimated invalid "
+            "negative value for parameter alpha!");
 
-    } while (! converged(aEst, aOld, tol) );
+    } while (!Converged(aEst, aOld, tol));
 
     alpha(row) = aEst;
-    beta(row) = meanx/aEst;
+    beta(row) = meanx / aEst;
   }
-  return;
 }
diff --git a/src/mlpack/core/dists/gamma_distribution.hpp b/src/mlpack/core/dists/gamma_distribution.hpp
index 5763180..698e7b4 100644
--- a/src/mlpack/core/dists/gamma_distribution.hpp
+++ b/src/mlpack/core/dists/gamma_distribution.hpp
@@ -10,24 +10,57 @@
  * Based on "Estimating a Gamma Distribution" by Thomas P. Minka:
  * research.microsoft.com/~minka/papers/minka-gamma.pdf
  */
-
 #ifndef _MLPACK_CORE_DISTRIBUTIONS_GAMMA_DISTRIBUTION_HPP
 #define _MLPACK_CORE_DISTRIBUTIONS_GAMMA_DISTRIBUTION_HPP
 
 #include <mlpack/core.hpp>
-namespace mlpack{
-namespace distribution{
+
+namespace mlpack {
+namespace distribution {
 
 /**
- * Class for fitting the Gamma Distribution to a dataset.
+ * This class represents the Gamma distribution.  It supports training a Gamma
+ * distribution on a given dataset and accessing the fitted alpha and beta
+ * parameters.
+ *
+ * This class supports multidimensional Gamma distributions; however, it is
+ * assumed that each dimension is independent; therefore, a multidimensional
+ * Gamma distribution here may be seen as a set of independent
+ * single-dimensional Gamma distributions---and the parameters are estimated
+ * under this assumption.
+ *
+ * The estimation algorithm used can be found in the following paper:
+ *
+ * @code
+ * @techreport{minka2002estimating,
+ *   title={Estimating a {G}amma distribution},
+ *   author={Minka, Thomas P.},
+ *   institution={Microsoft Research},
+ *   address={Cambridge, U.K.},
+ *   year={2002}
+ * }
+ * @endcode
  */
 class GammaDistribution
 {
   public:
     /**
-     * Empty constructor.
+     * Construct the Gamma distribution with the given number of dimensions
+     * (default 0); each parameter will be initialized to 0.
+     *
+     * @param dimensionality Number of dimensions.
+     */
+    GammaDistribution(const size_t dimensionality = 0);
+
+    /**
+     * Construct the Gamma distribution, training on the given parameters.
+     *
+     * @param data Data to train the distribution on.
+     * @param tol Convergence tolerance. This is *not* an absolute measure:
+     *    It will stop the approximation once the *change* in the value is
+     *    smaller than tol.
      */
-    GammaDistribution() { /* Nothing to do. */ };
+    GammaDistribution(const arma::mat& data, const double tol = 1e-8);
 
     /**
      * Destructor.
@@ -45,15 +78,26 @@ class GammaDistribution
      */
     void Train(const arma::mat& rdata, const double tol = 1e-8);
 
-    // Access to Gamma Distribution Parameters.
-    /* Get alpha parameters of each dimension */
-    arma::Col<double>& Alpha(void) { return alpha; };
-    /* Get beta parameters of each dimension */
-    arma::Col<double>& Beta(void) { return beta; };
+    // Access to Gamma distribution parameters.
+
+    //! Get the alpha parameter of the given dimension.
+    double Alpha(const size_t dim) const { return alpha[dim]; }
+    //! Modify the alpha parameter of the given dimension.
+    double& Alpha(const size_t dim) { return alpha[dim]; }
+
+    //! Get the beta parameter of the given dimension.
+    double Beta(const size_t dim) const { return beta[dim]; }
+    //! Modify the beta parameter of the given dimension.
+    double& Beta(const size_t dim) { return beta[dim]; }
+
+    //! Get the dimensionality of the distribution.
+    size_t Dimensionality() const { return alpha.n_elem; }
 
   private:
-    arma::Col<double> alpha; // Array of fitted alphas.
-    arma::Col<double> beta; // Array of fitted betas.
+    //! Array of fitted alphas.
+    arma::vec alpha;
+    //! Array of fitted betas.
+    arma::vec beta;
 
     /**
      * This is a small function that returns true if the update of alpha is smaller
@@ -64,7 +108,7 @@ class GammaDistribution
      * @param tol Convergence tolerance. Relative measure (see documentation of
      * GammaDistribution::Train)
      */
-    inline bool converged(const double aOld, 
+    inline bool Converged(const double aOld,
                           const double aNew,
                           const double tol);
 };
diff --git a/src/mlpack/tests/distribution_test.cpp b/src/mlpack/tests/distribution_test.cpp
index 3862548..68c5754 100644
--- a/src/mlpack/tests/distribution_test.cpp
+++ b/src/mlpack/tests/distribution_test.cpp
@@ -416,8 +416,8 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainTest)
   gDist.Train(rdata);
 
   // Training must estimate d pairs of alpha and beta parameters.
-  BOOST_REQUIRE_EQUAL(gDist.Alpha().n_elem, d);
-  BOOST_REQUIRE_EQUAL(gDist.Beta().n_elem, d);
+  BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d);
+  BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d);
 
   // Create a N' x d' gamma distribution, fit results without new object.
   size_t N2 = 350;
@@ -433,8 +433,8 @@ BOOST_AUTO_TEST_CASE(GammaDistributionTrainTest)
   gDist.Train(rdata2);
 
   // Training must estimate d' pairs of alpha and beta parameters.
-  BOOST_REQUIRE_EQUAL(gDist.Alpha().n_elem, d2);
-  BOOST_REQUIRE_EQUAL(gDist.Beta().n_elem, d2);
+  BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d2);
+  BOOST_REQUIRE_EQUAL(gDist.Dimensionality(), d2);
 }
 
 /**
@@ -472,8 +472,8 @@ BOOST_AUTO_TEST_CASE(GammaDistributionFittingTest)
   gDist.Train(rdata);
 
   // Estimated parameter must be close to real.
-  BOOST_REQUIRE_CLOSE(gDist.Alpha()[0], alphaReal, errorTolerance);
-  BOOST_REQUIRE_CLOSE(gDist.Beta()[0], betaReal, errorTolerance);
+  BOOST_REQUIRE_CLOSE(gDist.Alpha(0), alphaReal, errorTolerance);
+  BOOST_REQUIRE_CLOSE(gDist.Beta(0), betaReal, errorTolerance);
 
   /** Iteration 2 (different parameter set) **/
 
@@ -494,8 +494,27 @@ BOOST_AUTO_TEST_CASE(GammaDistributionFittingTest)
   gDist2.Train(rdata2);
 
   // Estimated parameter must be close to real.
-  BOOST_REQUIRE_CLOSE(gDist2.Alpha()[0], alphaReal2, errorTolerance);
-  BOOST_REQUIRE_CLOSE(gDist2.Beta()[0], betaReal2, errorTolerance);
+  BOOST_REQUIRE_CLOSE(gDist2.Alpha(0), alphaReal2, errorTolerance);
+  BOOST_REQUIRE_CLOSE(gDist2.Beta(0), betaReal2, errorTolerance);
+}
+
+/**
+ * Test that Train() and the constructor that takes data give the same resulting
+ * distribution.
+ */
+BOOST_AUTO_TEST_CASE(GammaDistributionTrainConstructorTest)
+{
+  const arma::mat data = arma::randu<arma::mat>(10, 500);
+
+  GammaDistribution d1(data);
+  GammaDistribution d2;
+  d2.Train(data);
+
+  for (size_t i = 0; i < 10; ++i)
+  {
+    BOOST_REQUIRE_CLOSE(d1.Alpha(i), d2.Alpha(i), 1e-5);
+    BOOST_REQUIRE_CLOSE(d1.Beta(i), d2.Beta(i), 1e-5);
+  }
 }
 
 BOOST_AUTO_TEST_SUITE_END();