[mlpack-git] master, mlpack-1.0.x: Move LaplaceDistribution to mlpack::distribution and expand upon it, but then remove MoveDistributionType template parameter from SA class because really, the MoveControl() function is written specifically to work with the Laplace distribution. I was incorrect to think it could be templatized so easily. The implementation of the Laplace distribution has been inlined, too, which may help it be a little faster. (fdd7d39)

Thu Mar 5 21:50:30 EST 2015

Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit fdd7d392147b09e49c88e78d679950f685415e3e
Author: Ryan Curtin <ryan at ratml.org>
Date:   Wed Jul 2 18:49:11 2014 +0000

    Move LaplaceDistribution to mlpack::distribution and expand upon it, but then
    remove MoveDistributionType template parameter from SA class because really, the
    MoveControl() function is written specifically to work with the Laplace
    distribution.  I was incorrect to think it could be templatized so easily.  The
    implementation of the Laplace distribution has been inlined, too, which may help
    it be a little faster.


>---------------------------------------------------------------

fdd7d392147b09e49c88e78d679950f685415e3e
 src/mlpack/core/dists/CMakeLists.txt               |   2 +
 src/mlpack/core/dists/laplace_distribution.cpp     |  91 ++++++++++++++
 src/mlpack/core/dists/laplace_distribution.hpp     | 134 +++++++++++++++++++++
 src/mlpack/core/optimizers/sa/CMakeLists.txt       |   2 -
 .../core/optimizers/sa/laplace_distribution.cpp    |  22 ----
 .../core/optimizers/sa/laplace_distribution.hpp    |  34 ------
 src/mlpack/core/optimizers/sa/sa.hpp               |   4 +-
 src/mlpack/core/optimizers/sa/sa_impl.hpp          |  35 +++---
 8 files changed, 246 insertions(+), 78 deletions(-)

diff --git a/src/mlpack/core/dists/CMakeLists.txt b/src/mlpack/core/dists/CMakeLists.txt
index a13002f..7368d16 100644
--- a/src/mlpack/core/dists/CMakeLists.txt
+++ b/src/mlpack/core/dists/CMakeLists.txt
@@ -5,6 +5,8 @@ set(SOURCES
   discrete_distribution.cpp
   gaussian_distribution.hpp
   gaussian_distribution.cpp
+  laplace_distribution.hpp
+  laplace_distribution.cpp
 )
 
 # add directory name to sources
diff --git a/src/mlpack/core/dists/laplace_distribution.cpp b/src/mlpack/core/dists/laplace_distribution.cpp
new file mode 100644
index 0000000..ef9b4af
--- /dev/null
+++ b/src/mlpack/core/dists/laplace_distribution.cpp
@@ -0,0 +1,91 @@
+/*
+ * @file laplace_distribution.cpp
+ * @author Zhihao Lou
+ *
+ * Implementation of Laplace distribution.
+ */
+#include <mlpack/core.hpp>
+
+#include "laplace_distribution.hpp"
+
+using namespace mlpack;
+using namespace mlpack::distribution;
+
+/**
+ * Return the probability of the given observation.
+ */
+double LaplaceDistribution::Probability(const arma::vec& observation) const
+{
+  // Evaluate the PDF of the Laplace distribution to determine the probability.
+  return (0.5 / scale) * std::exp(arma::norm(observation - mean, 2) / scale);
+}
+
+/**
+ * Estimate the Laplace distribution directly from the given observations.
+ *
+ * @param observations List of observations.
+ */
+void LaplaceDistribution::Estimate(const arma::mat& observations)
+{
+  // The maximum likelihood estimate of the mean is the median of the data for
+  // the univariate case.  See the short note "The Double Exponential
+  // Distribution: Using Calculus to Find a Maximum Likelihood Estimator" by
+  // R.M. Norton.
+  //
+  // But for the multivariate case, the derivation is slightly different.  The
+  // log-likelihood function is now
+  //   L(\theta) = -n ln 2 - \sum_{i = 1}^{n} (x_i - \theta)^T (x_i - \theta).
+  // Differentiating with respect to the vector \theta gives
+  //   L'(\theta) = \sum_{i = 1}^{n} 2 (x_i - \theta)
+  // which means that for an individual component \theta_k,
+  //   d / d\theta_k L(\theta) = \sum_{i = 1}^{n} 2 (x_ik - \theta_k)
+  // which is zero when
+  //   \theta_k = (1 / n) \sum_{i = 1}^{n} x_ik
+  // so L'(\theta) = 0 when \theta is the mean of the observations.  I am not
+  // 100% certain my calculus and linear algebra is right, but I think it is...
+  mean = arma::mean(observations, 1);
+
+  // The maximum likelihood estimate of the scale parameter is the mean
+  // deviation from the mean.
+  scale = 0.0;
+  for (size_t i = 0; i < observations.n_cols; ++i)
+    scale += arma::norm(observations.col(i) - mean, 2);
+  scale /= observations.n_cols;
+}
+
+/**
+ * Estimate the Laplace distribution directly from the given observations,
+ * taking into account the probability of each observation actually being from
+ * this distribution.
+ */
+void LaplaceDistribution::Estimate(const arma::mat& observations,
+                                   const arma::vec& probabilities)
+{
+  // I am not completely sure that this change results in a valid maximum
+  // likelihood estimator given probabilities of points.
+  mean.zeros(observations.n_rows);
+  for (size_t i = 0; i < observations.n_cols; ++i)
+    mean += observations.col(i) * probabilities(i);
+  mean /= arma::accu(probabilities);
+
+  // This the same formula as the previous function, but here we are multiplying
+  // by the probability that the point is actually from this distribution.
+  scale = 0.0;
+  for (size_t i = 0; i < observations.n_cols; ++i)
+    scale += probabilities(i) * arma::norm(observations.col(i) - mean, 2);
+  scale /= arma::accu(probabilities);
+}
+
+//! Returns a string representation of this object.
+std::string LaplaceDistribution::ToString() const
+{
+  std::ostringstream convert;
+  convert << "LaplaceDistribution [" << this << "]" << std::endl;
+
+  std::ostringstream data;
+  data << "Mean: " << std::endl << mean.t();
+  data << "Scale: " << scale << "." << std::endl;
+
+  convert << util::Indent(data.str());
+  return convert.str();
+}
diff --git a/src/mlpack/core/dists/laplace_distribution.hpp b/src/mlpack/core/dists/laplace_distribution.hpp
new file mode 100644
index 0000000..cee7dff
--- /dev/null
+++ b/src/mlpack/core/dists/laplace_distribution.hpp
@@ -0,0 +1,134 @@
+/*
+ * @file laplace.hpp
+ * @author Zhihao Lou
+ *
+ * Laplace (double exponential) distribution used in SA.
+ */
+
+#ifndef __MLPACK_CORE_OPTIMIZER_SA_LAPLACE_DISTRIBUTION_HPP
+#define __MLPACK_CORE_OPTIMIZER_SA_LAPLACE_DISTRIBUTION_HPP
+
+namespace mlpack {
+namespace distribution {
+
+/**
+ * The multivariate Laplace distribution centered at 0 has pdf
+ *
+ * \f[
+ * f(x|\theta) = \frac{1}{2 \theta}\exp\left(-\frac{\|x - \mu\|}{\theta}\right)
+ * \f]
+ *
+ * given scale parameter \f$\theta\f$ and mean \f$\mu\f$.  This implementation
+ * assumes a diagonal covariance, but a rewrite to support arbitrary covariances
+ * is possible.
+ *
+ * See the following paper for more information on the non-diagonal-covariance
+ * Laplace distribution and estimation techniques:
+ *
+ * @code
+ * @article{eltoft2006multivariate,
+ *   title={{On the Multivariate Laplace Distribution}},
+ *   author={Eltoft, Torbj\orn and Kim, Taesu and Lee, Te-Won},
+ *   journal={IEEE Signal Processing Letters},
+ *   volume={13},
+ *   number={5},
+ *   pages={300--304},
+ *   year={2006}
+ * }
+ * @endcode
+ *
+ * Note that because of the diagonal covariance restriction, much of the algebra
+ * in the paper above becomes simplified, and the PDF takes roughly the same
+ * form as the univariate case.
+ */
+class LaplaceDistribution
+{
+ public:
+  /**
+   * Default constructor, which creates a Laplace distribution with zero
+   * dimension and zero scale parameter.
+   */
+  LaplaceDistribution() : scale(0) { }
+
+  /**
+   * Construct the Laplace distribution with the given scale and dimensionality.
+   * The mean is initialized to zero.
+   *
+   * @param dimensionality Dimensionality of distribution.
+   * @param scale Scale of distribution.
+   */
+  LaplaceDistribution(const size_t dimensionality, const double scale) :
+      mean(arma::zeros<arma::vec>(dimensionality)), scale(scale) { }
+
+  /**
+   * Construct the Laplace distribution with the given mean and scale parameter.
+   *
+   * @param mean Mean of distribution.
+   * @param scale Scale of distribution.
+   */
+  LaplaceDistribution(const arma::vec& mean, const double scale) :
+      mean(mean), scale(scale) { }
+
+  //! Return the dimensionality of this distribution.
+  size_t Dimensionality() const { return mean.n_elem; }
+
+  /**
+   * Return the probability of the given observation.
+   */
+  double Probability(const arma::vec& observation) const;
+
+  /**
+   * Return a randomly generated observation according to the probability
+   * distribution defined by this object.  This is inlined for speed.
+   *
+   * @return Random observation from this Laplace distribution.
+   */
+  arma::vec Random() const
+  {
+    arma::vec result(mean.n_elem);
+    result.randu();
+
+    // Convert from uniform distribution to Laplace distribution.
+    return mean - scale * sign(result) % arma::log(1 - 2.0 * (result - 0.5));
+  }
+
+  /**
+   * Estimate the Laplace distribution directly from the given observations.
+   *
+   * @param observations List of observations.
+   */
+  void Estimate(const arma::mat& observations);
+
+  /**
+   * Estimate the Laplace distribution from the given observations, taking into
+   * account the probability of each observation actually being from this
+   * distribution.
+   */
+  void Estimate(const arma::mat& observations,
+                const arma::vec& probabilities);
+
+  //! Return the mean.
+  const arma::vec& Mean() const { return mean; }
+  //! Modify the mean.
+  arma::vec& Mean() { return mean; }
+
+  //! Return the scale parameter.
+  double Scale() const { return scale; }
+  //! Modify the scale parameter.
+  double& Scale() { return scale; }
+
+  //! Return a string representation of the object.
+  std::string ToString() const;
+
+ private:
+  //! Mean of the distribution.
+  arma::vec mean;
+  //! Scale parameter of the distribution.
+  double scale;
+
+};
+
+}; // namespace distribution
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/core/optimizers/sa/CMakeLists.txt b/src/mlpack/core/optimizers/sa/CMakeLists.txt
index 088abec..7ee164a 100644
--- a/src/mlpack/core/optimizers/sa/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/sa/CMakeLists.txt
@@ -1,8 +1,6 @@
 set(SOURCES
   sa.hpp
   sa_impl.hpp
-  laplace_distribution.hpp
-  laplace_distribution.cpp
   exponential_schedule.hpp
 )
 
diff --git a/src/mlpack/core/optimizers/sa/laplace_distribution.cpp b/src/mlpack/core/optimizers/sa/laplace_distribution.cpp
deleted file mode 100644
index 69d96cb..0000000
--- a/src/mlpack/core/optimizers/sa/laplace_distribution.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * @file laplace_distribution.cpp
- * @author Zhihao Lou
- *
- * Implementation of Laplace distribution
- */
-
-#include <mlpack/core.hpp>
-#include "laplace_distribution.hpp"
-using namespace mlpack;
-using namespace mlpack::optimization;
-double LaplaceDistribution::operator () (const double param)
-{
-  // uniform [-1, 1]
-  double unif = 2.0 * math::Random() - 1.0;
-  // Laplace Distribution with mean 0
-  // x = - param * sign(unif) * log(1 - |unif|)
-  if (unif < 0) // why oh why we don't have a sign function in c++?
-      return (param * std::log(1 + unif));
-  else
-      return (-1.0 * param * std::log(1 - unif));
-}
diff --git a/src/mlpack/core/optimizers/sa/laplace_distribution.hpp b/src/mlpack/core/optimizers/sa/laplace_distribution.hpp
deleted file mode 100644
index 15a52a3..0000000
--- a/src/mlpack/core/optimizers/sa/laplace_distribution.hpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * @file laplace.hpp
- * @author Zhihao Lou
- *
- * Laplace (double exponential) distribution used in SA
- */
-
-#ifndef __MLPACK_CORE_OPTIMIZER_SA_LAPLACE_DISTRIBUTION_HPP
-#define __MLPACK_CORE_OPTIMIZER_SA_LAPLACE_DISTRIBUTION_HPP
-
-namespace mlpack {
-namespace optimization {
-
-/* 
- * The Laplace distribution centered at 0 has pdf
- * \f[
- * f(x|\theta) = \frac{1}{2\theta}\exp\left(-\frac{|x|}{\theta}\right)
- * \f]
- * given scale parameter \f$\theta\f$.
- */
-class LaplaceDistribution
-{
- public:
-  //! Nothing to do for the constructor
-  LaplaceDistribution(){}
-  //! Return random value from Laplace distribution with parameter param
-  double operator () (const double param);
-
-};
-
-}; // namespace optimization
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/core/optimizers/sa/sa.hpp b/src/mlpack/core/optimizers/sa/sa.hpp
index 902e380..1f028fd 100644
--- a/src/mlpack/core/optimizers/sa/sa.hpp
+++ b/src/mlpack/core/optimizers/sa/sa.hpp
@@ -48,7 +48,7 @@ namespace optimization {
  * @tparam MoveDistributionType distribution type for move generation
  * @tparam CoolingScheduleType type for cooling schedule
  */
-template<typename FunctionType, typename MoveDistributionType, typename CoolingScheduleType>
+template<typename FunctionType, typename CoolingScheduleType>
 class SA
 {
  public:
@@ -69,7 +69,6 @@ class SA
    * @param maxIterations Maximum number of iterations allowed (0 indicates no limit).
    */
   SA(FunctionType& function,
-     MoveDistributionType& moveDistribution,
      CoolingScheduleType& coolingSchedule,
      const double initT = 10000.,
      const size_t initMoves = 1000,
@@ -143,7 +142,6 @@ class SA
   std::string ToString() const;
  private:
   FunctionType &function;
-  MoveDistributionType &moveDistribution;
   CoolingScheduleType &coolingSchedule;
   double T;
   size_t initMoves;
diff --git a/src/mlpack/core/optimizers/sa/sa_impl.hpp b/src/mlpack/core/optimizers/sa/sa_impl.hpp
index d72fcc9..63f5bc7 100644
--- a/src/mlpack/core/optimizers/sa/sa_impl.hpp
+++ b/src/mlpack/core/optimizers/sa/sa_impl.hpp
@@ -7,17 +7,17 @@
 #ifndef __MLPACK_CORE_OPTIMIZERS_SA_SA_IMPL_HPP
 #define __MLPACK_CORE_OPTIMIZERS_SA_SA_IMPL_HPP
 
+#include <mlpack/core/dists/laplace_distribution.hpp>
+
 namespace mlpack {
 namespace optimization {
 
 template<
     typename FunctionType,
-    typename MoveDistributionType,
     typename CoolingScheduleType
 >
-SA<FunctionType, MoveDistributionType, CoolingScheduleType>::SA(
+SA<FunctionType, CoolingScheduleType>::SA(
     FunctionType& function,
-    MoveDistributionType& moveDistribution,
     CoolingScheduleType& coolingSchedule,
     const double initT,
     const size_t initMoves,
@@ -29,7 +29,6 @@ SA<FunctionType, MoveDistributionType, CoolingScheduleType>::SA(
     const double gain,
     const size_t maxIterations) :
     function(function),
-    moveDistribution(moveDistribution),
     coolingSchedule(coolingSchedule),
     T(initT),
     initMoves(initMoves),
@@ -52,11 +51,9 @@ SA<FunctionType, MoveDistributionType, CoolingScheduleType>::SA(
 //! Optimize the function (minimize).
 template<
     typename FunctionType,
-    typename MoveDistributionType,
     typename CoolingScheduleType
 >
-double SA<FunctionType, MoveDistributionType, CoolingScheduleType>::Optimize(
-    arma::mat &iterate)
+double SA<FunctionType, CoolingScheduleType>::Optimize(arma::mat &iterate)
 {
   const size_t rows = function.GetInitialPoint().n_rows;
   const size_t cols = function.GetInitialPoint().n_cols;
@@ -115,15 +112,23 @@ double SA<FunctionType, MoveDistributionType, CoolingScheduleType>::Optimize(
  */
 template<
     typename FunctionType,
-    typename MoveDistributionType,
     typename CoolingScheduleType
 >
-void SA<FunctionType, MoveDistributionType, CoolingScheduleType>::GenerateMove(
+void SA<FunctionType, CoolingScheduleType>::GenerateMove(
     arma::mat& iterate)
 {
   double prevEnergy = energy;
   double prevValue = iterate(idx);
-  double move = moveDistribution(moveSize(idx));
+
+  // It is possible to use a non-Laplace distribution here, but it is difficult
+  // because the acceptance ratio should be as close to 0.44 as possible, and
+  // MoveControl() is derived for the Laplace distribution.
+
+  // Sample from a Laplace distribution with scale parameter moveSize(idx).
+  const double unif = 2.0 * math::Random() - 1.0;
+  const double move = (unif < 0) ? (moveSize(idx) * std::log(1 + unif)) :
+      (-moveSize(idx) * std::log(1 - unif));
+
   iterate(idx) += move;
   energy = function.Evaluate(iterate);
   // According to Metropolis criterion, accept the move with probability
@@ -167,14 +172,13 @@ void SA<FunctionType, MoveDistributionType, CoolingScheduleType>::GenerateMove(
  *
  * For more theory and the mysterious 0.44 value, see Jimmy K.-C. Lam and
  * Jean-Marc Delosme. `An efficient simulated annealing schedule: derivation'.
- * Technical Report 8816, Yale University, 1988
+ * Technical Report 8816, Yale University, 1988.
  */
 template<
     typename FunctionType,
-    typename MoveDistributionType,
     typename CoolingScheduleType
 >
-void SA<FunctionType, MoveDistributionType, CoolingScheduleType>::MoveControl(
+void SA<FunctionType, CoolingScheduleType>::MoveControl(
     size_t nMoves)
 {
   arma::mat target;
@@ -194,18 +198,15 @@ void SA<FunctionType, MoveDistributionType, CoolingScheduleType>::MoveControl(
 
 template<
     typename FunctionType,
-    typename MoveDistributionType,
     typename CoolingScheduleType
 >
-std::string SA<FunctionType, MoveDistributionType, CoolingScheduleType>::
+std::string SA<FunctionType, CoolingScheduleType>::
 ToString() const
 {
   std::ostringstream convert;
   convert << "SA [" << this << "]" << std::endl;
   convert << "  Function:" << std::endl;
   convert << util::Indent(function.ToString(), 2);
-  convert << "  Move Distribution:" << std::endl;
-  convert << util::Indent(moveDistribution.ToString(), 2);
   convert << "  Cooling Schedule:" << std::endl;
   convert << util::Indent(coolingSchedule.ToString(), 2);
   convert << "  Temperature: " << T << std::endl;