[mlpack-git] master: WIP: gaussian distribution caches covariance factorizations (6c4ab7f)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Jan 26 15:26:38 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/71a9f87ab29175554507f1592e564063f0452743...9b55e5e4a300972d01cf1cf2802df8bf392a1fd1
>---------------------------------------------------------------
commit 6c4ab7f43ed812f71958fc3d17521e03279e9416
Author: Stephen Tu <stephent at berkeley.edu>
Date: Tue Jan 20 12:16:40 2015 -0800
WIP: gaussian distribution caches covariance factorizations
compiles, not tested
>---------------------------------------------------------------
6c4ab7f43ed812f71958fc3d17521e03279e9416
.gitignore | 1 +
src/mlpack/core/dists/gaussian_distribution.cpp | 50 +++++++++++++++++++----
src/mlpack/core/dists/gaussian_distribution.hpp | 36 ++++++++++------
src/mlpack/core/dists/regression_distribution.hpp | 4 +-
src/mlpack/methods/gmm/em_fit_impl.hpp | 43 +++++++++++--------
src/mlpack/methods/gmm/gmm_convert_main.cpp | 4 +-
src/mlpack/methods/hmm/hmm_util_impl.hpp | 8 +++-
src/mlpack/tests/distribution_test.cpp | 32 ++++++++++-----
src/mlpack/tests/gmm_test.cpp | 5 ++-
src/mlpack/tests/hmm_test.cpp | 10 ++++-
10 files changed, 140 insertions(+), 53 deletions(-)
diff --git a/.gitignore b/.gitignore
index 1b2211d..726d153 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
build*
+*.swp
diff --git a/src/mlpack/core/dists/gaussian_distribution.cpp b/src/mlpack/core/dists/gaussian_distribution.cpp
index cb37e79..969c33e 100644
--- a/src/mlpack/core/dists/gaussian_distribution.cpp
+++ b/src/mlpack/core/dists/gaussian_distribution.cpp
@@ -10,21 +10,49 @@
using namespace mlpack;
using namespace mlpack::distribution;
+
+GaussianDistribution::GaussianDistribution(const arma::vec& mean,
+ const arma::mat& covariance)
+ : mean(mean)
+{
+ Covariance(covariance);
+}
+
+void GaussianDistribution::Covariance(const arma::mat& covariance)
+{
+ this->covariance = covariance;
+ FactorCovariance();
+}
+
+void GaussianDistribution::Covariance(arma::mat&& covariance)
+{
+ this->covariance = std::move(covariance);
+ FactorCovariance();
+}
+
+void GaussianDistribution::FactorCovariance()
+{
+ covLower = arma::chol(covariance, "lower");
+ // tell arma that this is lower triangular matrix (for faster inversion)
+ covLower = arma::trimatl(covLower);
+ const arma::mat invCovLower = covLower.i();
+ invCov = invCovLower.t() * invCovLower;
+ double sign = 0.;
+ arma::log_det(logDetCov, sign, covLower);
+ logDetCov *= 2;
+}
+
double GaussianDistribution::LogProbability(const arma::vec& observation) const
{
const size_t k = observation.n_elem;
- double logdetsigma = 0;
- double sign = 0.;
- arma::log_det(logdetsigma, sign, covariance);
const arma::vec diff = mean - observation;
- const arma::vec v = (diff.t() * arma::inv(covariance) * diff);
- return -0.5 * k * log2pi - 0.5 * logdetsigma - 0.5 * v(0);
+ const arma::vec v = (diff.t() * invCov * diff);
+ return -0.5 * k * log2pi - 0.5 * logDetCov - 0.5 * v(0);
}
arma::vec GaussianDistribution::Random() const
{
- // Should we store chol(covariance) for easier calculation later?
- return trans(chol(covariance)) * arma::randn<arma::vec>(mean.n_elem) + mean;
+ return covLower * arma::randn<arma::vec>(mean.n_elem) + mean;
}
/**
@@ -41,6 +69,7 @@ void GaussianDistribution::Estimate(const arma::mat& observations)
}
else // This will end up just being empty.
{
+ // TODO(stephentu): why do we allow this case? why not throw an error?
mean.zeros(0);
covariance.zeros(0);
return;
@@ -77,6 +106,8 @@ void GaussianDistribution::Estimate(const arma::mat& observations)
perturbation *= 10; // Slow, but we don't want to add too much.
}
}
+
+ FactorCovariance();
}
/**
@@ -94,6 +125,7 @@ void GaussianDistribution::Estimate(const arma::mat& observations,
}
else // This will end up just being empty.
{
+ // TODO(stephentu): same as above
mean.zeros(0);
covariance.zeros(0);
return;
@@ -114,6 +146,7 @@ void GaussianDistribution::Estimate(const arma::mat& observations,
// Nothing in this Gaussian! At least set the covariance so that it's
// invertible.
covariance.diag() += 1e-50;
+ // TODO(stephentu): why do we allow this case?
return;
}
@@ -143,6 +176,8 @@ void GaussianDistribution::Estimate(const arma::mat& observations,
perturbation *= 10; // Slow, but we don't want to add too much.
}
}
+
+ FactorCovariance();
}
/**
@@ -180,4 +215,5 @@ void GaussianDistribution::Load(const util::SaveRestoreUtility& sr)
{
sr.LoadParameter(mean, "mean");
sr.LoadParameter(covariance, "covariance");
+ FactorCovariance();
}
diff --git a/src/mlpack/core/dists/gaussian_distribution.hpp b/src/mlpack/core/dists/gaussian_distribution.hpp
index 0e7ea24..8462aaf 100644
--- a/src/mlpack/core/dists/gaussian_distribution.hpp
+++ b/src/mlpack/core/dists/gaussian_distribution.hpp
@@ -21,8 +21,14 @@ class GaussianDistribution
private:
//! Mean of the distribution.
arma::vec mean;
- //! Covariance of the distribution.
+ //! Positive definite covariance of the distribution.
arma::mat covariance;
+ //! Lower triangular factor of cov (e.g. cov = LL^T).
+ arma::mat covLower;
+ //! Cached inverse of covariance.
+ arma::mat invCov;
+ //! Cached logdet(cov).
+ double logDetCov;
//! log(2pi)
static const constexpr double log2pi = 1.83787706640934533908193770912475883;
@@ -39,14 +45,20 @@ class GaussianDistribution
*/
GaussianDistribution(const size_t dimension) :
mean(arma::zeros<arma::vec>(dimension)),
- covariance(arma::eye<arma::mat>(dimension, dimension))
+ covariance(arma::eye<arma::mat>(dimension, dimension)),
+ covLower(arma::eye<arma::mat>(dimension, dimension)),
+ invCov(arma::eye<arma::mat>(dimension, dimension)),
+ logDetCov(0)
{ /* Nothing to do. */ }
/**
* Create a Gaussian distribution with the given mean and covariance.
+ *
+ * covariance is expected to be positive definite.
*/
- GaussianDistribution(const arma::vec& mean, const arma::mat& covariance) :
- mean(mean), covariance(covariance) { /* Nothing to do. */ }
+ GaussianDistribution(const arma::vec& mean, const arma::mat& covariance);
+
+ // TODO(stephentu): do we want a (arma::vec&&, arma::mat&&) ctor?
//! Return the dimensionality of this distribution.
size_t Dimensionality() const { return mean.n_elem; }
@@ -119,9 +131,11 @@ class GaussianDistribution
const arma::mat& Covariance() const { return covariance; }
/**
- * Return a modifiable copy of the covariance.
+ * Set the covariance.
*/
- arma::mat& Covariance() { return covariance; }
+ void Covariance(const arma::mat& covariance);
+
+ void Covariance(arma::mat&& covariance);
/**
* Returns a string representation of this object.
@@ -135,7 +149,8 @@ class GaussianDistribution
void Load(const util::SaveRestoreUtility& n);
static std::string const Type() { return "GaussianDistribution"; }
-
+ private:
+ void FactorCovariance();
};
@@ -156,17 +171,14 @@ inline void GaussianDistribution::LogProbability(const arma::mat& x,
// diffs). We just don't need any of the other elements. We can calculate
// the right hand part of the equation (instead of the left side) so that
// later we are referencing columns, not rows -- that is faster.
- arma::mat rhs = -0.5 * inv(covariance) * diffs;
+ const arma::mat rhs = -0.5 * invCov * diffs;
arma::vec logExponents(diffs.n_cols); // We will now fill this.
for (size_t i = 0; i < diffs.n_cols; i++)
logExponents(i) = accu(diffs.unsafe_col(i) % rhs.unsafe_col(i));
- double logdetsigma = 0;
- double sign = 0.;
- arma::log_det(logdetsigma, sign, covariance);
const size_t k = x.n_rows;
- logProbabilities = -0.5 * k * log2pi - 0.5 * logdetsigma + logExponents;
+ logProbabilities = -0.5 * k * log2pi - 0.5 * logDetCov + logExponents;
}
diff --git a/src/mlpack/core/dists/regression_distribution.hpp b/src/mlpack/core/dists/regression_distribution.hpp
index 6cda681..b3186a8 100644
--- a/src/mlpack/core/dists/regression_distribution.hpp
+++ b/src/mlpack/core/dists/regression_distribution.hpp
@@ -48,7 +48,9 @@ class RegressionDistribution
rf(regression::LinearRegression(predictors, responses))
{
err = GaussianDistribution(1);
- err.Covariance() = rf.ComputeError(predictors, responses);
+ arma::mat cov(1, 1);
+ cov(0, 0) = rf.ComputeError(predictors, responses);
+ err.Covariance(std::move(cov));
}
/**
diff --git a/src/mlpack/methods/gmm/em_fit_impl.hpp b/src/mlpack/methods/gmm/em_fit_impl.hpp
index c1457dd..dead473 100644
--- a/src/mlpack/methods/gmm/em_fit_impl.hpp
+++ b/src/mlpack/methods/gmm/em_fit_impl.hpp
@@ -93,11 +93,12 @@ void EMFit<InitialClusteringType, CovarianceConstraintPolicy>::Estimate(
trans(condProb.col(i)));
// Don't update if there's no probability of the Gaussian having points.
- if (probRowSums[i] != 0.0)
- dists[i].Covariance() = (tmp * trans(tmpB)) / probRowSums[i];
-
- // Apply covariance constraint.
- constraint.ApplyConstraint(dists[i].Covariance());
+ if (probRowSums[i] != 0.0) {
+ arma::mat covariance = (tmp * trans(tmpB)) / probRowSums[i];
+ // Apply covariance constraint.
+ constraint.ApplyConstraint(covariance);
+ dists[i].Covariance(std::move(covariance));
+ }
}
// Calculate the new values for omega using the updated conditional
@@ -180,10 +181,12 @@ void EMFit<InitialClusteringType, CovarianceConstraintPolicy>::Estimate(
arma::mat tmpB = tmp % (arma::ones<arma::vec>(observations.n_rows) *
trans(condProb.col(i) % probabilities));
- dists[i].Covariance() = (tmp * trans(tmpB)) / probRowSums[i];
+ arma::mat cov = (tmp * trans(tmpB)) / probRowSums[i];
// Apply covariance constraint.
- constraint.ApplyConstraint(dists[i].Covariance());
+ constraint.ApplyConstraint(cov);
+
+ dists[i].Covariance(std::move(cov));
}
// Calculate the new values for omega using the updated conditional
@@ -210,12 +213,16 @@ InitialClustering(const arma::mat& observations,
// Run clustering algorithm.
clusterer.Cluster(observations, dists.size(), assignments);
+ std::vector<arma::vec> means(dists.size());
+ std::vector<arma::mat> covs(dists.size());
+
// Now calculate the means, covariances, and weights.
weights.zeros();
for (size_t i = 0; i < dists.size(); ++i)
{
- dists[i].Mean().zeros();
- dists[i].Covariance().zeros();
+ means[i].zeros(dists[i].Mean().n_elem);
+ covs[i].zeros(dists[i].Covariance().n_rows,
+ dists[i].Covariance().n_cols);
}
// From the assignments, generate our means, covariances, and weights.
@@ -224,11 +231,10 @@ InitialClustering(const arma::mat& observations,
const size_t cluster = assignments[i];
// Add this to the relevant mean.
- dists[cluster].Mean() += observations.col(i);
+ means[cluster] += observations.col(i);
// Add this to the relevant covariance.
- dists[cluster].Covariance() += observations.col(i) *
- trans(observations.col(i));
+ covs[cluster] += observations.col(i) * trans(observations.col(i));
// Now add one to the weights (we will normalize).
weights[cluster]++;
@@ -237,22 +243,25 @@ InitialClustering(const arma::mat& observations,
// Now normalize the mean and covariance.
for (size_t i = 0; i < dists.size(); ++i)
{
- dists[i].Mean() /= (weights[i] > 1) ? weights[i] : 1;
+ means[i] /= (weights[i] > 1) ? weights[i] : 1;
}
for (size_t i = 0; i < observations.n_cols; ++i)
{
const size_t cluster = assignments[i];
- const arma::vec normObs = observations.col(i) - dists[cluster].Mean();
- dists[cluster].Covariance() += normObs * normObs.t();
+ const arma::vec normObs = observations.col(i) - means[cluster];
+ covs[cluster] += normObs * normObs.t();
}
for (size_t i = 0; i < dists.size(); ++i)
{
- dists[i].Covariance() /= (weights[i] > 1) ? weights[i] : 1;
+ covs[i] /= (weights[i] > 1) ? weights[i] : 1;
// Apply constraints to covariance matrix.
- constraint.ApplyConstraint(dists[i].Covariance());
+ constraint.ApplyConstraint(covs[i]);
+
+ std::swap(dists[i].Mean(), means[i]);
+ dists[i].Covariance(std::move(covs[i]));
}
// Finally, normalize weights.
diff --git a/src/mlpack/methods/gmm/gmm_convert_main.cpp b/src/mlpack/methods/gmm/gmm_convert_main.cpp
index 7d80ebd..1f484f9 100644
--- a/src/mlpack/methods/gmm/gmm_convert_main.cpp
+++ b/src/mlpack/methods/gmm/gmm_convert_main.cpp
@@ -47,12 +47,14 @@ int main(int argc, char* argv[])
for (size_t i = 0; i < gaussians; ++i)
{
stringstream o;
+ arma::mat covariance;
o << i;
string meanName = "mean" + o.str();
string covName = "covariance" + o.str();
load.LoadParameter(gmm.Component(i).Mean(), meanName);
- load.LoadParameter(gmm.Component(i).Covariance(), covName);
+ load.LoadParameter(covariance, covName);
+ gmm.Component(i).Covariance(std::move(covariance));
}
gmm.Save(CLI::GetParam<string>("output_file"));
diff --git a/src/mlpack/methods/hmm/hmm_util_impl.hpp b/src/mlpack/methods/hmm/hmm_util_impl.hpp
index d3599ab..47fbd9d 100644
--- a/src/mlpack/methods/hmm/hmm_util_impl.hpp
+++ b/src/mlpack/methods/hmm/hmm_util_impl.hpp
@@ -115,8 +115,10 @@ void ConvertHMM(HMM<distribution::GaussianDistribution>& hmm,
sr.LoadParameter(hmm.Emission()[i].Mean(), s.str());
s.str("");
+ arma::mat covariance;
s << "hmm_emission_covariance_" << i;
- sr.LoadParameter(hmm.Emission()[i].Covariance(), s.str());
+ sr.LoadParameter(covariance, s.str());
+ hmm.Emission()[i].Covariance(std::move(covariance));
}
hmm.Dimensionality() = hmm.Emission()[0].Mean().n_elem;
@@ -168,7 +170,9 @@ void ConvertHMM(HMM<gmm::GMM<> >& hmm, const util::SaveRestoreUtility& sr)
s.str("");
s << "hmm_emission_" << i << "_gaussian_" << g << "_covariance";
- sr.LoadParameter(hmm.Emission()[i].Component(g).Covariance(), s.str());
+ arma::mat covariance;
+ sr.LoadParameter(covariance, s.str());
+ hmm.Emission()[i].Component(g).Covariance(std::move(covariance));
}
s.str("");
diff --git a/src/mlpack/tests/distribution_test.cpp b/src/mlpack/tests/distribution_test.cpp
index 64268fd..fc04c1a 100644
--- a/src/mlpack/tests/distribution_test.cpp
+++ b/src/mlpack/tests/distribution_test.cpp
@@ -187,16 +187,23 @@ BOOST_AUTO_TEST_CASE(GaussianUnivariateProbabilityTest)
1e-5);
// A few more cases...
- g.Covariance().fill(2.0);
+ arma::mat covariance;
+ covariance.set_size(g.Covariance().n_rows, g.Covariance().n_cols);
+
+ covariance.fill(2.0);
+ g.Covariance(std::move(covariance));
BOOST_REQUIRE_CLOSE(g.Probability(arma::vec("0.0")), 0.282094791773878, 1e-5);
BOOST_REQUIRE_CLOSE(g.Probability(arma::vec("1.0")), 0.219695644733861, 1e-5);
BOOST_REQUIRE_CLOSE(g.Probability(arma::vec("-1.0")), 0.219695644733861,
1e-5);
g.Mean().fill(1.0);
- g.Covariance().fill(1.0);
+ covariance.fill(1.0);
+ g.Covariance(std::move(covariance));
BOOST_REQUIRE_CLOSE(g.Probability(arma::vec("1.0")), 0.398942280401433, 1e-5);
- g.Covariance().fill(2.0);
+
+ covariance.fill(2.0);
+ g.Covariance(std::move(covariance));
BOOST_REQUIRE_CLOSE(g.Probability(arma::vec("-1.0")), 0.103776874355149,
1e-5);
}
@@ -215,7 +222,9 @@ BOOST_AUTO_TEST_CASE(GaussianMultivariateProbabilityTest)
BOOST_REQUIRE_CLOSE(g.Probability(x), 0.159154943091895, 1e-5);
- g.Covariance() = "2 0; 0 2";
+ arma::mat covariance;
+ covariance = "2 0; 0 2";
+ g.Covariance(std::move(covariance));
BOOST_REQUIRE_CLOSE(g.Probability(x), 0.0795774715459477, 1e-5);
@@ -230,7 +239,8 @@ BOOST_AUTO_TEST_CASE(GaussianMultivariateProbabilityTest)
BOOST_REQUIRE_CLOSE(g.Probability(-x), 0.0795774715459477, 1e-5);
g.Mean() = "1 1";
- g.Covariance() = "2 1.5; 1 4";
+ covariance = "2 1.5; 1 4";
+ g.Covariance(std::move(covariance));
BOOST_REQUIRE_CLOSE(g.Probability(x), 0.0624257046546403, 1e-5);
g.Mean() *= -1;
@@ -245,11 +255,13 @@ BOOST_AUTO_TEST_CASE(GaussianMultivariateProbabilityTest)
// Higher-dimensional case.
x = "0 1 2 3 4";
g.Mean() = "5 6 3 3 2";
- g.Covariance() = "6 1 1 0 2;"
- "0 7 1 0 1;"
- "1 1 4 1 1;"
- "1 0 1 7 0;"
- "2 0 1 1 6";
+
+ covariance = "6 1 1 0 2;"
+ "0 7 1 0 1;"
+ "1 1 4 1 1;"
+ "1 0 1 7 0;"
+ "2 0 1 1 6";
+ g.Covariance(std::move(covariance));
BOOST_REQUIRE_CLOSE(g.Probability(x), 1.02531207499358e-6, 1e-5);
BOOST_REQUIRE_CLOSE(g.Probability(-x), 1.06784794079363e-8, 1e-5);
diff --git a/src/mlpack/tests/gmm_test.cpp b/src/mlpack/tests/gmm_test.cpp
index e043376..d00a50e 100644
--- a/src/mlpack/tests/gmm_test.cpp
+++ b/src/mlpack/tests/gmm_test.cpp
@@ -490,7 +490,10 @@ BOOST_AUTO_TEST_CASE(GMMLoadSaveTest)
for (size_t i = 0; i < gmm.Gaussians(); ++i)
{
gmm.Component(i).Mean().randu();
- gmm.Component(i).Covariance().randu();
+ arma::mat covariance = arma::randu<arma::mat>(
+ gmm.Component(i).Covariance().n_rows,
+ gmm.Component(i).Covariance().n_cols);
+ gmm.Component(i).Covariance(std::move(covariance));
}
gmm.Save("test-gmm-save.xml");
diff --git a/src/mlpack/tests/hmm_test.cpp b/src/mlpack/tests/hmm_test.cpp
index 4368a1f..9ae7016 100644
--- a/src/mlpack/tests/hmm_test.cpp
+++ b/src/mlpack/tests/hmm_test.cpp
@@ -992,7 +992,10 @@ BOOST_AUTO_TEST_CASE(GMMHMMLoadSaveTest)
for (size_t i = 0; i < hmm.Emission()[j].Gaussians(); ++i)
{
hmm.Emission()[j].Component(i).Mean().randu();
- hmm.Emission()[j].Component(i).Covariance().randu();
+ arma::mat covariance = arma::randu<arma::mat>(
+ hmm.Emission()[j].Component(i).Covariance().n_rows,
+ hmm.Emission()[j].Component(i).Covariance().n_cols);
+ hmm.Emission()[j].Component(i).Covariance(std::move(covariance));
}
}
@@ -1048,7 +1051,10 @@ BOOST_AUTO_TEST_CASE(GaussianHMMLoadSaveTest)
for(size_t j = 0; j < hmm.Emission().size(); ++j)
{
hmm.Emission()[j].Mean().randu();
- hmm.Emission()[j].Covariance().randu();
+ arma::mat covariance = arma::randu<arma::mat>(
+ hmm.Emission()[j].Covariance().n_rows,
+ hmm.Emission()[j].Covariance().n_cols);
+ hmm.Emission()[j].Covariance(std::move(covariance));
}
util::SaveRestoreUtility sr;
More information about the mlpack-git
mailing list