[mlpack-git] master: Refactor so only the training is templatized. (a265ddb)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Dec 7 09:47:49 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/1efafa13b304c7821d858754b0d2dab9a05795a7...c2d22d60c3355e82b45e6596d721c3905351989c
>---------------------------------------------------------------
commit a265ddb024c44b2eb1e18024257c4f9609c84901
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Dec 6 21:26:00 2015 +0000
Refactor so only the training is templatized.
>---------------------------------------------------------------
a265ddb024c44b2eb1e18024257c4f9609c84901
src/mlpack/methods/sparse_coding/CMakeLists.txt | 1 +
src/mlpack/methods/sparse_coding/sparse_coding.hpp | 11 +-
.../methods/sparse_coding/sparse_coding_impl.hpp | 291 +--------------------
.../methods/sparse_coding/sparse_coding_main.cpp | 35 +--
src/mlpack/tests/sparse_coding_test.cpp | 6 +-
src/mlpack/tests/to_string_test.cpp | 2 +-
6 files changed, 35 insertions(+), 311 deletions(-)
diff --git a/src/mlpack/methods/sparse_coding/CMakeLists.txt b/src/mlpack/methods/sparse_coding/CMakeLists.txt
index 5c0cec3..79b6b07 100644
--- a/src/mlpack/methods/sparse_coding/CMakeLists.txt
+++ b/src/mlpack/methods/sparse_coding/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SOURCES
nothing_initializer.hpp
random_initializer.hpp
sparse_coding.hpp
+ sparse_coding.cpp
sparse_coding_impl.hpp
)
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding.hpp b/src/mlpack/methods/sparse_coding/sparse_coding.hpp
index f764f5d..89bc49a 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding.hpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding.hpp
@@ -104,7 +104,6 @@ namespace sparse_coding {
* dictionary; must have 'void Initialize(const arma::mat& data, arma::mat&
* dictionary)' function.
*/
-template<typename DictionaryInitializer = DataDependentRandomInitializer>
class SparseCoding
{
public:
@@ -126,13 +125,16 @@ class SparseCoding
* @param newtonTolerance Tolerance for the Newton's method dictionary
* optimization step.
*/
+ template<typename DictionaryInitializer = DataDependentRandomInitializer>
SparseCoding(const arma::mat& data,
const size_t atoms,
const double lambda1,
const double lambda2 = 0,
const size_t maxIterations = 0,
const double objTolerance = 0.01,
- const double newtonTolerance = 1e-6);
+ const double newtonTolerance = 1e-6,
+ const DictionaryInitializer& initializer =
+ DictionaryInitializer());
/**
* Set the parameters to SparseCoding. lambda2 defaults to 0. This
@@ -161,7 +163,10 @@ class SparseCoding
/**
* Train the sparse coding model on the given dataset.
*/
- void Train(const arma::mat& data);
+ template<typename DictionaryInitializer = DataDependentRandomInitializer>
+ void Train(const arma::mat& data,
+ const DictionaryInitializer& initializer =
+ DictionaryInitializer());
/**
* Sparse code each point via LARS.
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp b/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
index 264deb9..810bb9c 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
@@ -15,32 +15,15 @@ namespace mlpack {
namespace sparse_coding {
template<typename DictionaryInitializer>
-SparseCoding<DictionaryInitializer>::SparseCoding(
+SparseCoding::SparseCoding(
const arma::mat& data,
const size_t atoms,
const double lambda1,
const double lambda2,
const size_t maxIterations,
const double objTolerance,
- const double newtonTolerance) :
- atoms(atoms),
- lambda1(lambda1),
- lambda2(lambda2),
- maxIterations(maxIterations),
- objTolerance(objTolerance),
- newtonTolerance(newtonTolerance)
-{
- Train(data);
-}
-
-template<typename DictionaryInitializer>
-SparseCoding<DictionaryInitializer>::SparseCoding(
- const size_t atoms,
- const double lambda1,
- const double lambda2,
- const size_t maxIterations,
- const double objTolerance,
- const double newtonTolerance) :
+ const double newtonTolerance,
+ const DictionaryInitializer& initializer) :
atoms(atoms),
lambda1(lambda1),
lambda2(lambda2),
@@ -48,17 +31,19 @@ SparseCoding<DictionaryInitializer>::SparseCoding(
objTolerance(objTolerance),
newtonTolerance(newtonTolerance)
{
- // Nothing to do.
+ Train(data, initializer);
}
template<typename DictionaryInitializer>
-void SparseCoding<DictionaryInitializer>::Train(const arma::mat& data)
+void SparseCoding::Train(
+ const arma::mat& data,
+ const DictionaryInitializer& initializer)
{
// Now, train.
Timer::Start("sparse_coding");
// Initialize the dictionary.
- DictionaryInitializer::Initialize(data, atoms, dictionary);
+ initializer.Initialize(data, atoms, dictionary);
double lastObjVal = DBL_MAX;
@@ -118,266 +103,6 @@ void SparseCoding<DictionaryInitializer>::Train(const arma::mat& data)
Timer::Stop("sparse_coding");
}
-template<typename DictionaryInitializer>
-void SparseCoding<DictionaryInitializer>::OptimizeCode(const arma::mat& data,
- arma::mat& codes)
-{
- // When using the Cholesky version of LARS, this is correct even if
- // lambda2 > 0.
- arma::mat matGram = trans(dictionary) * dictionary;
-
- codes.set_size(atoms, data.n_cols);
- for (size_t i = 0; i < data.n_cols; ++i)
- {
- // Report progress.
- if ((i % 100) == 0)
- Log::Debug << "Optimization at point " << i << "." << std::endl;
-
- bool useCholesky = true;
- regression::LARS lars(useCholesky, matGram, lambda1, lambda2);
-
- // Create an alias of the code (using the same memory), and then LARS will
- // place the result directly into that; then we will not need to have an
- // extra copy.
- arma::vec code = codes.unsafe_col(i);
- lars.Train(dictionary, data.unsafe_col(i), code, false);
- }
-}
-
-// Dictionary step for optimization.
-template<typename DictionaryInitializer>
-double SparseCoding<DictionaryInitializer>::OptimizeDictionary(
- const arma::mat& data,
- const arma::mat& codes,
- const arma::uvec& adjacencies,
- const double newtonTolerance,
- const size_t maxIterations)
-{
- // Count the number of atomic neighbors for each point x^i.
- arma::uvec neighborCounts = arma::zeros<arma::uvec>(data.n_cols, 1);
-
- if (adjacencies.n_elem > 0)
- {
- // This gets the column index. Intentional integer division.
- size_t curPointInd = (size_t) (adjacencies(0) / atoms);
-
- size_t nextColIndex = (curPointInd + 1) * atoms;
- for (size_t l = 1; l < adjacencies.n_elem; ++l)
- {
- // If l no longer refers to an element in this column, advance the column
- // number accordingly.
- if (adjacencies(l) >= nextColIndex)
- {
- curPointInd = (size_t) (adjacencies(l) / atoms);
- nextColIndex = (curPointInd + 1) * atoms;
- }
-
- ++neighborCounts(curPointInd);
- }
- }
-
- // Handle the case of inactive atoms (atoms not used in the given coding).
- std::vector<size_t> inactiveAtoms;
-
- for (size_t j = 0; j < atoms; ++j)
- {
- if (arma::accu(codes.row(j) != 0) == 0)
- inactiveAtoms.push_back(j);
- }
-
- const size_t nInactiveAtoms = inactiveAtoms.size();
- const size_t nActiveAtoms = atoms - nInactiveAtoms;
-
- // Efficient construction of Z restricted to active atoms.
- arma::mat matActiveZ;
- if (nInactiveAtoms > 0)
- {
- math::RemoveRows(codes, inactiveAtoms, matActiveZ);
- }
-
- if (nInactiveAtoms > 0)
- {
- Log::Warn << "There are " << nInactiveAtoms
- << " inactive atoms. They will be re-initialized randomly.\n";
- }
-
- Log::Debug << "Solving Dual via Newton's Method.\n";
-
- // Solve using Newton's method in the dual - note that the final dot
- // multiplication with inv(A) seems to be unavoidable. Although more
- // expensive, the code written this way (we use solve()) should be more
- // numerically stable than just using inv(A) for everything.
- arma::vec dualVars = arma::zeros<arma::vec>(nActiveAtoms);
-
- //vec dualVars = 1e-14 * ones<vec>(nActiveAtoms);
-
- // Method used by feature sign code - fails miserably here. Perhaps the
- // MATLAB optimizer fmincon does something clever?
- //vec dualVars = 10.0 * randu(nActiveAtoms, 1);
-
- //vec dualVars = diagvec(solve(dictionary, data * trans(codes))
- // - codes * trans(codes));
- //for (size_t i = 0; i < dualVars.n_elem; i++)
- // if (dualVars(i) < 0)
- // dualVars(i) = 0;
-
- bool converged = false;
-
- // If we have any inactive atoms, we must construct these differently.
- arma::mat codesXT;
- arma::mat codesZT;
-
- if (inactiveAtoms.empty())
- {
- codesXT = codes * trans(data);
- codesZT = codes * trans(codes);
- }
- else
- {
- codesXT = matActiveZ * trans(data);
- codesZT = matActiveZ * trans(matActiveZ);
- }
-
- double normGradient = 0;
- double improvement = 0;
- for (size_t t = 1; (t != maxIterations) && !converged; ++t)
- {
- arma::mat A = codesZT + diagmat(dualVars);
-
- arma::mat matAInvZXT = solve(A, codesXT);
-
- arma::vec gradient = -arma::sum(arma::square(matAInvZXT), 1);
- gradient += 1;
-
- arma::mat hessian = -(-2 * (matAInvZXT * trans(matAInvZXT)) % inv(A));
-
- arma::vec searchDirection = -solve(hessian, gradient);
- //printf("%e\n", norm(searchDirection, 2));
-
- // Armijo line search.
- const double c = 1e-4;
- double alpha = 1.0;
- const double rho = 0.9;
- double sufficientDecrease = c * dot(gradient, searchDirection);
-
- // A maxIterations parameter for the Armijo line search may be a good idea,
- // but it doesn't seem to be causing any problems for now.
- while (true)
- {
- // Calculate objective.
- double sumDualVars = arma::sum(dualVars);
- double fOld = -(-trace(trans(codesXT) * matAInvZXT) - sumDualVars);
- double fNew = -(-trace(trans(codesXT) * solve(codesZT +
- diagmat(dualVars + alpha * searchDirection), codesXT)) -
- (sumDualVars + alpha * arma::sum(searchDirection)));
-
- if (fNew <= fOld + alpha * sufficientDecrease)
- {
- searchDirection = alpha * searchDirection;
- improvement = fOld - fNew;
- break;
- }
-
- alpha *= rho;
- }
-
- // Take step and print useful information.
- dualVars += searchDirection;
- normGradient = arma::norm(gradient, 2);
- Log::Debug << "Newton Method iteration " << t << ":" << std::endl;
- Log::Debug << " Gradient norm: " << std::scientific << normGradient
- << "." << std::endl;
- Log::Debug << " Improvement: " << std::scientific << improvement << ".\n";
-
- if (normGradient < newtonTolerance)
- converged = true;
- }
-
- if (inactiveAtoms.empty())
- {
- // Directly update dictionary.
- dictionary = trans(solve(codesZT + diagmat(dualVars), codesXT));
- }
- else
- {
- arma::mat activeDictionary = trans(solve(codesZT +
- diagmat(dualVars), codesXT));
-
- // Update all atoms.
- size_t currentInactiveIndex = 0;
- for (size_t i = 0; i < atoms; ++i)
- {
- if (inactiveAtoms[currentInactiveIndex] == i)
- {
- // This atom is inactive. Reinitialize it randomly.
- dictionary.col(i) = (data.col(math::RandInt(data.n_cols)) +
- data.col(math::RandInt(data.n_cols)) +
- data.col(math::RandInt(data.n_cols)));
-
- dictionary.col(i) /= arma::norm(dictionary.col(i), 2);
-
- // Increment inactive index counter.
- ++currentInactiveIndex;
- }
- else
- {
- // Update estimate.
- dictionary.col(i) = activeDictionary.col(i - currentInactiveIndex);
- }
- }
- }
-
- return normGradient;
-}
-
-// Project each atom of the dictionary back into the unit ball (if necessary).
-template<typename DictionaryInitializer>
-void SparseCoding<DictionaryInitializer>::ProjectDictionary()
-{
- for (size_t j = 0; j < atoms; j++)
- {
- double atomNorm = arma::norm(dictionary.col(j), 2);
- if (atomNorm > 1)
- {
- Log::Info << "Norm of atom " << j << " exceeds 1 (" << std::scientific
- << atomNorm << "). Shrinking...\n";
- dictionary.col(j) /= atomNorm;
- }
- }
-}
-
-// Compute the objective function.
-template<typename DictionaryInitializer>
-double SparseCoding<DictionaryInitializer>::Objective(
- const arma::mat& data,
- const arma::mat& codes) const
-{
- double l11NormZ = arma::sum(arma::sum(arma::abs(codes)));
- double froNormResidual = arma::norm(data - (dictionary * codes), "fro");
-
- if (lambda2 > 0)
- {
- double froNormZ = arma::norm(codes, "fro");
- return 0.5 * (std::pow(froNormResidual, 2.0) + (lambda2 *
- std::pow(froNormZ, 2.0))) + (lambda1 * l11NormZ);
- }
- else // It can be simpler.
- {
- return 0.5 * std::pow(froNormResidual, 2.0) + lambda1 * l11NormZ;
- }
-}
-
-template<typename DictionaryInitializer>
-std::string SparseCoding<DictionaryInitializer>::ToString() const
-{
- std::ostringstream convert;
- convert << "Sparse Coding [" << this << "]" << std::endl;
- convert << " Atoms: " << atoms << std::endl;
- convert << " Lambda 1: " << lambda1 << std::endl;
- convert << " Lambda 2: " << lambda2 << std::endl;
- return convert.str();
-}
-
} // namespace sparse_coding
} // namespace mlpack
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp b/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
index 5c6ea25..7f43092 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
@@ -112,11 +112,10 @@ int main(int argc, char* argv[])
}
// If there is an initial dictionary, be sure we do not initialize one.
+ SparseCoding sc(atoms, lambda1, lambda2, maxIterations, objTolerance,
+ newtonTolerance);
if (initialDictionaryFile != "")
{
- SparseCoding<NothingInitializer> sc(atoms, lambda1, lambda2, maxIterations,
- objTolerance, newtonTolerance);
-
// Load initial dictionary directly into sparse coding object.
data::Load(initialDictionaryFile, sc.Dictionary(), true);
@@ -136,27 +135,21 @@ int main(int argc, char* argv[])
}
// Run sparse coding.
- sc.Train(matX);
-
- // Save the results.
- Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
- data::Save(dictionaryFile, sc.Dictionary());
- Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
- data::Save(codesFile, sc.Codes());
+ sc.Train<NothingInitializer>(matX);
}
else
{
- // No initial dictionary.
- SparseCoding<> sc(atoms, lambda1, lambda2, maxIterations, objTolerance,
- newtonTolerance);
-
- // Run sparse coding.
+ // Run sparse coding with the default initialization.
sc.Train(matX);
-
- // Save the results.
- Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
- data::Save(dictionaryFile, sc.Dictionary());
- Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
- data::Save(codesFile, sc.Codes());
}
+
+ // Encode the input matrix.
+ arma::mat codes;
+ sc.OptimizeCode(matX, codes);
+
+ // Save the results.
+ Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
+ data::Save(dictionaryFile, sc.Dictionary());
+ Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
+ data::Save(codesFile, codes);
}
diff --git a/src/mlpack/tests/sparse_coding_test.cpp b/src/mlpack/tests/sparse_coding_test.cpp
index ed4d24e..0af0e30 100644
--- a/src/mlpack/tests/sparse_coding_test.cpp
+++ b/src/mlpack/tests/sparse_coding_test.cpp
@@ -58,7 +58,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestCodingStepLasso)
X.col(i) /= norm(X.col(i), 2);
}
- SparseCoding<> sc(nAtoms, lambda1);
+ SparseCoding sc(nAtoms, lambda1);
mat Z;
DataDependentRandomInitializer::Initialize(X, 25, sc.Dictionary());
sc.OptimizeCode(X, Z);
@@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestCodingStepElasticNet)
for (uword i = 0; i < nPoints; ++i)
X.col(i) /= norm(X.col(i), 2);
- SparseCoding<> sc(nAtoms, lambda1, lambda2);
+ SparseCoding sc(nAtoms, lambda1, lambda2);
mat Z;
DataDependentRandomInitializer::Initialize(X, 25, sc.Dictionary());
sc.OptimizeCode(X, Z);
@@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
for (uword i = 0; i < nPoints; ++i)
X.col(i) /= norm(X.col(i), 2);
- SparseCoding<> sc(nAtoms, lambda1);
+ SparseCoding sc(nAtoms, lambda1);
mat Z;
DataDependentRandomInitializer::Initialize(X, 25, sc.Dictionary());
sc.OptimizeCode(X, Z);
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index ffd774d..9cc498a 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -522,7 +522,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingString)
c.randn();
const size_t b=3;
double a=0.1;
- mlpack::sparse_coding::SparseCoding<> d(c,b,a);
+ mlpack::sparse_coding::SparseCoding d(c,b,a);
Log::Debug << d;
testOstream << d;
std::string s = d.ToString();
More information about the mlpack-git
mailing list