[mlpack-git] master: Refactor so only the training is templatized. (a265ddb)

Mon Dec 7 09:47:49 EST 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/1efafa13b304c7821d858754b0d2dab9a05795a7...c2d22d60c3355e82b45e6596d721c3905351989c

>---------------------------------------------------------------

commit a265ddb024c44b2eb1e18024257c4f9609c84901
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Dec 6 21:26:00 2015 +0000

    Refactor so only the training is templatized.


>---------------------------------------------------------------

a265ddb024c44b2eb1e18024257c4f9609c84901
 src/mlpack/methods/sparse_coding/CMakeLists.txt    |   1 +
 src/mlpack/methods/sparse_coding/sparse_coding.hpp |  11 +-
 .../methods/sparse_coding/sparse_coding_impl.hpp   | 291 +--------------------
 .../methods/sparse_coding/sparse_coding_main.cpp   |  35 +--
 src/mlpack/tests/sparse_coding_test.cpp            |   6 +-
 src/mlpack/tests/to_string_test.cpp                |   2 +-
 6 files changed, 35 insertions(+), 311 deletions(-)

diff --git a/src/mlpack/methods/sparse_coding/CMakeLists.txt b/src/mlpack/methods/sparse_coding/CMakeLists.txt
index 5c0cec3..79b6b07 100644
--- a/src/mlpack/methods/sparse_coding/CMakeLists.txt
+++ b/src/mlpack/methods/sparse_coding/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SOURCES
   nothing_initializer.hpp
   random_initializer.hpp
   sparse_coding.hpp
+  sparse_coding.cpp
   sparse_coding_impl.hpp
 )
 
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding.hpp b/src/mlpack/methods/sparse_coding/sparse_coding.hpp
index f764f5d..89bc49a 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding.hpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding.hpp
@@ -104,7 +104,6 @@ namespace sparse_coding {
  *     dictionary; must have 'void Initialize(const arma::mat& data, arma::mat&
  *     dictionary)' function.
  */
-template<typename DictionaryInitializer = DataDependentRandomInitializer>
 class SparseCoding
 {
  public:
@@ -126,13 +125,16 @@ class SparseCoding
    * @param newtonTolerance Tolerance for the Newton's method dictionary
    *     optimization step.
    */
+  template<typename DictionaryInitializer = DataDependentRandomInitializer>
   SparseCoding(const arma::mat& data,
                const size_t atoms,
                const double lambda1,
                const double lambda2 = 0,
                const size_t maxIterations = 0,
                const double objTolerance = 0.01,
-               const double newtonTolerance = 1e-6);
+               const double newtonTolerance = 1e-6,
+               const DictionaryInitializer& initializer =
+                   DictionaryInitializer());
 
   /**
    * Set the parameters to SparseCoding.  lambda2 defaults to 0.  This
@@ -161,7 +163,10 @@ class SparseCoding
   /**
    * Train the sparse coding model on the given dataset.
    */
-  void Train(const arma::mat& data);
+  template<typename DictionaryInitializer = DataDependentRandomInitializer>
+  void Train(const arma::mat& data,
+             const DictionaryInitializer& initializer =
+                 DictionaryInitializer());
 
   /**
    * Sparse code each point via LARS.
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp b/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
index 264deb9..810bb9c 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
@@ -15,32 +15,15 @@ namespace mlpack {
 namespace sparse_coding {
 
 template<typename DictionaryInitializer>
-SparseCoding<DictionaryInitializer>::SparseCoding(
+SparseCoding::SparseCoding(
     const arma::mat& data,
     const size_t atoms,
     const double lambda1,
     const double lambda2,
     const size_t maxIterations,
     const double objTolerance,
-    const double newtonTolerance) :
-    atoms(atoms),
-    lambda1(lambda1),
-    lambda2(lambda2),
-    maxIterations(maxIterations),
-    objTolerance(objTolerance),
-    newtonTolerance(newtonTolerance)
-{
-  Train(data);
-}
-
-template<typename DictionaryInitializer>
-SparseCoding<DictionaryInitializer>::SparseCoding(
-    const size_t atoms,
-    const double lambda1,
-    const double lambda2,
-    const size_t maxIterations,
-    const double objTolerance,
-    const double newtonTolerance) :
+    const double newtonTolerance,
+    const DictionaryInitializer& initializer) :
     atoms(atoms),
     lambda1(lambda1),
     lambda2(lambda2),
@@ -48,17 +31,19 @@ SparseCoding<DictionaryInitializer>::SparseCoding(
     objTolerance(objTolerance),
     newtonTolerance(newtonTolerance)
 {
-  // Nothing to do.
+  Train(data, initializer);
 }
 
 template<typename DictionaryInitializer>
-void SparseCoding<DictionaryInitializer>::Train(const arma::mat& data)
+void SparseCoding::Train(
+    const arma::mat& data,
+    const DictionaryInitializer& initializer)
 {
   // Now, train.
   Timer::Start("sparse_coding");
 
   // Initialize the dictionary.
-  DictionaryInitializer::Initialize(data, atoms, dictionary);
+  initializer.Initialize(data, atoms, dictionary);
 
   double lastObjVal = DBL_MAX;
 
@@ -118,266 +103,6 @@ void SparseCoding<DictionaryInitializer>::Train(const arma::mat& data)
   Timer::Stop("sparse_coding");
 }
 
-template<typename DictionaryInitializer>
-void SparseCoding<DictionaryInitializer>::OptimizeCode(const arma::mat& data,
-                                                       arma::mat& codes)
-{
-  // When using the Cholesky version of LARS, this is correct even if
-  // lambda2 > 0.
-  arma::mat matGram = trans(dictionary) * dictionary;
-
-  codes.set_size(atoms, data.n_cols);
-  for (size_t i = 0; i < data.n_cols; ++i)
-  {
-    // Report progress.
-    if ((i % 100) == 0)
-      Log::Debug << "Optimization at point " << i << "." << std::endl;
-
-    bool useCholesky = true;
-    regression::LARS lars(useCholesky, matGram, lambda1, lambda2);
-
-    // Create an alias of the code (using the same memory), and then LARS will
-    // place the result directly into that; then we will not need to have an
-    // extra copy.
-    arma::vec code = codes.unsafe_col(i);
-    lars.Train(dictionary, data.unsafe_col(i), code, false);
-  }
-}
-
-// Dictionary step for optimization.
-template<typename DictionaryInitializer>
-double SparseCoding<DictionaryInitializer>::OptimizeDictionary(
-    const arma::mat& data,
-    const arma::mat& codes,
-    const arma::uvec& adjacencies,
-    const double newtonTolerance,
-    const size_t maxIterations)
-{
-  // Count the number of atomic neighbors for each point x^i.
-  arma::uvec neighborCounts = arma::zeros<arma::uvec>(data.n_cols, 1);
-
-  if (adjacencies.n_elem > 0)
-  {
-    // This gets the column index.  Intentional integer division.
-    size_t curPointInd = (size_t) (adjacencies(0) / atoms);
-
-    size_t nextColIndex = (curPointInd + 1) * atoms;
-    for (size_t l = 1; l < adjacencies.n_elem; ++l)
-    {
-      // If l no longer refers to an element in this column, advance the column
-      // number accordingly.
-      if (adjacencies(l) >= nextColIndex)
-      {
-        curPointInd = (size_t) (adjacencies(l) / atoms);
-        nextColIndex = (curPointInd + 1) * atoms;
-      }
-
-      ++neighborCounts(curPointInd);
-    }
-  }
-
-  // Handle the case of inactive atoms (atoms not used in the given coding).
-  std::vector<size_t> inactiveAtoms;
-
-  for (size_t j = 0; j < atoms; ++j)
-  {
-    if (arma::accu(codes.row(j) != 0) == 0)
-      inactiveAtoms.push_back(j);
-  }
-
-  const size_t nInactiveAtoms = inactiveAtoms.size();
-  const size_t nActiveAtoms = atoms - nInactiveAtoms;
-
-  // Efficient construction of Z restricted to active atoms.
-  arma::mat matActiveZ;
-  if (nInactiveAtoms > 0)
-  {
-    math::RemoveRows(codes, inactiveAtoms, matActiveZ);
-  }
-
-  if (nInactiveAtoms > 0)
-  {
-    Log::Warn << "There are " << nInactiveAtoms
-        << " inactive atoms. They will be re-initialized randomly.\n";
-  }
-
-  Log::Debug << "Solving Dual via Newton's Method.\n";
-
-  // Solve using Newton's method in the dual - note that the final dot
-  // multiplication with inv(A) seems to be unavoidable. Although more
-  // expensive, the code written this way (we use solve()) should be more
-  // numerically stable than just using inv(A) for everything.
-  arma::vec dualVars = arma::zeros<arma::vec>(nActiveAtoms);
-
-  //vec dualVars = 1e-14 * ones<vec>(nActiveAtoms);
-
-  // Method used by feature sign code - fails miserably here.  Perhaps the
-  // MATLAB optimizer fmincon does something clever?
-  //vec dualVars = 10.0 * randu(nActiveAtoms, 1);
-
-  //vec dualVars = diagvec(solve(dictionary, data * trans(codes))
-  //    - codes * trans(codes));
-  //for (size_t i = 0; i < dualVars.n_elem; i++)
-  //  if (dualVars(i) < 0)
-  //    dualVars(i) = 0;
-
-  bool converged = false;
-
-  // If we have any inactive atoms, we must construct these differently.
-  arma::mat codesXT;
-  arma::mat codesZT;
-
-  if (inactiveAtoms.empty())
-  {
-    codesXT = codes * trans(data);
-    codesZT = codes * trans(codes);
-  }
-  else
-  {
-    codesXT = matActiveZ * trans(data);
-    codesZT = matActiveZ * trans(matActiveZ);
-  }
-
-  double normGradient = 0;
-  double improvement = 0;
-  for (size_t t = 1; (t != maxIterations) && !converged; ++t)
-  {
-    arma::mat A = codesZT + diagmat(dualVars);
-
-    arma::mat matAInvZXT = solve(A, codesXT);
-
-    arma::vec gradient = -arma::sum(arma::square(matAInvZXT), 1);
-    gradient += 1;
-
-    arma::mat hessian = -(-2 * (matAInvZXT * trans(matAInvZXT)) % inv(A));
-
-    arma::vec searchDirection = -solve(hessian, gradient);
-    //printf("%e\n", norm(searchDirection, 2));
-
-    // Armijo line search.
-    const double c = 1e-4;
-    double alpha = 1.0;
-    const double rho = 0.9;
-    double sufficientDecrease = c * dot(gradient, searchDirection);
-
-    // A maxIterations parameter for the Armijo line search may be a good idea,
-    // but it doesn't seem to be causing any problems for now.
-    while (true)
-    {
-      // Calculate objective.
-      double sumDualVars = arma::sum(dualVars);
-      double fOld = -(-trace(trans(codesXT) * matAInvZXT) - sumDualVars);
-      double fNew = -(-trace(trans(codesXT) * solve(codesZT +
-          diagmat(dualVars + alpha * searchDirection), codesXT)) -
-          (sumDualVars + alpha * arma::sum(searchDirection)));
-
-      if (fNew <= fOld + alpha * sufficientDecrease)
-      {
-        searchDirection = alpha * searchDirection;
-        improvement = fOld - fNew;
-        break;
-      }
-
-      alpha *= rho;
-    }
-
-    // Take step and print useful information.
-    dualVars += searchDirection;
-    normGradient = arma::norm(gradient, 2);
-    Log::Debug << "Newton Method iteration " << t << ":" << std::endl;
-    Log::Debug << "  Gradient norm: " << std::scientific << normGradient
-        << "." << std::endl;
-    Log::Debug << "  Improvement: " << std::scientific << improvement << ".\n";
-
-    if (normGradient < newtonTolerance)
-      converged = true;
-  }
-
-  if (inactiveAtoms.empty())
-  {
-    // Directly update dictionary.
-    dictionary = trans(solve(codesZT + diagmat(dualVars), codesXT));
-  }
-  else
-  {
-    arma::mat activeDictionary = trans(solve(codesZT +
-        diagmat(dualVars), codesXT));
-
-    // Update all atoms.
-    size_t currentInactiveIndex = 0;
-    for (size_t i = 0; i < atoms; ++i)
-    {
-      if (inactiveAtoms[currentInactiveIndex] == i)
-      {
-        // This atom is inactive.  Reinitialize it randomly.
-        dictionary.col(i) = (data.col(math::RandInt(data.n_cols)) +
-                             data.col(math::RandInt(data.n_cols)) +
-                             data.col(math::RandInt(data.n_cols)));
-
-        dictionary.col(i) /= arma::norm(dictionary.col(i), 2);
-
-        // Increment inactive index counter.
-        ++currentInactiveIndex;
-      }
-      else
-      {
-        // Update estimate.
-        dictionary.col(i) = activeDictionary.col(i - currentInactiveIndex);
-      }
-    }
-  }
-
-  return normGradient;
-}
-
-// Project each atom of the dictionary back into the unit ball (if necessary).
-template<typename DictionaryInitializer>
-void SparseCoding<DictionaryInitializer>::ProjectDictionary()
-{
-  for (size_t j = 0; j < atoms; j++)
-  {
-    double atomNorm = arma::norm(dictionary.col(j), 2);
-    if (atomNorm > 1)
-    {
-      Log::Info << "Norm of atom " << j << " exceeds 1 (" << std::scientific
-          << atomNorm << ").  Shrinking...\n";
-      dictionary.col(j) /= atomNorm;
-    }
-  }
-}
-
-// Compute the objective function.
-template<typename DictionaryInitializer>
-double SparseCoding<DictionaryInitializer>::Objective(
-    const arma::mat& data,
-    const arma::mat& codes) const
-{
-  double l11NormZ = arma::sum(arma::sum(arma::abs(codes)));
-  double froNormResidual = arma::norm(data - (dictionary * codes), "fro");
-
-  if (lambda2 > 0)
-  {
-    double froNormZ = arma::norm(codes, "fro");
-    return 0.5 * (std::pow(froNormResidual, 2.0) + (lambda2 *
-        std::pow(froNormZ, 2.0))) + (lambda1 * l11NormZ);
-  }
-  else // It can be simpler.
-  {
-    return 0.5 * std::pow(froNormResidual, 2.0) + lambda1 * l11NormZ;
-  }
-}
-
-template<typename DictionaryInitializer>
-std::string SparseCoding<DictionaryInitializer>::ToString() const
-{
-  std::ostringstream convert;
-  convert << "Sparse Coding  [" << this << "]" << std::endl;
-  convert << "  Atoms: " << atoms << std::endl;
-  convert << "  Lambda 1: " << lambda1 << std::endl;
-  convert << "  Lambda 2: " << lambda2 << std::endl;
-  return convert.str();
-}
-
 } // namespace sparse_coding
 } // namespace mlpack
 
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp b/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
index 5c6ea25..7f43092 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
@@ -112,11 +112,10 @@ int main(int argc, char* argv[])
   }
 
   // If there is an initial dictionary, be sure we do not initialize one.
+  SparseCoding sc(atoms, lambda1, lambda2, maxIterations, objTolerance,
+      newtonTolerance);
   if (initialDictionaryFile != "")
   {
-    SparseCoding<NothingInitializer> sc(atoms, lambda1, lambda2, maxIterations,
-        objTolerance, newtonTolerance);
-
     // Load initial dictionary directly into sparse coding object.
     data::Load(initialDictionaryFile, sc.Dictionary(), true);
 
@@ -136,27 +135,21 @@ int main(int argc, char* argv[])
     }
 
     // Run sparse coding.
-    sc.Train(matX);
-
-    // Save the results.
-    Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
-    data::Save(dictionaryFile, sc.Dictionary());
-    Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
-    data::Save(codesFile, sc.Codes());
+    sc.Train<NothingInitializer>(matX);
   }
   else
   {
-    // No initial dictionary.
-    SparseCoding<> sc(atoms, lambda1, lambda2, maxIterations, objTolerance,
-        newtonTolerance);
-
-    // Run sparse coding.
+    // Run sparse coding with the default initialization.
     sc.Train(matX);
-
-    // Save the results.
-    Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
-    data::Save(dictionaryFile, sc.Dictionary());
-    Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
-    data::Save(codesFile, sc.Codes());
   }
+
+  // Encode the input matrix.
+  arma::mat codes;
+  sc.OptimizeCode(matX, codes);
+
+  // Save the results.
+  Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
+  data::Save(dictionaryFile, sc.Dictionary());
+  Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
+  data::Save(codesFile, codes);
 }
diff --git a/src/mlpack/tests/sparse_coding_test.cpp b/src/mlpack/tests/sparse_coding_test.cpp
index ed4d24e..0af0e30 100644
--- a/src/mlpack/tests/sparse_coding_test.cpp
+++ b/src/mlpack/tests/sparse_coding_test.cpp
@@ -58,7 +58,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestCodingStepLasso)
     X.col(i) /= norm(X.col(i), 2);
   }
 
-  SparseCoding<> sc(nAtoms, lambda1);
+  SparseCoding sc(nAtoms, lambda1);
   mat Z;
   DataDependentRandomInitializer::Initialize(X, 25, sc.Dictionary());
   sc.OptimizeCode(X, Z);
@@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestCodingStepElasticNet)
   for (uword i = 0; i < nPoints; ++i)
     X.col(i) /= norm(X.col(i), 2);
 
-  SparseCoding<> sc(nAtoms, lambda1, lambda2);
+  SparseCoding sc(nAtoms, lambda1, lambda2);
   mat Z;
   DataDependentRandomInitializer::Initialize(X, 25, sc.Dictionary());
   sc.OptimizeCode(X, Z);
@@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
   for (uword i = 0; i < nPoints; ++i)
     X.col(i) /= norm(X.col(i), 2);
 
-  SparseCoding<> sc(nAtoms, lambda1);
+  SparseCoding sc(nAtoms, lambda1);
   mat Z;
   DataDependentRandomInitializer::Initialize(X, 25, sc.Dictionary());
   sc.OptimizeCode(X, Z);
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index ffd774d..9cc498a 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -522,7 +522,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingString)
   c.randn();
   const size_t b=3;
   double a=0.1;
-  mlpack::sparse_coding::SparseCoding<> d(c,b,a);
+  mlpack::sparse_coding::SparseCoding d(c,b,a);
   Log::Debug << d;
   testOstream << d;
   std::string s = d.ToString();