[mlpack-git] master: Refactor LCC to same API as SparseCoding. (efb930e)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 16 14:12:32 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/cd5986e141b41781fdc13a9c89443f9be33b56bd...31c10fef76ac1d85c6415c92d2ccd429c430105f

>---------------------------------------------------------------

commit efb930e9b50851ea54c5905043a9038e14016c7b
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Dec 15 01:37:28 2015 +0000

    Refactor LCC to same API as SparseCoding.


>---------------------------------------------------------------

efb930e9b50851ea54c5905043a9038e14016c7b
 .../methods/local_coordinate_coding/CMakeLists.txt |   7 +-
 .../{lcc_impl.hpp => lcc.cpp}                      | 112 ++++-----------------
 src/mlpack/methods/local_coordinate_coding/lcc.hpp | 100 ++++++++++++++----
 .../methods/local_coordinate_coding/lcc_main.cpp   |  22 ++--
 src/mlpack/tests/local_coordinate_coding_test.cpp  |  29 +++---
 5 files changed, 131 insertions(+), 139 deletions(-)

diff --git a/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt b/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt
index 6c7d302..99b58e8 100644
--- a/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt
+++ b/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt
@@ -4,8 +4,9 @@
 # In this library, these are specified twice, once here, and once for the individual library it belongs to, so make sure
 # that you have files in both sections
 set(SOURCES
-   lcc.hpp
-   lcc_impl.hpp
+  lcc.hpp
+  lcc.cpp
+  lcc_impl.hpp
 )
 
 # add directory name to sources
@@ -18,7 +19,7 @@ endforeach()
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
 add_executable(local_coordinate_coding
-    lcc_main.cpp
+  lcc_main.cpp
 )
 
 target_link_libraries(local_coordinate_coding
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc_impl.hpp b/src/mlpack/methods/local_coordinate_coding/lcc.cpp
similarity index 69%
rename from src/mlpack/methods/local_coordinate_coding/lcc_impl.hpp
rename to src/mlpack/methods/local_coordinate_coding/lcc.cpp
index 4630d68..54c2fa6 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc_impl.hpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc.cpp
@@ -1,101 +1,28 @@
 /**
- * @file lcc_impl.hpp
+ * @file lcc.cpp
  * @author Nishant Mehta
  *
- * Implementation of Local Coordinate Coding
+ * Implementation of Local Coordinate Coding.
  */
-#ifndef __MLPACK_METHODS_LOCAL_COORDINATE_CODING_LCC_IMPL_HPP
-#define __MLPACK_METHODS_LOCAL_COORDINATE_CODING_LCC_IMPL_HPP
-
-// In case it hasn't been included yet.
 #include "lcc.hpp"
 
 namespace mlpack {
 namespace lcc {
 
-template<typename DictionaryInitializer>
-LocalCoordinateCoding<DictionaryInitializer>::LocalCoordinateCoding(
-    const arma::mat& data,
+LocalCoordinateCoding::LocalCoordinateCoding(
     const size_t atoms,
-    const double lambda) :
-    atoms(atoms),
-    data(data),
-    codes(atoms, data.n_cols),
-    lambda(lambda)
-{
-  // Initialize the dictionary.
-  DictionaryInitializer::Initialize(data, atoms, dictionary);
-}
-
-template<typename DictionaryInitializer>
-void LocalCoordinateCoding<DictionaryInitializer>::Encode(
+    const double lambda,
     const size_t maxIterations,
-    const double objTolerance)
+    const double tolerance) :
+    atoms(atoms),
+    lambda(lambda),
+    maxIterations(maxIterations),
+    tolerance(tolerance)
 {
-  Timer::Start("local_coordinate_coding");
-
-  double lastObjVal = DBL_MAX;
-
-  // Take the initial coding step, which has to happen before entering the main
-  // loop.
-  Log::Info << "Initial Coding Step." << std::endl;
-
-  OptimizeCode();
-  arma::uvec adjacencies = find(codes);
-
-  Log::Info << "  Sparsity level: " << 100.0 * ((double)(adjacencies.n_elem)) /
-      ((double)(atoms * data.n_cols)) << "%.\n";
-  Log::Info << "  Objective value: " << Objective(adjacencies) << "."
-      << std::endl;
-
-  for (size_t t = 1; t != maxIterations; t++)
-  {
-    Log::Info << "Iteration " << t << " of " << maxIterations << "."
-        << std::endl;
-
-    // First step: optimize the dictionary.
-    Log::Info << "Performing dictionary step..." << std::endl;
-    OptimizeDictionary(adjacencies);
-    double dsObjVal = Objective(adjacencies);
-    Log::Info << "  Objective value: " << Objective(adjacencies) << "."
-        << std::endl;
-
-    // Second step: perform the coding.
-    Log::Info << "Performing coding step..." << std::endl;
-    OptimizeCode();
-    adjacencies = find(codes);
-    Log::Info << "  Sparsity level: " << 100.0 * ((double) (adjacencies.n_elem))
-        / ((double)(atoms * data.n_cols)) << "%.\n";
-
-    // Terminate if the objective increased in the coding step.
-    double curObjVal = Objective(adjacencies);
-    if (curObjVal > dsObjVal)
-    {
-      Log::Warn << "Objective increased in coding step!  Terminating."
-          << std::endl;
-      break;
-    }
-
-    // Find the new objective value and improvement so we can check for
-    // convergence.
-    double improvement = lastObjVal - curObjVal;
-    Log::Info << "Objective value: " << curObjVal << " (improvement "
-        << std::scientific << improvement << ")." << std::endl;
-
-    if (improvement < objTolerance)
-    {
-      Log::Info << "Converged within tolerance " << objTolerance << ".\n";
-      break;
-    }
-
-    lastObjVal = curObjVal;
-  }
-
-  Timer::Stop("local_coordinate_coding");
+  // Nothing to do.
 }
 
-template<typename DictionaryInitializer>
-void LocalCoordinateCoding<DictionaryInitializer>::OptimizeCode()
+void LocalCoordinateCoding::Encode(const arma::mat& data, arma::mat& codes)
 {
   arma::mat invSqDists = 1.0 / (repmat(trans(sum(square(dictionary))), 1,
       data.n_cols) + repmat(sum(square(data)), atoms, 1) - 2 * trans(dictionary)
@@ -104,9 +31,10 @@ void LocalCoordinateCoding<DictionaryInitializer>::OptimizeCode()
   arma::mat dictGram = trans(dictionary) * dictionary;
   arma::mat dictGramTD(dictGram.n_rows, dictGram.n_cols);
 
+  codes.set_size(atoms, data.n_cols);
   for (size_t i = 0; i < data.n_cols; i++)
   {
-    // report progress
+    // Report progress.
     if ((i % 100) == 0)
     {
       Log::Debug << "Optimization at point " << i << "." << std::endl;
@@ -128,9 +56,9 @@ void LocalCoordinateCoding<DictionaryInitializer>::OptimizeCode()
   }
 }
 
-template<typename DictionaryInitializer>
-void LocalCoordinateCoding<DictionaryInitializer>::OptimizeDictionary(
-    arma::uvec adjacencies)
+void LocalCoordinateCoding::OptimizeDictionary(const arma::mat& data,
+                                               const arma::mat& codes,
+                                               const arma::uvec& adjacencies)
 {
   // Count number of atomic neighbors for each point x^i.
   arma::uvec neighborCounts = arma::zeros<arma::uvec>(data.n_cols, 1);
@@ -288,9 +216,9 @@ void LocalCoordinateCoding<DictionaryInitializer>::OptimizeDictionary(
   }
 }
 
-template<typename DictionaryInitializer>
-double LocalCoordinateCoding<DictionaryInitializer>::Objective(
-    arma::uvec adjacencies) const
+double LocalCoordinateCoding::Objective(const arma::mat& data,
+                                        const arma::mat& codes,
+                                        const arma::uvec& adjacencies) const
 {
   double weightedL1NormZ = 0;
 
@@ -310,5 +238,3 @@ double LocalCoordinateCoding<DictionaryInitializer>::Objective(
 
 } // namespace lcc
 } // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc.hpp b/src/mlpack/methods/local_coordinate_coding/lcc.hpp
index 185e941..ec1e801 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc.hpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc.hpp
@@ -71,21 +71,56 @@ namespace lcc {
  * }
  * @endcode
  */
-template<typename DictionaryInitializer =
-    sparse_coding::DataDependentRandomInitializer>
 class LocalCoordinateCoding
 {
  public:
   /**
-   * Set the parameters to LocalCoordinateCoding.
+   * Set the parameters to LocalCoordinateCoding, and train the dictionary.
+   * This constructor will also initialize the dictionary using the given
+   * DictionaryInitializer before training.
+   *
+   * If you want to initialize the dictionary to a custom matrix, consider
+   * either writing your own DictionaryInitializer class (with void
+   * Initialize(const arma::mat& data, arma::mat& dictionary) function), or call
+   * the constructor that does not take a data matrix, then call Dictionary() to
+   * set the dictionary matrix to a matrix of your choosing, and then call
+   * Train() with sparse_coding::NothingInitializer (i.e.
+   * Train<sparse_coding::NothingInitializer>(data)).
    *
    * @param data Data matrix.
    * @param atoms Number of atoms in dictionary.
    * @param lambda Regularization parameter for weighted l1-norm penalty.
+   * @param maxIterations Maximum number of iterations for training (0 runs
+   *      until convergence).
+   * @param tolerance Tolerance for the objective function.
    */
+  template<
+      typename DictionaryInitializer =
+          sparse_coding::DataDependentRandomInitializer
+  >
   LocalCoordinateCoding(const arma::mat& data,
                         const size_t atoms,
-                        const double lambda);
+                        const double lambda,
+                        const size_t maxIterations = 0,
+                        const double tolerance = 0.01,
+                        const DictionaryInitializer& initializer =
+                            DictionaryInitializer());
+
+  /**
+   * Set the parameters to LocalCoordinateCoding.  This constructor will not
+   * train the model, and a subsequent call to Train() will be required before
+   * the model can encode points with Encode().
+   *
+   * @param atoms Number of atoms in dictionary.
+   * @param lambda Regularization parameter for weighted l1-norm penalty.
+   * @param maxIterations Maximum number of iterations for training (0 runs
+   *      until convergence).
+   * @param tolerance Tolerance for the objective function.
+   */
+  LocalCoordinateCoding(const size_t atoms,
+                        const double lambda,
+                        const size_t maxIterations = 0,
+                        const double tolerance = 0.01);
 
   /**
    * Run local coordinate coding.
@@ -95,13 +130,21 @@ class LocalCoordinateCoding
    *     function changes by a value lower than this tolerance, the optimization
    *     terminates.
    */
-  void Encode(const size_t maxIterations = 0,
-              const double objTolerance = 0.01);
+  template<
+      typename DictionaryInitializer =
+          sparse_coding::DataDependentRandomInitializer
+  >
+  void Train(const arma::mat& data,
+             const DictionaryInitializer& initializer =
+                 DictionaryInitializer());
 
   /**
    * Code each point via distance-weighted LARS.
+   *
+   * @param data Matrix containing points to encode.
+   * @param codes Output matrix to store codes in.
    */
-  void OptimizeCode();
+  void Encode(const arma::mat& data, arma::mat& codes);
 
   /**
    * Learn dictionary by solving linear system.
@@ -110,41 +153,56 @@ class LocalCoordinateCoding
    *    the coding matrix Z that are non-zero (the adjacency matrix for the
    *    bipartite graph of points and atoms)
    */
-  void OptimizeDictionary(arma::uvec adjacencies);
+  void OptimizeDictionary(const arma::mat& data,
+                          const arma::mat& codes,
+                          const arma::uvec& adjacencies);
 
   /**
    * Compute objective function given the list of adjacencies.
    */
-  double Objective(arma::uvec adjacencies) const;
+  double Objective(const arma::mat& data,
+                   const arma::mat& codes,
+                   const arma::uvec& adjacencies) const;
 
-  //! Access the data.
-  const arma::mat& Data() const { return data; }
+  //! Get the number of atoms.
+  size_t Atoms() const { return atoms; }
+  //! Modify the number of atoms.
+  size_t& Atoms() { return atoms; }
 
   //! Accessor for dictionary.
   const arma::mat& Dictionary() const { return dictionary; }
   //! Mutator for dictionary.
   arma::mat& Dictionary() { return dictionary; }
 
-  //! Accessor the codes.
-  const arma::mat& Codes() const { return codes; }
-  //! Modify the codes.
-  arma::mat& Codes() { return codes; }
+  //! Get the L1 regularization parameter.
+  double Lambda() const { return lambda; }
+  //! Modify the L1 regularization parameter.
+  double& Lambda() { return lambda; }
+
+  //! Get the maximum number of iterations.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations.
+  size_t& MaxIterations() { return maxIterations; }
+
+  //! Get the objective tolerance.
+  double Tolerance() const { return tolerance; }
+  //! Modify the objective tolerance.
+  double& Tolerance() { return tolerance; }
 
  private:
   //! Number of atoms in dictionary.
   size_t atoms;
 
-  //! Data matrix (columns are points).
-  const arma::mat& data;
-
   //! Dictionary (columns are atoms).
   arma::mat dictionary;
 
-  //! Codes (columns are points).
-  arma::mat codes;
-
   //! l1 regularization term.
   double lambda;
+
+  //! Maximum number of iterations during training.
+  size_t maxIterations;
+  //! Tolerance for main objective.
+  double tolerance;
 };
 
 } // namespace lcc
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp b/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
index 7f47f81..26887e8 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
@@ -111,7 +111,7 @@ int main(int argc, char* argv[])
   // If there is an initial dictionary, be sure we do not initialize one.
   if (initialDictionaryFile != "")
   {
-    LocalCoordinateCoding<NothingInitializer> lcc(input, atoms, lambda);
+    LocalCoordinateCoding lcc(atoms, lambda, maxIterations, objTolerance);
 
     // Load initial dictionary directly into LCC object.
     data::Load(initialDictionaryFile, lcc.Dictionary(), true);
@@ -132,26 +132,32 @@ int main(int argc, char* argv[])
     }
 
     // Run LCC.
-    lcc.Encode(maxIterations, objTolerance);
+    lcc.Train<NothingInitializer>(input);
 
     // Save the results.
     Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
     data::Save(dictionaryFile, lcc.Dictionary());
+
+    mat codes;
+    lcc.Encode(input, codes);
+
     Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
-    data::Save(codesFile, lcc.Codes());
+    data::Save(codesFile, codes);
   }
   else
   {
     // No initial dictionary.
-    LocalCoordinateCoding<> lcc(input, atoms, lambda);
-
-    // Run LCC.
-    lcc.Encode(maxIterations, objTolerance);
+    LocalCoordinateCoding lcc(input, atoms, lambda, maxIterations,
+        objTolerance);
 
     // Save the results.
     Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
     data::Save(dictionaryFile, lcc.Dictionary());
+
+    mat codes;
+    lcc.Encode(input, codes);
+
     Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
-    data::Save(codesFile, lcc.Codes());
+    data::Save(codesFile, codes);
   }
 }
diff --git a/src/mlpack/tests/local_coordinate_coding_test.cpp b/src/mlpack/tests/local_coordinate_coding_test.cpp
index 7da242f..98fea7f 100644
--- a/src/mlpack/tests/local_coordinate_coding_test.cpp
+++ b/src/mlpack/tests/local_coordinate_coding_test.cpp
@@ -59,20 +59,22 @@ BOOST_AUTO_TEST_CASE(LocalCoordinateCodingTestCodingStep)
     X.col(i) /= norm(X.col(i), 2);
   }
 
-  LocalCoordinateCoding<> lcc(X, nAtoms, lambda1);
-  lcc.OptimizeCode();
+  mat Z;
+  LocalCoordinateCoding lcc(X, nAtoms, lambda1);
+  lcc.Encode(X, Z);
 
   mat D = lcc.Dictionary();
-  mat Z = lcc.Codes();
 
-  for(uword i = 0; i < nPoints; i++) {
-    vec sq_dists = vec(nAtoms);
-    for(uword j = 0; j < nAtoms; j++) {
+  for (uword i = 0; i < nPoints; i++)
+  {
+    vec sqDists = vec(nAtoms);
+    for (uword j = 0; j < nAtoms; j++)
+    {
       vec diff = D.unsafe_col(j) - X.unsafe_col(i);
-      sq_dists[j] = dot(diff, diff);
+      sqDists[j] = dot(diff, diff);
     }
-    mat Dprime = D * diagmat(1.0 / sq_dists);
-    mat zPrime = Z.unsafe_col(i) % sq_dists;
+    mat Dprime = D * diagmat(1.0 / sqDists);
+    mat zPrime = Z.unsafe_col(i) % sqDists;
 
     vec errCorr = trans(Dprime) * (Dprime * zPrime - X.unsafe_col(i));
     VerifyCorrectness(zPrime, errCorr, 0.5 * lambda1);
@@ -96,11 +98,11 @@ BOOST_AUTO_TEST_CASE(LocalCoordinateCodingTestDictionaryStep)
     X.col(i) /= norm(X.col(i), 2);
   }
 
-  LocalCoordinateCoding<> lcc(X, nAtoms, lambda);
-  lcc.OptimizeCode();
-  mat Z = lcc.Codes();
+  mat Z;
+  LocalCoordinateCoding lcc(X, nAtoms, lambda);
+  lcc.Encode(X, Z);
   uvec adjacencies = find(Z);
-  lcc.OptimizeDictionary(adjacencies);
+  lcc.OptimizeDictionary(X, Z, adjacencies);
 
   mat D = lcc.Dictionary();
 
@@ -113,7 +115,6 @@ BOOST_AUTO_TEST_CASE(LocalCoordinateCodingTestDictionaryStep)
   grad = lambda * grad + (D * Z - X) * trans(Z);
 
   BOOST_REQUIRE_SMALL(norm(grad, "fro"), tol);
-
 }
 
 BOOST_AUTO_TEST_SUITE_END();



More information about the mlpack-git mailing list