[mlpack-svn] r13024 - mlpack/trunk/src/mlpack/methods/sparse_coding

Mon Jun 11 16:10:21 EDT 2012

Author: rcurtin
Date: 2012-06-11 16:10:20 -0400 (Mon, 11 Jun 2012)
New Revision: 13024

Added:
   mlpack/trunk/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp
   mlpack/trunk/src/mlpack/methods/sparse_coding/nothing_initializer.hpp
   mlpack/trunk/src/mlpack/methods/sparse_coding/random_initializer.hpp
   mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp
Removed:
   mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.cpp
Modified:
   mlpack/trunk/src/mlpack/methods/sparse_coding/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.hpp
   mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
Log:
Refactor SparseCoding to initialize the dictionary as a template parameter.


Modified: mlpack/trunk/src/mlpack/methods/sparse_coding/CMakeLists.txt
===================================================================

--- mlpack/trunk/src/mlpack/methods/sparse_coding/CMakeLists.txt	2012-06-11 19:23:47 UTC (rev 13023)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/CMakeLists.txt	2012-06-11 20:10:20 UTC (rev 13024)
@@ -3,8 +3,11 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into the output library
 set(SOURCES
+  data_dependent_random_initializer.hpp
+  nothing_initializer.hpp
+  random_initializer.hpp
   sparse_coding.hpp
-  sparse_coding.cpp
+  sparse_coding_impl.hpp
 )
 
 # add directory name to sources

Added: mlpack/trunk/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -0,0 +1,56 @@
+/**
+ * @file data_dependent_random_initializer.hpp
+ * @author Nishant Mehta
+ *
+ * A sensible heuristic for initializing dictionaries for sparse coding.
+ */
+#ifndef __MLPACK_METHODS_SPARSE_CODING_DATA_DEPENDENT_RANDOM_INITIALIZER_HPP
+#define __MLPACK_METHODS_SPARSE_CODING_DATA_DEPENDENT_RANDOM_INITIALIZER_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace sparse_coding {
+
+/**
+ * A data-dependent random dictionary initializer for SparseCoding.  This
+ * creates random dictionary atoms by adding three random observations from the
+ * data together, and then normalizing the atom.
+ */
+class DataDependentRandomInitializer
+{
+ public:
+  /**
+   * Initialize the dictionary by adding together three random observations from
+   * the data, and then normalizing the atom.  This implementation is simple
+   * enough to be included with the definition.
+   *
+   * @param data Dataset to initialize the dictionary with.
+   * @param atoms Number of atoms in dictionary.
+   * @param dictionary Dictionary to initialize.
+   */
+  static void Initialize(const arma::mat& data,
+                         const size_t atoms,
+                         arma::mat& dictionary)
+  {
+    // Set the size of the dictionary.
+    dictionary.set_size(data.n_rows, atoms);
+
+    // Create each atom.
+    for (size_t i = 0; i < atoms; ++i)
+    {
+      // Add three atoms together.
+      dictionary.col(i) = (data.col(math::RandInt(data.n_cols)) +
+          data.col(math::RandInt(data.n_cols)) +
+          data.col(math::RandInt(data.n_cols)));
+
+      // Now normalize the atom.
+      dictionary.col(i) /= norm(dictionary.col(i), 2);
+    }
+  }
+};
+
+}; // namespace sparse_coding
+}; // namespace mlpack
+
+#endif

Added: mlpack/trunk/src/mlpack/methods/sparse_coding/nothing_initializer.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/nothing_initializer.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/nothing_initializer.hpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -0,0 +1,41 @@
+/**
+ * @file nothing_initializer.hpp
+ * @author Ryan Curtin
+ *
+ * An initializer for SparseCoding which does precisely nothing.  It is useful
+ * for when you have an already defined dictionary and you plan on setting it
+ * with SparseCoding::Dictionary().
+ */
+#ifndef __MLPACK_METHODS_SPARSE_CODING_NOTHING_INITIALIZER_HPP
+#define __MLPACK_METHODS_SPARSE_CODING_NOTHING_INITIALIZER_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace sparse_coding {
+
+/**
+ * A DictionaryInitializer for SparseCoding which does not initialize anything;
+ * it is useful for when the dictionary is already known and will be set with
+ * SparseCoding::Dictionary().
+ */
+class NothingInitializer
+{
+ public:
+  /**
+   * This function does not initialize the dictionary.  This will cause problems
+   * for SparseCoding if the dictionary is not set manually before running the
+   * method.
+   */
+  static void Initialize(const arma::mat& /* data */,
+                         const size_t /* atoms */,
+                         arma::mat& /* dictionary */)
+  {
+    // Do nothing!
+  }
+};
+
+}; // namespace sparse_coding
+}; // namespace mlpack
+
+#endif

Added: mlpack/trunk/src/mlpack/methods/sparse_coding/random_initializer.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/random_initializer.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/random_initializer.hpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -0,0 +1,48 @@
+/**
+ * @file random_initializer.hpp
+ * @author Nishant Mehta
+ *
+ * A very simple random dictionary initializer for SparseCoding; it is probably
+ * not a very good choice.
+ */
+#ifndef __MLPACK_METHODS_SPARSE_CODING_RANDOM_INITIALIZER_HPP
+#define __MLPACK_METHODS_SPARSE_CODING_RANDOM_INITIALIZER_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace sparse_coding {
+
+/**
+ * A DictionaryInitializer for use with the SparseCoding class.  This provides a
+ * random, normally distributed dictionary, such that each atom has a norm of 1.
+ */
+class RandomInitializer
+{
+ public:
+  /**
+   * Initialize the dictionary randomly from a normal distribution, such that
+   * each atom has a norm of 1.  This is simple enough to be included with the
+   * definition.
+   *
+   * @param data Dataset to use for initialization.
+   * @param atoms Number of atoms (columns) in the dictionary.
+   * @param dictionary Dictionary to initialize.
+   */
+  static void Initialize(const arma::mat& data,
+                         const size_t atoms,
+                         arma::mat& dictionary)
+  {
+    // Create random dictionary.
+    dictionary.randn(data.n_rows, atoms);
+
+    // Normalize each atom.
+    for (size_t j = 0; j < atoms; ++j)
+      dictionary.col(j) /= norm(dictionary.col(j), 2);
+  }
+};
+
+}; // namespace sparse_coding
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.cpp	2012-06-11 19:23:47 UTC (rev 13023)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.cpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -1,401 +0,0 @@
-/**
- * @file sparse_coding.cpp
- * @author Nishant Mehta
- *
- * Implementation of Sparse Coding with Dictionary Learning using l1 (LASSO) or
- * l1+l2 (Elastic Net) regularization.
- */
-#include "sparse_coding.hpp"
-
-using namespace std;
-using namespace arma;
-using namespace mlpack;
-using namespace mlpack::regression;
-using namespace mlpack::sparse_coding;
-
-// TODO: parameterizable; options to methods?
-#define OBJ_TOL 1e-2 // 1E-9
-#define NEWTON_TOL 1e-6 // 1E-9
-
-SparseCoding::SparseCoding(const mat& data,
-                           const size_t atoms,
-                           const double lambda1,
-                           const double lambda2) :
-    atoms(atoms),
-    data(data),
-    codes(mat(atoms, data.n_cols)),
-    lambda1(lambda1),
-    lambda2(lambda2)
-{ /* Nothing left to do. */ }
-
-// Always a not good decision!
-void SparseCoding::RandomInitDictionary()
-{
-  dictionary = randn(data.n_rows, atoms);
-
-  for (size_t j = 0; j < atoms; ++j)
-    dictionary.col(j) /= norm(dictionary.col(j), 2);
-}
-
-// The sensible heuristic.
-void SparseCoding::DataDependentRandomInitDictionary()
-{
-  dictionary = mat(data.n_rows, atoms);
-  for (size_t j = 0; j < atoms; ++j)
-  {
-    vec vecD_j = dictionary.unsafe_col(j);
-    RandomAtom(vecD_j);
-  }
-}
-
-void SparseCoding::RandomAtom(vec& atom)
-{
-  atom.zeros();
-  const size_t nSeedAtoms = 3;
-  for (size_t i = 0; i < nSeedAtoms; i++)
-    atom += data.col(rand() % data.n_cols);
-
-  atom /= norm(atom, 2);
-}
-
-void SparseCoding::DoSparseCoding(const size_t maxIterations)
-{
-  double lastObjVal = DBL_MAX;
-
-  Log::Info << "Initial Coding Step." << endl;
-
-  OptimizeCode();
-  uvec adjacencies = find(codes);
-
-  Log::Info << "  Sparsity level: "
-      << 100.0 * ((double) (adjacencies.n_elem)) / ((double)
-      (atoms * data.n_cols)) << "%" << endl;
-  Log::Info << "  Objective value: " << Objective() << "." << endl;
-
-  for (size_t t = 1; t != maxIterations; ++t)
-  {
-    Log::Info << "Iteration " << t << " of " << maxIterations << "." << endl;
-
-    Log::Info << "Performing dictionary step... ";
-    OptimizeDictionary(adjacencies);
-    Log::Info << "objective value: " << Objective() << "." << endl;
-
-    Log::Info << "Performing coding step..." << endl;
-    OptimizeCode();
-    adjacencies = find(codes);
-    Log::Info << "  Sparsity level: "
-        << 100.0 *
-        ((double) (adjacencies.n_elem)) / ((double) (atoms * data.n_cols))
-        << "%" << endl;
-
-    double curObjVal = Objective();
-    Log::Info << "  Objective value: " << curObjVal << "." << endl;
-
-    double objValImprov = lastObjVal - curObjVal;
-    Log::Info << "  Improvement: " << scientific << objValImprov << "." << endl;
-
-    if (objValImprov < OBJ_TOL)
-    {
-      Log::Info << "Converged within tolerance " << OBJ_TOL << ".\n";
-      break;
-    }
-
-    lastObjVal = curObjVal;
-  }
-}
-
-void SparseCoding::OptimizeCode()
-{
-  // When using Cholesky version of LARS, this is correct even if lambda2 > 0.
-  mat matGram = trans(dictionary) * dictionary;
-  // mat matGram;
-  // if(lambda2 > 0) {
-  //   matGram = trans(dictionary) * dictionary + lambda2 * eye(atoms, atoms);
-  // }
-  // else {
-  //   matGram = trans(dictionary) * dictionary;
-  // }
-
-  for (size_t i = 0; i < data.n_cols; ++i)
-  {
-    // Report progress.
-    if ((i % 100) == 0)
-      Log::Debug << "Optimization at point " << i << "." << endl;
-
-    bool useCholesky = true;
-    LARS lars(useCholesky, matGram, lambda1, lambda2);
-
-    vec beta;
-    lars.Regress(dictionary, data.unsafe_col(i), beta, true);
-
-    codes.col(i) = beta;
-  }
-}
-
-void SparseCoding::OptimizeDictionary(const uvec& adjacencies)
-{
-  // Count the number of atomic neighbors for each point x^i.
-  uvec neighborCounts = zeros<uvec>(data.n_cols, 1);
-
-  if (adjacencies.n_elem > 0)
-  {
-    // This gets the column index.
-    // TODO: is this integer division intentional?
-    size_t curPointInd = (size_t) (adjacencies(0) / atoms);
-    size_t curCount = 1;
-
-    for (size_t l = 1; l < adjacencies.n_elem; ++l)
-    {
-      if ((size_t) (adjacencies(l) / atoms) == curPointInd)
-      {
-        ++curCount;
-      }
-      else
-      {
-        neighborCounts(curPointInd) = curCount;
-        curPointInd = (size_t) (adjacencies(l) / atoms);
-        curCount = 1;
-      }
-    }
-
-    neighborCounts(curPointInd) = curCount;
-  }
-
-  // Handle the case of inactive atoms (atoms not used in the given coding).
-  std::vector<size_t> inactiveAtoms;
-  std::vector<size_t> activeAtoms;
-  activeAtoms.reserve(atoms);
-
-  for (size_t j = 0; j < atoms; ++j)
-  {
-    if (accu(codes.row(j) != 0) == 0)
-      inactiveAtoms.push_back(j);
-    else
-      activeAtoms.push_back(j);
-  }
-
-  const size_t nActiveAtoms = activeAtoms.size();
-  const size_t nInactiveAtoms = inactiveAtoms.size();
-
-  // Efficient construction of Z restricted to active atoms.
-  mat matActiveZ;
-  if (inactiveAtoms.empty())
-  {
-    matActiveZ = codes;
-  }
-  else
-  {
-    uvec inactiveAtomsVec = conv_to<uvec>::from(inactiveAtoms);
-    RemoveRows(codes, inactiveAtomsVec, matActiveZ);
-  }
-
-  uvec atomReverseLookup = uvec(atoms);
-  for (size_t i = 0; i < nActiveAtoms; ++i)
-    atomReverseLookup(activeAtoms[i]) = i;
-
-  if (nInactiveAtoms > 0)
-  {
-    Log::Info << "There are " << nInactiveAtoms
-        << " inactive atoms. They will be re-initialized randomly.\n";
-  }
-
-  Log::Debug << "Solving Dual via Newton's Method.\n";
-
-  mat dictionaryEstimate;
-  // Solve using Newton's method in the dual - note that the final dot
-  // multiplication with inv(A) seems to be unavoidable. Although more
-  // expensive, the code written this way (we use solve()) should be more
-  // numerically stable than just using inv(A) for everything.
-  vec dualVars = zeros<vec>(nActiveAtoms);
-
-  //vec dualVars = 1e-14 * ones<vec>(nActiveAtoms);
-
-  // Method used by feature sign code - fails miserably here.  Perhaps the
-  // MATLAB optimizer fmincon does something clever?
-  //vec dualVars = 10.0 * randu(nActiveAtoms, 1);
-
-  //vec dualVars = diagvec(solve(dictionary, data * trans(codes))
-  //    - codes * trans(codes));
-  //for (size_t i = 0; i < dualVars.n_elem; i++)
-  //  if (dualVars(i) < 0)
-  //    dualVars(i) = 0;
-
-  bool converged = false;
-  mat codesXT = matActiveZ * trans(data);
-  mat codesZT = matActiveZ * trans(matActiveZ);
-
-  for (size_t t = 1; !converged; ++t)
-  {
-    mat A = codesZT + diagmat(dualVars);
-
-    mat matAInvZXT = solve(A, codesXT);
-
-    vec gradient = -(sum(square(matAInvZXT), 1) - ones<vec>(nActiveAtoms));
-
-    mat hessian = -(-2 * (matAInvZXT * trans(matAInvZXT)) % inv(A));
-
-    vec searchDirection = -solve(hessian, gradient);
-    //vec searchDirection = -gradient;
-
-    // Armijo line search.
-    const double c = 1e-4;
-    double alpha = 1.0;
-    const double rho = 0.9;
-    double sufficientDecrease = c * dot(gradient, searchDirection);
-
-    /*
-    {
-      double sumDualVars = sum(dualVars);
-      double fOld = -(-trace(trans(codesXT) * matAInvZXT) - sumDualVars);
-      Log::Debug << "fOld = " << fOld << "." << endl;
-      double fNew =
-          -(-trace(trans(codesXT) * solve(codesZT +
-          diagmat(dualVars + alpha * searchDirection), codesXT))
-          - (sumDualVars + alpha * sum(searchDirection)) );
-      Log::Debug << "fNew = " << fNew << "." << endl;
-    }
-    */
-
-    double improvement;
-    while (true)
-    {
-      // Calculate objective.
-      double sumDualVars = sum(dualVars);
-      double fOld = -(-trace(trans(codesXT) * matAInvZXT) - sumDualVars);
-      double fNew = -(-trace(trans(codesXT) * solve(codesZT +
-          diagmat(dualVars + alpha * searchDirection), codesXT)) -
-          (sumDualVars + alpha * sum(searchDirection)));
-
-      if (fNew <= fOld + alpha * sufficientDecrease)
-      {
-        searchDirection = alpha * searchDirection;
-        improvement = fOld - fNew;
-        break;
-      }
-
-      alpha *= rho;
-    }
-
-    // End of Armijo line search code.
-
-    dualVars += searchDirection;
-    double normGradient = norm(gradient, 2);
-    Log::Debug << "Newton Method iteration " << t << ":" << endl;
-    Log::Debug << "  Gradient norm: " << std::scientific << normGradient
-        << "." << endl;
-    Log::Debug << "  Improvement: " << std::scientific << improvement << ".\n";
-
-    if (improvement < NEWTON_TOL)
-      converged = true;
-  }
-
-  if (inactiveAtoms.empty())
-  {
-    dictionaryEstimate = trans(solve(codesZT + diagmat(dualVars), codesXT));
-  }
-  else
-  {
-    mat dictionaryActiveEstimate = trans(solve(codesZT + diagmat(dualVars),
-        codesXT));
-    dictionaryEstimate = zeros(data.n_rows, atoms);
-
-    for (size_t i = 0; i < nActiveAtoms; ++i)
-      dictionaryEstimate.col(activeAtoms[i]) = dictionaryActiveEstimate.col(i);
-
-    for (size_t i = 0; i < nInactiveAtoms; ++i)
-    {
-      vec vecdictionaryi = dictionaryEstimate.unsafe_col(inactiveAtoms[i]);
-      RandomAtom(vecdictionaryi);
-    }
-  }
-
-  dictionary = dictionaryEstimate;
-}
-
-void SparseCoding::ProjectDictionary()
-{
-  for (size_t j = 0; j < atoms; j++)
-  {
-    double normD_j = norm(dictionary.col(j), 2);
-    if ((normD_j > 1) && (normD_j - 1.0 > 1e-9))
-    {
-      Log::Warn << "Norm exceeded 1 by " << std::scientific << normD_j - 1.0
-          << ".  Shrinking...\n";
-      dictionary.col(j) /= normD_j;
-    }
-  }
-}
-
-double SparseCoding::Objective()
-{
-  double l11NormZ = sum(sum(abs(codes)));
-  double froNormResidual = norm(data - dictionary * codes, "fro");
-
-  if (lambda2 > 0)
-  {
-    double froNormZ = norm(codes, "fro");
-    return 0.5 *
-      (froNormResidual * froNormResidual + lambda2 * froNormZ * froNormZ) +
-      lambda1 * l11NormZ;
-  }
-  else
-  {
-    return 0.5 * froNormResidual * froNormResidual + lambda1 * l11NormZ;
-  }
-}
-
-void mlpack::sparse_coding::RemoveRows(const mat& X,
-                                       uvec rows_to_remove,
-                                       mat& X_mod)
-{
-  const size_t n_cols = X.n_cols;
-  const size_t n_rows = X.n_rows;
-  const size_t n_to_remove = rows_to_remove.n_elem;
-  const size_t n_to_keep = n_rows - n_to_remove;
-
-  if (n_to_remove == 0)
-  {
-    X_mod = X;
-  }
-  else
-  {
-    X_mod.set_size(n_to_keep, n_cols);
-
-    size_t cur_row = 0;
-    size_t remove_ind = 0;
-    // First, check 0 to first row to remove.
-    if (rows_to_remove(0) > 0)
-    {
-      // Note that this implies that n_rows > 1.
-      size_t height = rows_to_remove(0);
-      X_mod(span(cur_row, cur_row + height - 1), span::all) =
-          X(span(0, rows_to_remove(0) - 1), span::all);
-      cur_row += height;
-    }
-    // Now, check i'th row to remove to (i + 1)'th row to remove, until i is the
-    // penultimate row.
-    while (remove_ind < n_to_remove - 1)
-    {
-      size_t height = rows_to_remove[remove_ind + 1] -
-          rows_to_remove[remove_ind] - 1;
-
-      if (height > 0)
-      {
-        X_mod(span(cur_row, cur_row + height - 1), span::all) =
-            X(span(rows_to_remove[remove_ind] + 1,
-            rows_to_remove[remove_ind + 1] - 1), span::all);
-        cur_row += height;
-      }
-
-      remove_ind++;
-    }
-
-    // Now that i is the last row to remove, check last row to remove to last
-    // row.
-    if (rows_to_remove[remove_ind] < n_rows - 1)
-    {
-      X_mod(span(cur_row, n_to_keep - 1), span::all) =
-          X(span(rows_to_remove[remove_ind] + 1, n_rows - 1), span::all);
-    }
-  }
-}

Modified: mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.hpp	2012-06-11 19:23:47 UTC (rev 13023)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.hpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -11,6 +11,11 @@
 #include <mlpack/core.hpp>
 #include <mlpack/methods/lars/lars.hpp>
 
+// Include our three simple dictionary initializers.
+#include "nothing_initializer.hpp"
+#include "data_dependent_random_initializer.hpp"
+#include "random_initializer.hpp"
+
 namespace mlpack {
 namespace sparse_coding {
 
@@ -78,7 +83,20 @@
  *   publisher={Royal Statistical Society}
  * }
  * @endcode
+ *
+ * Before the method is run, the dictionary is initialized using the
+ * DictionaryInitializationPolicy class.  Possible choices include the
+ * RandomInitializer, which provides an entirely random dictionary, the
+ * DataDependentRandomInitializer, which provides a random dictionary based
+ * loosely on characteristics of the dataset, and the NothingInitializer, which
+ * does not initialize the dictionary -- instead, the user should set the
+ * dictionary using the Dictionary() mutator method.
+ *
+ * @tparam DictionaryInitializationPolicy The class to use to initialize the
+ *     dictionary; must have 'void Initialize(const arma::mat& data, arma::mat&
+ *     dictionary)' function.
  */
+template<typename DictionaryInitializer = DataDependentRandomInitializer>
 class SparseCoding
 {
  public:
@@ -96,28 +114,6 @@
                const double lambda2 = 0);
 
   /**
-   * Initialize dictionary by drawing k vectors uniformly at random from the
-   * unit sphere.
-   */
-  void RandomInitDictionary();
-
-  /**
-   * Initialize dictionary by initializing each atom to a normalized mixture of
-   * a small number of randomly selected points in X.
-   */
-  void DataDependentRandomInitDictionary();
-
-  /**
-   * Initialize an atom to a normalized mixture of a small number of randomly
-   * selected points in X.
-   *
-   * @param atom The atom to initialize
-   */
-  void RandomAtom(arma::vec& atom);
-
-  // core algorithm functions
-
-  /**
    * Run Sparse Coding with Dictionary Learning.
    *
    * @param maxIterations Maximum number of iterations to run algorithm.  If 0,
@@ -131,7 +127,7 @@
   void OptimizeCode();
 
   /**
-   * Learn dictionary via Newton method based on Lagrange dual
+   * Learn dictionary via Newton method based on Lagrange dual.
    *
    * @param adjacencies Indices of entries (unrolled column by column) of
    *    the coding matrix Z that are non-zero (the adjacency matrix for the
@@ -186,10 +182,13 @@
 };
 
 void RemoveRows(const arma::mat& X,
-                arma::uvec rows_to_remove,
-                arma::mat& X_mod);
+                const arma::uvec& rowsToRemove,
+                arma::mat& modX);
 
 }; // namespace sparse_coding
 }; // namespace mlpack
 
+// Include implementation.
+#include "sparse_coding_impl.hpp"
+
 #endif

Copied: mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp (from rev 13003, mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding.cpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_impl.hpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -0,0 +1,399 @@
+/**
+ * @file sparse_coding_impl.hpp
+ * @author Nishant Mehta
+ *
+ * Implementation of Sparse Coding with Dictionary Learning using l1 (LASSO) or
+ * l1+l2 (Elastic Net) regularization.
+ */
+#ifndef __MLPACK_METHODS_SPARSE_CODING_SPARSE_CODING_IMPL_HPP
+#define __MLPACK_METHODS_SPARSE_CODING_SPARSE_CODING_IMPL_HPP
+
+// In case it hasn't already been included.
+#include "sparse_coding.hpp"
+
+namespace mlpack {
+namespace sparse_coding {
+
+// TODO: parameterizable; options to methods?
+#define OBJ_TOL 1e-2 // 1E-9
+#define NEWTON_TOL 1e-6 // 1E-9
+
+template<typename DictionaryInitializer>
+SparseCoding<DictionaryInitializer>::SparseCoding(const arma::mat& data,
+                                                  const size_t atoms,
+                                                  const double lambda1,
+                                                  const double lambda2) :
+    atoms(atoms),
+    data(data),
+    codes(atoms, data.n_cols),
+    lambda1(lambda1),
+    lambda2(lambda2)
+{
+  // Initialize the dictionary.
+  DictionaryInitializer::Initialize(data, atoms, dictionary);
+}
+
+template<typename DictionaryInitializer>
+void SparseCoding<DictionaryInitializer>::DoSparseCoding(
+    const size_t maxIterations)
+{
+  double lastObjVal = DBL_MAX;
+
+  Log::Info << "Initial Coding Step." << std::endl;
+
+  OptimizeCode();
+  arma::uvec adjacencies = find(codes);
+
+  Log::Info << "  Sparsity level: "
+      << 100.0 * ((double) (adjacencies.n_elem)) / ((double)
+      (atoms * data.n_cols)) << "%" << std::endl;
+  Log::Info << "  Objective value: " << Objective() << "." << std::endl;
+
+  for (size_t t = 1; t != maxIterations; ++t)
+  {
+    Log::Info << "Iteration " << t << " of " << maxIterations << "."
+        << std::endl;
+
+    Log::Info << "Performing dictionary step... ";
+    OptimizeDictionary(adjacencies);
+    Log::Info << "objective value: " << Objective() << "." << std::endl;
+
+    Log::Info << "Performing coding step..." << std::endl;
+    OptimizeCode();
+    adjacencies = find(codes);
+    Log::Info << "  Sparsity level: "
+        << 100.0 *
+        ((double) (adjacencies.n_elem)) / ((double) (atoms * data.n_cols))
+        << "%" << std::endl;
+
+    double curObjVal = Objective();
+    Log::Info << "  Objective value: " << curObjVal << "." << std::endl;
+
+    double objValImprov = lastObjVal - curObjVal;
+    Log::Info << "  Improvement: " << std::scientific << objValImprov << "."
+        << std::endl;
+
+    if (objValImprov < OBJ_TOL)
+    {
+      Log::Info << "Converged within tolerance " << OBJ_TOL << ".\n";
+      break;
+    }
+
+    lastObjVal = curObjVal;
+  }
+}
+
+template<typename DictionaryInitializer>
+void SparseCoding<DictionaryInitializer>::OptimizeCode()
+{
+  // When using Cholesky version of LARS, this is correct even if lambda2 > 0.
+  arma::mat matGram = trans(dictionary) * dictionary;
+  // mat matGram;
+  // if(lambda2 > 0) {
+  //   matGram = trans(dictionary) * dictionary + lambda2 * eye(atoms, atoms);
+  // }
+  // else {
+  //   matGram = trans(dictionary) * dictionary;
+  // }
+
+  for (size_t i = 0; i < data.n_cols; ++i)
+  {
+    // Report progress.
+    if ((i % 100) == 0)
+      Log::Debug << "Optimization at point " << i << "." << std::endl;
+
+    bool useCholesky = true;
+    regression::LARS lars(useCholesky, matGram, lambda1, lambda2);
+
+    arma::vec beta;
+    lars.Regress(dictionary, data.unsafe_col(i), beta, true);
+
+    codes.col(i) = beta;
+  }
+}
+
+template<typename DictionaryInitializer>
+void SparseCoding<DictionaryInitializer>::OptimizeDictionary(
+      const arma::uvec& adjacencies)
+{
+  // Count the number of atomic neighbors for each point x^i.
+  arma::uvec neighborCounts = arma::zeros<arma::uvec>(data.n_cols, 1);
+
+  if (adjacencies.n_elem > 0)
+  {
+    // This gets the column index.
+    // TODO: is this integer division intentional?
+    size_t curPointInd = (size_t) (adjacencies(0) / atoms);
+    size_t curCount = 1;
+
+    for (size_t l = 1; l < adjacencies.n_elem; ++l)
+    {
+      if ((size_t) (adjacencies(l) / atoms) == curPointInd)
+      {
+        ++curCount;
+      }
+      else
+      {
+        neighborCounts(curPointInd) = curCount;
+        curPointInd = (size_t) (adjacencies(l) / atoms);
+        curCount = 1;
+      }
+    }
+
+    neighborCounts(curPointInd) = curCount;
+  }
+
+  // Handle the case of inactive atoms (atoms not used in the given coding).
+  std::vector<size_t> inactiveAtoms;
+  std::vector<size_t> activeAtoms;
+  activeAtoms.reserve(atoms);
+
+  for (size_t j = 0; j < atoms; ++j)
+  {
+    if (accu(codes.row(j) != 0) == 0)
+      inactiveAtoms.push_back(j);
+    else
+      activeAtoms.push_back(j);
+  }
+
+  const size_t nActiveAtoms = activeAtoms.size();
+  const size_t nInactiveAtoms = inactiveAtoms.size();
+
+  // Efficient construction of Z restricted to active atoms.
+  arma::mat matActiveZ;
+  if (inactiveAtoms.empty())
+  {
+    matActiveZ = codes;
+  }
+  else
+  {
+    arma::uvec inactiveAtomsVec =
+        arma::conv_to<arma::uvec>::from(inactiveAtoms);
+    RemoveRows(codes, inactiveAtomsVec, matActiveZ);
+  }
+
+  arma::uvec atomReverseLookup(atoms);
+  for (size_t i = 0; i < nActiveAtoms; ++i)
+    atomReverseLookup(activeAtoms[i]) = i;
+
+  if (nInactiveAtoms > 0)
+  {
+    Log::Info << "There are " << nInactiveAtoms
+        << " inactive atoms. They will be re-initialized randomly.\n";
+  }
+
+  Log::Debug << "Solving Dual via Newton's Method.\n";
+
+  arma::mat dictionaryEstimate;
+  // Solve using Newton's method in the dual - note that the final dot
+  // multiplication with inv(A) seems to be unavoidable. Although more
+  // expensive, the code written this way (we use solve()) should be more
+  // numerically stable than just using inv(A) for everything.
+  arma::vec dualVars = arma::zeros<arma::vec>(nActiveAtoms);
+
+  //vec dualVars = 1e-14 * ones<vec>(nActiveAtoms);
+
+  // Method used by feature sign code - fails miserably here.  Perhaps the
+  // MATLAB optimizer fmincon does something clever?
+  //vec dualVars = 10.0 * randu(nActiveAtoms, 1);
+
+  //vec dualVars = diagvec(solve(dictionary, data * trans(codes))
+  //    - codes * trans(codes));
+  //for (size_t i = 0; i < dualVars.n_elem; i++)
+  //  if (dualVars(i) < 0)
+  //    dualVars(i) = 0;
+
+  bool converged = false;
+  arma::mat codesXT = matActiveZ * trans(data);
+  arma::mat codesZT = matActiveZ * trans(matActiveZ);
+
+  for (size_t t = 1; !converged; ++t)
+  {
+    arma::mat A = codesZT + diagmat(dualVars);
+
+    arma::mat matAInvZXT = solve(A, codesXT);
+
+    arma::vec gradient = -(arma::sum(arma::square(matAInvZXT), 1) -
+        arma::ones<arma::vec>(nActiveAtoms));
+
+    arma::mat hessian = -(-2 * (matAInvZXT * trans(matAInvZXT)) % inv(A));
+
+    arma::vec searchDirection = -solve(hessian, gradient);
+    //vec searchDirection = -gradient;
+
+    // Armijo line search.
+    const double c = 1e-4;
+    double alpha = 1.0;
+    const double rho = 0.9;
+    double sufficientDecrease = c * dot(gradient, searchDirection);
+
+    /*
+    {
+      double sumDualVars = sum(dualVars);
+      double fOld = -(-trace(trans(codesXT) * matAInvZXT) - sumDualVars);
+      Log::Debug << "fOld = " << fOld << "." << std::endl;
+      double fNew =
+          -(-trace(trans(codesXT) * solve(codesZT +
+          diagmat(dualVars + alpha * searchDirection), codesXT))
+          - (sumDualVars + alpha * sum(searchDirection)) );
+      Log::Debug << "fNew = " << fNew << "." << std::endl;
+    }
+    */
+
+    double improvement;
+    while (true)
+    {
+      // Calculate objective.
+      double sumDualVars = sum(dualVars);
+      double fOld = -(-trace(trans(codesXT) * matAInvZXT) - sumDualVars);
+      double fNew = -(-trace(trans(codesXT) * solve(codesZT +
+          diagmat(dualVars + alpha * searchDirection), codesXT)) -
+          (sumDualVars + alpha * sum(searchDirection)));
+
+      if (fNew <= fOld + alpha * sufficientDecrease)
+      {
+        searchDirection = alpha * searchDirection;
+        improvement = fOld - fNew;
+        break;
+      }
+
+      alpha *= rho;
+    }
+
+    // End of Armijo line search code.
+
+    dualVars += searchDirection;
+    double normGradient = norm(gradient, 2);
+    Log::Debug << "Newton Method iteration " << t << ":" << std::endl;
+    Log::Debug << "  Gradient norm: " << std::scientific << normGradient
+        << "." << std::endl;
+    Log::Debug << "  Improvement: " << std::scientific << improvement << ".\n";
+
+    if (improvement < NEWTON_TOL)
+      converged = true;
+  }
+
+  if (inactiveAtoms.empty())
+  {
+    dictionaryEstimate = trans(solve(codesZT + diagmat(dualVars), codesXT));
+  }
+  else
+  {
+    arma::mat dictionaryActiveEstimate = trans(solve(codesZT +
+        diagmat(dualVars), codesXT));
+    dictionaryEstimate = arma::zeros(data.n_rows, atoms);
+
+    for (size_t i = 0; i < nActiveAtoms; ++i)
+      dictionaryEstimate.col(activeAtoms[i]) = dictionaryActiveEstimate.col(i);
+
+    for (size_t i = 0; i < nInactiveAtoms; ++i)
+    {
+      // Make a new random atom estimate.
+      dictionaryEstimate.col(inactiveAtoms[i]) =
+          (data.col(math::RandInt(data.n_cols)) +
+           data.col(math::RandInt(data.n_cols)) +
+           data.col(math::RandInt(data.n_cols)));
+
+      dictionaryEstimate.col(inactiveAtoms[i]) /=
+          norm(dictionaryEstimate.col(inactiveAtoms[i]), 2);
+    }
+  }
+
+  dictionary = dictionaryEstimate;
+}
+
+template<typename DictionaryInitializer>
+void SparseCoding<DictionaryInitializer>::ProjectDictionary()
+{
+  for (size_t j = 0; j < atoms; j++)
+  {
+    double normD_j = norm(dictionary.col(j), 2);
+    if ((normD_j > 1) && (normD_j - 1.0 > 1e-9))
+    {
+      Log::Warn << "Norm exceeded 1 by " << std::scientific << normD_j - 1.0
+          << ".  Shrinking...\n";
+      dictionary.col(j) /= normD_j;
+    }
+  }
+}
+
+template<typename DictionaryInitializer>
+double SparseCoding<DictionaryInitializer>::Objective()
+{
+  double l11NormZ = sum(sum(abs(codes)));
+  double froNormResidual = norm(data - dictionary * codes, "fro");
+
+  if (lambda2 > 0)
+  {
+    double froNormZ = norm(codes, "fro");
+    return 0.5 *
+      (froNormResidual * froNormResidual + lambda2 * froNormZ * froNormZ) +
+      lambda1 * l11NormZ;
+  }
+  else
+  {
+    return 0.5 * froNormResidual * froNormResidual + lambda1 * l11NormZ;
+  }
+}
+
+void RemoveRows(const arma::mat& X,
+                const arma::uvec& rowsToRemove,
+                arma::mat& modX)
+{
+  const size_t cols = X.n_cols;
+  const size_t rows = X.n_rows;
+  const size_t nRemove = rowsToRemove.n_elem;
+  const size_t nKeep = rows - nRemove;
+
+  if (nRemove == 0)
+  {
+    modX = X;
+  }
+  else
+  {
+    modX.set_size(nKeep, cols);
+
+    size_t curRow = 0;
+    size_t removeInd = 0;
+    // First, check 0 to first row to remove.
+    if (rowsToRemove(0) > 0)
+    {
+      // Note that this implies that n_rows > 1.
+      size_t height = rowsToRemove(0);
+      modX(arma::span(curRow, curRow + height - 1), arma::span::all) =
+          X(arma::span(0, rowsToRemove(0) - 1), arma::span::all);
+      curRow += height;
+    }
+    // Now, check i'th row to remove to (i + 1)'th row to remove, until i is the
+    // penultimate row.
+    while (removeInd < nRemove - 1)
+    {
+      size_t height = rowsToRemove[removeInd + 1] -
+          rowsToRemove[removeInd] - 1;
+
+      if (height > 0)
+      {
+        modX(arma::span(curRow, curRow + height - 1), arma::span::all) =
+            X(arma::span(rowsToRemove[removeInd] + 1,
+            rowsToRemove[removeInd + 1] - 1), arma::span::all);
+        curRow += height;
+      }
+
+      removeInd++;
+    }
+
+    // Now that i is the last row to remove, check last row to remove to last
+    // row.
+    if (rowsToRemove[removeInd] < rows - 1)
+    {
+      modX(arma::span(curRow, nKeep - 1), arma::span::all) =
+          X(arma::span(rowsToRemove[removeInd] + 1, rows - 1),
+          arma::span::all);
+    }
+  }
+}
+
+}; // namespace sparse_coding
+}; // namespace mlpack
+
+#endif

Modified: mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp	2012-06-11 19:23:47 UTC (rev 13023)
+++ mlpack/trunk/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp	2012-06-11 20:10:20 UTC (rev 13024)
@@ -63,14 +63,10 @@
     matX.col(i) /= norm(matX.col(i), 2);
 
   // Run the sparse coding algorithm.
-  SparseCoding sc(matX, nAtoms, lambda1, lambda2);
+  SparseCoding<> sc(matX, nAtoms, lambda1, lambda2);
 
-  if (strlen(initialDictionaryFullpath) == 0)
+  if (strlen(initialDictionaryFullpath) != 0)
   {
-    sc.DataDependentRandomInitDictionary();
-  }
-  else
-  {
     mat matInitialD;
     data::Load(initialDictionaryFullpath, matInitialD);