[mlpack-git] master: Refactor LCC to same API as SparseCoding. (efb930e)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 16 14:12:32 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/cd5986e141b41781fdc13a9c89443f9be33b56bd...31c10fef76ac1d85c6415c92d2ccd429c430105f
>---------------------------------------------------------------
commit efb930e9b50851ea54c5905043a9038e14016c7b
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Dec 15 01:37:28 2015 +0000
Refactor LCC to same API as SparseCoding.
>---------------------------------------------------------------
efb930e9b50851ea54c5905043a9038e14016c7b
.../methods/local_coordinate_coding/CMakeLists.txt | 7 +-
.../{lcc_impl.hpp => lcc.cpp} | 112 ++++-----------------
src/mlpack/methods/local_coordinate_coding/lcc.hpp | 100 ++++++++++++++----
.../methods/local_coordinate_coding/lcc_main.cpp | 22 ++--
src/mlpack/tests/local_coordinate_coding_test.cpp | 29 +++---
5 files changed, 131 insertions(+), 139 deletions(-)
diff --git a/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt b/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt
index 6c7d302..99b58e8 100644
--- a/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt
+++ b/src/mlpack/methods/local_coordinate_coding/CMakeLists.txt
@@ -4,8 +4,9 @@
# In this library, these are specified twice, once here, and once for the individual library it belongs to, so make sure
# that you have files in both sections
set(SOURCES
- lcc.hpp
- lcc_impl.hpp
+ lcc.hpp
+ lcc.cpp
+ lcc_impl.hpp
)
# add directory name to sources
@@ -18,7 +19,7 @@ endforeach()
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
add_executable(local_coordinate_coding
- lcc_main.cpp
+ lcc_main.cpp
)
target_link_libraries(local_coordinate_coding
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc_impl.hpp b/src/mlpack/methods/local_coordinate_coding/lcc.cpp
similarity index 69%
rename from src/mlpack/methods/local_coordinate_coding/lcc_impl.hpp
rename to src/mlpack/methods/local_coordinate_coding/lcc.cpp
index 4630d68..54c2fa6 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc_impl.hpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc.cpp
@@ -1,101 +1,28 @@
/**
- * @file lcc_impl.hpp
+ * @file lcc.cpp
* @author Nishant Mehta
*
- * Implementation of Local Coordinate Coding
+ * Implementation of Local Coordinate Coding.
*/
-#ifndef __MLPACK_METHODS_LOCAL_COORDINATE_CODING_LCC_IMPL_HPP
-#define __MLPACK_METHODS_LOCAL_COORDINATE_CODING_LCC_IMPL_HPP
-
-// In case it hasn't been included yet.
#include "lcc.hpp"
namespace mlpack {
namespace lcc {
-template<typename DictionaryInitializer>
-LocalCoordinateCoding<DictionaryInitializer>::LocalCoordinateCoding(
- const arma::mat& data,
+LocalCoordinateCoding::LocalCoordinateCoding(
const size_t atoms,
- const double lambda) :
- atoms(atoms),
- data(data),
- codes(atoms, data.n_cols),
- lambda(lambda)
-{
- // Initialize the dictionary.
- DictionaryInitializer::Initialize(data, atoms, dictionary);
-}
-
-template<typename DictionaryInitializer>
-void LocalCoordinateCoding<DictionaryInitializer>::Encode(
+ const double lambda,
const size_t maxIterations,
- const double objTolerance)
+ const double tolerance) :
+ atoms(atoms),
+ lambda(lambda),
+ maxIterations(maxIterations),
+ tolerance(tolerance)
{
- Timer::Start("local_coordinate_coding");
-
- double lastObjVal = DBL_MAX;
-
- // Take the initial coding step, which has to happen before entering the main
- // loop.
- Log::Info << "Initial Coding Step." << std::endl;
-
- OptimizeCode();
- arma::uvec adjacencies = find(codes);
-
- Log::Info << " Sparsity level: " << 100.0 * ((double)(adjacencies.n_elem)) /
- ((double)(atoms * data.n_cols)) << "%.\n";
- Log::Info << " Objective value: " << Objective(adjacencies) << "."
- << std::endl;
-
- for (size_t t = 1; t != maxIterations; t++)
- {
- Log::Info << "Iteration " << t << " of " << maxIterations << "."
- << std::endl;
-
- // First step: optimize the dictionary.
- Log::Info << "Performing dictionary step..." << std::endl;
- OptimizeDictionary(adjacencies);
- double dsObjVal = Objective(adjacencies);
- Log::Info << " Objective value: " << Objective(adjacencies) << "."
- << std::endl;
-
- // Second step: perform the coding.
- Log::Info << "Performing coding step..." << std::endl;
- OptimizeCode();
- adjacencies = find(codes);
- Log::Info << " Sparsity level: " << 100.0 * ((double) (adjacencies.n_elem))
- / ((double)(atoms * data.n_cols)) << "%.\n";
-
- // Terminate if the objective increased in the coding step.
- double curObjVal = Objective(adjacencies);
- if (curObjVal > dsObjVal)
- {
- Log::Warn << "Objective increased in coding step! Terminating."
- << std::endl;
- break;
- }
-
- // Find the new objective value and improvement so we can check for
- // convergence.
- double improvement = lastObjVal - curObjVal;
- Log::Info << "Objective value: " << curObjVal << " (improvement "
- << std::scientific << improvement << ")." << std::endl;
-
- if (improvement < objTolerance)
- {
- Log::Info << "Converged within tolerance " << objTolerance << ".\n";
- break;
- }
-
- lastObjVal = curObjVal;
- }
-
- Timer::Stop("local_coordinate_coding");
+ // Nothing to do.
}
-template<typename DictionaryInitializer>
-void LocalCoordinateCoding<DictionaryInitializer>::OptimizeCode()
+void LocalCoordinateCoding::Encode(const arma::mat& data, arma::mat& codes)
{
arma::mat invSqDists = 1.0 / (repmat(trans(sum(square(dictionary))), 1,
data.n_cols) + repmat(sum(square(data)), atoms, 1) - 2 * trans(dictionary)
@@ -104,9 +31,10 @@ void LocalCoordinateCoding<DictionaryInitializer>::OptimizeCode()
arma::mat dictGram = trans(dictionary) * dictionary;
arma::mat dictGramTD(dictGram.n_rows, dictGram.n_cols);
+ codes.set_size(atoms, data.n_cols);
for (size_t i = 0; i < data.n_cols; i++)
{
- // report progress
+ // Report progress.
if ((i % 100) == 0)
{
Log::Debug << "Optimization at point " << i << "." << std::endl;
@@ -128,9 +56,9 @@ void LocalCoordinateCoding<DictionaryInitializer>::OptimizeCode()
}
}
-template<typename DictionaryInitializer>
-void LocalCoordinateCoding<DictionaryInitializer>::OptimizeDictionary(
- arma::uvec adjacencies)
+void LocalCoordinateCoding::OptimizeDictionary(const arma::mat& data,
+ const arma::mat& codes,
+ const arma::uvec& adjacencies)
{
// Count number of atomic neighbors for each point x^i.
arma::uvec neighborCounts = arma::zeros<arma::uvec>(data.n_cols, 1);
@@ -288,9 +216,9 @@ void LocalCoordinateCoding<DictionaryInitializer>::OptimizeDictionary(
}
}
-template<typename DictionaryInitializer>
-double LocalCoordinateCoding<DictionaryInitializer>::Objective(
- arma::uvec adjacencies) const
+double LocalCoordinateCoding::Objective(const arma::mat& data,
+ const arma::mat& codes,
+ const arma::uvec& adjacencies) const
{
double weightedL1NormZ = 0;
@@ -310,5 +238,3 @@ double LocalCoordinateCoding<DictionaryInitializer>::Objective(
} // namespace lcc
} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc.hpp b/src/mlpack/methods/local_coordinate_coding/lcc.hpp
index 185e941..ec1e801 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc.hpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc.hpp
@@ -71,21 +71,56 @@ namespace lcc {
* }
* @endcode
*/
-template<typename DictionaryInitializer =
- sparse_coding::DataDependentRandomInitializer>
class LocalCoordinateCoding
{
public:
/**
- * Set the parameters to LocalCoordinateCoding.
+ * Set the parameters to LocalCoordinateCoding, and train the dictionary.
+ * This constructor will also initialize the dictionary using the given
+ * DictionaryInitializer before training.
+ *
+ * If you want to initialize the dictionary to a custom matrix, consider
+ * either writing your own DictionaryInitializer class (with void
+ * Initialize(const arma::mat& data, arma::mat& dictionary) function), or call
+ * the constructor that does not take a data matrix, then call Dictionary() to
+ * set the dictionary matrix to a matrix of your choosing, and then call
+ * Train() with sparse_coding::NothingInitializer (i.e.
+ * Train<sparse_coding::NothingInitializer>(data)).
*
* @param data Data matrix.
* @param atoms Number of atoms in dictionary.
* @param lambda Regularization parameter for weighted l1-norm penalty.
+ * @param maxIterations Maximum number of iterations for training (0 runs
+ * until convergence).
+ * @param tolerance Tolerance for the objective function.
*/
+ template<
+ typename DictionaryInitializer =
+ sparse_coding::DataDependentRandomInitializer
+ >
LocalCoordinateCoding(const arma::mat& data,
const size_t atoms,
- const double lambda);
+ const double lambda,
+ const size_t maxIterations = 0,
+ const double tolerance = 0.01,
+ const DictionaryInitializer& initializer =
+ DictionaryInitializer());
+
+ /**
+ * Set the parameters to LocalCoordinateCoding. This constructor will not
+ * train the model, and a subsequent call to Train() will be required before
+ * the model can encode points with Encode().
+ *
+ * @param atoms Number of atoms in dictionary.
+ * @param lambda Regularization parameter for weighted l1-norm penalty.
+ * @param maxIterations Maximum number of iterations for training (0 runs
+ * until convergence).
+ * @param tolerance Tolerance for the objective function.
+ */
+ LocalCoordinateCoding(const size_t atoms,
+ const double lambda,
+ const size_t maxIterations = 0,
+ const double tolerance = 0.01);
/**
* Run local coordinate coding.
@@ -95,13 +130,21 @@ class LocalCoordinateCoding
* function changes by a value lower than this tolerance, the optimization
* terminates.
*/
- void Encode(const size_t maxIterations = 0,
- const double objTolerance = 0.01);
+ template<
+ typename DictionaryInitializer =
+ sparse_coding::DataDependentRandomInitializer
+ >
+ void Train(const arma::mat& data,
+ const DictionaryInitializer& initializer =
+ DictionaryInitializer());
/**
* Code each point via distance-weighted LARS.
+ *
+ * @param data Matrix containing points to encode.
+ * @param codes Output matrix to store codes in.
*/
- void OptimizeCode();
+ void Encode(const arma::mat& data, arma::mat& codes);
/**
* Learn dictionary by solving linear system.
@@ -110,41 +153,56 @@ class LocalCoordinateCoding
* the coding matrix Z that are non-zero (the adjacency matrix for the
* bipartite graph of points and atoms)
*/
- void OptimizeDictionary(arma::uvec adjacencies);
+ void OptimizeDictionary(const arma::mat& data,
+ const arma::mat& codes,
+ const arma::uvec& adjacencies);
/**
* Compute objective function given the list of adjacencies.
*/
- double Objective(arma::uvec adjacencies) const;
+ double Objective(const arma::mat& data,
+ const arma::mat& codes,
+ const arma::uvec& adjacencies) const;
- //! Access the data.
- const arma::mat& Data() const { return data; }
+ //! Get the number of atoms.
+ size_t Atoms() const { return atoms; }
+ //! Modify the number of atoms.
+ size_t& Atoms() { return atoms; }
//! Accessor for dictionary.
const arma::mat& Dictionary() const { return dictionary; }
//! Mutator for dictionary.
arma::mat& Dictionary() { return dictionary; }
- //! Accessor the codes.
- const arma::mat& Codes() const { return codes; }
- //! Modify the codes.
- arma::mat& Codes() { return codes; }
+ //! Get the L1 regularization parameter.
+ double Lambda() const { return lambda; }
+ //! Modify the L1 regularization parameter.
+ double& Lambda() { return lambda; }
+
+ //! Get the maximum number of iterations.
+ size_t MaxIterations() const { return maxIterations; }
+ //! Modify the maximum number of iterations.
+ size_t& MaxIterations() { return maxIterations; }
+
+ //! Get the objective tolerance.
+ double Tolerance() const { return tolerance; }
+ //! Modify the objective tolerance.
+ double& Tolerance() { return tolerance; }
private:
//! Number of atoms in dictionary.
size_t atoms;
- //! Data matrix (columns are points).
- const arma::mat& data;
-
//! Dictionary (columns are atoms).
arma::mat dictionary;
- //! Codes (columns are points).
- arma::mat codes;
-
//! l1 regularization term.
double lambda;
+
+ //! Maximum number of iterations during training.
+ size_t maxIterations;
+ //! Tolerance for main objective.
+ double tolerance;
};
} // namespace lcc
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp b/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
index 7f47f81..26887e8 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
@@ -111,7 +111,7 @@ int main(int argc, char* argv[])
// If there is an initial dictionary, be sure we do not initialize one.
if (initialDictionaryFile != "")
{
- LocalCoordinateCoding<NothingInitializer> lcc(input, atoms, lambda);
+ LocalCoordinateCoding lcc(atoms, lambda, maxIterations, objTolerance);
// Load initial dictionary directly into LCC object.
data::Load(initialDictionaryFile, lcc.Dictionary(), true);
@@ -132,26 +132,32 @@ int main(int argc, char* argv[])
}
// Run LCC.
- lcc.Encode(maxIterations, objTolerance);
+ lcc.Train<NothingInitializer>(input);
// Save the results.
Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
data::Save(dictionaryFile, lcc.Dictionary());
+
+ mat codes;
+ lcc.Encode(input, codes);
+
Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
- data::Save(codesFile, lcc.Codes());
+ data::Save(codesFile, codes);
}
else
{
// No initial dictionary.
- LocalCoordinateCoding<> lcc(input, atoms, lambda);
-
- // Run LCC.
- lcc.Encode(maxIterations, objTolerance);
+ LocalCoordinateCoding lcc(input, atoms, lambda, maxIterations,
+ objTolerance);
// Save the results.
Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
data::Save(dictionaryFile, lcc.Dictionary());
+
+ mat codes;
+ lcc.Encode(input, codes);
+
Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
- data::Save(codesFile, lcc.Codes());
+ data::Save(codesFile, codes);
}
}
diff --git a/src/mlpack/tests/local_coordinate_coding_test.cpp b/src/mlpack/tests/local_coordinate_coding_test.cpp
index 7da242f..98fea7f 100644
--- a/src/mlpack/tests/local_coordinate_coding_test.cpp
+++ b/src/mlpack/tests/local_coordinate_coding_test.cpp
@@ -59,20 +59,22 @@ BOOST_AUTO_TEST_CASE(LocalCoordinateCodingTestCodingStep)
X.col(i) /= norm(X.col(i), 2);
}
- LocalCoordinateCoding<> lcc(X, nAtoms, lambda1);
- lcc.OptimizeCode();
+ mat Z;
+ LocalCoordinateCoding lcc(X, nAtoms, lambda1);
+ lcc.Encode(X, Z);
mat D = lcc.Dictionary();
- mat Z = lcc.Codes();
- for(uword i = 0; i < nPoints; i++) {
- vec sq_dists = vec(nAtoms);
- for(uword j = 0; j < nAtoms; j++) {
+ for (uword i = 0; i < nPoints; i++)
+ {
+ vec sqDists = vec(nAtoms);
+ for (uword j = 0; j < nAtoms; j++)
+ {
vec diff = D.unsafe_col(j) - X.unsafe_col(i);
- sq_dists[j] = dot(diff, diff);
+ sqDists[j] = dot(diff, diff);
}
- mat Dprime = D * diagmat(1.0 / sq_dists);
- mat zPrime = Z.unsafe_col(i) % sq_dists;
+ mat Dprime = D * diagmat(1.0 / sqDists);
+ mat zPrime = Z.unsafe_col(i) % sqDists;
vec errCorr = trans(Dprime) * (Dprime * zPrime - X.unsafe_col(i));
VerifyCorrectness(zPrime, errCorr, 0.5 * lambda1);
@@ -96,11 +98,11 @@ BOOST_AUTO_TEST_CASE(LocalCoordinateCodingTestDictionaryStep)
X.col(i) /= norm(X.col(i), 2);
}
- LocalCoordinateCoding<> lcc(X, nAtoms, lambda);
- lcc.OptimizeCode();
- mat Z = lcc.Codes();
+ mat Z;
+ LocalCoordinateCoding lcc(X, nAtoms, lambda);
+ lcc.Encode(X, Z);
uvec adjacencies = find(Z);
- lcc.OptimizeDictionary(adjacencies);
+ lcc.OptimizeDictionary(X, Z, adjacencies);
mat D = lcc.Dictionary();
@@ -113,7 +115,6 @@ BOOST_AUTO_TEST_CASE(LocalCoordinateCodingTestDictionaryStep)
grad = lambda * grad + (D * Z - X) * trans(Z);
BOOST_REQUIRE_SMALL(norm(grad, "fro"), tol);
-
}
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list