[mlpack-svn] r13184 - mlpack/trunk/src/mlpack/methods/local_coordinate_coding
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Jul 9 14:41:47 EDT 2012
Author: rcurtin
Date: 2012-07-09 14:41:47 -0400 (Mon, 09 Jul 2012)
New Revision: 13184
Modified:
mlpack/trunk/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
Log:
Severe cleanup of executable, so it actually works again.
Modified: mlpack/trunk/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp 2012-07-09 18:34:21 UTC (rev 13183)
+++ mlpack/trunk/src/mlpack/methods/local_coordinate_coding/lcc_main.cpp 2012-07-09 18:41:47 UTC (rev 13184)
@@ -7,102 +7,150 @@
#include <mlpack/core.hpp>
#include "lcc.hpp"
-PROGRAM_INFO("LCC", "An implementation of Local Coordinate Coding");
+PROGRAM_INFO("Local Coordinate Coding",
+ "An implementation of Local Coordinate Coding (LCC), which "
+ "codes data that approximately lives on a manifold using a variation of l1-"
+ "norm regularized sparse coding. Given a dense data matrix X with n points"
+ " and d dimensions, LCC seeks to find a dense dictionary matrix D with k "
+ "atoms in d dimensions, and a coding matrix Z with n points in k "
+ "dimensions. Because of the regularization method used, the atoms in D "
+ "should lie close to the manifold on which the data points lie."
+ "\n\n"
+ "The original data matrix X can then be reconstructed as D * Z. Therefore,"
+ " this program finds a representation of each point in X as a sparse linear"
+ " combination of atoms in the dictionary D."
+ "\n\n"
+ "The coding is found with an algorithm which alternates between a "
+ "dictionary step, which updates the dictionary D, and a coding step, which "
+ "updates the coding matrix Z."
+ "\n\n"
+ "To run this program, the input matrix X must be specified (with -i), along"
+ " with the number of atoms in the dictionary (-k). An initial dictionary "
+ "may also be specified with the --initial_dictionary option. The l1-norm "
+ "regularization parameter is specified with -l. For example, to run LCC on"
+ " the dataset in data.csv using 200 atoms and an l1-regularization "
+ "parameter of 0.1, saving the dictionary into dict.csv and the codes into "
+ "codes.csv, use "
+ "\n\n"
+ "$ local_coordinate_coding -i data.csv -k 200 -l 0.1 -d dict.csv -c "
+ "codes.csv"
+ "\n\n"
+ "The maximum number of iterations may be specified with the -n option. "
+ "Optionally, the input data matrix X can be normalized before coding with "
+ "the -N option.");
-PARAM_DOUBLE_REQ("lambda", "weighted l1-norm regularization parameter.", "l");
+PARAM_STRING_REQ("input_file", "Filename of the input data.", "i");
+PARAM_INT_REQ("atoms", "Number of atoms in the dictionary.", "k");
-PARAM_INT_REQ("n_atoms", "number of atoms in dictionary.", "k");
+PARAM_DOUBLE("lambda", "Weighted l1-norm regularization parameter.", "l", 0.0);
-PARAM_INT_REQ("n_iterations", "number of iterations for sparse coding.", "");
+PARAM_INT("max_iterations", "Maximum number of iterations for LCC (0 indicates "
+ "no limit).", "n", 0);
-PARAM_STRING_REQ("data", "path to the input data.", "");
-PARAM_STRING("initial_dictionary", "Filename for initial dictionary.", "", "");
-PARAM_STRING("results_dir", "Directory for results.", "", "");
+PARAM_STRING("initial_dictionary", "Filename for optional initial dictionary.",
+ "D", "");
+PARAM_STRING("dictionary_file", "Filename to save the output dictionary to.",
+ "d", "dictionary.csv");
+PARAM_STRING("codes_file", "Filename to save the output codes to.", "c",
+ "codes.csv");
+
+PARAM_FLAG("normalize", "If set, the input data matrix will be normalized "
+ "before coding.", "N");
+
+PARAM_INT("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0);
+
using namespace arma;
using namespace std;
using namespace mlpack;
+using namespace mlpack::math;
using namespace mlpack::lcc;
+using namespace mlpack::sparse_coding; // For NothingInitializer.
int main(int argc, char* argv[])
{
CLI::ParseCommandLine(argc, argv);
- double lambda = CLI::GetParam<double>("lambda");
+ if (CLI::GetParam<int>("seed") != 0)
+ RandomSeed((size_t) CLI::GetParam<int>("seed"));
+ else
+ RandomSeed((size_t) std::time(NULL));
- // if using fx-run, one could just leave resultsDir blank
- const char* resultsDir = CLI::GetParam<string>("results_dir").c_str();
+ const double lambda = CLI::GetParam<double>("lambda");
- const char* dataFullpath = CLI::GetParam<string>("data").c_str();
+ const string inputFile = CLI::GetParam<string>("input_file");
+ const string dictionaryFile = CLI::GetParam<string>("dictionary_file");
+ const string codesFile = CLI::GetParam<string>("codes_file");
+ const string initialDictionaryFile =
+ CLI::GetParam<string>("initial_dictionary");
- const char* initialDictionaryFullpath =
- CLI::GetParam<string>("initial_dictionary").c_str();
+ const size_t maxIterations = CLI::GetParam<int>("max_iteartions");
+ const size_t atoms = CLI::GetParam<int>("atoms");
- size_t nIterations = CLI::GetParam<int>("n_iterations");
+ const bool normalize = CLI::HasParam("normalize");
- size_t nAtoms = CLI::GetParam<int>("n_atoms");
+ mat input;
+ data::Load(inputFile, input, true);
- mat matX;
- matX.load(dataFullpath);
+ Log::Info << "Loaded " << input.n_cols << " point in " << input.n_rows
+ << " dimensions." << endl;
- uword nPoints = matX.n_cols;
-
- // normalize each point since these are images
- for (uword i = 0; i < nPoints; i++)
+ // Normalize each point if the user asked for it.
+ if (normalize)
{
- matX.col(i) /= norm(matX.col(i), 2);
+ Log::Info << "Normalizing data before coding..." << endl;
+ for (size_t i = 0; i < input.n_cols; ++i)
+ input.col(i) /= norm(input.col(i), 2);
}
- // run Local Coordinate Coding
- LocalCoordinateCoding<> lcc(matX, nAtoms, lambda);
+ // If there is an initial dictionary, be sure we do not initialize one.
+ if (initialDictionaryFile != "")
+ {
+ LocalCoordinateCoding<NothingInitializer> lcc(input, atoms, lambda);
- if (strlen(initialDictionaryFullpath) == 0)
- {
-// lcc.DataDependentRandomInitDictionary();
- }
- else
- {
- mat matInitialD;
- matInitialD.load(initialDictionaryFullpath);
- if (matInitialD.n_cols != nAtoms)
+ // Load initial dictionary directly into LCC object.
+ data::Load(initialDictionaryFile, lcc.Dictionary(), true);
+
+ // Validate size of initial dictionary.
+ if (lcc.Dictionary().n_cols != atoms)
{
- Log::Fatal << "The specified initial dictionary to load has "
- << matInitialD.n_cols << " atoms, but the learned dictionary "
- << "was specified to have " << nAtoms << " atoms!\n";
+ Log::Fatal << "The initial dictionary has " << lcc.Dictionary().n_cols
+ << " atoms, but the number of atoms was specified to be " << atoms
+ << "!" << endl;
}
- if (matInitialD.n_rows != matX.n_rows)
+ if (lcc.Dictionary().n_rows != input.n_rows)
{
- Log::Fatal << "The specified initial dictionary to load has "
- << matInitialD.n_rows << " dimensions, but the specified data "
- << "has " << matX.n_rows << " dimensions!\n";
+ Log::Fatal << "The initial dictionary has " << lcc.Dictionary().n_rows
+ << " dimensions, but the data has " << input.n_rows << " dimensions!"
+ << endl;
}
- lcc.Dictionary() = matInitialD;
- }
+ // Run LCC.
+ Timer::Start("local_coordinate_coding");
+ lcc.Encode(maxIterations);
+ Timer::Stop("local_coordinate_coding");
- Timer::Start("local_coordinate_coding");
- lcc.Encode(nIterations);
- Timer::Stop("local_coordinate_coding");
-
- mat learnedD = lcc.Dictionary();
- mat learnedZ = lcc.Codes();
-
- if (strlen(resultsDir) == 0)
- {
- data::Save("D.csv", learnedD);
- data::Save("Z.csv", learnedZ);
+ // Save the results.
+ Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
+ data::Save(dictionaryFile, lcc.Dictionary());
+ Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
+ data::Save(codesFile, lcc.Codes());
}
else
{
- char* dataFullpath = (char*) malloc(320 * sizeof(char));
+ // No initial dictionary.
+ LocalCoordinateCoding<> lcc(input, atoms, lambda);
- sprintf(dataFullpath, "%s/D.csv", resultsDir);
- data::Save(dataFullpath, learnedD);
+ // Run LCC.
+ Timer::Start("local_coordinate_coding");
+ lcc.Encode(maxIterations);
+ Timer::Stop("local_coordinate_coding");
- sprintf(dataFullpath, "%s/Z.csv", resultsDir);
- data::Save(dataFullpath, learnedZ);
-
- free(dataFullpath);
+ // Save the results.
+ Log::Info << "Saving dictionary matrix to '" << dictionaryFile << "'.\n";
+ data::Save(dictionaryFile, lcc.Dictionary());
+ Log::Info << "Saving sparse codes to '" << codesFile << "'.\n";
+ data::Save(codesFile, lcc.Codes());
}
}
More information about the mlpack-svn
mailing list