[mlpack-svn] r13050 - mlpack/trunk/src/mlpack/methods/pca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jun 13 12:20:58 EDT 2012
Author: rmohan
Date: 2012-06-13 12:20:58 -0400 (Wed, 13 Jun 2012)
New Revision: 13050
Removed:
mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp
mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp
mlpack/trunk/src/mlpack/methods/pca/nmf.hpp
mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp
mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp
Log:
Move NMF out of PCA
Deleted: mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp 2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp 2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,95 +0,0 @@
-/**
- * @file mdistupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization'
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is
- * \f[
- * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
- * \f]
- */
-class MultiplicativeDistanceW
-{
- public:
- // Empty constructor required for the WUpdateRule template
- MultiplicativeDistanceW() { }
-
- /**
- * The update function that actually updates the W matrix. The function takes
- * in all the salient matrices and only changes the value of the W matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- arma::mat& W,
- const arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1,t2;
-
- t1 = V*H.t();
- t2 = W*H*H.t();
-
- W = (W%t1)/t2;
- }
-}; // Class MultiplicativeDistanceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
- * \f]
- */
-class MultiplicativeDistanceH
-{
- public:
- // Empty constructor required for the HUpdateRule template
- MultiplicativeDistanceH() { }
-
- /**
- * The update function that actually updates the H matrix. The function takes
- * in all the salient matrices and only changes the value of the H matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- const arma::mat& W,
- arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1,t2;
-
- t1 = W.t()*V;
- t2 = W.t()*W*H;
-
- H = (H%t1)/t2;
- }
-}; // Class MultiplicativeDistanceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif
Deleted: mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp 2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp 2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,112 +0,0 @@
-/**
- * @file mdivupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization'
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the the 'divergence'
- * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is
- * \f[
- * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{\nu} H_{a\nu}}
- * \f]
- */
-class MultiplicativeDistanceW
-{
- public:
- // Empty constructor required for the WUpdateRule template
- MultiplicativeDivergenceW() { }
-
- /**
- * The update function that actually updates the W matrix. The function takes
- * in all the salient matrices and only changes the value of the W matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- arma::mat& W,
- const arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1;
- arma::rowvec t2;
-
- t1 = W*H;
- for(size_t i=0;i<W.n_rows;i++)
- {
- for(size_t j=0;j<W.n_cols;j++)
- {
- t2 = H.row(j)%V.row(i)/t1.row(i);
- W(i,j) = W(i,j)*sum(t2)/sum(H.row(i));
- }
- }
-
- }
-}; // Class MultiplicativeDivergenceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{k} H_{ka}}
- * \f]
- */
-class MultiplicativeDivergenceH
-{
- public:
- // Empty constructor required for the HUpdateRule template
- MultiplicativeDistanceH() { }
-
- /**
- * The update function that actually updates the H matrix. The function takes
- * in all the salient matrices and only changes the value of the H matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- const arma::mat& W,
- arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1;
- arma::colvec t2;
-
- t1 = W*H;
- for(size_t i=0;i<H.n_rows;i++)
- {
- for(size_t j=0;j<H.n_cols;j++)
- {
- t2 = W.col(i)%V.col(j)/t1.col(j);
- H(i,j) = H(i,j)*sum(t2)/sum(H.col(i));
- }
- }
-
- }
-}; // Class MultiplicativeDivergenceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif
Deleted: mlpack/trunk/src/mlpack/methods/pca/nmf.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf.hpp 2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf.hpp 2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,101 +0,0 @@
-/**
- * @file nmf.hpp
- * @author Mohan Rajendran
- *
- * Defines the NMF class to perform Non-negative Matrix Factorization
- * on the given matrix.
- */
-#ifndef __MLPACK_METHODS_NMF_NMF_HPP
-#define __MLPACK_METHODS_NMF_NMF_HPP
-
-#include <mlpack/core.hpp>
-#include "mdistupdate.hpp"
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * This class implements the NMF on the given matrix V. Non-negative Matrix
- * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
- * called the basis matrix and H is called the encoding matrix. V is taken
- * to be of size n*m and the obtained W is n*r and H is r*m. The size r is
- * called the rank of the factorization.
- *
- * The implementation requires the supply of two templates. One for the update
- * rule for updating the W matrix during each iteration and another rule for
- * updating the H matrix during each iteration. This allows the user to
- * try out various update rules for performing the factorization.
- *
- * A simple example of how to run NMF is shown below.
- *
- * @code
- * extern arma::mat V; // Matrix that we want to perform NMF on.
- * size_t r = 10; // Rank of decomposition
- * arma::mat W; // Basis matrix
- * arma::mat H; // Encoding matrix
- *
- * NMF<> nmf(); // Default options
- * nmf.Apply(V,W,H,r);
- * @endcode
- *
- * @tparam WUpdateRule The update rule for calculating W matrix at each
- * iteration; @see MultiplicativeDistanceW for an example.
- * @tparam HUpdateRule The update rule for calculating H matrix at each
- * iteration; @see MultiplicativeDistanceH for an example.
- */
-template<typename WUpdateRule = MultiplicativeDistanceW,
- typename HUpdateRule = MultiplicativeDistanceH>
-class NMF
-{
- public:
- /**
- * Create the NMF object and (optionally) set the parameters which NMF will
- * run with. This implementation allows us to use different update rules for
- * the updation of the basis and encoding matrices over each iteration.
- *
- * @param maxIterations Maximum number of iterations allowed before giving up
- * @param maxResidue The maximum root mean square of the difference between
- * two subsequent iteration of product WH at which to terminate iteration.
- * A low residual value denotes that subsequent iterationas are not
- * producing much different values of W and H. Once the difference goes
- * below the supplied value, the iteration terminates.
- * @param WUpdate Optional WUpdateRule object; for when the update rule for
- * the W vector has states that it needs to store.
- * @param HUpdate Optional HUpdateRule object; for when the update rule for
- * the H vector has states that it needs to store.
- */
- NMF(const size_t maxIterations = 1000,
- const double maxResidue = 1e-10,
- const WUpdateRule WUpdate = WUpdateRule(),
- const HUpdateRule HUpdate = HUpdateRule());
-
- /**
- * Apply the Non-Negative Matrix Factorization on the provided matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- * @param r Rank r of the factorization
- */
- void Apply(const arma::mat& V, arma::mat& W, arma::mat& H,
- size_t& r) const;
-
- private:
- //! The maximum number of iterations allowed before giving up
- size_t maxIterations;
- //! The maximum residue below which iteration is considered converged
- double maxResidue;
- //! Instantiated W Update Rule
- WUpdateRule WUpdate;
- //! Instantiated H Update Rule
- HUpdateRule HUpdate;
-
-}; // class NMF
-
-}; // namespace nmf
-}; // namespace mlpack
-
-// Include implementation.
-#include "nmf_impl.hpp"
-
-#endif
Deleted: mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp 2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp 2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,89 +0,0 @@
-/**
- * @file nmf.cpp
- * @author Mohan Rajendran
- *
- * Implementation of NMF class to perform Non-Negative Matrix Factorization
- * on the given matrix.
- */
-#include "nmf.hpp"
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * Construct the NMF object.
- */
-template<typename WUpdateRule,
- typename HUpdateRule>
-NMF<WUpdateRule,
- HUpdateRule>::
-NMF(const size_t maxIterations,
- const double maxResidue,
- const WUpdateRule WUpdate,
- const HUpdateRule HUpdate) :
- maxIterations(maxIterations),
- maxResidue(maxResidue),
- WUpdate(WUpdate),
- HUpdate(HUpdate)
-{
- if (maxResidue < 0.0)
- {
- Log::Warn << "NMF::NMF(): maxResidue must be a positive value ("
- << maxResidue << " given). Setting to the default value of "
- << "1e-10.\n";
- this->maxResidue = 1e-10;
- }
-}
-
-/**
- * Apply the Non-Negative Matrix Factorization on the provided matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- * @param r Rank r of the factorization
- */
-template<typename WUpdateRule,
- typename HUpdateRule>
-void NMF<WUpdateRule,
- HUpdateRule>::
-Apply(const arma::mat& V, arma::mat& W, arma::mat& H, size_t& r) const
-{
- size_t n = V.n_rows;
- size_t m = V.n_cols;
- // old and new product WH for residue checking
- arma::mat WHold,WH,diff;
-
- // Allocate random values to the starting iteration
- W.randu(n,r);
- H.randu(r,m);
- // Store the original calculated value for residue checking
- WHold = W*H;
-
- size_t iteration = 0;
- double residue;
- double sqrRes = maxResidue*maxResidue;
-
- do
- {
- // Update step.
- // Update the value of W and H based on the Update Rules provided
- WUpdate.Update(V,W,H);
- HUpdate.Update(V,W,H);
-
- // Calculate square of residue after iteration
- WH = W*H;
- diff = WHold-WH;
- diff = diff%diff;
- residue = accu(diff)/(double)(n*m);
- WHold = WH;
-
- iteration++;
-
- } while (residue >= sqrRes && iteration != maxIterations);
-
- Log::Debug << "Iterations: " << iteration << std::endl;
-}
-
-}; // namespace nmf
-}; // namespace mlpack
Deleted: mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp 2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp 2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,65 +0,0 @@
-/**
- * @file nmf_main.cpp
- * @author Mohan Rajendran
- *
- * Main executable to run NMF.
- */
-#include <mlpack/core.hpp>
-
-#include "nmf.hpp"
-
-using namespace mlpack;
-using namespace mlpack::nmf;
-using namespace std;
-
-// Document program.
-PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs the "
- "non-negative matrix factorization on the given vector. It will store the "
- "calculated factors in the reference matrix arguments supplied.");
-
-// Parameters for program.
-PARAM_STRING_REQ("input_file", "Input matrix to perform NMF on.", "i");
-PARAM_STRING_REQ("W_output_file", "File to save the calculated W matrix to.",
- "w");
-PARAM_STRING_REQ("H_output_file", "File to save the calculated H matrix to.",
- "h");
-PARAM_INT_REQ("rank", "Rank of the factorization.", "r");
-PARAM_INT("max_iterations", "Number of iterations before NMF terminates",
- "m", 1000);
-PARAM_DOUBLE("max_residue", "The maximum root mean square allowed below which "
- "the program termiates", "e", 1e-10);
-
-int main(int argc, char** argv)
-{
- // Parse commandline.
- CLI::ParseCommandLine(argc, argv);
-
- // Load input dataset.
- string inputFile = CLI::GetParam<string>("input_file");
- arma::mat V;
- data::Load(inputFile.c_str(), V);
- arma::mat W;
- arma::mat H;
-
- // Find out the rank of the factorization.
- size_t r = CLI::GetParam<int>("rank");
- if (r<1)
- {
- Log::Fatal << "The rank of the factorization cannot be less than 1. "
- << std::endl;
- }
-
- size_t maxiterations = CLI::GetParam<int>("max_iterations");
- double maxresidue = CLI::GetParam<double>("max_residue");
-
- // Perform NMF.
- NMF<> nmf(maxiterations,maxresidue);
- Log::Info << "Performing NMF on the given matrix..." << endl;
- nmf.Apply(V,W,H,r);
-
- // Save results
- string outputFile = CLI::GetParam<string>("W_output_file");
- data::Save(outputFile, W);
- outputFile = CLI::GetParam<string>("H_output_file");
- data::Save(outputFile, H);
-}
More information about the mlpack-svn
mailing list