[mlpack-svn] r13047 - mlpack/trunk/src/mlpack/methods/pca
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jun 13 12:18:45 EDT 2012
Author: rmohan
Date: 2012-06-13 12:18:45 -0400 (Wed, 13 Jun 2012)
New Revision: 13047
Added:
mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp
mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp
mlpack/trunk/src/mlpack/methods/pca/nmf.hpp
mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp
mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp
Modified:
mlpack/trunk/src/mlpack/methods/pca/CMakeLists.txt
Log:
Basic implementation of NMF
Modified: mlpack/trunk/src/mlpack/methods/pca/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/CMakeLists.txt 2012-06-13 15:48:27 UTC (rev 13046)
+++ mlpack/trunk/src/mlpack/methods/pca/CMakeLists.txt 2012-06-13 16:18:45 UTC (rev 13047)
@@ -3,8 +3,10 @@
# Define the files we need to compile
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
- pca.hpp
- pca.cpp
+ mdistupdate.hpp
+ mdivupdate.hpp
+ nmf.hpp
+ nmf_impl.hpp
)
# Add directory name to sources.
@@ -16,10 +18,10 @@
# the parent scope).
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-add_executable(pca
- pca_main.cpp
+add_executable(nmf
+ nmf_main.cpp
)
-target_link_libraries(pca
+target_link_libraries(nmf
mlpack
)
-install(TARGETS pca RUNTIME DESTINATION bin)
+install(TARGETS nmf RUNTIME DESTINATION bin)
Added: mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp 2012-06-13 16:18:45 UTC (rev 13047)
@@ -0,0 +1,95 @@
+/**
+ * @file mdistupdate.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ *
+ */
+
+#ifndef __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
+#define __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
+ * \f]
+ */
+class MultiplicativeDistanceW
+{
+ public:
+ // Empty constructor required for the WUpdateRule template
+ MultiplicativeDistanceW() { }
+
+ /**
+ * The update function that actually updates the W matrix. The function takes
+ * in all the salient matrices and only changes the value of the W matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ */
+
+ inline static void Update(const arma::mat& V,
+ arma::mat& W,
+ const arma::mat& H)
+ {
+ // Simple implementation. This can be left here.
+ arma::mat t1,t2;
+
+ t1 = V*H.t();
+ t2 = W*H*H.t();
+
+ W = (W%t1)/t2;
+ }
+}; // Class MultiplicativeDistanceW
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
+ * \f]
+ */
+class MultiplicativeDistanceH
+{
+ public:
+ // Empty constructor required for the HUpdateRule template
+ MultiplicativeDistanceH() { }
+
+ /**
+ * The update function that actually updates the H matrix. The function takes
+ * in all the salient matrices and only changes the value of the H matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ */
+
+ inline static void Update(const arma::mat& V,
+ const arma::mat& W,
+ arma::mat& H)
+ {
+ // Simple implementation. This can be left here.
+ arma::mat t1,t2;
+
+ t1 = W.t()*V;
+ t2 = W.t()*W*H;
+
+ H = (H%t1)/t2;
+ }
+}; // Class MultiplicativeDistanceH
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp 2012-06-13 16:18:45 UTC (rev 13047)
@@ -0,0 +1,112 @@
+/**
+ * @file mdivupdate.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the the 'divergence'
+ * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ *
+ */
+
+#ifndef __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
+#define __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
+ * {\sum_{\nu} H_{a\nu}}
+ * \f]
+ */
+class MultiplicativeDistanceW
+{
+ public:
+ // Empty constructor required for the WUpdateRule template
+ MultiplicativeDivergenceW() { }
+
+ /**
+ * The update function that actually updates the W matrix. The function takes
+ * in all the salient matrices and only changes the value of the W matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ */
+
+ inline static void Update(const arma::mat& V,
+ arma::mat& W,
+ const arma::mat& H)
+ {
+ // Simple implementation. This can be left here.
+ arma::mat t1;
+ arma::rowvec t2;
+
+ t1 = W*H;
+ for(size_t i=0;i<W.n_rows;i++)
+ {
+ for(size_t j=0;j<W.n_cols;j++)
+ {
+ t2 = H.row(j)%V.row(i)/t1.row(i);
+ W(i,j) = W(i,j)*sum(t2)/sum(H.row(i));
+ }
+ }
+
+ }
+}; // Class MultiplicativeDivergenceW
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
+ * {\sum_{k} H_{ka}}
+ * \f]
+ */
+class MultiplicativeDivergenceH
+{
+ public:
+ // Empty constructor required for the HUpdateRule template
+ MultiplicativeDistanceH() { }
+
+ /**
+ * The update function that actually updates the H matrix. The function takes
+ * in all the salient matrices and only changes the value of the H matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ */
+
+ inline static void Update(const arma::mat& V,
+ const arma::mat& W,
+ arma::mat& H)
+ {
+ // Simple implementation. This can be left here.
+ arma::mat t1;
+ arma::colvec t2;
+
+ t1 = W*H;
+ for(size_t i=0;i<H.n_rows;i++)
+ {
+ for(size_t j=0;j<H.n_cols;j++)
+ {
+ t2 = W.col(i)%V.col(j)/t1.col(j);
+ H(i,j) = H(i,j)*sum(t2)/sum(H.col(i));
+ }
+ }
+
+ }
+}; // Class MultiplicativeDivergenceH
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/pca/nmf.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf.hpp 2012-06-13 16:18:45 UTC (rev 13047)
@@ -0,0 +1,101 @@
+/**
+ * @file nmf.hpp
+ * @author Mohan Rajendran
+ *
+ * Defines the NMF class to perform Non-negative Matrix Factorization
+ * on the given matrix.
+ */
+#ifndef __MLPACK_METHODS_NMF_NMF_HPP
+#define __MLPACK_METHODS_NMF_NMF_HPP
+
+#include <mlpack/core.hpp>
+#include "mdistupdate.hpp"
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * This class implements the NMF on the given matrix V. Non-negative Matrix
+ * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
+ * called the basis matrix and H is called the encoding matrix. V is taken
+ * to be of size n*m and the obtained W is n*r and H is r*m. The size r is
+ * called the rank of the factorization.
+ *
+ * The implementation requires the supply of two templates. One for the update
+ * rule for updating the W matrix during each iteration and another rule for
+ * updating the H matrix during each iteration. This allows the user to
+ * try out various update rules for performing the factorization.
+ *
+ * A simple example of how to run NMF is shown below.
+ *
+ * @code
+ * extern arma::mat V; // Matrix that we want to perform NMF on.
+ * size_t r = 10; // Rank of decomposition
+ * arma::mat W; // Basis matrix
+ * arma::mat H; // Encoding matrix
+ *
+ * NMF<> nmf(); // Default options
+ * nmf.Apply(V,W,H,r);
+ * @endcode
+ *
+ * @tparam WUpdateRule The update rule for calculating W matrix at each
+ * iteration; @see MultiplicativeDistanceW for an example.
+ * @tparam HUpdateRule The update rule for calculating H matrix at each
+ * iteration; @see MultiplicativeDistanceH for an example.
+ */
+template<typename WUpdateRule = MultiplicativeDistanceW,
+ typename HUpdateRule = MultiplicativeDistanceH>
+class NMF
+{
+ public:
+ /**
+ * Create the NMF object and (optionally) set the parameters which NMF will
+ * run with. This implementation allows us to use different update rules for
+ * the updation of the basis and encoding matrices over each iteration.
+ *
+ * @param maxIterations Maximum number of iterations allowed before giving up
+ * @param maxResidue The maximum root mean square of the difference between
+ * two subsequent iteration of product WH at which to terminate iteration.
+ * A low residual value denotes that subsequent iterationas are not
+ * producing much different values of W and H. Once the difference goes
+ * below the supplied value, the iteration terminates.
+ * @param WUpdate Optional WUpdateRule object; for when the update rule for
+ * the W vector has states that it needs to store.
+ * @param HUpdate Optional HUpdateRule object; for when the update rule for
+ * the H vector has states that it needs to store.
+ */
+ NMF(const size_t maxIterations = 1000,
+ const double maxResidue = 1e-10,
+ const WUpdateRule WUpdate = WUpdateRule(),
+ const HUpdateRule HUpdate = HUpdateRule());
+
+ /**
+ * Apply the Non-Negative Matrix Factorization on the provided matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ * @param r Rank r of the factorization
+ */
+ void Apply(const arma::mat& V, arma::mat& W, arma::mat& H,
+ size_t& r) const;
+
+ private:
+ //! The maximum number of iterations allowed before giving up
+ size_t maxIterations;
+ //! The maximum residue below which iteration is considered converged
+ double maxResidue;
+ //! Instantiated W Update Rule
+ WUpdateRule WUpdate;
+ //! Instantiated H Update Rule
+ HUpdateRule HUpdate;
+
+}; // class NMF
+
+}; // namespace nmf
+}; // namespace mlpack
+
+// Include implementation.
+#include "nmf_impl.hpp"
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp 2012-06-13 16:18:45 UTC (rev 13047)
@@ -0,0 +1,89 @@
+/**
+ * @file nmf.cpp
+ * @author Mohan Rajendran
+ *
+ * Implementation of NMF class to perform Non-Negative Matrix Factorization
+ * on the given matrix.
+ */
+#include "nmf.hpp"
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * Construct the NMF object.
+ */
+template<typename WUpdateRule,
+ typename HUpdateRule>
+NMF<WUpdateRule,
+ HUpdateRule>::
+NMF(const size_t maxIterations,
+ const double maxResidue,
+ const WUpdateRule WUpdate,
+ const HUpdateRule HUpdate) :
+ maxIterations(maxIterations),
+ maxResidue(maxResidue),
+ WUpdate(WUpdate),
+ HUpdate(HUpdate)
+{
+ if (maxResidue < 0.0)
+ {
+ Log::Warn << "NMF::NMF(): maxResidue must be a positive value ("
+ << maxResidue << " given). Setting to the default value of "
+ << "1e-10.\n";
+ this->maxResidue = 1e-10;
+ }
+}
+
+/**
+ * Apply the Non-Negative Matrix Factorization on the provided matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ * @param r Rank r of the factorization
+ */
+template<typename WUpdateRule,
+ typename HUpdateRule>
+void NMF<WUpdateRule,
+ HUpdateRule>::
+Apply(const arma::mat& V, arma::mat& W, arma::mat& H, size_t& r) const
+{
+ size_t n = V.n_rows;
+ size_t m = V.n_cols;
+ // old and new product WH for residue checking
+ arma::mat WHold,WH,diff;
+
+ // Allocate random values to the starting iteration
+ W.randu(n,r);
+ H.randu(r,m);
+ // Store the original calculated value for residue checking
+ WHold = W*H;
+
+ size_t iteration = 0;
+ double residue;
+ double sqrRes = maxResidue*maxResidue;
+
+ do
+ {
+ // Update step.
+ // Update the value of W and H based on the Update Rules provided
+ WUpdate.Update(V,W,H);
+ HUpdate.Update(V,W,H);
+
+ // Calculate square of residue after iteration
+ WH = W*H;
+ diff = WHold-WH;
+ diff = diff%diff;
+ residue = accu(diff)/(double)(n*m);
+ WHold = WH;
+
+ iteration++;
+
+ } while (residue >= sqrRes && iteration != maxIterations);
+
+ Log::Debug << "Iterations: " << iteration << std::endl;
+}
+
+}; // namespace nmf
+}; // namespace mlpack
Added: mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp 2012-06-13 16:18:45 UTC (rev 13047)
@@ -0,0 +1,65 @@
+/**
+ * @file nmf_main.cpp
+ * @author Mohan Rajendran
+ *
+ * Main executable to run NMF.
+ */
+#include <mlpack/core.hpp>
+
+#include "nmf.hpp"
+
+using namespace mlpack;
+using namespace mlpack::nmf;
+using namespace std;
+
+// Document program.
+PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs the "
+ "non-negative matrix factorization on the given vector. It will store the "
+ "calculated factors in the reference matrix arguments supplied.");
+
+// Parameters for program.
+PARAM_STRING_REQ("input_file", "Input matrix to perform NMF on.", "i");
+PARAM_STRING_REQ("W_output_file", "File to save the calculated W matrix to.",
+ "w");
+PARAM_STRING_REQ("H_output_file", "File to save the calculated H matrix to.",
+ "h");
+PARAM_INT_REQ("rank", "Rank of the factorization.", "r");
+PARAM_INT("max_iterations", "Number of iterations before NMF terminates",
+ "m", 1000);
+PARAM_DOUBLE("max_residue", "The maximum root mean square allowed below which "
+ "the program termiates", "e", 1e-10);
+
+int main(int argc, char** argv)
+{
+ // Parse commandline.
+ CLI::ParseCommandLine(argc, argv);
+
+ // Load input dataset.
+ string inputFile = CLI::GetParam<string>("input_file");
+ arma::mat V;
+ data::Load(inputFile.c_str(), V);
+ arma::mat W;
+ arma::mat H;
+
+ // Find out the rank of the factorization.
+ size_t r = CLI::GetParam<int>("rank");
+ if (r<1)
+ {
+ Log::Fatal << "The rank of the factorization cannot be less than 1. "
+ << std::endl;
+ }
+
+ size_t maxiterations = CLI::GetParam<int>("max_iterations");
+ double maxresidue = CLI::GetParam<double>("max_residue");
+
+ // Perform NMF.
+ NMF<> nmf(maxiterations,maxresidue);
+ Log::Info << "Performing NMF on the given matrix..." << endl;
+ nmf.Apply(V,W,H,r);
+
+ // Save results
+ string outputFile = CLI::GetParam<string>("W_output_file");
+ data::Save(outputFile, W);
+ outputFile = CLI::GetParam<string>("H_output_file");
+ data::Save(outputFile, H);
+}
More information about the mlpack-svn
mailing list