[mlpack-svn] r13050 - mlpack/trunk/src/mlpack/methods/pca

Wed Jun 13 12:20:58 EDT 2012

Author: rmohan
Date: 2012-06-13 12:20:58 -0400 (Wed, 13 Jun 2012)
New Revision: 13050

Removed:
   mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp
   mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp
   mlpack/trunk/src/mlpack/methods/pca/nmf.hpp
   mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp
   mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp
Log:
Move NMF out of PCA


Deleted: mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp	2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/mdistupdate.hpp	2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,95 +0,0 @@
-/**
- * @file mdistupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization' 
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is 
- * \f[ 
- * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
- * \f]
- */
-class MultiplicativeDistanceW
-{
- public:
-  // Empty constructor required for the WUpdateRule template
-  MultiplicativeDistanceW() { }
-
-  /**
-   * The update function that actually updates the W matrix. The function takes
-   * in all the salient matrices and only changes the value of the W matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     arma::mat& W, 
-                     const arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1,t2;
-    
-    t1 = V*H.t();
-    t2 = W*H*H.t();
-    
-    W = (W%t1)/t2;
-  }
-}; // Class MultiplicativeDistanceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
- * \f]
- */
-class MultiplicativeDistanceH
-{
- public:
-  // Empty constructor required for the HUpdateRule template
-  MultiplicativeDistanceH() { }
-
-  /**
-   * The update function that actually updates the H matrix. The function takes
-   * in all the salient matrices and only changes the value of the H matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     const arma::mat& W, 
-                     arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1,t2;
-    
-    t1 = W.t()*V;
-    t2 = W.t()*W*H;
-
-    H = (H%t1)/t2;
-  }
-}; // Class MultiplicativeDistanceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif

Deleted: mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp	2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/mdivupdate.hpp	2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,112 +0,0 @@
-/**
- * @file mdivupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization' 
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the the 'divergence' 
- * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is 
- * \f[ 
- * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{\nu} H_{a\nu}}
- * \f]
- */
-class MultiplicativeDistanceW
-{
- public:
-  // Empty constructor required for the WUpdateRule template
-  MultiplicativeDivergenceW() { }
-
-  /**
-   * The update function that actually updates the W matrix. The function takes
-   * in all the salient matrices and only changes the value of the W matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     arma::mat& W, 
-                     const arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1;
-    arma::rowvec t2;
-
-    t1 = W*H;
-    for(size_t i=0;i<W.n_rows;i++)
-    {
-      for(size_t j=0;j<W.n_cols;j++)
-      {
-        t2 = H.row(j)%V.row(i)/t1.row(i);
-        W(i,j) = W(i,j)*sum(t2)/sum(H.row(i));
-      }
-    }
-
-  }
-}; // Class MultiplicativeDivergenceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{k} H_{ka}}
- * \f]
- */
-class MultiplicativeDivergenceH
-{
- public:
-  // Empty constructor required for the HUpdateRule template
-  MultiplicativeDistanceH() { }
-
-  /**
-   * The update function that actually updates the H matrix. The function takes
-   * in all the salient matrices and only changes the value of the H matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     const arma::mat& W, 
-                     arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1;
-    arma::colvec t2;
-    
-    t1 = W*H;
-    for(size_t i=0;i<H.n_rows;i++)
-    {
-      for(size_t j=0;j<H.n_cols;j++)
-      {
-        t2 = W.col(i)%V.col(j)/t1.col(j);
-        H(i,j) = H(i,j)*sum(t2)/sum(H.col(i));
-      }
-    }
-
-  }
-}; // Class MultiplicativeDivergenceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif

Deleted: mlpack/trunk/src/mlpack/methods/pca/nmf.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf.hpp	2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf.hpp	2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,101 +0,0 @@
-/**
- * @file nmf.hpp
- * @author Mohan Rajendran
- *
- * Defines the NMF class to perform Non-negative Matrix Factorization 
- * on the given matrix.
- */
-#ifndef __MLPACK_METHODS_NMF_NMF_HPP
-#define __MLPACK_METHODS_NMF_NMF_HPP
-
-#include <mlpack/core.hpp>
-#include "mdistupdate.hpp"
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * This class implements the NMF on the given matrix V. Non-negative Matrix 
- * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is 
- * called the basis matrix and H is called the encoding matrix. V is taken 
- * to be of size n*m and the obtained W is n*r and H is r*m. The size r is 
- * called the rank of the factorization.
- * 
- * The implementation requires the supply of two templates. One for the update
- * rule for updating the W matrix during each iteration and another rule for
- * updating the H matrix during each iteration. This allows the user to
- * try out various update rules for performing the factorization.
- *
- * A simple example of how to run NMF is shown below.
- *
- * @code
- * extern arma::mat V; // Matrix that we want to perform NMF on.
- * size_t r = 10; // Rank of decomposition
- * arma::mat W; // Basis matrix
- * arma::mat H; // Encoding matrix
- * 
- * NMF<> nmf(); // Default options
- * nmf.Apply(V,W,H,r);
- * @endcode
- *
- * @tparam WUpdateRule The update rule for calculating W matrix at each 
- * iteration; @see MultiplicativeDistanceW for an example.
- * @tparam HUpdateRule The update rule for calculating H matrix at each
- * iteration; @see MultiplicativeDistanceH for an example.
- */
-template<typename WUpdateRule = MultiplicativeDistanceW,
-         typename HUpdateRule = MultiplicativeDistanceH>
-class NMF
-{
- public:
-  /**
-   * Create the NMF object and (optionally) set the parameters which NMF will
-   * run with. This implementation allows us to use different update rules for
-   * the updation of the basis and encoding matrices over each iteration.
-   * 
-   * @param maxIterations Maximum number of iterations allowed before giving up
-   * @param maxResidue The maximum root mean square of the difference between 
-   *    two subsequent iteration of product WH at which to terminate iteration. 
-   *    A low residual value denotes that subsequent iterationas are not 
-   *    producing much different values of W and H. Once the difference goes 
-   *    below the supplied value, the iteration terminates.
-   * @param WUpdate Optional WUpdateRule object; for when the update rule for
-   *    the W vector has states that it needs to store.
-   * @param HUpdate Optional HUpdateRule object; for when the update rule for
-   *    the H vector has states that it needs to store.
-   */
-  NMF(const size_t maxIterations = 1000,
-      const double maxResidue = 1e-10,
-      const WUpdateRule WUpdate = WUpdateRule(),
-      const HUpdateRule HUpdate = HUpdateRule());
-
-  /**
-   * Apply the Non-Negative Matrix Factorization on the provided matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   * @param r Rank r of the factorization
-   */
-  void Apply(const arma::mat& V, arma::mat& W, arma::mat& H,
-              size_t& r) const;
-
-  private:
-  //! The  maximum number of iterations allowed before giving up
-  size_t maxIterations;
-  //! The maximum residue below which iteration is considered converged
-  double maxResidue;
-  //! Instantiated W Update Rule
-  WUpdateRule WUpdate;
-  //! Instantiated H Update Rule
-  HUpdateRule HUpdate;
-
-}; // class NMF
-
-}; // namespace nmf
-}; // namespace mlpack
-
-// Include implementation.
-#include "nmf_impl.hpp"
-
-#endif

Deleted: mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp	2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf_impl.hpp	2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,89 +0,0 @@
-/**
- * @file nmf.cpp
- * @author Mohan Rajendran
- *
- * Implementation of NMF class to perform Non-Negative Matrix Factorization
- * on the given matrix.
- */
-#include "nmf.hpp"
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * Construct the NMF object.
- */
-template<typename WUpdateRule,
-         typename HUpdateRule>
-NMF<WUpdateRule,
-    HUpdateRule>::
-NMF(const size_t maxIterations,
-      const double maxResidue,
-      const WUpdateRule WUpdate,
-      const HUpdateRule HUpdate) :
-    maxIterations(maxIterations),
-    maxResidue(maxResidue),
-    WUpdate(WUpdate),
-    HUpdate(HUpdate)
-{
-  if (maxResidue < 0.0)
-  {
-    Log::Warn << "NMF::NMF(): maxResidue must be a positive value ("
-        << maxResidue << " given). Setting to the default value of "
-        << "1e-10.\n";
-    this->maxResidue = 1e-10;
-  } 
-}
-
-/**
- * Apply the Non-Negative Matrix Factorization on the provided matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- * @param r Rank r of the factorization
- */
-template<typename WUpdateRule,
-         typename HUpdateRule>
-void NMF<WUpdateRule,
-    HUpdateRule>::
-Apply(const arma::mat& V, arma::mat& W, arma::mat& H, size_t& r) const
-{
-  size_t n = V.n_rows;
-  size_t m = V.n_cols;
-  // old and new product WH for residue checking
-  arma::mat WHold,WH,diff;
-  
-  // Allocate random values to the starting iteration
-  W.randu(n,r);
-  H.randu(r,m);
-  // Store the original calculated value for residue checking
-  WHold = W*H;
-  
-  size_t iteration = 0;
-  double residue;
-  double sqrRes = maxResidue*maxResidue;
-
-  do
-  {
-    // Update step.
-    // Update the value of W and H based on the Update Rules provided
-    WUpdate.Update(V,W,H);
-    HUpdate.Update(V,W,H);
-
-    // Calculate square of residue after iteration
-    WH = W*H;
-    diff = WHold-WH;
-    diff = diff%diff;
-    residue = accu(diff)/(double)(n*m);
-    WHold = WH;
-
-    iteration++;
-  
-  } while (residue >= sqrRes  && iteration != maxIterations);
-
-  Log::Debug << "Iterations: " << iteration << std::endl;
-}
-
-}; // namespace nmf
-}; // namespace mlpack

Deleted: mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp	2012-06-13 16:20:50 UTC (rev 13049)
+++ mlpack/trunk/src/mlpack/methods/pca/nmf_main.cpp	2012-06-13 16:20:58 UTC (rev 13050)
@@ -1,65 +0,0 @@
-/**
- * @file nmf_main.cpp
- * @author Mohan Rajendran
- *
- * Main executable to run NMF.
- */
-#include <mlpack/core.hpp>
-
-#include "nmf.hpp"
-
-using namespace mlpack;
-using namespace mlpack::nmf;
-using namespace std;
-
-// Document program.
-PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs the "
-    "non-negative matrix factorization on the given vector. It will store the "
-    "calculated factors in the reference matrix arguments supplied.");
-
-// Parameters for program.
-PARAM_STRING_REQ("input_file", "Input matrix to perform NMF on.", "i");
-PARAM_STRING_REQ("W_output_file", "File to save the calculated W matrix to.",
-    "w");
-PARAM_STRING_REQ("H_output_file", "File to save the calculated H matrix to.",
-    "h");
-PARAM_INT_REQ("rank", "Rank of the factorization.", "r");
-PARAM_INT("max_iterations", "Number of iterations before NMF terminates", 
-    "m", 1000);
-PARAM_DOUBLE("max_residue", "The maximum root mean square allowed below which "
-    "the program termiates", "e", 1e-10);
-
-int main(int argc, char** argv)
-{
-  // Parse commandline.
-  CLI::ParseCommandLine(argc, argv);
-
-  // Load input dataset.
-  string inputFile = CLI::GetParam<string>("input_file");
-  arma::mat V;
-  data::Load(inputFile.c_str(), V);
-  arma::mat W;
-  arma::mat H;
-
-  // Find out the rank of the factorization.
-  size_t r = CLI::GetParam<int>("rank");
-  if (r<1)
-  {
-    Log::Fatal << "The rank of the factorization cannot be less than 1. "
-          << std::endl;
-  }
-  
-  size_t maxiterations = CLI::GetParam<int>("max_iterations");
-  double maxresidue = CLI::GetParam<double>("max_residue");
-
-  // Perform NMF. 
-  NMF<> nmf(maxiterations,maxresidue);
-  Log::Info << "Performing NMF on the given matrix..." << endl;
-  nmf.Apply(V,W,H,r);
-
-  // Save results
-  string outputFile = CLI::GetParam<string>("W_output_file");
-  data::Save(outputFile, W);
-  outputFile = CLI::GetParam<string>("H_output_file");
-  data::Save(outputFile, H);
-}