[mlpack-svn] r13323 - mlpack/trunk/src/mlpack/methods/nmf

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Aug 2 22:13:51 EDT 2012


Author: rcurtin
Date: 2012-08-02 22:13:51 -0400 (Thu, 02 Aug 2012)
New Revision: 13323

Added:
   mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp
   mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp
   mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp
   mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp
Removed:
   mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp
   mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp
   mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp
   mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp
Modified:
   mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp
   mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp
   mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp
Log:
Rename various parts of NMF to different names which line up more with the style
guide; clean up main executable documentation and allow the choice between
different update rules.


Modified: mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt	2012-08-03 02:13:51 UTC (rev 13323)
@@ -3,10 +3,10 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  mdistupdate.hpp
-  mdivupdate.hpp
-  alsupdate.hpp
-  randominit.hpp
+  mult_dist_update_rules.hpp
+  mult_div_update_rules.hpp
+  als_update_rules.hpp
+  random_init.hpp
 #  randomacolinit.hpp
   nmf.hpp
   nmf_impl.hpp

Copied: mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,98 @@
+/**
+ * @file als_update_rules.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * titled 'Alternating Least Squares' described in the paper 'Positive Matrix
+ * Factorization: A Non-negative Factor Model with Optimal Utilization of
+ * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
+ * squares projection formula to reduce the error value of
+ * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
+ * respectively while holding the other matrix constant.
+ */
+#ifndef __MLPACK_METHODS_NMF_ALS_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_NMF_ALS_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W^T = \frac{HV^T}{HH^T}
+ * \f]
+ */
+class WAlternatingLeastSquaresRule
+{
+ public:
+  // Empty constructor required for the WUpdateRule template.
+  WAlternatingLeastSquaresRule() { }
+
+  /**
+   * The update function that actually updates the W matrix. The function takes
+   * in all the matrices and only changes the value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  inline static void Update(const arma::mat& /* V */,
+                            arma::mat& W,
+                            const arma::mat& H)
+  {
+    W = (inv(H * H.t()) * H * H.t()).t();
+
+    // Set all negative numbers to 0.
+    for (size_t i = 0; i < W.n_elem; i++)
+    {
+      if (W(i) < 0.0)
+      {
+        W(i) = 0.0;
+      }
+    }
+  }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H = \frac{W^TV}{W^TW}
+ * \f]
+ */
+class HAlternatingLeastSquaresRule
+{
+ public:
+  // Empty constructor required for the HUpdateRule template.
+  HAlternatingLeastSquaresRule() { }
+
+  /**
+   * The update function that actually updates the H matrix. The function takes
+   * in all the matrices and only changes the value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+  inline static void Update(const arma::mat& V,
+                            const arma::mat& W,
+                            arma::mat& H)
+  {
+    H = inv(W.t() * W) * W.t() * V;
+
+    // Set all negative numbers to 0.
+    for (size_t i = 0; i < H.n_elem; i++)
+    {
+      if (H(i) < 0.0)
+      {
+        H(i) = 0.0;
+      }
+    }
+  }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,104 +0,0 @@
-/**
- * @file alsupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * titled 'Alternating Least Squares' describes in the paper 'Positive Matrix
- * Factorization: A Non-negative Factor Model with Optimal Utilization of 
- * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least 
- * squares projection formula to reduce the error value of 
- * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
- * respectively while holding the other matrix constant.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_ALSUPDATE_HPP
-#define __MLPACK_METHODS_NMF_ALSUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is 
- * \f[ 
- * W^T = \frac{HV^T}{HH^T}
- * \f]
- */
-class AlternatingLeastSquareW
-{
- public:
-  // Empty constructor required for the WUpdateRule template
-  AlternatingLeastSquareW() { }
-
-  /**
-   * The update function that actually updates the W matrix. The function takes
-   * in all the salient matrices and only changes the value of the W matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     arma::mat& W, 
-                     const arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    W = (inv(H*H.t())*H*H.t()).t();
-    
-    // Set all negative numbers to machine epsilon
-    for(size_t i=0;i<W.n_rows*W.n_cols;i++)
-    {
-      if(W(i) < 0.0)
-      {
-        W(i) = eps(W);
-      }
-    }
-  }
-}; // Class AlternatingLeastSquareW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H = \frac{W^TV}{W^TW}
- * \f]
- */
-class AlternatingLeastSquareH
-{
- public:
-  // Empty constructor required for the HUpdateRule template
-  AlternatingLeastSquareH() { }
-
-  /**
-   * The update function that actually updates the H matrix. The function takes
-   * in all the salient matrices and only changes the value of the H matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     const arma::mat& W, 
-                     arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    H = inv(W.t()*W)*W.t()*V;
-
-    // Set all negative numbers to machine epsilon
-    for(size_t i=0;i<H.n_rows*H.n_cols;i++)
-    {
-      if(H(i) < 0.0)
-      {
-        H(i) = eps(H);
-      }
-    }
-  }
-}; // Class AlternatingLeastSquareH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif

Deleted: mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,95 +0,0 @@
-/**
- * @file mdistupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization' 
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is 
- * \f[ 
- * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
- * \f]
- */
-class MultiplicativeDistanceW
-{
- public:
-  // Empty constructor required for the WUpdateRule template
-  MultiplicativeDistanceW() { }
-
-  /**
-   * The update function that actually updates the W matrix. The function takes
-   * in all the salient matrices and only changes the value of the W matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     arma::mat& W, 
-                     const arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1,t2;
-    
-    t1 = V*H.t();
-    t2 = W*H*H.t();
-    
-    W = (W%t1)/t2;
-  }
-}; // Class MultiplicativeDistanceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
- * \f]
- */
-class MultiplicativeDistanceH
-{
- public:
-  // Empty constructor required for the HUpdateRule template
-  MultiplicativeDistanceH() { }
-
-  /**
-   * The update function that actually updates the H matrix. The function takes
-   * in all the salient matrices and only changes the value of the H matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     const arma::mat& W, 
-                     arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1,t2;
-    
-    t1 = W.t()*V;
-    t2 = W.t()*W*H;
-
-    H = (H%t1)/t2;
-  }
-}; // Class MultiplicativeDistanceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif

Deleted: mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,112 +0,0 @@
-/**
- * @file mdivupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization' 
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the the 'divergence' 
- * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is 
- * \f[ 
- * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{\nu} H_{a\nu}}
- * \f]
- */
-class MultiplicativeDivergenceW
-{
- public:
-  // Empty constructor required for the WUpdateRule template
-  MultiplicativeDivergenceW() { }
-
-  /**
-   * The update function that actually updates the W matrix. The function takes
-   * in all the salient matrices and only changes the value of the W matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     arma::mat& W, 
-                     const arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1;
-    arma::rowvec t2;
-
-    t1 = W*H;
-    for(size_t i=0;i<W.n_rows;i++)
-    {
-      for(size_t j=0;j<W.n_cols;j++)
-      {
-        t2 = H.row(j)%V.row(i)/t1.row(i);
-        W(i,j) = W(i,j)*sum(t2)/sum(H.row(j));
-      }
-    }
-
-  }
-}; // Class MultiplicativeDivergenceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{k} H_{ka}}
- * \f]
- */
-class MultiplicativeDivergenceH
-{
- public:
-  // Empty constructor required for the HUpdateRule template
-  MultiplicativeDivergenceH() { }
-
-  /**
-   * The update function that actually updates the H matrix. The function takes
-   * in all the salient matrices and only changes the value of the H matrix.
-   *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   */
-
-  inline static void Update(const arma::mat& V,
-                     const arma::mat& W, 
-                     arma::mat& H)
-  {
-    // Simple implementation. This can be left here.
-    arma::mat t1;
-    arma::colvec t2;
-    
-    t1 = W*H;
-    for(size_t i=0;i<H.n_rows;i++)
-    {
-      for(size_t j=0;j<H.n_cols;j++)
-      {
-        t2 = W.col(i)%V.col(j)/t1.col(j);
-        H(i,j) = H(i,j)*sum(t2)/sum(W.col(i));
-      }
-    }
-
-  }
-}; // Class MultiplicativeDivergenceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif

Copied: mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,81 @@
+/**
+ * @file mult_dist_update_rules.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ */
+#ifndef __MLPACK_METHODS_NMF_MULT_DIST_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_NMF_MULT_DIST_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
+ * \f]
+ */
+class WMultiplicativeDistanceRule
+{
+ public:
+  // Empty constructor required for the WUpdateRule template.
+  WMultiplicativeDistanceRule() { }
+
+  /**
+   * The update function that actually updates the W matrix. The function takes
+   * in all the matrices and only changes the value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+
+  inline static void Update(const arma::mat& V,
+                            arma::mat& W,
+                            const arma::mat& H)
+  {
+    W = (W % (V * H.t())) / (W * H * H.t());
+  }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
+ * \f]
+ */
+class HMultiplicativeDistanceRule
+{
+ public:
+  // Empty constructor required for the HUpdateRule template.
+  HMultiplicativeDistanceRule() { }
+
+  /**
+   * The update function that actually updates the H matrix. The function takes
+   * in all the matrices and only changes the value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+
+  inline static void Update(const arma::mat& V,
+                            const arma::mat& W,
+                            arma::mat& H)
+  {
+    H = (H % (W.t() * V)) / (W.t() * W * H);
+  }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Copied: mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,106 @@
+/**
+ * @file mult_div_update_rules.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the the 'divergence'
+ * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ */
+#ifndef __MLPACK_METHODS_NMF_MULT_DIV_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_NMF_MULT_DIV_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
+ * {\sum_{\nu} H_{a\nu}}
+ * \f]
+ */
+class WMultiplicativeDivergenceRule
+{
+ public:
+  // Empty constructor required for the WUpdateRule template.
+  WMultiplicativeDivergenceRule() { }
+
+  /**
+   * The update function that actually updates the W matrix. The function takes
+   * in all the matrices and only changes the value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  inline static void Update(const arma::mat& V,
+                            arma::mat& W,
+                            const arma::mat& H)
+  {
+    // Simple implementation left in the header file.
+    arma::mat t1;
+    arma::rowvec t2;
+
+    t1 = W * H;
+    for (size_t i = 0; i < W.n_rows; ++i)
+    {
+      for (size_t j = 0; j < W.n_cols; ++j)
+      {
+        t2 = H.row(j) % V.row(i) / t1.row(i);
+        W(i, j) = W(i, j) * sum(t2) / sum(H.row(j));
+      }
+    }
+  }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
+ * {\sum_{k} H_{ka}}
+ * \f]
+ */
+class HMultiplicativeDivergenceRule
+{
+ public:
+  // Empty constructor required for the HUpdateRule template.
+  HMultiplicativeDivergenceRule() { }
+
+  /**
+   * The update function that actually updates the H matrix. The function takes
+   * in all the matrices and only changes the value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to updated.
+   */
+  inline static void Update(const arma::mat& V,
+                            const arma::mat& W,
+                            arma::mat& H)
+  {
+    // Simple implementation left in the header file.
+    arma::mat t1;
+    arma::colvec t2;
+
+    t1 = W * H;
+    for (size_t i = 0; i < H.n_rows; i++)
+    {
+      for (size_t j = 0; j < H.n_cols; j++)
+      {
+        t2 = W.col(i) % V.col(j) / t1.col(j);
+        H(i,j) = H(i,j) * sum(t2) / sum(W.col(i));
+      }
+    }
+  }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Modified: mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -2,31 +2,32 @@
  * @file nmf.hpp
  * @author Mohan Rajendran
  *
- * Defines the NMF class to perform Non-negative Matrix Factorization 
+ * Defines the NMF class to perform Non-negative Matrix Factorization
  * on the given matrix.
  */
 #ifndef __MLPACK_METHODS_NMF_NMF_HPP
 #define __MLPACK_METHODS_NMF_NMF_HPP
 
 #include <mlpack/core.hpp>
-#include "mdistupdate.hpp"
-#include "randominit.hpp"
+#include "mult_dist_update_rules.hpp"
+#include "random_init.hpp"
 
 namespace mlpack {
 namespace nmf {
 
 /**
- * This class implements the NMF on the given matrix V. Non-negative Matrix 
- * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is 
- * called the basis matrix and H is called the encoding matrix. V is taken 
- * to be of size n*m and the obtained W is n*r and H is r*m. The size r is 
+ * This class implements the NMF on the given matrix V. Non-negative Matrix
+ * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
+ * called the basis matrix and H is called the encoding matrix. V is taken
+ * to be of size n x m and the obtained W is n x r and H is r x m. The size r is
  * called the rank of the factorization.
- * 
- * The implementation requires the supply of two templates. One for the update
- * rule for updating the W matrix during each iteration and another rule for
- * updating the H matrix during each iteration. This allows the user to
- * try out various update rules for performing the factorization.
  *
+ * The implementation requires two template types; the first contains the update
+ * rule for the W matrix during each iteration and the other contains the update
+ * rule for the H matrix during each iteration.  This templatization allows the
+ * user to try various update rules (including ones not supplied with MLPACK)
+ * for factorization.
+ *
  * A simple example of how to run NMF is shown below.
  *
  * @code
@@ -34,69 +35,110 @@
  * size_t r = 10; // Rank of decomposition
  * arma::mat W; // Basis matrix
  * arma::mat H; // Encoding matrix
- * 
+ *
  * NMF<> nmf(); // Default options
- * nmf.Apply(V,W,H,r);
+ * nmf.Apply(V, W, H, r);
  * @endcode
  *
- * @tparam WUpdateRule The update rule for calculating W matrix at each 
- * iteration; @see MultiplicativeDistanceW for an example.
+ * For more information on non-negative matrix factorization, see the following
+ * paper:
+ *
+ * @code
+ * @article{
+ *   title = {{Learning the parts of objects by non-negative matrix
+ *       factorization}},
+ *   author = {Lee, Daniel D. and Seung, H. Sebastian},
+ *   journal = {Nature},
+ *   month = {Oct},
+ *   year = {1999},
+ *   number = {6755},
+ *   pages = {788--791},
+ *   publisher = {Nature Publishing Group},
+ *   url = {http://dx.doi.org/10.1038/44565}
+ * }
+ * @endcode
+ *
+ * @tparam WUpdateRule The update rule for calculating W matrix at each
+ *     iteration; @see MultiplicativeDistanceW for an example.
  * @tparam HUpdateRule The update rule for calculating H matrix at each
- * iteration; @see MultiplicativeDistanceH for an example.
+ *     iteration; @see MultiplicativeDistanceH for an example.
  */
-template<typename InitializeRule = RandomInitialization,
-         typename WUpdateRule = MultiplicativeDistanceW,
-         typename HUpdateRule = MultiplicativeDistanceH>
+template<typename InitializationRule = RandomInitialization,
+         typename WUpdateRule = WMultiplicativeDistanceRule,
+         typename HUpdateRule = HMultiplicativeDistanceRule>
 class NMF
 {
  public:
   /**
    * Create the NMF object and (optionally) set the parameters which NMF will
-   * run with. This implementation allows us to use different update rules for
-   * the updation of the basis and encoding matrices over each iteration.
-   * 
-   * @param maxIterations Maximum number of iterations allowed before giving up
-   * @param maxResidue The maximum root mean square of the difference between 
-   *    two subsequent iteration of product WH at which to terminate iteration. 
-   *    A low residual value denotes that subsequent iterationas are not 
-   *    producing much different values of W and H. Once the difference goes 
-   *    below the supplied value, the iteration terminates.
+   * run with.  The minimum residue refers to the root mean square of the
+   * difference between two subsequent iterations of the product W * H.  A low
+   * residue indicates that subsequent iterations are not producing much change
+   * in W and H.  Once the residue goes below the specified minimum residue, the
+   * algorithm terminates.
+   *
+   * @param maxIterations Maximum number of iterations allowed before giving up.
+   *     A value of 0 indicates no limit.
+   * @param minResidue The minimum allowed residue before the algorithm
+   *     terminates.
    * @param Initialize Optional Initialization object for initializing the
-   *    W and H matrices
+   *     W and H matrices
    * @param WUpdate Optional WUpdateRule object; for when the update rule for
-   *    the W vector has states that it needs to store.
+   *     the W vector has states that it needs to store.
    * @param HUpdate Optional HUpdateRule object; for when the update rule for
-   *    the H vector has states that it needs to store.
+   *     the H vector has states that it needs to store.
    */
   NMF(const size_t maxIterations = 10000,
-      const double maxResidue = 1e-5,
-      const InitializeRule Initialize = InitializeRule(),
-      const WUpdateRule WUpdate = WUpdateRule(),
-      const HUpdateRule HUpdate = HUpdateRule());
+      const double minResidue = 1e-5,
+      const InitializationRule initializateRule = InitializationRule(),
+      const WUpdateRule wUpdate = WUpdateRule(),
+      const HUpdateRule hUpdate = HUpdateRule());
 
   /**
-   * Apply the Non-Negative Matrix Factorization on the provided matrix.
+   * Apply Non-Negative Matrix Factorization to the provided matrix.
    *
-   * @param V Input matrix to be factorized
-   * @param W Basis matrix to be output
-   * @param H Encoding matrix to output
-   * @param r Rank r of the factorization
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be output.
+   * @param H Encoding matrix to output.
+   * @param r Rank r of the factorization.
    */
-  void Apply(const arma::mat& V, arma::mat& W, arma::mat& H,
-              size_t& r) const;
+  void Apply(const arma::mat& V, const size_t r, arma::mat& W, arma::mat& H)
+      const;
 
-  private:
-  //! The  maximum number of iterations allowed before giving up
+ private:
+  //! The maximum number of iterations allowed before giving up.
   size_t maxIterations;
-  //! The maximum residue below which iteration is considered converged
-  double maxResidue;
-  //! Instantiated W&H Initialization Rule
-  InitializeRule Initialize;
-  //! Instantiated W Update Rule
-  WUpdateRule WUpdate;
-  //! Instantiated H Update Rule
-  HUpdateRule HUpdate;
+  //! The minimum residue, below which iteration is considered converged.
+  double minResidue;
+  //! Instantiated initialization Rule.
+  InitializationRule initializeRule;
+  //! Instantiated W update rule.
+  WUpdateRule wUpdate;
+  //! Instantiated H update rule.
+  HUpdateRule hUpdate;
 
+ public:
+  //! Access the maximum number of iterations.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations.
+  size_t& MaxIterations() { return maxIterations; }
+  //! Access the minimum residue before termination.
+  double MinResidue() const { return minResidue; }
+  //! Modify the minimum residue before termination.
+  double& MinResidue() { return minResidue; }
+  //! Access the initialization rule.
+  const InitializationRule& InitializeRule() const { return initializeRule; }
+  //! Modify the initialization rule.
+  InitializationRule& InitializeRule() { return initializeRule; }
+  //! Access the W update rule.
+  const WUpdateRule& WUpdate() const { return wUpdate; }
+  //! Modify the W update rule.
+  WUpdateRule& WUpdate() { return wUpdate; }
+  //! Access the H update rule.
+  const HUpdateRule& HUpdate() const { return hUpdate; }
+  //! Modify the H update rule.
+  HUpdateRule& HUpdate() { return hUpdate; }
+
 }; // class NMF
 
 }; // namespace nmf

Modified: mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -12,79 +12,76 @@
 /**
  * Construct the NMF object.
  */
-template<typename InitializeRule,
+template<typename InitializationRule,
          typename WUpdateRule,
          typename HUpdateRule>
-NMF<InitializeRule,
-    WUpdateRule,
-    HUpdateRule>::
-NMF(const size_t maxIterations,
-      const double maxResidue,
-      const InitializeRule Initialize,
-      const WUpdateRule WUpdate,
-      const HUpdateRule HUpdate) :
+NMF<InitializationRule, WUpdateRule, HUpdateRule>::NMF(
+    const size_t maxIterations,
+    const double minResidue,
+    const InitializationRule initializeRule,
+    const WUpdateRule wUpdate,
+    const HUpdateRule hUpdate) :
     maxIterations(maxIterations),
-    maxResidue(maxResidue),
-    Initialize(Initialize),
-    WUpdate(WUpdate),
-    HUpdate(HUpdate)
+    minResidue(minResidue),
+    initializeRule(initializeRule),
+    wUpdate(wUpdate),
+    hUpdate(hUpdate)
 {
-  if (maxResidue < 0.0)
+  if (minResidue < 0.0)
   {
-    Log::Warn << "NMF::NMF(): maxResidue must be a positive value ("
-        << maxResidue << " given). Setting to the default value of "
-        << "1e-10.\n";
-    this->maxResidue = 1e-10;
-  } 
-
-  math::RandomSeed((size_t) std::time(NULL));    
+    Log::Warn << "NMF::NMF(): minResidue must be a positive value ("
+        << minResidue << " given). Setting to the default value of 1e-10.\n";
+    this->minResidue = 1e-10;
+  }
 }
 
 /**
- * Apply the Non-Negative Matrix Factorization on the provided matrix.
+ * Apply Non-Negative Matrix Factorization to the provided matrix.
  *
  * @param V Input matrix to be factorized
  * @param W Basis matrix to be output
  * @param H Encoding matrix to output
  * @param r Rank r of the factorization
  */
-template<typename InitializeRule,
+template<typename InitializationRule,
          typename WUpdateRule,
          typename HUpdateRule>
-void NMF<InitializeRule,
-    WUpdateRule,
-    HUpdateRule>::
-Apply(const arma::mat& V, arma::mat& W, arma::mat& H, size_t& r) const
+void NMF<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
+    const arma::mat& V,
+    const size_t r,
+    arma::mat& W,
+    arma::mat& H) const
 {
-  size_t n = V.n_rows;
-  size_t m = V.n_cols;
+  const size_t n = V.n_rows;
+  const size_t m = V.n_cols;
 
-  // Intialize W and H
-  Initialize.Init(V,W,H,r);
+  // Initialize W and H.
+  initializeRule.Initialize(V, r, W, H);
 
-  //Log::Debug << "Initialized W and H." << std::endl;
+  Log::Info << "Initialized W and H." << std::endl;
 
-  size_t iteration = 0;
-  size_t nm = n*m;
-  double residue = maxResidue;
-  double normOld,norm;
-  arma::mat WH;    
-  
-  while (residue >= maxResidue  && iteration != maxIterations)
+  size_t iteration = 1;
+  const size_t nm = n * m;
+  double residue = minResidue;
+  double normOld;
+  double norm;
+  arma::mat WH;
+
+  while (residue >= minResidue && iteration != maxIterations)
   {
     // Update step.
     // Update the value of W and H based on the Update Rules provided
-    WUpdate.Update(V,W,H);
-    HUpdate.Update(V,W,H);
+    wUpdate.Update(V, W, H);
+    hUpdate.Update(V, W, H);
 
-    // Calculate norm of WH after each iteration
-    WH = W*H;
-    norm = sqrt(accu(WH%WH)/nm);
-    
-    if(iteration!=0)
+    // Calculate norm of WH after each iteration.
+    WH = W * H;
+    norm = sqrt(accu(WH % WH) / nm);
+
+    if (iteration != 0)
     {
-      residue = fabs(normOld-norm);
-      if(normOld > 1.0)
+      residue = fabs(normOld - norm);
+      if (normOld > 1.0)
       {
         residue /= normOld;
       }
@@ -92,21 +89,14 @@
 
     normOld = norm;
 
-    /*
-      WH = W*H;
-      diff = WHold-WH;
-    diff = diff%diff;
-    residue = accu(diff)/(double)(n*m);
-    WHold = WH;*/
+    Log::Debug << "NMF iteration " << iteration << ": residue "
+        << sqrt(residue) << std::endl;
 
-    //Log::Debug << "Iteration: " << iteration << " Residue: " 
-    //      << sqrt(residue) << std::endl;
-
     iteration++;
-      
   }
 
-  //Log::Debug << "Iterations: " << iteration << std::endl;
+  Log::Info << "NMF converged to residue of " << sqrt(residue) << " in "
+      << iteration << " iterations." << std::endl;
 }
 
 }; // namespace nmf

Modified: mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -8,31 +8,58 @@
 
 #include "nmf.hpp"
 
+#include "random_init.hpp"
+#include "mult_dist_update_rules.hpp"
+#include "mult_div_update_rules.hpp"
+#include "als_update_rules.hpp"
+
 using namespace mlpack;
 using namespace mlpack::nmf;
 using namespace std;
 
 // Document program.
-PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs the "
-    "non-negative matrix factorization on the given vector. It will store the "
-    "calculated factors in the reference matrix arguments supplied.");
+PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs "
+    "non-negative matrix factorization on the given dataset, storing the "
+    "resulting decomposed matrices in the specified files.  For an input "
+    "dataset V, NMF decomposes V into two matrices W and H such that "
+    "\n\n"
+    "V = W * H"
+    "\n\n"
+    "where all elements in W and H are non-negative.  If V is of size (n x m),"
+    " then W will be of size (n x r) and H will be of size (r x m), where r is "
+    "the rank of the factorization (specified by --rank)."
+    "\n\n"
+    "Optionally, the desired update rules for each NMF iteration can be chosen "
+    "from the following list:"
+    "\n\n"
+    " - multdist: multiplicative distance-based update rules (Lee and Seung "
+    "1999)\n"
+    " - multdiv: multiplicative divergence-based update rules (Lee and Seung "
+    "1999)\n"
+    " - als: alternating least squares update rules (Paatero and Tapper 1994)"
+    "\n\n"
+    "The maximum number of iterations is specified with --max_iterations, and "
+    "the minimum residue required for algorithm termination is specified with "
+    "--min_residue.");
 
 // Parameters for program.
-PARAM_STRING_REQ("input_file", "Input matrix to perform NMF on.", "i");
-PARAM_STRING_REQ("W_output_file", "File to save the calculated W matrix to.",
-    "w");
-PARAM_STRING_REQ("H_output_file", "File to save the calculated H matrix to.",
-    "h");
+PARAM_STRING_REQ("input_file", "Input dataset to perform NMF on.", "i");
+PARAM_STRING_REQ("w_file", "File to save the calculated W matrix to.", "w");
+PARAM_STRING_REQ("h_file", "File to save the calculated H matrix to.", "h");
 PARAM_INT_REQ("rank", "Rank of the factorization.", "r");
-PARAM_INT("max_iterations", "Number of iterations before NMF terminates", 
-    "m", 10000);
+
+PARAM_INT("max_iterations", "Number of iterations before NMF terminates (0 runs"
+    " until convergence.", "m", 10000);
 PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
-PARAM_DOUBLE("max_residue", "The maximum root mean square allowed below which "
-    "the program termiates", "e", 1e-5);
+PARAM_DOUBLE("min_residue", "The minimum root mean square residue allowed for "
+    "each iteration, below which the program terminates.", "e", 1e-5);
 
+PARAM_STRING("update_rules", "Update rules for each iteration; ( multdist | "
+    "multdiv | als ).", "u", "multdist");
+
 int main(int argc, char** argv)
 {
-  // Parse commandline.
+  // Parse command line.
   CLI::ParseCommandLine(argc, argv);
 
   // Initialize random seed.
@@ -41,32 +68,65 @@
   else
     math::RandomSeed((size_t) std::time(NULL));
 
+  // Gather parameters.
+  const string inputFile = CLI::GetParam<string>("input_file");
+  const string hOutputFile = CLI::GetParam<string>("h_file");
+  const string wOutputFile = CLI::GetParam<string>("w_file");
+  const size_t r = CLI::GetParam<int>("rank");
+  const size_t maxIterations = CLI::GetParam<int>("max_iterations");
+  const double minResidue = CLI::GetParam<double>("min_residue");
+  const string updateRules = CLI::GetParam<string>("update_rules");
+
+  // Validate rank.
+  if (r < 1)
+  {
+    Log::Fatal << "The rank of the factorization cannot be less than 1."
+        << std::endl;
+  }
+
+  if ((updateRules != "multdist") &&
+      (updateRules != "multdiv") &&
+      (updateRules != "als"))
+  {
+    Log::Fatal << "Invalid update rules ('" << updateRules << "'); must be '"
+        << "multdist', 'multdiv', or 'als'." << std::endl;
+  }
+
   // Load input dataset.
-  string inputFile = CLI::GetParam<string>("input_file");
   arma::mat V;
-  data::Load(inputFile.c_str(), V);
+  data::Load(inputFile, V, true);
+
   arma::mat W;
   arma::mat H;
 
-  // Find out the rank of the factorization.
-  size_t r = CLI::GetParam<int>("rank");
-  if (r<1)
+  // Perform NMF with the specified update rules.
+  if (updateRules == "multdist")
   {
-    Log::Fatal << "The rank of the factorization cannot be less than 1. "
-          << std::endl;
+    Log::Info << "Performing NMF with multiplicative distance-based update "
+        << "rules." << std::endl;
+    NMF<> nmf(maxIterations, minResidue);
+    nmf.Apply(V, r, W, H);
   }
-  
-  size_t maxiterations = CLI::GetParam<int>("max_iterations");
-  double maxresidue = CLI::GetParam<double>("max_residue");
+  else if (updateRules == "multdiv")
+  {
+    Log::Info << "Performing NMF with multiplicative divergence-based update "
+        << "rules." << std::endl;
+    NMF<RandomInitialization,
+        WMultiplicativeDivergenceRule,
+        HMultiplicativeDivergenceRule> nmf(maxIterations, minResidue);
+    nmf.Apply(V, r, W, H);
+  }
+  else if (updateRules == "als")
+  {
+    Log::Info << "Performing NMF with alternating least squared update rules."
+        << std::endl;
+    NMF<RandomInitialization,
+        WAlternatingLeastSquaresRule,
+        HAlternatingLeastSquaresRule> nmf(maxIterations, minResidue);
+    nmf.Apply(V, r, W, H);
+  }
 
-  // Perform NMF. 
-  NMF<> nmf(maxiterations,maxresidue);
-  Log::Info << "Performing NMF on the given matrix..." << endl;
-  nmf.Apply(V,W,H,r);
-
-  // Save results
-  string outputFile = CLI::GetParam<string>("W_output_file");
-  data::Save(outputFile, W);
-  outputFile = CLI::GetParam<string>("H_output_file");
-  data::Save(outputFile, H);
+  // Save results.
+  data::Save(wOutputFile, W, false);
+  data::Save(hOutputFile, H, false);
 }

Copied: mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp	                        (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,40 @@
+/**
+ * @file randominit.hpp
+ * @author Mohan Rajendran
+ *
+ * Intialization rule for Non-Negative Matrix Factorization (NMF). This simple
+ * initialization is performed by assigning a random matrix to W and H.
+ */
+#ifndef __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+#define __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+class RandomInitialization
+{
+ public:
+  // Empty constructor required for the InitializeRule template
+  RandomInitialization() { }
+
+  inline static void Initialize(const arma::mat& V,
+                                const size_t r,
+                                arma::mat& W,
+                                arma::mat& H)
+  {
+    // Simple implementation (left in the header file due to its simplicity).
+    size_t n = V.n_rows;
+    size_t m = V.n_cols;
+
+    // Intialize to random values.
+    W.randu(n, r);
+    H.randu(r, m);
+  }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Deleted: mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp	2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp	2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,44 +0,0 @@
-/**
- * @file randominit.hpp
- * @author Mohan Rajendran
- *
- * Intialization rule for the Non-negative Matrix Factorization. This simple
- * initialization is performed by assigning a random matrix to W and H
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_RANDOMINIT_HPP
-#define __MLPACK_METHODS_NMF_RANDOMINIT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-class RandomInitialization
-{
- public:
-  // Empty constructor required for the InitializeRule template
-  RandomInitialization() { }
-
-  inline static void Init(const arma::mat& V,
-                     arma::mat& W,
-                     arma::mat& H,
-                     const size_t& r)
-  {
-    // Simple inplementation. This can be left here.
-
-    size_t n = V.n_rows;
-    size_t m = V.n_cols;
-  
-    // Intialize to random values
-    W.randu(n,r);
-    H.randu(r,m);
-  }
-  
-}; // Class RandomInitialization
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif




More information about the mlpack-svn mailing list