[mlpack-svn] r13323 - mlpack/trunk/src/mlpack/methods/nmf
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Aug 2 22:13:51 EDT 2012
Author: rcurtin
Date: 2012-08-02 22:13:51 -0400 (Thu, 02 Aug 2012)
New Revision: 13323
Added:
mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp
mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp
mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp
mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp
Removed:
mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp
mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp
mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp
mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp
Modified:
mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt
mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp
mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp
mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp
Log:
Rename various parts of NMF to different names which line up more with the style
guide; clean up main executable documentation and allow the choice between
different update rules.
Modified: mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/CMakeLists.txt 2012-08-03 02:13:51 UTC (rev 13323)
@@ -3,10 +3,10 @@
# Define the files we need to compile
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
- mdistupdate.hpp
- mdivupdate.hpp
- alsupdate.hpp
- randominit.hpp
+ mult_dist_update_rules.hpp
+ mult_div_update_rules.hpp
+ als_update_rules.hpp
+ random_init.hpp
# randomacolinit.hpp
nmf.hpp
nmf_impl.hpp
Copied: mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/als_update_rules.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,98 @@
+/**
+ * @file als_update_rules.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * titled 'Alternating Least Squares' described in the paper 'Positive Matrix
+ * Factorization: A Non-negative Factor Model with Optimal Utilization of
+ * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
+ * squares projection formula to reduce the error value of
+ * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
+ * respectively while holding the other matrix constant.
+ */
+#ifndef __MLPACK_METHODS_NMF_ALS_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_NMF_ALS_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W^T = \frac{HV^T}{HH^T}
+ * \f]
+ */
+class WAlternatingLeastSquaresRule
+{
+ public:
+ // Empty constructor required for the WUpdateRule template.
+ WAlternatingLeastSquaresRule() { }
+
+ /**
+ * The update function that actually updates the W matrix. The function takes
+ * in all the matrices and only changes the value of the W matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix to be updated.
+ * @param H Encoding matrix.
+ */
+ inline static void Update(const arma::mat& /* V */,
+ arma::mat& W,
+ const arma::mat& H)
+ {
+ W = (inv(H * H.t()) * H * H.t()).t();
+
+ // Set all negative numbers to 0.
+ for (size_t i = 0; i < W.n_elem; i++)
+ {
+ if (W(i) < 0.0)
+ {
+ W(i) = 0.0;
+ }
+ }
+ }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H = \frac{W^TV}{W^TW}
+ * \f]
+ */
+class HAlternatingLeastSquaresRule
+{
+ public:
+ // Empty constructor required for the HUpdateRule template.
+ HAlternatingLeastSquaresRule() { }
+
+ /**
+ * The update function that actually updates the H matrix. The function takes
+ * in all the matrices and only changes the value of the H matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix.
+ * @param H Encoding matrix to be updated.
+ */
+ inline static void Update(const arma::mat& V,
+ const arma::mat& W,
+ arma::mat& H)
+ {
+ H = inv(W.t() * W) * W.t() * V;
+
+ // Set all negative numbers to 0.
+ for (size_t i = 0; i < H.n_elem; i++)
+ {
+ if (H(i) < 0.0)
+ {
+ H(i) = 0.0;
+ }
+ }
+ }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Deleted: mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/alsupdate.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,104 +0,0 @@
-/**
- * @file alsupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * titled 'Alternating Least Squares' describes in the paper 'Positive Matrix
- * Factorization: A Non-negative Factor Model with Optimal Utilization of
- * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
- * squares projection formula to reduce the error value of
- * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
- * respectively while holding the other matrix constant.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_ALSUPDATE_HPP
-#define __MLPACK_METHODS_NMF_ALSUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is
- * \f[
- * W^T = \frac{HV^T}{HH^T}
- * \f]
- */
-class AlternatingLeastSquareW
-{
- public:
- // Empty constructor required for the WUpdateRule template
- AlternatingLeastSquareW() { }
-
- /**
- * The update function that actually updates the W matrix. The function takes
- * in all the salient matrices and only changes the value of the W matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- arma::mat& W,
- const arma::mat& H)
- {
- // Simple implementation. This can be left here.
- W = (inv(H*H.t())*H*H.t()).t();
-
- // Set all negative numbers to machine epsilon
- for(size_t i=0;i<W.n_rows*W.n_cols;i++)
- {
- if(W(i) < 0.0)
- {
- W(i) = eps(W);
- }
- }
- }
-}; // Class AlternatingLeastSquareW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H = \frac{W^TV}{W^TW}
- * \f]
- */
-class AlternatingLeastSquareH
-{
- public:
- // Empty constructor required for the HUpdateRule template
- AlternatingLeastSquareH() { }
-
- /**
- * The update function that actually updates the H matrix. The function takes
- * in all the salient matrices and only changes the value of the H matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- const arma::mat& W,
- arma::mat& H)
- {
- // Simple implementation. This can be left here.
- H = inv(W.t()*W)*W.t()*V;
-
- // Set all negative numbers to machine epsilon
- for(size_t i=0;i<H.n_rows*H.n_cols;i++)
- {
- if(H(i) < 0.0)
- {
- H(i) = eps(H);
- }
- }
- }
-}; // Class AlternatingLeastSquareH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif
Deleted: mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,95 +0,0 @@
-/**
- * @file mdistupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization'
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDISTUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is
- * \f[
- * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
- * \f]
- */
-class MultiplicativeDistanceW
-{
- public:
- // Empty constructor required for the WUpdateRule template
- MultiplicativeDistanceW() { }
-
- /**
- * The update function that actually updates the W matrix. The function takes
- * in all the salient matrices and only changes the value of the W matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- arma::mat& W,
- const arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1,t2;
-
- t1 = V*H.t();
- t2 = W*H*H.t();
-
- W = (W%t1)/t2;
- }
-}; // Class MultiplicativeDistanceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
- * \f]
- */
-class MultiplicativeDistanceH
-{
- public:
- // Empty constructor required for the HUpdateRule template
- MultiplicativeDistanceH() { }
-
- /**
- * The update function that actually updates the H matrix. The function takes
- * in all the salient matrices and only changes the value of the H matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- const arma::mat& W,
- arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1,t2;
-
- t1 = W.t()*V;
- t2 = W.t()*W*H;
-
- H = (H%t1)/t2;
- }
-}; // Class MultiplicativeDistanceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif
Deleted: mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,112 +0,0 @@
-/**
- * @file mdivupdate.hpp
- * @author Mohan Rajendran
- *
- * Update rules for the Non-negative Matrix Factorization. This follows a method
- * described in the paper 'Algorithms for Non-negative Matrix Factorization'
- * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
- * that the the 'divergence'
- * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
- * non-increasing between subsequent iterations. Both of the update rules
- * for W and H are defined in this file.
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-#define __MLPACK_METHODS_NMF_MDIVUPDATE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-/**
- * The update rule for the basis matrix W. The formula used is
- * \f[
- * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{\nu} H_{a\nu}}
- * \f]
- */
-class MultiplicativeDivergenceW
-{
- public:
- // Empty constructor required for the WUpdateRule template
- MultiplicativeDivergenceW() { }
-
- /**
- * The update function that actually updates the W matrix. The function takes
- * in all the salient matrices and only changes the value of the W matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- arma::mat& W,
- const arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1;
- arma::rowvec t2;
-
- t1 = W*H;
- for(size_t i=0;i<W.n_rows;i++)
- {
- for(size_t j=0;j<W.n_cols;j++)
- {
- t2 = H.row(j)%V.row(i)/t1.row(i);
- W(i,j) = W(i,j)*sum(t2)/sum(H.row(j));
- }
- }
-
- }
-}; // Class MultiplicativeDivergenceW
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{k} H_{ka}}
- * \f]
- */
-class MultiplicativeDivergenceH
-{
- public:
- // Empty constructor required for the HUpdateRule template
- MultiplicativeDivergenceH() { }
-
- /**
- * The update function that actually updates the H matrix. The function takes
- * in all the salient matrices and only changes the value of the H matrix.
- *
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- */
-
- inline static void Update(const arma::mat& V,
- const arma::mat& W,
- arma::mat& H)
- {
- // Simple implementation. This can be left here.
- arma::mat t1;
- arma::colvec t2;
-
- t1 = W*H;
- for(size_t i=0;i<H.n_rows;i++)
- {
- for(size_t j=0;j<H.n_cols;j++)
- {
- t2 = W.col(i)%V.col(j)/t1.col(j);
- H(i,j) = H(i,j)*sum(t2)/sum(W.col(i));
- }
- }
-
- }
-}; // Class MultiplicativeDivergenceH
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif
Copied: mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/mdistupdate.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/mult_dist_update_rules.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,81 @@
+/**
+ * @file mult_dist_update_rules.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ */
+#ifndef __MLPACK_METHODS_NMF_MULT_DIST_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_NMF_MULT_DIST_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{(WHH^T)_{ia}}
+ * \f]
+ */
+class WMultiplicativeDistanceRule
+{
+ public:
+ // Empty constructor required for the WUpdateRule template.
+ WMultiplicativeDistanceRule() { }
+
+ /**
+ * The update function that actually updates the W matrix. The function takes
+ * in all the matrices and only changes the value of the W matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix to be updated.
+ * @param H Encoding matrix.
+ */
+
+ inline static void Update(const arma::mat& V,
+ arma::mat& W,
+ const arma::mat& H)
+ {
+ W = (W % (V * H.t())) / (W * H * H.t());
+ }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
+ * \f]
+ */
+class HMultiplicativeDistanceRule
+{
+ public:
+ // Empty constructor required for the HUpdateRule template.
+ HMultiplicativeDistanceRule() { }
+
+ /**
+ * The update function that actually updates the H matrix. The function takes
+ * in all the matrices and only changes the value of the H matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix.
+ * @param H Encoding matrix to be updated.
+ */
+
+ inline static void Update(const arma::mat& V,
+ const arma::mat& W,
+ arma::mat& H)
+ {
+ H = (H % (W.t() * V)) / (W.t() * W * H);
+ }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Copied: mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/mdivupdate.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/mult_div_update_rules.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,106 @@
+/**
+ * @file mult_div_update_rules.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the the 'divergence'
+ * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ */
+#ifndef __MLPACK_METHODS_NMF_MULT_DIV_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_NMF_MULT_DIV_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
+ * {\sum_{\nu} H_{a\nu}}
+ * \f]
+ */
+class WMultiplicativeDivergenceRule
+{
+ public:
+ // Empty constructor required for the WUpdateRule template.
+ WMultiplicativeDivergenceRule() { }
+
+ /**
+ * The update function that actually updates the W matrix. The function takes
+ * in all the matrices and only changes the value of the W matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix to be updated.
+ * @param H Encoding matrix.
+ */
+ inline static void Update(const arma::mat& V,
+ arma::mat& W,
+ const arma::mat& H)
+ {
+ // Simple implementation left in the header file.
+ arma::mat t1;
+ arma::rowvec t2;
+
+ t1 = W * H;
+ for (size_t i = 0; i < W.n_rows; ++i)
+ {
+ for (size_t j = 0; j < W.n_cols; ++j)
+ {
+ t2 = H.row(j) % V.row(i) / t1.row(i);
+ W(i, j) = W(i, j) * sum(t2) / sum(H.row(j));
+ }
+ }
+ }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
+ * {\sum_{k} H_{ka}}
+ * \f]
+ */
+class HMultiplicativeDivergenceRule
+{
+ public:
+ // Empty constructor required for the HUpdateRule template.
+ HMultiplicativeDivergenceRule() { }
+
+ /**
+ * The update function that actually updates the H matrix. The function takes
+ * in all the matrices and only changes the value of the H matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix.
+ * @param H Encoding matrix to updated.
+ */
+ inline static void Update(const arma::mat& V,
+ const arma::mat& W,
+ arma::mat& H)
+ {
+ // Simple implementation left in the header file.
+ arma::mat t1;
+ arma::colvec t2;
+
+ t1 = W * H;
+ for (size_t i = 0; i < H.n_rows; i++)
+ {
+ for (size_t j = 0; j < H.n_cols; j++)
+ {
+ t2 = W.col(i) % V.col(j) / t1.col(j);
+ H(i,j) = H(i,j) * sum(t2) / sum(W.col(i));
+ }
+ }
+ }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Modified: mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/nmf.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -2,31 +2,32 @@
* @file nmf.hpp
* @author Mohan Rajendran
*
- * Defines the NMF class to perform Non-negative Matrix Factorization
+ * Defines the NMF class to perform Non-negative Matrix Factorization
* on the given matrix.
*/
#ifndef __MLPACK_METHODS_NMF_NMF_HPP
#define __MLPACK_METHODS_NMF_NMF_HPP
#include <mlpack/core.hpp>
-#include "mdistupdate.hpp"
-#include "randominit.hpp"
+#include "mult_dist_update_rules.hpp"
+#include "random_init.hpp"
namespace mlpack {
namespace nmf {
/**
- * This class implements the NMF on the given matrix V. Non-negative Matrix
- * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
- * called the basis matrix and H is called the encoding matrix. V is taken
- * to be of size n*m and the obtained W is n*r and H is r*m. The size r is
+ * This class implements the NMF on the given matrix V. Non-negative Matrix
+ * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
+ * called the basis matrix and H is called the encoding matrix. V is taken
+ * to be of size n x m and the obtained W is n x r and H is r x m. The size r is
* called the rank of the factorization.
- *
- * The implementation requires the supply of two templates. One for the update
- * rule for updating the W matrix during each iteration and another rule for
- * updating the H matrix during each iteration. This allows the user to
- * try out various update rules for performing the factorization.
*
+ * The implementation requires two template types; the first contains the update
+ * rule for the W matrix during each iteration and the other contains the update
+ * rule for the H matrix during each iteration. This templatization allows the
+ * user to try various update rules (including ones not supplied with MLPACK)
+ * for factorization.
+ *
* A simple example of how to run NMF is shown below.
*
* @code
@@ -34,69 +35,110 @@
* size_t r = 10; // Rank of decomposition
* arma::mat W; // Basis matrix
* arma::mat H; // Encoding matrix
- *
+ *
* NMF<> nmf(); // Default options
- * nmf.Apply(V,W,H,r);
+ * nmf.Apply(V, W, H, r);
* @endcode
*
- * @tparam WUpdateRule The update rule for calculating W matrix at each
- * iteration; @see MultiplicativeDistanceW for an example.
+ * For more information on non-negative matrix factorization, see the following
+ * paper:
+ *
+ * @code
+ * @article{
+ * title = {{Learning the parts of objects by non-negative matrix
+ * factorization}},
+ * author = {Lee, Daniel D. and Seung, H. Sebastian},
+ * journal = {Nature},
+ * month = {Oct},
+ * year = {1999},
+ * number = {6755},
+ * pages = {788--791},
+ * publisher = {Nature Publishing Group},
+ * url = {http://dx.doi.org/10.1038/44565}
+ * }
+ * @endcode
+ *
+ * @tparam WUpdateRule The update rule for calculating W matrix at each
+ * iteration; @see MultiplicativeDistanceW for an example.
* @tparam HUpdateRule The update rule for calculating H matrix at each
- * iteration; @see MultiplicativeDistanceH for an example.
+ * iteration; @see MultiplicativeDistanceH for an example.
*/
-template<typename InitializeRule = RandomInitialization,
- typename WUpdateRule = MultiplicativeDistanceW,
- typename HUpdateRule = MultiplicativeDistanceH>
+template<typename InitializationRule = RandomInitialization,
+ typename WUpdateRule = WMultiplicativeDistanceRule,
+ typename HUpdateRule = HMultiplicativeDistanceRule>
class NMF
{
public:
/**
* Create the NMF object and (optionally) set the parameters which NMF will
- * run with. This implementation allows us to use different update rules for
- * the updation of the basis and encoding matrices over each iteration.
- *
- * @param maxIterations Maximum number of iterations allowed before giving up
- * @param maxResidue The maximum root mean square of the difference between
- * two subsequent iteration of product WH at which to terminate iteration.
- * A low residual value denotes that subsequent iterationas are not
- * producing much different values of W and H. Once the difference goes
- * below the supplied value, the iteration terminates.
+ * run with. The minimum residue refers to the root mean square of the
+ * difference between two subsequent iterations of the product W * H. A low
+ * residue indicates that subsequent iterations are not producing much change
+ * in W and H. Once the residue goes below the specified minimum residue, the
+ * algorithm terminates.
+ *
+ * @param maxIterations Maximum number of iterations allowed before giving up.
+ * A value of 0 indicates no limit.
+ * @param minResidue The minimum allowed residue before the algorithm
+ * terminates.
* @param Initialize Optional Initialization object for initializing the
- * W and H matrices
+ * W and H matrices
* @param WUpdate Optional WUpdateRule object; for when the update rule for
- * the W vector has states that it needs to store.
+ * the W vector has states that it needs to store.
* @param HUpdate Optional HUpdateRule object; for when the update rule for
- * the H vector has states that it needs to store.
+ * the H vector has states that it needs to store.
*/
NMF(const size_t maxIterations = 10000,
- const double maxResidue = 1e-5,
- const InitializeRule Initialize = InitializeRule(),
- const WUpdateRule WUpdate = WUpdateRule(),
- const HUpdateRule HUpdate = HUpdateRule());
+ const double minResidue = 1e-5,
+ const InitializationRule initializateRule = InitializationRule(),
+ const WUpdateRule wUpdate = WUpdateRule(),
+ const HUpdateRule hUpdate = HUpdateRule());
/**
- * Apply the Non-Negative Matrix Factorization on the provided matrix.
+ * Apply Non-Negative Matrix Factorization to the provided matrix.
*
- * @param V Input matrix to be factorized
- * @param W Basis matrix to be output
- * @param H Encoding matrix to output
- * @param r Rank r of the factorization
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix to be output.
+ * @param H Encoding matrix to output.
+ * @param r Rank r of the factorization.
*/
- void Apply(const arma::mat& V, arma::mat& W, arma::mat& H,
- size_t& r) const;
+ void Apply(const arma::mat& V, const size_t r, arma::mat& W, arma::mat& H)
+ const;
- private:
- //! The maximum number of iterations allowed before giving up
+ private:
+ //! The maximum number of iterations allowed before giving up.
size_t maxIterations;
- //! The maximum residue below which iteration is considered converged
- double maxResidue;
- //! Instantiated W&H Initialization Rule
- InitializeRule Initialize;
- //! Instantiated W Update Rule
- WUpdateRule WUpdate;
- //! Instantiated H Update Rule
- HUpdateRule HUpdate;
+ //! The minimum residue, below which iteration is considered converged.
+ double minResidue;
+ //! Instantiated initialization Rule.
+ InitializationRule initializeRule;
+ //! Instantiated W update rule.
+ WUpdateRule wUpdate;
+ //! Instantiated H update rule.
+ HUpdateRule hUpdate;
+ public:
+ //! Access the maximum number of iterations.
+ size_t MaxIterations() const { return maxIterations; }
+ //! Modify the maximum number of iterations.
+ size_t& MaxIterations() { return maxIterations; }
+ //! Access the minimum residue before termination.
+ double MinResidue() const { return minResidue; }
+ //! Modify the minimum residue before termination.
+ double& MinResidue() { return minResidue; }
+ //! Access the initialization rule.
+ const InitializationRule& InitializeRule() const { return initializeRule; }
+ //! Modify the initialization rule.
+ InitializationRule& InitializeRule() { return initializeRule; }
+ //! Access the W update rule.
+ const WUpdateRule& WUpdate() const { return wUpdate; }
+ //! Modify the W update rule.
+ WUpdateRule& WUpdate() { return wUpdate; }
+ //! Access the H update rule.
+ const HUpdateRule& HUpdate() const { return hUpdate; }
+ //! Modify the H update rule.
+ HUpdateRule& HUpdate() { return hUpdate; }
+
}; // class NMF
}; // namespace nmf
Modified: mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/nmf_impl.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -12,79 +12,76 @@
/**
* Construct the NMF object.
*/
-template<typename InitializeRule,
+template<typename InitializationRule,
typename WUpdateRule,
typename HUpdateRule>
-NMF<InitializeRule,
- WUpdateRule,
- HUpdateRule>::
-NMF(const size_t maxIterations,
- const double maxResidue,
- const InitializeRule Initialize,
- const WUpdateRule WUpdate,
- const HUpdateRule HUpdate) :
+NMF<InitializationRule, WUpdateRule, HUpdateRule>::NMF(
+ const size_t maxIterations,
+ const double minResidue,
+ const InitializationRule initializeRule,
+ const WUpdateRule wUpdate,
+ const HUpdateRule hUpdate) :
maxIterations(maxIterations),
- maxResidue(maxResidue),
- Initialize(Initialize),
- WUpdate(WUpdate),
- HUpdate(HUpdate)
+ minResidue(minResidue),
+ initializeRule(initializeRule),
+ wUpdate(wUpdate),
+ hUpdate(hUpdate)
{
- if (maxResidue < 0.0)
+ if (minResidue < 0.0)
{
- Log::Warn << "NMF::NMF(): maxResidue must be a positive value ("
- << maxResidue << " given). Setting to the default value of "
- << "1e-10.\n";
- this->maxResidue = 1e-10;
- }
-
- math::RandomSeed((size_t) std::time(NULL));
+ Log::Warn << "NMF::NMF(): minResidue must be a positive value ("
+ << minResidue << " given). Setting to the default value of 1e-10.\n";
+ this->minResidue = 1e-10;
+ }
}
/**
- * Apply the Non-Negative Matrix Factorization on the provided matrix.
+ * Apply Non-Negative Matrix Factorization to the provided matrix.
*
* @param V Input matrix to be factorized
* @param W Basis matrix to be output
* @param H Encoding matrix to output
* @param r Rank r of the factorization
*/
-template<typename InitializeRule,
+template<typename InitializationRule,
typename WUpdateRule,
typename HUpdateRule>
-void NMF<InitializeRule,
- WUpdateRule,
- HUpdateRule>::
-Apply(const arma::mat& V, arma::mat& W, arma::mat& H, size_t& r) const
+void NMF<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
+ const arma::mat& V,
+ const size_t r,
+ arma::mat& W,
+ arma::mat& H) const
{
- size_t n = V.n_rows;
- size_t m = V.n_cols;
+ const size_t n = V.n_rows;
+ const size_t m = V.n_cols;
- // Intialize W and H
- Initialize.Init(V,W,H,r);
+ // Initialize W and H.
+ initializeRule.Initialize(V, r, W, H);
- //Log::Debug << "Initialized W and H." << std::endl;
+ Log::Info << "Initialized W and H." << std::endl;
- size_t iteration = 0;
- size_t nm = n*m;
- double residue = maxResidue;
- double normOld,norm;
- arma::mat WH;
-
- while (residue >= maxResidue && iteration != maxIterations)
+ size_t iteration = 1;
+ const size_t nm = n * m;
+ double residue = minResidue;
+ double normOld;
+ double norm;
+ arma::mat WH;
+
+ while (residue >= minResidue && iteration != maxIterations)
{
// Update step.
// Update the value of W and H based on the Update Rules provided
- WUpdate.Update(V,W,H);
- HUpdate.Update(V,W,H);
+ wUpdate.Update(V, W, H);
+ hUpdate.Update(V, W, H);
- // Calculate norm of WH after each iteration
- WH = W*H;
- norm = sqrt(accu(WH%WH)/nm);
-
- if(iteration!=0)
+ // Calculate norm of WH after each iteration.
+ WH = W * H;
+ norm = sqrt(accu(WH % WH) / nm);
+
+ if (iteration != 0)
{
- residue = fabs(normOld-norm);
- if(normOld > 1.0)
+ residue = fabs(normOld - norm);
+ if (normOld > 1.0)
{
residue /= normOld;
}
@@ -92,21 +89,14 @@
normOld = norm;
- /*
- WH = W*H;
- diff = WHold-WH;
- diff = diff%diff;
- residue = accu(diff)/(double)(n*m);
- WHold = WH;*/
+ Log::Debug << "NMF iteration " << iteration << ": residue "
+ << sqrt(residue) << std::endl;
- //Log::Debug << "Iteration: " << iteration << " Residue: "
- // << sqrt(residue) << std::endl;
-
iteration++;
-
}
- //Log::Debug << "Iterations: " << iteration << std::endl;
+ Log::Info << "NMF converged to residue of " << sqrt(residue) << " in "
+ << iteration << " iterations." << std::endl;
}
}; // namespace nmf
Modified: mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/nmf_main.cpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -8,31 +8,58 @@
#include "nmf.hpp"
+#include "random_init.hpp"
+#include "mult_dist_update_rules.hpp"
+#include "mult_div_update_rules.hpp"
+#include "als_update_rules.hpp"
+
using namespace mlpack;
using namespace mlpack::nmf;
using namespace std;
// Document program.
-PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs the "
- "non-negative matrix factorization on the given vector. It will store the "
- "calculated factors in the reference matrix arguments supplied.");
+PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs "
+ "non-negative matrix factorization on the given dataset, storing the "
+ "resulting decomposed matrices in the specified files. For an input "
+ "dataset V, NMF decomposes V into two matrices W and H such that "
+ "\n\n"
+ "V = W * H"
+ "\n\n"
+ "where all elements in W and H are non-negative. If V is of size (n x m),"
+ " then W will be of size (n x r) and H will be of size (r x m), where r is "
+ "the rank of the factorization (specified by --rank)."
+ "\n\n"
+ "Optionally, the desired update rules for each NMF iteration can be chosen "
+ "from the following list:"
+ "\n\n"
+ " - multdist: multiplicative distance-based update rules (Lee and Seung "
+ "1999)\n"
+ " - multdiv: multiplicative divergence-based update rules (Lee and Seung "
+ "1999)\n"
+ " - als: alternating least squares update rules (Paatero and Tapper 1994)"
+ "\n\n"
+ "The maximum number of iterations is specified with --max_iterations, and "
+ "the minimum residue required for algorithm termination is specified with "
+ "--min_residue.");
// Parameters for program.
-PARAM_STRING_REQ("input_file", "Input matrix to perform NMF on.", "i");
-PARAM_STRING_REQ("W_output_file", "File to save the calculated W matrix to.",
- "w");
-PARAM_STRING_REQ("H_output_file", "File to save the calculated H matrix to.",
- "h");
+PARAM_STRING_REQ("input_file", "Input dataset to perform NMF on.", "i");
+PARAM_STRING_REQ("w_file", "File to save the calculated W matrix to.", "w");
+PARAM_STRING_REQ("h_file", "File to save the calculated H matrix to.", "h");
PARAM_INT_REQ("rank", "Rank of the factorization.", "r");
-PARAM_INT("max_iterations", "Number of iterations before NMF terminates",
- "m", 10000);
+
+PARAM_INT("max_iterations", "Number of iterations before NMF terminates (0 runs"
+ " until convergence.", "m", 10000);
PARAM_INT("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0);
-PARAM_DOUBLE("max_residue", "The maximum root mean square allowed below which "
- "the program termiates", "e", 1e-5);
+PARAM_DOUBLE("min_residue", "The minimum root mean square residue allowed for "
+ "each iteration, below which the program terminates.", "e", 1e-5);
+PARAM_STRING("update_rules", "Update rules for each iteration; ( multdist | "
+ "multdiv | als ).", "u", "multdist");
+
int main(int argc, char** argv)
{
- // Parse commandline.
+ // Parse command line.
CLI::ParseCommandLine(argc, argv);
// Initialize random seed.
@@ -41,32 +68,65 @@
else
math::RandomSeed((size_t) std::time(NULL));
+ // Gather parameters.
+ const string inputFile = CLI::GetParam<string>("input_file");
+ const string hOutputFile = CLI::GetParam<string>("h_file");
+ const string wOutputFile = CLI::GetParam<string>("w_file");
+ const size_t r = CLI::GetParam<int>("rank");
+ const size_t maxIterations = CLI::GetParam<int>("max_iterations");
+ const double minResidue = CLI::GetParam<double>("min_residue");
+ const string updateRules = CLI::GetParam<string>("update_rules");
+
+ // Validate rank.
+ if (r < 1)
+ {
+ Log::Fatal << "The rank of the factorization cannot be less than 1."
+ << std::endl;
+ }
+
+ if ((updateRules != "multdist") &&
+ (updateRules != "multdiv") &&
+ (updateRules != "als"))
+ {
+ Log::Fatal << "Invalid update rules ('" << updateRules << "'); must be '"
+ << "multdist', 'multdiv', or 'als'." << std::endl;
+ }
+
// Load input dataset.
- string inputFile = CLI::GetParam<string>("input_file");
arma::mat V;
- data::Load(inputFile.c_str(), V);
+ data::Load(inputFile, V, true);
+
arma::mat W;
arma::mat H;
- // Find out the rank of the factorization.
- size_t r = CLI::GetParam<int>("rank");
- if (r<1)
+ // Perform NMF with the specified update rules.
+ if (updateRules == "multdist")
{
- Log::Fatal << "The rank of the factorization cannot be less than 1. "
- << std::endl;
+ Log::Info << "Performing NMF with multiplicative distance-based update "
+ << "rules." << std::endl;
+ NMF<> nmf(maxIterations, minResidue);
+ nmf.Apply(V, r, W, H);
}
-
- size_t maxiterations = CLI::GetParam<int>("max_iterations");
- double maxresidue = CLI::GetParam<double>("max_residue");
+ else if (updateRules == "multdiv")
+ {
+ Log::Info << "Performing NMF with multiplicative divergence-based update "
+ << "rules." << std::endl;
+ NMF<RandomInitialization,
+ WMultiplicativeDivergenceRule,
+ HMultiplicativeDivergenceRule> nmf(maxIterations, minResidue);
+ nmf.Apply(V, r, W, H);
+ }
+ else if (updateRules == "als")
+ {
+ Log::Info << "Performing NMF with alternating least squared update rules."
+ << std::endl;
+ NMF<RandomInitialization,
+ WAlternatingLeastSquaresRule,
+ HAlternatingLeastSquaresRule> nmf(maxIterations, minResidue);
+ nmf.Apply(V, r, W, H);
+ }
- // Perform NMF.
- NMF<> nmf(maxiterations,maxresidue);
- Log::Info << "Performing NMF on the given matrix..." << endl;
- nmf.Apply(V,W,H,r);
-
- // Save results
- string outputFile = CLI::GetParam<string>("W_output_file");
- data::Save(outputFile, W);
- outputFile = CLI::GetParam<string>("H_output_file");
- data::Save(outputFile, H);
+ // Save results.
+ data::Save(wOutputFile, W, false);
+ data::Save(hOutputFile, H, false);
}
Copied: mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp (from rev 13215, mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp)
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp (rev 0)
+++ mlpack/trunk/src/mlpack/methods/nmf/random_init.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -0,0 +1,40 @@
+/**
+ * @file randominit.hpp
+ * @author Mohan Rajendran
+ *
+ * Intialization rule for Non-Negative Matrix Factorization (NMF). This simple
+ * initialization is performed by assigning a random matrix to W and H.
+ */
+#ifndef __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+#define __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+class RandomInitialization
+{
+ public:
+ // Empty constructor required for the InitializeRule template
+ RandomInitialization() { }
+
+ inline static void Initialize(const arma::mat& V,
+ const size_t r,
+ arma::mat& W,
+ arma::mat& H)
+ {
+ // Simple implementation (left in the header file due to its simplicity).
+ size_t n = V.n_rows;
+ size_t m = V.n_cols;
+
+ // Intialize to random values.
+ W.randu(n, r);
+ H.randu(r, m);
+ }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Deleted: mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp 2012-08-02 21:44:04 UTC (rev 13322)
+++ mlpack/trunk/src/mlpack/methods/nmf/randominit.hpp 2012-08-03 02:13:51 UTC (rev 13323)
@@ -1,44 +0,0 @@
-/**
- * @file randominit.hpp
- * @author Mohan Rajendran
- *
- * Intialization rule for the Non-negative Matrix Factorization. This simple
- * initialization is performed by assigning a random matrix to W and H
- *
- */
-
-#ifndef __MLPACK_METHODS_NMF_RANDOMINIT_HPP
-#define __MLPACK_METHODS_NMF_RANDOMINIT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace nmf {
-
-class RandomInitialization
-{
- public:
- // Empty constructor required for the InitializeRule template
- RandomInitialization() { }
-
- inline static void Init(const arma::mat& V,
- arma::mat& W,
- arma::mat& H,
- const size_t& r)
- {
- // Simple inplementation. This can be left here.
-
- size_t n = V.n_rows;
- size_t m = V.n_cols;
-
- // Intialize to random values
- W.randu(n,r);
- H.randu(r,m);
- }
-
-}; // Class RandomInitialization
-
-}; // namespace nmf
-}; // namespace mlpack
-
-#endif
More information about the mlpack-svn
mailing list