[mlpack-git] master, mlpack-1.0.x: added module 'lmf'(Latent Matrix Factorization) to accommodate SVD based update rules alongside NMF based update rule. CF module is updated to use LMF module. (25bd564)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:47:24 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 25bd564e1c80fd1458a93ba157e7610135e8491b
Author: sumedhghaisas <sumedhghaisas at gmail.com>
Date:   Wed May 21 20:23:43 2014 +0000

    added module 'lmf'(Latent Matrix Factorization) to accommodate SVD based update rules alongside NMF based update rule. CF module is updated to use LMF module.


>---------------------------------------------------------------

25bd564e1c80fd1458a93ba157e7610135e8491b
 src/mlpack/methods/CMakeLists.txt                  |   1 +
 src/mlpack/methods/cf/cf.hpp                       |  12 +--
 src/mlpack/methods/cf/cf_impl.hpp                  |  12 +--
 src/mlpack/methods/{cf => lmf}/CMakeLists.txt      |  15 +--
 .../methods/{cf => lmf/init_rules}/CMakeLists.txt  |  12 +--
 .../{nmf => lmf/init_rules}/random_acol_init.hpp   |  23 ++++-
 .../{nmf => lmf/init_rules}/random_init.hpp        |  23 ++++-
 src/mlpack/methods/{nmf/nmf.hpp => lmf/lmf.hpp}    | 106 +++++++-------------
 .../methods/{nmf/nmf_impl.hpp => lmf/lmf_impl.hpp} |  51 +++-------
 .../methods/{nmf/nmf_main.cpp => lmf/lmf_main.cpp} |  60 +++++-------
 .../{cf => lmf/update_rules}/CMakeLists.txt        |  13 +--
 src/mlpack/methods/lmf/update_rules/nmf_als.hpp    | 109 +++++++++++++++++++++
 .../methods/lmf/update_rules/nmf_mult_dist.hpp     |  87 ++++++++++++++++
 .../update_rules/nmf_mult_div.hpp}                 |  89 ++++++++---------
 14 files changed, 370 insertions(+), 243 deletions(-)

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 9e15448..a434eca 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -18,6 +18,7 @@ set(DIRS
   nca
   neighbor_search
   nmf
+  lmf
   pca
   radical
   range_search
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 4eda91f..3e12902 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -12,14 +12,12 @@
 
 #include <mlpack/core.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-#include <mlpack/methods/nmf/nmf.hpp>
-#include <mlpack/methods/nmf/als_update_rules.hpp>
+#include <mlpack/methods/lmf/lmf.hpp>
+#include <mlpack/methods/lmf/update_rules/nmf_als.hpp>
 #include <set>
 #include <map>
 #include <iostream>
 
-using namespace mlpack::nmf;
-
 namespace mlpack {
 namespace cf /** Collaborative filtering. */{
 
@@ -56,10 +54,8 @@ namespace cf /** Collaborative filtering. */{
  *     Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
  */
 template<
-    typename FactorizerType = NMF<RandomInitialization,
-                                  WAlternatingLeastSquaresRule,
-                                  HAlternatingLeastSquaresRule>
->
+    typename FactorizerType = lmf::LMF<lmf::RandomInitialization, 
+                                       lmf::NMF_ALSUpdate> >
 class CF
 {
  public:
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index c2b6232..15fbe34 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -8,13 +8,6 @@
  * specified data set.
  *
  */
-#include "cf.hpp"
-#include <mlpack/methods/nmf/nmf.hpp>
-#include <mlpack/methods/nmf/als_update_rules.hpp>
-
-using namespace mlpack::nmf;
-using namespace mlpack::neighbor;
-using namespace std;
 
 namespace mlpack {
 namespace cf {
@@ -85,7 +78,6 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
 
   // Operations independent of the query:
   // Decompose the sparse data matrix to user and data matrices.
-  // Presently only ALS (via NMF) is supported as an optimizer.
   factorizer.Apply(cleanedData, rank, w, h);
 
   // Generate new table by multiplying approximate values.
@@ -106,7 +98,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
 
   // Calculate the neighborhood of the queried users.
   // This should be a templatized option.
-  AllkNN a(rating, query);
+  neighbor::AllkNN a(rating, query);
   arma::mat resultingDistances; // Temporary storage.
   a.Search(numUsersForSimilarity, neighborhood, resultingDistances);
 
@@ -162,7 +154,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
     // warning.
     if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
       Log::Warn << "Could not provide " << values.n_rows << " recommendations "
-          << "for user " << users(i) << " (not enough un-rated items)!" << endl;
+          << "for user " << users(i) << " (not enough un-rated items)!" << std::endl;
   }
 }
 
diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/lmf/CMakeLists.txt
similarity index 69%
copy from src/mlpack/methods/cf/CMakeLists.txt
copy to src/mlpack/methods/lmf/CMakeLists.txt
index 6413af4..e9205ae 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/lmf/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  cf.hpp
-  cf_impl.hpp
+  lmf.hpp
+  lmf_impl.hpp
 )
 
 # Add directory name to sources.
@@ -14,10 +14,13 @@ endforeach()
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
-add_executable(cf
-  cf_main.cpp
+add_subdirectory(update_rules)
+add_subdirectory(init_rules)
+
+add_executable(lmf
+  lmf_main.cpp
 )
-target_link_libraries(cf
+target_link_libraries(lmf
   mlpack
 )
-install(TARGETS cf RUNTIME DESTINATION bin)
+install(TARGETS lmf RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/lmf/init_rules/CMakeLists.txt
similarity index 75%
copy from src/mlpack/methods/cf/CMakeLists.txt
copy to src/mlpack/methods/lmf/init_rules/CMakeLists.txt
index 6413af4..a31d281 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/lmf/init_rules/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  cf.hpp
-  cf_impl.hpp
+  random_init.hpp
+  random_acol_init.hpp
 )
 
 # Add directory name to sources.
@@ -13,11 +13,3 @@ endforeach()
 # Append sources (with directory name) to list of all MLPACK sources (used at
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_executable(cf
-  cf_main.cpp
-)
-target_link_libraries(cf
-  mlpack
-)
-install(TARGETS cf RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/nmf/random_acol_init.hpp b/src/mlpack/methods/lmf/init_rules/random_acol_init.hpp
similarity index 68%
copy from src/mlpack/methods/nmf/random_acol_init.hpp
copy to src/mlpack/methods/lmf/init_rules/random_acol_init.hpp
index 43d9b39..b110e49 100644
--- a/src/mlpack/methods/nmf/random_acol_init.hpp
+++ b/src/mlpack/methods/lmf/init_rules/random_acol_init.hpp
@@ -7,14 +7,29 @@
  * the paper 'Algorithms, Initializations and Convergence' by Langville et al.
  * This method sets each of the columns of W by averaging p randomly chosen
  * columns of V.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef __MLPACK_METHODS_NMF_RANDOM_ACOL_INIT_HPP
-#define __MLPACK_METHODS_NMF_RANDOM_ACOL_INIT_HPP
+#ifndef __MLPACK_METHODS_LMF_RANDOM_ACOL_INIT_HPP
+#define __MLPACK_METHODS_LMF_RANDOM_ACOL_INIT_HPP
 
 #include <mlpack/core.hpp>
 
 namespace mlpack {
-namespace nmf {
+namespace lmf {
 
 /**
  * This class initializes the W matrix of the NMF algorithm by averaging p
@@ -66,7 +81,7 @@ class RandomAcolInitialization
   }
 }; // Class RandomAcolInitialization
 
-}; // namespace nmf
+}; // namespace lmf
 }; // namespace mlpack
 
 #endif
diff --git a/src/mlpack/methods/nmf/random_init.hpp b/src/mlpack/methods/lmf/init_rules/random_init.hpp
similarity index 50%
copy from src/mlpack/methods/nmf/random_init.hpp
copy to src/mlpack/methods/lmf/init_rules/random_init.hpp
index 0ec52ae..de91724 100644
--- a/src/mlpack/methods/nmf/random_init.hpp
+++ b/src/mlpack/methods/lmf/init_rules/random_init.hpp
@@ -4,14 +4,29 @@
  *
  * Intialization rule for Non-Negative Matrix Factorization (NMF). This simple
  * initialization is performed by assigning a random matrix to W and H.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
-#define __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+#ifndef __MLPACK_METHODS_LMF_RANDOM_INIT_HPP
+#define __MLPACK_METHODS_LMF_RANDOM_INIT_HPP
 
 #include <mlpack/core.hpp>
 
 namespace mlpack {
-namespace nmf {
+namespace lmf {
 
 class RandomInitialization
 {
@@ -35,7 +50,7 @@ class RandomInitialization
   }
 };
 
-}; // namespace nmf
+}; // namespace lmf
 }; // namespace mlpack
 
 #endif
diff --git a/src/mlpack/methods/nmf/nmf.hpp b/src/mlpack/methods/lmf/lmf.hpp
similarity index 50%
copy from src/mlpack/methods/nmf/nmf.hpp
copy to src/mlpack/methods/lmf/lmf.hpp
index bdc0d25..16d4c63 100644
--- a/src/mlpack/methods/nmf/nmf.hpp
+++ b/src/mlpack/methods/lmf/lmf.hpp
@@ -1,78 +1,52 @@
-/**
- * @file nmf.hpp
- * @author Mohan Rajendran
- *
- * Defines the NMF class to perform Non-negative Matrix Factorization
- * on the given matrix.
- */
-#ifndef __MLPACK_METHODS_NMF_NMF_HPP
-#define __MLPACK_METHODS_NMF_NMF_HPP
+#ifndef __MLPACK_METHODS_LMF_LMF_HPP
+#define __MLPACK_METHODS_LMF_LMF_HPP
 
 #include <mlpack/core.hpp>
-#include "mult_dist_update_rules.hpp"
-#include "random_init.hpp"
+#include "update_rules/nmf_mult_dist.hpp"
+#include "init_rules/random_init.hpp"
 
 namespace mlpack {
-namespace nmf {
+namespace lmf {
 
 /**
- * This class implements the NMF on the given matrix V. Non-negative Matrix
+ * This class implements the LMF on the given matrix V. Latent Matrix
  * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
  * called the basis matrix and H is called the encoding matrix. V is taken
  * to be of size n x m and the obtained W is n x r and H is r x m. The size r is
  * called the rank of the factorization.
  *
- * The implementation requires two template types; the first contains the update
- * rule for the W matrix during each iteration and the other contains the update
- * rule for the H matrix during each iteration.  This templatization allows the
+ * The implementation requires two template types; the first contains the
+ * initialization rule for the W and H matrix and the other contains the update
+ * rule to be used during each iteration.  This templatization allows the
  * user to try various update rules (including ones not supplied with MLPACK)
  * for factorization.
  *
- * A simple example of how to run NMF is shown below.
+ * A simple example of how to run LMF is shown below.
  *
  * @code
- * extern arma::mat V; // Matrix that we want to perform NMF on.
+ * extern arma::mat V; // Matrix that we want to perform LMF on.
  * size_t r = 10; // Rank of decomposition
  * arma::mat W; // Basis matrix
  * arma::mat H; // Encoding matrix
  *
- * NMF<> nmf(); // Default options
- * nmf.Apply(V, W, H, r);
+ * LMF<> lmf; // Default options
+ * lmf.Apply(V, W, H, r);
  * @endcode
  *
- * For more information on non-negative matrix factorization, see the following
- * paper:
- *
- * @code
- * @article{
- *   title = {{Learning the parts of objects by non-negative matrix
- *       factorization}},
- *   author = {Lee, Daniel D. and Seung, H. Sebastian},
- *   journal = {Nature},
- *   month = {Oct},
- *   year = {1999},
- *   number = {6755},
- *   pages = {788--791},
- *   publisher = {Nature Publishing Group},
- *   url = {http://dx.doi.org/10.1038/44565}
- * }
- * @endcode
- *
- * @tparam WUpdateRule The update rule for calculating W matrix at each
- *     iteration.
- * @tparam HUpdateRule The update rule for calculating H matrix at each
+ * @tparam InitializationRule The initialization rule for initializing W and H
+ *     matrix.
+ * @tparam UpdateRule The update rule for calculating W and H matrix at each
  *     iteration.
  *
- * @see WMultiplicativeDistanceRule, HMultiplicativeDistanceRule
+ * @see NMF_MultiplicativeDistanceUpdate
  */
 template<typename InitializationRule = RandomInitialization,
-         typename WUpdateRule = WMultiplicativeDistanceRule,
-         typename HUpdateRule = HMultiplicativeDistanceRule>
-class NMF
+         typename UpdateRule = NMF_MultiplicativeDistanceUpdate>
+class LMF
 {
  public:
   /**
-   * Create the NMF object and (optionally) set the parameters which NMF will
+   * Create the LMF object and (optionally) set the parameters which LMF will
    * run with.  The minimum residue refers to the root mean square of the
    * difference between two subsequent iterations of the product W * H.  A low
    * residue indicates that subsequent iterations are not producing much change
@@ -85,19 +59,16 @@ class NMF
    *     terminates.
    * @param Initialize Optional Initialization object for initializing the
    *     W and H matrices
-   * @param WUpdate Optional WUpdateRule object; for when the update rule for
-   *     the W vector has states that it needs to store.
-   * @param HUpdate Optional HUpdateRule object; for when the update rule for
-   *     the H vector has states that it needs to store.
+   * @param Update Optional UpdateRule object; for when the update rule for
+   *     the W and H vector has states that it needs to store
    */
-  NMF(const size_t maxIterations = 10000,
+  LMF(const size_t maxIterations = 10000,
       const double minResidue = 1e-10,
       const InitializationRule initializeRule = InitializationRule(),
-      const WUpdateRule wUpdate = WUpdateRule(),
-      const HUpdateRule hUpdate = HUpdateRule());
+      const UpdateRule update = UpdateRule());
 
   /**
-   * Apply Non-Negative Matrix Factorization to the provided matrix.
+   * Apply Latent Matrix Factorization to the provided matrix.
    *
    * @param V Input matrix to be factorized.
    * @param W Basis matrix to be output.
@@ -117,10 +88,8 @@ class NMF
   double minResidue;
   //! Instantiated initialization Rule.
   InitializationRule initializeRule;
-  //! Instantiated W update rule.
-  WUpdateRule wUpdate;
-  //! Instantiated H update rule.
-  HUpdateRule hUpdate;
+  //! Instantiated update rule.
+  UpdateRule update;
 
  public:
   //! Access the maximum number of iterations.
@@ -135,24 +104,17 @@ class NMF
   const InitializationRule& InitializeRule() const { return initializeRule; }
   //! Modify the initialization rule.
   InitializationRule& InitializeRule() { return initializeRule; }
-  //! Access the W update rule.
-  const WUpdateRule& WUpdate() const { return wUpdate; }
-  //! Modify the W update rule.
-  WUpdateRule& WUpdate() { return wUpdate; }
-  //! Access the H update rule.
-  const HUpdateRule& HUpdate() const { return hUpdate; }
-  //! Modify the H update rule.
-  HUpdateRule& HUpdate() { return hUpdate; }
-
-  // Returns a string representation of this object. 
-  std::string ToString() const;
+  //! Access the update rule.
+  const UpdateRule& Update() const { return update; }
+  //! Modify the update rule.
+  UpdateRule& Update() { return update; }
 
-}; // class NMF
+}; // class LMF
 
-}; // namespace nmf
+}; // namespace lmf
 }; // namespace mlpack
 
 // Include implementation.
-#include "nmf_impl.hpp"
+#include "lmf_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/nmf/nmf_impl.hpp b/src/mlpack/methods/lmf/lmf_impl.hpp
similarity index 55%
copy from src/mlpack/methods/nmf/nmf_impl.hpp
copy to src/mlpack/methods/lmf/lmf_impl.hpp
index 5cc097b..d4bf896 100644
--- a/src/mlpack/methods/nmf/nmf_impl.hpp
+++ b/src/mlpack/methods/lmf/lmf_impl.hpp
@@ -1,42 +1,31 @@
-/**
- * @file nmf.cpp
- * @author Mohan Rajendran
- *
- * Implementation of NMF class to perform Non-Negative Matrix Factorization
- * on the given matrix.
- */
-
 namespace mlpack {
-namespace nmf {
+namespace lmf {
 
 /**
- * Construct the NMF object.
+ * Construct the LMF object.
  */
 template<typename InitializationRule,
-         typename WUpdateRule,
-         typename HUpdateRule>
-NMF<InitializationRule, WUpdateRule, HUpdateRule>::NMF(
+         typename UpdateRule>
+LMF<InitializationRule, UpdateRule>::LMF(
     const size_t maxIterations,
     const double minResidue,
     const InitializationRule initializeRule,
-    const WUpdateRule wUpdate,
-    const HUpdateRule hUpdate) :
+    const UpdateRule update) :
     maxIterations(maxIterations),
     minResidue(minResidue),
     initializeRule(initializeRule),
-    wUpdate(wUpdate),
-    hUpdate(hUpdate)
+    update(update)
 {
   if (minResidue < 0.0)
   {
-    Log::Warn << "NMF::NMF(): minResidue must be a positive value ("
+    Log::Warn << "LMF::LMF(): minResidue must be a positive value ("
         << minResidue << " given). Setting to the default value of 1e-10.\n";
     this->minResidue = 1e-10;
   }
 }
 
 /**
- * Apply Non-Negative Matrix Factorization to the provided matrix.
+ * Apply Latent Matrix Factorization to the provided matrix.
  *
  * @param V Input matrix to be factorized
  * @param W Basis matrix to be output
@@ -44,10 +33,9 @@ NMF<InitializationRule, WUpdateRule, HUpdateRule>::NMF(
  * @param r Rank r of the factorization
  */
 template<typename InitializationRule,
-         typename WUpdateRule,
-         typename HUpdateRule>
+         typename UpdateRule>
 template<typename MatType>
-void NMF<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
+void LMF<InitializationRule, UpdateRule>::Apply(
     const MatType& V,
     const size_t r,
     arma::mat& W,
@@ -72,8 +60,8 @@ void NMF<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
   {
     // Update step.
     // Update the value of W and H based on the Update Rules provided
-    wUpdate.Update(V, W, H);
-    hUpdate.Update(V, W, H);
+    update.WUpdate(V, W, H);
+    update.HUpdate(V, W, H);
 
     // Calculate norm of WH after each iteration.
     WH = W * H;
@@ -90,22 +78,9 @@ void NMF<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
     iteration++;
   }
 
-  Log::Info << "NMF converged to residue of " << sqrt(residue) << " in "
+  Log::Info << "LMF converged to residue of " << sqrt(residue) << " in "
       << iteration << " iterations." << std::endl;
 }
 
-//Return a String of the object
-template<typename InitializationRule,
-         typename WUpdateRule,
-         typename HUpdateRule>
-std::string NMF<InitializationRule, WUpdateRule, HUpdateRule>::ToString() const
-{
-  std::ostringstream convert;
-  convert << "Non negative matrix factorization [" << this << "]" << std::endl;
-  convert << "  Max Iterations: " << maxIterations << std::endl;
-  convert << "  Minimum Residue: " << minResidue<< std::endl;
-  return convert.str();
-}
-
 }; // namespace nmf
 }; // namespace mlpack
diff --git a/src/mlpack/methods/nmf/nmf_main.cpp b/src/mlpack/methods/lmf/lmf_main.cpp
similarity index 71%
copy from src/mlpack/methods/nmf/nmf_main.cpp
copy to src/mlpack/methods/lmf/lmf_main.cpp
index feddb7a..04a79dc 100644
--- a/src/mlpack/methods/nmf/nmf_main.cpp
+++ b/src/mlpack/methods/lmf/lmf_main.cpp
@@ -1,43 +1,41 @@
-/**
- * @file nmf_main.cpp
- * @author Mohan Rajendran
- *
- * Main executable to run NMF.
- */
 #include <mlpack/core.hpp>
 
-#include "nmf.hpp"
+#include "lmf.hpp"
 
-#include "random_init.hpp"
-#include "mult_dist_update_rules.hpp"
-#include "mult_div_update_rules.hpp"
-#include "als_update_rules.hpp"
+#include "init_rules/random_init.hpp"
+#include "update_rules/nmf_mult_dist.hpp"
+#include "update_rules/nmf_mult_div.hpp"
+#include "update_rules/nmf_als.hpp"
 
 using namespace mlpack;
-using namespace mlpack::nmf;
+using namespace mlpack::lmf;
 using namespace std;
 
 // Document program.
-PROGRAM_INFO("Non-negative Matrix Factorization", "This program performs "
-    "non-negative matrix factorization on the given dataset, storing the "
+PROGRAM_INFO("Latent Matrix Factorization", "This program performs "
+    "matrix factorization on the given dataset, storing the "
     "resulting decomposed matrices in the specified files.  For an input "
-    "dataset V, NMF decomposes V into two matrices W and H such that "
+    "dataset V, LMF decomposes V into two matrices W and H such that "
     "\n\n"
     "V = W * H"
     "\n\n"
-    "where all elements in W and H are non-negative.  If V is of size (n x m),"
+    "If V is of size (n x m),"
     " then W will be of size (n x r) and H will be of size (r x m), where r is "
     "the rank of the factorization (specified by --rank)."
     "\n\n"
-    "Optionally, the desired update rules for each NMF iteration can be chosen "
+    "Optionally, the desired update rules for each LMF iteration can be chosen "
     "from the following list:"
     "\n\n"
     " - multdist: multiplicative distance-based update rules (Lee and Seung "
-    "1999)\n"
+    "1999): non-negative matrix factorization. Matrix V should contain\n"
+    "non-negative elements.\n"
     " - multdiv: multiplicative divergence-based update rules (Lee and Seung "
-    "1999)\n"
-    " - als: alternating least squares update rules (Paatero and Tapper 1994)"
-    "\n\n"
+    "1999): non-negative matrix factorization. Matrix V should contain\n"
+    "non-negative elements.\n"
+    " - als: alternating least squares update rules (Paatero and Tapper 1994)\n"
+    "non-negative matrix factorization. Matrix V should contain\n"
+    "non-negative elements.\n"
+    "\n"
     "The maximum number of iterations is specified with --max_iterations, and "
     "the minimum residue required for algorithm termination is specified with "
     "--min_residue.");
@@ -102,28 +100,24 @@ int main(int argc, char** argv)
   // Perform NMF with the specified update rules.
   if (updateRules == "multdist")
   {
-    Log::Info << "Performing NMF with multiplicative distance-based update "
+    Log::Info << "Performing LMF with multiplicative distance-based update(Non-negative Matrix Factorization) "
         << "rules." << std::endl;
-    NMF<> nmf(maxIterations, minResidue);
+    LMF<> nmf(maxIterations, minResidue);
     nmf.Apply(V, r, W, H);
   }
   else if (updateRules == "multdiv")
   {
-    Log::Info << "Performing NMF with multiplicative divergence-based update "
+    Log::Info << "Performing NMF with multiplicative divergence-based update(Non-negative Matrix Factorization) "
         << "rules." << std::endl;
-    NMF<RandomInitialization,
-        WMultiplicativeDivergenceRule,
-        HMultiplicativeDivergenceRule> nmf(maxIterations, minResidue);
-    nmf.Apply(V, r, W, H);
+    LMF<RandomInitialization,NMF_MultiplicativeDivergenceUpdate> lmf(maxIterations, minResidue);
+    lmf.Apply(V, r, W, H);
   }
   else if (updateRules == "als")
   {
-    Log::Info << "Performing NMF with alternating least squared update rules."
+    Log::Info << "Performing NMF with alternating least squared update rules.(Non-negative Matrix Factorization)"
         << std::endl;
-    NMF<RandomInitialization,
-        WAlternatingLeastSquaresRule,
-        HAlternatingLeastSquaresRule> nmf(maxIterations, minResidue);
-    nmf.Apply(V, r, W, H);
+    LMF<RandomInitialization, NMF_ALSUpdate> lmf(maxIterations, minResidue);
+    lmf.Apply(V, r, W, H);
   }
 
   // Save results.
diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/lmf/update_rules/CMakeLists.txt
similarity index 75%
copy from src/mlpack/methods/cf/CMakeLists.txt
copy to src/mlpack/methods/lmf/update_rules/CMakeLists.txt
index 6413af4..011ec27 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/lmf/update_rules/CMakeLists.txt
@@ -1,8 +1,9 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  cf.hpp
-  cf_impl.hpp
+  nmf_als.hpp
+  nmf_mult_dist.hpp
+  nmf_mult_div.hpp
 )
 
 # Add directory name to sources.
@@ -13,11 +14,3 @@ endforeach()
 # Append sources (with directory name) to list of all MLPACK sources (used at
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_executable(cf
-  cf_main.cpp
-)
-target_link_libraries(cf
-  mlpack
-)
-install(TARGETS cf RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/lmf/update_rules/nmf_als.hpp b/src/mlpack/methods/lmf/update_rules/nmf_als.hpp
new file mode 100644
index 0000000..d559ea3
--- /dev/null
+++ b/src/mlpack/methods/lmf/update_rules/nmf_als.hpp
@@ -0,0 +1,109 @@
+/**
+ * @file nmf_als.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * titled 'Alternating Least Squares' described in the paper 'Positive Matrix
+ * Factorization: A Non-negative Factor Model with Optimal Utilization of
+ * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
+ * squares projection formula to reduce the error value of
+ * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
+ * respectively while holding the other matrix constant.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_ALS_HPP
+#define __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_ALS_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+/**
+ * The alternating least square update rules of matrices W and H.
+ */
+class NMF_ALSUpdate
+{
+ public:
+  // Empty constructor required for the UpdateRule template.
+  NMF_ALSUpdate() { }
+
+  /**
+   * The update rule for the basis matrix W. The formula used is
+   * \f[
+   * W^T = \frac{HV^T}{HH^T}
+   * \f]
+   * The function takes in all the matrices and only changes the
+   * value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  template<typename MatType>
+  inline static void WUpdate(const MatType& V,
+                             arma::mat& W,
+                             const arma::mat& H)
+  {
+    // The call to inv() sometimes fails; so we are using the psuedoinverse.
+    // W = (inv(H * H.t()) * H * V.t()).t();
+    W = V * H.t() * pinv(H * H.t());
+
+    // Set all negative numbers to machine epsilon
+    for (size_t i = 0; i < W.n_elem; i++)
+    {
+      if (W(i) < 0.0)
+      {
+        W(i) = 0.0;
+      }
+    }
+  }
+
+  /**
+   * The update rule for the encoding matrix H. The formula used is
+   * \f[
+   * H = \frac{W^TV}{W^TW}
+   * \f]
+   * The function takes in all the matrices and only changes the
+   * value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+  template<typename MatType>
+  inline static void HUpdate(const MatType& V,
+                             const arma::mat& W,
+                             arma::mat& H)
+  {
+    H = pinv(W.t() * W) * W.t() * V;
+
+    // Set all negative numbers to 0.
+    for (size_t i = 0; i < H.n_elem; i++)
+    {
+      if (H(i) < 0.0)
+      {
+        H(i) = 0.0;
+      }
+    }
+  }
+};
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/lmf/update_rules/nmf_mult_dist.hpp b/src/mlpack/methods/lmf/update_rules/nmf_mult_dist.hpp
new file mode 100644
index 0000000..72a3541
--- /dev/null
+++ b/src/mlpack/methods/lmf/update_rules/nmf_mult_dist.hpp
@@ -0,0 +1,87 @@
+/**
+ * @file nmf_mult_dist.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIST_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIST_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+/**
+ * The multiplicative distance update rules for matrices W and H.
+ */
+class NMF_MultiplicativeDistanceUpdate
+{
+ public:
+  // Empty constructor required for the UpdateRule template.
+  NMF_MultiplicativeDistanceUpdate() { }
+
+  /**
+   * The update rule for the basis matrix W. The formula used is
+   * \f[
+   * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{
+   * The function takes in all the matrices and only changes the
+   * value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  template<typename MatType>
+  inline static void WUpdate(const MatType& V,
+                             arma::mat& W,
+                             const arma::mat& H)
+  {
+    W = (W % (V * H.t())) / (W * H * H.t());
+  }
+
+  /**
+   * The update rule for the encoding matrix H. The formula used is
+   * \f[
+   * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
+   * \f]
+   * The function takes in all the matrices and only changes the
+   * value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+  template<typename MatType>
+  inline static void HUpdate(const MatType& V,
+                             const arma::mat& W,
+                             arma::mat& H)
+  {
+    H = (H % (W.t() * V)) / (W.t() * W * H);
+  }
+};
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nmf/mult_div_update_rules.hpp b/src/mlpack/methods/lmf/update_rules/nmf_mult_div.hpp
similarity index 55%
copy from src/mlpack/methods/nmf/mult_div_update_rules.hpp
copy to src/mlpack/methods/lmf/update_rules/nmf_mult_div.hpp
index ca8eb9f..73ee0d5 100644
--- a/src/mlpack/methods/nmf/mult_div_update_rules.hpp
+++ b/src/mlpack/methods/lmf/update_rules/nmf_mult_div.hpp
@@ -1,5 +1,5 @@
 /**
- * @file mult_div_update_rules.hpp
+ * @file nmf_mult_div.hpp
  * @author Mohan Rajendran
  *
  * Update rules for the Non-negative Matrix Factorization. This follows a method
@@ -9,38 +9,52 @@
  * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
  * non-increasing between subsequent iterations. Both of the update rules
  * for W and H are defined in this file.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef __MLPACK_METHODS_NMF_MULT_DIV_UPDATE_RULES_HPP
-#define __MLPACK_METHODS_NMF_MULT_DIV_UPDATE_RULES_HPP
+#ifndef __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIV_HPP
+#define __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIV_HPP
 
 #include <mlpack/core.hpp>
 
 namespace mlpack {
-namespace nmf {
+namespace lmf {
 
-/**
- * The update rule for the basis matrix W. The formula used is
- * \f[
- * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{\nu} H_{a\nu}}
- * \f]
- */
-class WMultiplicativeDivergenceRule
+
+class NMF_MultiplicativeDivergenceUpdate
 {
  public:
   // Empty constructor required for the WUpdateRule template.
-  WMultiplicativeDivergenceRule() { }
+  NMF_MultiplicativeDivergenceUpdate() { }
 
   /**
-   * The update function that actually updates the W matrix. The function takes
-   * in all the matrices and only changes the value of the W matrix.
+   * The update rule for the basis matrix W. The formula used is
+   * \f[
+   * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
+   * {\sum_{\nu} H_{a\nu}}
+   * \f]
+   * The function takes in all the matrices and only changes the 
+   * value of the W matrix.
    *
    * @param V Input matrix to be factorized.
    * @param W Basis matrix to be updated.
    * @param H Encoding matrix.
    */
   template<typename MatType>
-  inline static void Update(const MatType& V,
+  inline static void WUpdate(const MatType& V,
                             arma::mat& W,
                             const arma::mat& H)
   {
@@ -60,44 +74,29 @@ class WMultiplicativeDivergenceRule
         t2.set_size(H.n_cols);
         for (size_t k = 0; k < t2.n_elem; ++k)
         {
-          // This may produce NaNs if V(i, k) = 0.
-          // Technically the math in the paper does not define what to do in
-          // this case, but considering the basic intent of the update rules,
-          // we'll make this modification and take t2(k) = 0.0.
           t2(k) = H(j, k) * V(i, k) / t1(i, k);
-          if (t2(k) != t2(k))
-            t2(k) = 0.0;
         }
 
-        W(i, j) *= sum(t2) / sum(H.row(j));
+        W(i, j) = W(i, j) * sum(t2) / sum(H.row(j));
       }
     }
   }
-};
-
-/**
- * The update rule for the encoding matrix H. The formula used is
- * \f[
- * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
- * {\sum_{k} H_{ka}}
- * \f]
- */
-class HMultiplicativeDivergenceRule
-{
- public:
-  // Empty constructor required for the HUpdateRule template.
-  HMultiplicativeDivergenceRule() { }
 
   /**
-   * The update function that actually updates the H matrix. The function takes
-   * in all the matrices and only changes the value of the H matrix.
+   * The update rule for the encoding matrix H. The formula used is
+   * \f[
+   * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
+   * {\sum_{k} H_{ka}}
+   * \f]
+   * The function takes in all the matrices and only changes the value 
+   * of the H matrix.
    *
    * @param V Input matrix to be factorized.
    * @param W Basis matrix.
    * @param H Encoding matrix to updated.
    */
   template<typename MatType>
-  inline static void Update(const MatType& V,
+  inline static void HUpdate(const MatType& V,
                             const arma::mat& W,
                             arma::mat& H)
   {
@@ -117,22 +116,16 @@ class HMultiplicativeDivergenceRule
         t2.set_size(W.n_rows);
         for (size_t k = 0; k < t2.n_elem; ++k)
         {
-          // This may produce NaNs if V(i, k) = 0.
-          // Technically the math in the paper does not define what to do in
-          // this case, but considering the basic intent of the update rules,
-          // we'll make this modification and take t2(k) = 0.0.
           t2(k) = W(k, i) * V(k, j) / t1(k, j);
-          if (t2(k) != t2(k))
-            t2(k) = 0.0;
         }
 
-        H(i, j) *= sum(t2) / sum(W.col(i));
+        H(i,j) = H(i,j) * sum(t2) / sum(W.col(i));
       }
     }
   }
 };
 
-}; // namespace nmf
+}; // namespace lmf
 }; // namespace mlpack
 
 #endif



More information about the mlpack-git mailing list