[mlpack-git] master, mlpack-1.0.x: Patch from Siddharth: templatize CF to accept arbitrary types of factorizers. (efda5e0)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:44:59 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit efda5e046343e11daa809c32bf8117e27a3c393b
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Mar 2 04:39:49 2014 +0000

    Patch from Siddharth: templatize CF to accept arbitrary types of factorizers.


>---------------------------------------------------------------

efda5e046343e11daa809c32bf8117e27a3c393b
 src/mlpack/methods/cf/CMakeLists.txt          |  2 +-
 src/mlpack/methods/cf/cf.hpp                  | 38 ++++++++++++++----
 src/mlpack/methods/cf/{cf.cpp => cf_impl.hpp} | 58 ++++++++++++++++-----------
 src/mlpack/methods/cf/cf_main.cpp             |  2 +-
 src/mlpack/tests/cf_test.cpp                  |  8 ++--
 src/mlpack/tests/to_string_test.cpp           |  2 +-
 6 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/cf/CMakeLists.txt
index af6824f..6413af4 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/cf/CMakeLists.txt
@@ -2,7 +2,7 @@
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
   cf.hpp
-  cf.cpp
+  cf_impl.hpp
 )
 
 # Add directory name to sources.
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 798306e..38d1550 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -12,10 +12,14 @@
 
 #include <mlpack/core.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
+#include <mlpack/methods/nmf/nmf.hpp>
+#include <mlpack/methods/nmf/als_update_rules.hpp>
 #include <set>
 #include <map>
 #include <iostream>
 
+using namespace mlpack::nmf;
+
 namespace mlpack {
 namespace cf /** Collaborative filtering. */{
 
@@ -32,7 +36,7 @@ namespace cf /** Collaborative filtering. */{
  * arma::Mat<size_t> recommendations; // Recommendations
  * size_t numRecommendations = 10;
  *
- * CF cf(data); // Default options.
+ * CF<> cf(data); // Default options.
  *
  * // Generate the default number of recommendations for all users.
  * cf.GetRecommendations(recommendations);
@@ -49,8 +53,17 @@ namespace cf /** Collaborative filtering. */{
  * should have three rows.  The first represents the user; the second represents
  * the item; and the third represents the rating.  The user and item, while they
  * are in a matrix that holds doubles, should hold integer (or size_t) values.
- * The user and item indices are assumed to be starting from 0.
+ * The user and item indices are assumed to start at 0.
+ *
+ * @tparam FactorizerType The type of matrix factorization to use to decompose
+ *     the rating matrix (a W and H matrix).  This must implement the method
+ *     Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
  */
+template<
+    typename FactorizerType = NMF<RandomInitialization,
+                                  WAlternatingLeastSquaresRule,
+                                  HAlternatingLeastSquaresRule>
+>
 class CF
 {
  public:
@@ -82,13 +95,13 @@ class CF
     this->numRecs = recs;
   }
 
-  //! Gets numRecs
-  size_t NumRecs()
+  //! Gets the number of recommendations.
+  size_t NumRecs() const
   {
     return numRecs;
   }
 
-  //! Sets number of user for calculating similarity.
+  //! Sets number of users for calculating similarity.
   void NumUsersForSimilarity(const size_t num)
   {
     if (num < 1)
@@ -101,7 +114,7 @@ class CF
   }
 
   //! Gets number of users for calculating similarity.
-  size_t NumUsersForSimilarity()
+  size_t NumUsersForSimilarity() const
   {
     return numUsersForSimilarity;
   }
@@ -113,11 +126,17 @@ class CF
   }
 
   //! Gets rank parameter for matrix factorization.
-  size_t Rank()
+  size_t Rank() const
   {
     return rank;
   }
 
+  //! Sets factorizer for NMF
+  void Factorizer(const FactorizerType& f)
+  {
+    this->factorizer = f;
+  }
+
   //! Get the User Matrix.
   const arma::mat& W() const { return w; }
   //! Get the Item Matrix.
@@ -182,6 +201,8 @@ class CF
   size_t numUsersForSimilarity;
   //! Rank used for matrix factorization.
   size_t rank;
+  //! Instantiated factorizer object.
+  FactorizerType factorizer;
   //! User matrix.
   arma::mat w;
   //! Item matrix.
@@ -214,4 +235,7 @@ class CF
 }; // namespace cf
 }; // namespace mlpack
 
+//Include implementation
+#include "cf_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/cf/cf.cpp b/src/mlpack/methods/cf/cf_impl.hpp
similarity index 81%
rename from src/mlpack/methods/cf/cf.cpp
rename to src/mlpack/methods/cf/cf_impl.hpp
index 2ee382d..89d6c37 100644
--- a/src/mlpack/methods/cf/cf.cpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -22,10 +22,11 @@ namespace cf {
 /**
  * Construct the CF object.
  */
-CF::CF(arma::mat& data,
-       const size_t numRecs,
-       const size_t numUsersForSimilarity,
-       const size_t rank) :
+template<typename FactorizerType>
+CF<FactorizerType>::CF(arma::mat& data,
+                      const size_t numRecs,
+                      const size_t numUsersForSimilarity,
+                      const size_t rank) :
     data(data),
     numRecs(numRecs),
     numUsersForSimilarity(numUsersForSimilarity),
@@ -49,10 +50,15 @@ CF::CF(arma::mat& data,
     this->numUsersForSimilarity = 5;
   }
 
+  //Set default factorizer
+  FactorizerType f(10000, 1e-5);
+  Factorizer(f);
+
   CleanData();
 }
 
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations)
 {
   // Used to save user IDs.
   arma::Col<size_t> users =
@@ -65,8 +71,9 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations)
   GetRecommendations(recommendations, users);
 }
 
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
-                            arma::Col<size_t>& users)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations,
+                                            arma::Col<size_t>& users)
 {
   // Base function for calculating recommendations.
 
@@ -87,11 +94,8 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
 
   // Operations independent of the query:
   // Decompose the sparse data matrix to user and data matrices.
-  // Presently only ALS (via NMF) is supported as an optimizer.  This should be
-  // converted to a template when more optimizers are available.
-  NMF<RandomInitialization, WAlternatingLeastSquaresRule,
-      HAlternatingLeastSquaresRule> als(10000, 1e-5);
-  als.Apply(cleanedData, rank, w, h);
+  // Presently only ALS (via NMF) is supported as an optimizer.
+  factorizer.Apply(cleanedData, rank, w, h);
 
   // Generate new table by multiplying approximate values.
   rating = w * h;
@@ -171,8 +175,9 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
   }
 }
 
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
-                            arma::Col<size_t>& users,size_t num)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations,
+                                            arma::Col<size_t>& users,size_t num)
 {
   //Setting Number of Recommendations
   NumRecs(num);
@@ -180,8 +185,10 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
   GetRecommendations(recommendations,users);
 }
 
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
-                            arma::Col<size_t>& users,size_t num,size_t s)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations,
+                                            arma::Col<size_t>& users,size_t num,
+                                            size_t s)
 {
   //Setting number of users that should be used for calculating
   //neighbours
@@ -192,7 +199,8 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
   GetRecommendations(recommendations,users,num);
 }
 
-void CF::CleanData()
+template<typename FactorizerType>
+void CF<FactorizerType>::CleanData()
 {
   // Generate list of locations for batch insert constructor for sparse
   // matrices.
@@ -222,12 +230,13 @@ void CF::CleanData()
  * @param neighbor Index of item being inserted as a recommendation.
  * @param value Value of recommendation.
  */
-void CF::InsertNeighbor(const size_t queryIndex,
-                        const size_t pos,
-                        const size_t neighbor,
-                        const double value,
-                        arma::Mat<size_t>& recommendations,
-                        arma::mat& values) const
+template<typename FactorizerType>
+void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
+                                        const size_t pos,
+                                        const size_t neighbor,
+                                        const double value,
+                                        arma::Mat<size_t>& recommendations,
+                                        arma::mat& values) const
 {
   // We only memmove() if there is actually a need to shift something.
   if (pos < (recommendations.n_rows - 1))
@@ -247,7 +256,8 @@ void CF::InsertNeighbor(const size_t queryIndex,
 }
 
 // Return string of object.
-std::string CF::ToString() const
+template<typename FactorizerType>
+std::string CF<FactorizerType>::ToString() const
 {
   std::ostringstream convert;
   convert << "Collaborative Filtering [" << this << "]" << std::endl;
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index ac18182..4c001cf 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -70,7 +70,7 @@ int main(int argc, char** argv)
 
   // Perform decomposition to prepare for recommendations.
   Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
-  CF c(dataset);
+  CF<> c(dataset);
   c.NumRecs(numRecs);
   c.NumUsersForSimilarity(neighborhood);
 
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index 58289b8..b9cfcd5 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -33,7 +33,7 @@ BOOST_AUTO_TEST_CASE(CFConstructorTest)
   // Number of users for similarity (not the default).
   const size_t numUsersForSimilarity = 8;
 
-  CF c(dataset, numRecs, numUsersForSimilarity);
+  CF<> c(dataset, numRecs, numUsersForSimilarity);
 
   // Check parameters.
   BOOST_REQUIRE_EQUAL(c.NumRecs(), numRecs);
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsAllUsersTest)
   data::Load("GroupLens100k.csv", dataset);
 
   // Creat a CF object
-  CF c(dataset);
+  CF<> c(dataset);
 
   // Set number of recommendations.
   c.NumRecs(numRecs);
@@ -106,7 +106,7 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsQueriedUserTest)
   arma::mat dataset;
   data::Load("GroupLens100k.csv", dataset);
 
-  CF c(dataset);
+  CF<> c(dataset);
 
   // Generate recommendations when query set is specified.
   c.GetRecommendations(recommendations, users);
@@ -161,7 +161,7 @@ BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
   }
 
   // Now create the CF object.
-  CF c(dataset);
+  CF<> c(dataset);
 
   // Obtain 150 recommendations for the users in savedCols, and make sure the
   // missing item shows up in most of them.  First, create the list of users,
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index 269e6e1..1a61574 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -292,7 +292,7 @@ BOOST_AUTO_TEST_CASE(CFString)
   c(0, 2) = 1;
   c(1, 2) = 3;
   c(2, 2) = 0.7;
-  mlpack::cf::CF d(c, a, a);
+  mlpack::cf::CF<> d(c, a, a);
   Log::Debug << d;
   std::string s = d.ToString();
   BOOST_REQUIRE_NE(s, "");



More information about the mlpack-git mailing list