[mlpack-git] master: * added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable (ff1c084)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:58:29 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit ff1c08472ae15583d14036bb9ddde79333107294
Author: sumedhghaisas <sumedhghaisas at gmail.com>
Date:   Sun Aug 17 21:58:26 2014 +0000

    * added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable


>---------------------------------------------------------------

ff1c08472ae15583d14036bb9ddde79333107294
 src/mlpack/methods/cf/cf.hpp          | 16 +-------
 src/mlpack/methods/cf/cf_impl.hpp     | 46 ++--------------------
 src/mlpack/methods/cf/cf_main.cpp     | 73 ++++++++++++++++++++++-------------
 src/mlpack/methods/cf/svd_wrapper.hpp |  3 ++
 src/mlpack/tests/to_string_test.cpp   |  3 +-
 5 files changed, 55 insertions(+), 86 deletions(-)

diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 0312ccb..6f6b7b1 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -79,20 +79,6 @@ class CF
 {
  public:
   /**
-   * Initialize the CF object. Store a reference to the data that we
-   * will be using. There are parameters that can be set; default values
-   * are provided for each of them.  If the rank is left unset (or is set to 0),
-   * a simple density-based heuristic will be used to choose a rank.
-   *
-   * @param data Initial (user, item, rating) matrix.
-   * @param numUsersForSimilarity Size of the neighborhood.
-   * @param rank Rank parameter for matrix factorization.
-   */
-  CF(arma::mat& data,
-     const size_t numUsersForSimilarity = 5,
-     const size_t rank = 0);
-  
-  /**
    * Initialize the CF object using an instantiated factorizer. Store a
    * reference to the data that we will be using. There are parameters that can
    * be set; default values are provided for each of them. If the rank is left
@@ -105,7 +91,7 @@ class CF
    * @param rank Rank parameter for matrix factorization.
    */
   CF(arma::mat& data,
-     FactorizerType& factorizer,
+     FactorizerType factorizer = FactorizerType(),
      const size_t numUsersForSimilarity = 5,
      const size_t rank = 0);
    
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index c455018..53b0a05 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -29,6 +29,7 @@ void ApplyFactorizer(arma::mat& data,
         FactorizerTraits<FactorizerType>::UsesCoordinateList == false,
         int*>::type = 0)
 {
+  (void)data;
   factorizer.Apply(cleanedData, rank, w, h);
 }
 
@@ -48,57 +49,16 @@ void ApplyFactorizer(arma::mat& data,
         FactorizerTraits<FactorizerType>::UsesCoordinateList == true,
         int*>::type = 0)
 {
+  (void)cleanedData;
   factorizer.Apply(data, rank, w, h);
 }
 
 /**
- * Construct the CF object.
- */
-template<typename FactorizerType>
-CF<FactorizerType>::CF(arma::mat& data,
-                       const size_t numUsersForSimilarity,
-                       const size_t rank) :
-    numUsersForSimilarity(numUsersForSimilarity),
-    rank(rank),
-    factorizer()
-{
-  // Validate neighbourhood size.
-  if(numUsersForSimilarity < 1)
-  {
-    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
-        << numUsersForSimilarity << " given). Setting value to 5.\n";
-    //Setting Default Value of 5
-    this->numUsersForSimilarity = 5;
-  }
-
-  CleanData(data);
-
-  // Check if the user wanted us to choose a rank for them.
-  if(rank == 0)
-  {
-    // This is a simple heuristic that picks a rank based on the density of the
-    // dataset between 5 and 105.
-    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-    const size_t rankEstimate = size_t(density) + 5;
-
-    // Set to heuristic value.
-    Log::Info << "No rank given for decomposition; using rank of "
-        << rankEstimate << " calculated by density-based heuristic."
-        << std::endl;
-    this->rank = rankEstimate;
-  }
-
-  // Operations independent of the query:
-  // Decompose the sparse data matrix to user and data matrices.
-  ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
-}
-
-/**
  * Construct the CF object using an instantiated factorizer.
  */
 template<typename FactorizerType>
 CF<FactorizerType>::CF(arma::mat& data,
-                       FactorizerType& factorizer,
+                       FactorizerType factorizer,
                        const size_t numUsersForSimilarity,
                        const size_t rank) :
     numUsersForSimilarity(numUsersForSimilarity),
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index 7c873d4..7998c79 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -6,10 +6,13 @@
  */
 
 #include <mlpack/core.hpp>
+
+#include <mlpack/methods/amf/amf.hpp>
 #include "cf.hpp"
 
 using namespace mlpack;
 using namespace mlpack::cf;
+using namespace mlpack::amf;
 using namespace std;
 
 // Document program.
@@ -40,11 +43,8 @@ PARAM_STRING("query_file", "List of users for which recommendations are to "
 PARAM_STRING("output_file","File to save output recommendations to.", "o",
     "recommendations.csv");
 
-// These features are not yet available in the CF code.
-//PARAM_STRING("algorithm", "Algorithm used for CF ('als' or 'svd').", "a",
-//    "als");
-//PARAM_STRING("nearest_neighbor_algorithm", "Similarity search procedure to "
-//    "be used for generating recommendations.", "s", "knn");
+PARAM_STRING("algorithm", "Algorithm used for matrix factorization.", "a",
+    "NMF");
 
 PARAM_INT("recommendations", "Number of recommendations to generate for each "
     "query user.", "r", 5);
@@ -53,6 +53,39 @@ PARAM_INT("neighborhood", "Size of the neighborhood of similar users to "
 
 PARAM_INT("rank", "Rank of decomposed matrices.", "R", 2);
 
+template<typename Factorizer>
+void ComputeRecommendations(Factorizer factorizer,
+                            arma::mat& dataset,
+                            const size_t numRecs,
+                            const size_t neighbourhood,
+                            const size_t rank,
+                            arma::Mat<size_t>& recommendations)
+{
+  CF<Factorizer> c(dataset, factorizer, neighbourhood, rank);
+
+  // Reading users.
+  const string queryFile = CLI::GetParam<string>("query_file");
+  if (queryFile != "")
+  {
+    // User matrix.
+    arma::Mat<size_t> userTmp;
+    arma::Col<size_t> users;
+    data::Load(queryFile, userTmp, true, false /* Don't transpose. */);
+    users = userTmp.col(0);
+
+    Log::Info << "Generating recommendations for " << users.n_elem << " users "
+        << "in '" << queryFile << "'." << endl;
+    c.GetRecommendations(numRecs, recommendations, users);
+  }
+  else
+  {
+    Log::Info << "Generating recommendations for all users." << endl;
+    c.GetRecommendations(numRecs, recommendations);
+  }
+}
+                            
+#define CR(x) ComputeRecommendations(x, dataset, numRecs, neighborhood, rank, recommendations)
+
 int main(int argc, char** argv)
 {
   // Parse command line options.
@@ -73,29 +106,17 @@ int main(int argc, char** argv)
 
   // Perform decomposition to prepare for recommendations.
   Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
-  CF<> c(dataset);
-  c.NumUsersForSimilarity(neighborhood);
-  c.Rank(rank);
   
-  // Reading users.
-  const string queryFile = CLI::GetParam<string>("query_file");
-  if (queryFile != "")
-  {
-    // User matrix.
-    arma::Mat<size_t> userTmp;
-    arma::Col<size_t> users;
-    data::Load(queryFile, userTmp, true, false /* Don't transpose. */);
-    users = userTmp.col(0);
+  const string algo = CLI::GetParam<string>("algorithm");
   
-    Log::Info << "Generating recommendations for " << users.n_elem << " users "
-        << "in '" << queryFile << "'." << endl;
-    c.GetRecommendations(numRecs, recommendations, users);
-  }
-  else
-  {
-    Log::Info << "Generating recommendations for all users." << endl;
-    c.GetRecommendations(numRecs, recommendations);
-  }
+  if(algo == "NMF") 
+    CR(NMFALSFactorizer());  
+  else if(algo == "SVDBatch") 
+    CR(SparseSVDBatchFactorizer());
+  else if(algo == "SVDIncompleteIncremental") 
+    CR(SparseSVDIncompleteIncrementalFactorizer());
+  else if(algo == "SVDCompleteIncremental")
+    CR(SparseSVDCompleteIncrementalFactorizer());                 
 
   const string outputFile = CLI::GetParam<string>("output_file");
   data::Save(outputFile, recommendations);
diff --git a/src/mlpack/methods/cf/svd_wrapper.hpp b/src/mlpack/methods/cf/svd_wrapper.hpp
index 3835e89..89b6d5a 100644
--- a/src/mlpack/methods/cf/svd_wrapper.hpp
+++ b/src/mlpack/methods/cf/svd_wrapper.hpp
@@ -74,6 +74,9 @@ class SVDWrapper
   Factorizer factorizer;
 }; // class SVDWrapper
 
+//! add simple typedefs
+typedef SVDWrapper<DummyClass> ArmaSVDFactorizer;
+
 //! include the implementation
 #include "svd_wrapper_impl.hpp"
 
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index 0b86d18..d06ccd4 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -281,7 +281,6 @@ BOOST_AUTO_TEST_CASE(MRKDString)
 
 BOOST_AUTO_TEST_CASE(CFString)
 {
-  size_t a = 1 ;
   arma::mat c(3, 3);
   c(0, 0) = 1;
   c(1, 0) = 2;
@@ -292,7 +291,7 @@ BOOST_AUTO_TEST_CASE(CFString)
   c(0, 2) = 1;
   c(1, 2) = 3;
   c(2, 2) = 0.7;
-  mlpack::cf::CF<> d(c, a, a);
+  mlpack::cf::CF<> d(c);
   Log::Debug << d;
   std::string s = d.ToString();
   BOOST_REQUIRE_NE(s, "");



More information about the mlpack-git mailing list