[mlpack-svn] r17059 - in mlpack/trunk/src/mlpack: methods/cf tests

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun Aug 17 17:58:27 EDT 2014


Author: sumedhghaisas
Date: Sun Aug 17 17:58:26 2014
New Revision: 17059

Log:
* added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable


Modified:
   mlpack/trunk/src/mlpack/methods/cf/cf.hpp
   mlpack/trunk/src/mlpack/methods/cf/cf_impl.hpp
   mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp
   mlpack/trunk/src/mlpack/methods/cf/svd_wrapper.hpp
   mlpack/trunk/src/mlpack/tests/to_string_test.cpp

Modified: mlpack/trunk/src/mlpack/methods/cf/cf.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/cf/cf.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.hpp	Sun Aug 17 17:58:26 2014
@@ -79,20 +79,6 @@
 {
  public:
   /**
-   * Initialize the CF object. Store a reference to the data that we
-   * will be using. There are parameters that can be set; default values
-   * are provided for each of them.  If the rank is left unset (or is set to 0),
-   * a simple density-based heuristic will be used to choose a rank.
-   *
-   * @param data Initial (user, item, rating) matrix.
-   * @param numUsersForSimilarity Size of the neighborhood.
-   * @param rank Rank parameter for matrix factorization.
-   */
-  CF(arma::mat& data,
-     const size_t numUsersForSimilarity = 5,
-     const size_t rank = 0);
-  
-  /**
    * Initialize the CF object using an instantiated factorizer. Store a
    * reference to the data that we will be using. There are parameters that can
    * be set; default values are provided for each of them. If the rank is left
@@ -105,7 +91,7 @@
    * @param rank Rank parameter for matrix factorization.
    */
   CF(arma::mat& data,
-     FactorizerType& factorizer,
+     FactorizerType factorizer = FactorizerType(),
      const size_t numUsersForSimilarity = 5,
      const size_t rank = 0);
    

Modified: mlpack/trunk/src/mlpack/methods/cf/cf_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/cf/cf_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/cf/cf_impl.hpp	Sun Aug 17 17:58:26 2014
@@ -29,6 +29,7 @@
         FactorizerTraits<FactorizerType>::UsesCoordinateList == false,
         int*>::type = 0)
 {
+  (void)data;
   factorizer.Apply(cleanedData, rank, w, h);
 }
 
@@ -48,57 +49,16 @@
         FactorizerTraits<FactorizerType>::UsesCoordinateList == true,
         int*>::type = 0)
 {
+  (void)cleanedData;
   factorizer.Apply(data, rank, w, h);
 }
 
 /**
- * Construct the CF object.
- */
-template<typename FactorizerType>
-CF<FactorizerType>::CF(arma::mat& data,
-                       const size_t numUsersForSimilarity,
-                       const size_t rank) :
-    numUsersForSimilarity(numUsersForSimilarity),
-    rank(rank),
-    factorizer()
-{
-  // Validate neighbourhood size.
-  if(numUsersForSimilarity < 1)
-  {
-    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
-        << numUsersForSimilarity << " given). Setting value to 5.\n";
-    //Setting Default Value of 5
-    this->numUsersForSimilarity = 5;
-  }
-
-  CleanData(data);
-
-  // Check if the user wanted us to choose a rank for them.
-  if(rank == 0)
-  {
-    // This is a simple heuristic that picks a rank based on the density of the
-    // dataset between 5 and 105.
-    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-    const size_t rankEstimate = size_t(density) + 5;
-
-    // Set to heuristic value.
-    Log::Info << "No rank given for decomposition; using rank of "
-        << rankEstimate << " calculated by density-based heuristic."
-        << std::endl;
-    this->rank = rankEstimate;
-  }
-
-  // Operations independent of the query:
-  // Decompose the sparse data matrix to user and data matrices.
-  ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
-}
-
-/**
  * Construct the CF object using an instantiated factorizer.
  */
 template<typename FactorizerType>
 CF<FactorizerType>::CF(arma::mat& data,
-                       FactorizerType& factorizer,
+                       FactorizerType factorizer,
                        const size_t numUsersForSimilarity,
                        const size_t rank) :
     numUsersForSimilarity(numUsersForSimilarity),

Modified: mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp	(original)
+++ mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp	Sun Aug 17 17:58:26 2014
@@ -6,10 +6,13 @@
  */
 
 #include <mlpack/core.hpp>
+
+#include <mlpack/methods/amf/amf.hpp>
 #include "cf.hpp"
 
 using namespace mlpack;
 using namespace mlpack::cf;
+using namespace mlpack::amf;
 using namespace std;
 
 // Document program.
@@ -40,11 +43,8 @@
 PARAM_STRING("output_file","File to save output recommendations to.", "o",
     "recommendations.csv");
 
-// These features are not yet available in the CF code.
-//PARAM_STRING("algorithm", "Algorithm used for CF ('als' or 'svd').", "a",
-//    "als");
-//PARAM_STRING("nearest_neighbor_algorithm", "Similarity search procedure to "
-//    "be used for generating recommendations.", "s", "knn");
+PARAM_STRING("algorithm", "Algorithm used for matrix factorization.", "a",
+    "NMF");
 
 PARAM_INT("recommendations", "Number of recommendations to generate for each "
     "query user.", "r", 5);
@@ -53,29 +53,15 @@
 
 PARAM_INT("rank", "Rank of decomposed matrices.", "R", 2);
 
-int main(int argc, char** argv)
+template<typename Factorizer>
+void ComputeRecommendations(Factorizer factorizer,
+                            arma::mat& dataset,
+                            const size_t numRecs,
+                            const size_t neighbourhood,
+                            const size_t rank,
+                            arma::Mat<size_t>& recommendations)
 {
-  // Parse command line options.
-  CLI::ParseCommandLine(argc, argv);
-
-  // Read from the input file.
-  const string inputFile = CLI::GetParam<string>("input_file");
-  arma::mat dataset;
-  data::Load(inputFile, dataset, true);
-
-  // Recommendation matrix.
-  arma::Mat<size_t> recommendations;
-
-  // Get parameters.
-  const size_t numRecs = (size_t) CLI::GetParam<int>("recommendations");
-  const size_t neighborhood = (size_t) CLI::GetParam<int>("neighborhood");
-  const size_t rank = (size_t) CLI::GetParam<int>("rank");
-
-  // Perform decomposition to prepare for recommendations.
-  Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
-  CF<> c(dataset);
-  c.NumUsersForSimilarity(neighborhood);
-  c.Rank(rank);
+  CF<Factorizer> c(dataset, factorizer, neighbourhood, rank);
 
   // Reading users.
   const string queryFile = CLI::GetParam<string>("query_file");
@@ -96,6 +82,41 @@
     Log::Info << "Generating recommendations for all users." << endl;
     c.GetRecommendations(numRecs, recommendations);
   }
+}
+                            
+#define CR(x) ComputeRecommendations(x, dataset, numRecs, neighborhood, rank, recommendations)
+
+int main(int argc, char** argv)
+{
+  // Parse command line options.
+  CLI::ParseCommandLine(argc, argv);
+
+  // Read from the input file.
+  const string inputFile = CLI::GetParam<string>("input_file");
+  arma::mat dataset;
+  data::Load(inputFile, dataset, true);
+
+  // Recommendation matrix.
+  arma::Mat<size_t> recommendations;
+
+  // Get parameters.
+  const size_t numRecs = (size_t) CLI::GetParam<int>("recommendations");
+  const size_t neighborhood = (size_t) CLI::GetParam<int>("neighborhood");
+  const size_t rank = (size_t) CLI::GetParam<int>("rank");
+
+  // Perform decomposition to prepare for recommendations.
+  Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
+  
+  const string algo = CLI::GetParam<string>("algorithm");
+  
+  if(algo == "NMF") 
+    CR(NMFALSFactorizer());  
+  else if(algo == "SVDBatch") 
+    CR(SparseSVDBatchFactorizer());
+  else if(algo == "SVDIncompleteIncremental") 
+    CR(SparseSVDIncompleteIncrementalFactorizer());
+  else if(algo == "SVDCompleteIncremental")
+    CR(SparseSVDCompleteIncrementalFactorizer());                 
 
   const string outputFile = CLI::GetParam<string>("output_file");
   data::Save(outputFile, recommendations);

Modified: mlpack/trunk/src/mlpack/methods/cf/svd_wrapper.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/cf/svd_wrapper.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/cf/svd_wrapper.hpp	Sun Aug 17 17:58:26 2014
@@ -74,6 +74,9 @@
   Factorizer factorizer;
 }; // class SVDWrapper
 
+//! add simple typedefs
+typedef SVDWrapper<DummyClass> ArmaSVDFactorizer;
+
 //! include the implementation
 #include "svd_wrapper_impl.hpp"
 

Modified: mlpack/trunk/src/mlpack/tests/to_string_test.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/tests/to_string_test.cpp	(original)
+++ mlpack/trunk/src/mlpack/tests/to_string_test.cpp	Sun Aug 17 17:58:26 2014
@@ -281,7 +281,6 @@
 
 BOOST_AUTO_TEST_CASE(CFString)
 {
-  size_t a = 1 ;
   arma::mat c(3, 3);
   c(0, 0) = 1;
   c(1, 0) = 2;
@@ -292,7 +291,7 @@
   c(0, 2) = 1;
   c(1, 2) = 3;
   c(2, 2) = 0.7;
-  mlpack::cf::CF<> d(c, a, a);
+  mlpack::cf::CF<> d(c);
   Log::Debug << d;
   std::string s = d.ToString();
   BOOST_REQUIRE_NE(s, "");



More information about the mlpack-svn mailing list