[mlpack-svn] r16329 - mlpack/trunk/src/mlpack/methods/cf

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sat Feb 22 10:55:19 EST 2014


Author: rcurtin
Date: Sat Feb 22 10:55:19 2014
New Revision: 16329

Log:
Patch from Siddharth: make the rank parameterizable, and merge the constructors
into one.


Modified:
   mlpack/trunk/src/mlpack/methods/cf/cf.cpp
   mlpack/trunk/src/mlpack/methods/cf/cf.hpp

Modified: mlpack/trunk/src/mlpack/methods/cf/cf.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/cf/cf.cpp	(original)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.cpp	Sat Feb 22 10:55:19 2014
@@ -21,58 +21,32 @@
 /**
  * Construct the CF object.
  */
-CF::CF(arma::mat& data) :
-     data(data)
-{
-  Log::Info<<"Constructor (param: input data, default: numRecs;neighbourhood)"<<endl;
-  this->numRecs = 5;
-  this->numUsersForSimilarity = 5;
-
-  CleanData();
-}
-
-CF::CF(const size_t numRecs,arma::mat& data) :
-     data(data)
+CF::CF(arma::mat& data,
+       const size_t numRecs,
+       const size_t numUsersForSimilarity,
+       const size_t rank) :
+    data(data),
+    numRecs(numRecs),
+    numUsersForSimilarity(numUsersForSimilarity),
+    rank(rank)
 {
   // Validate number of recommendation factor.
   if (numRecs < 1)
   {
-    Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
+    Log::Warn << "CF::CF(): number of recommendations should be > 0("
         << numRecs << " given). Setting value to 5.\n";
     //Setting Default Value of 5
     this->numRecs = 5;
   }
-  else
-    this->numRecs = numRecs;
-  this->numUsersForSimilarity = 5;
 
-  CleanData();
-}
-
-CF::CF(const size_t numRecs, const size_t numUsersForSimilarity,
-     arma::mat& data) :
-     data(data)
-{
-  // Validate number of recommendation factor.
-  if (numRecs < 1)
-  {
-    Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
-        << numRecs << " given). Setting value to 5.\n";
-    //Setting Default Value of 5
-    this->numRecs = 5;
-  }
-  else
-    this->numRecs = numRecs;
   // Validate neighbourhood size.
   if (numUsersForSimilarity < 1)
   {
-    Log::Warn << "CF::CF(): neighbourhood size shoud be > 0("
+    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
         << numUsersForSimilarity << " given). Setting value to 5.\n";
     //Setting Default Value of 5
     this->numUsersForSimilarity = 5;
   }
-  else
-    this->numUsersForSimilarity = numUsersForSimilarity;
 
   CleanData();
 }
@@ -95,13 +69,23 @@
 {
   // Base function for calculating recommendations.
 
+  // Check if the user wanted us to choose a rank for them.
+  if (rank == 0)
+  {
+    // This is a simple heuristic that picks a rank based on the density of the
+    // dataset between 5 and 105.
+    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+    const size_t rankEstimate = size_t(density) + 5;
+
+    // Set to heuristic value.
+    Log::Info << "No rank given for decomposition; using rank of "
+        << rankEstimate << " calculated by density-based heuristic."
+        << std::endl;
+    rank = rankEstimate;
+  }
+
   // Operations independent of the query:
   // Decompose the sparse data matrix to user and data matrices.
-  // This is a simple heuristic that picks a rank based on the density of the
-  // dataset between 5 and 105.
-  const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-  size_t rank = size_t(density) + 5;
-
   // Presently only ALS (via NMF) is supported as an optimizer.  This should be
   // converted to a template when more optimizers are available.
   NMF<RandomInitialization, WAlternatingLeastSquaresRule,

Modified: mlpack/trunk/src/mlpack/methods/cf/cf.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/cf/cf.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.hpp	Sat Feb 22 10:55:19 2014
@@ -55,35 +55,23 @@
 {
  public:
   /**
-   * Create a CF object and (optionally) set the parameters with which
-   * collaborative filtering will be run.
+   * Initialize the CF object. Store a reference to the data that we
+   * will be using. There are parameters that can be set; default values
+   * are provided for each of them.  If the rank is left unset (or is set to 0),
+   * a simple density-based heuristic will be used to choose a rank.
    *
-   * @param data Initial (user,item,rating) matrix.
+   * @param data Initial (user, item, rating) matrix.
    * @param numRecs Desired number of recommendations for each user.
    * @param numUsersForSimilarity Size of the neighborhood.
+   * @param rank Rank parameter for matrix factorization.
    */
-  CF(const size_t numRecs,const size_t numUsersForSimilarity,
-     arma::mat& data);
-
-  /**
-   * Create a CF object and (optionally) set the parameters which CF
-   * will be run with.
-   *
-   * @param data Initial User,Item,Rating Matrix
-   * @param numRecs Number of Recommendations for each user.
-   */
-  CF(const size_t numRecs, arma::mat& data);
-
-  /**
-   * Create a CF object and (optionally) set the parameters which CF
-   * will be run with.
-   *
-   * @param data Initial User,Item,Rating Matrix
-   */
-  CF(arma::mat& data);
+  CF(arma::mat& data,
+     const size_t numRecs = 5,
+     const size_t numUsersForSimilarity = 5,
+     const size_t rank = 0);
 
   //! Sets number of Recommendations.
-  void NumRecs(size_t recs)
+  void NumRecs(const size_t recs)
   {
     if (recs < 1)
     {
@@ -101,7 +89,7 @@
   }
 
   //! Sets number of user for calculating similarity.
-  void NumUsersForSimilarity(size_t num)
+  void NumUsersForSimilarity(const size_t num)
   {
     if (num < 1)
     {
@@ -112,12 +100,24 @@
     this->numUsersForSimilarity = num;
   }
 
-  //! Gets number of users for calculating similarity/
+  //! Gets number of users for calculating similarity.
   size_t NumUsersForSimilarity()
   {
     return numUsersForSimilarity;
   }
 
+  //! Sets rank parameter for matrix factorization.
+  void Rank(const size_t rankValue)
+  {
+    this->rank = rankValue;
+  }
+
+  //! Gets rank parameter for matrix factorization.
+  size_t Rank()
+  {
+    return rank;
+  }
+
   //! Get the User Matrix.
   const arma::mat& W() const { return w; }
   //! Get the Item Matrix.
@@ -174,18 +174,20 @@
   std::string ToString() const;
 
  private:
+  //! Initial data matrix.
+  arma::mat data;
   //! Number of recommendations.
   size_t numRecs;
   //! Number of users for similarity.
   size_t numUsersForSimilarity;
+  //! Rank used for matrix factorization.
+  size_t rank;
   //! User matrix.
   arma::mat w;
   //! Item matrix.
   arma::mat h;
   //! Rating matrix.
   arma::mat rating;
-  //! Initial data matrix.
-  arma::mat data;
   //! Cleaned data matrix.
   arma::sp_mat cleanedData;
   //! Converts the User, Item, Value Matrix to User-Item Table



More information about the mlpack-svn mailing list