[mlpack-git] master, mlpack-1.0.x: Patch from Siddharth: make the rank parameterizable, and merge the constructors into one. (ce48c53)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:44:40 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit ce48c53adcbe87512c6d144d52f5fdc46f824e00
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat Feb 22 15:55:19 2014 +0000

    Patch from Siddharth: make the rank parameterizable, and merge the constructors
    into one.


>---------------------------------------------------------------

ce48c53adcbe87512c6d144d52f5fdc46f824e00
 src/mlpack/methods/cf/cf.cpp | 66 +++++++++++++++++---------------------------
 src/mlpack/methods/cf/cf.hpp | 56 +++++++++++++++++++------------------
 2 files changed, 54 insertions(+), 68 deletions(-)

diff --git a/src/mlpack/methods/cf/cf.cpp b/src/mlpack/methods/cf/cf.cpp
index b9a6ea5..d3248f1 100644
--- a/src/mlpack/methods/cf/cf.cpp
+++ b/src/mlpack/methods/cf/cf.cpp
@@ -21,58 +21,32 @@ namespace cf {
 /**
  * Construct the CF object.
  */
-CF::CF(arma::mat& data) :
-     data(data)
-{
-  Log::Info<<"Constructor (param: input data, default: numRecs;neighbourhood)"<<endl;
-  this->numRecs = 5;
-  this->numUsersForSimilarity = 5;
-
-  CleanData();
-}
-
-CF::CF(const size_t numRecs,arma::mat& data) :
-     data(data)
+CF::CF(arma::mat& data,
+       const size_t numRecs,
+       const size_t numUsersForSimilarity,
+       const size_t rank) :
+    data(data),
+    numRecs(numRecs),
+    numUsersForSimilarity(numUsersForSimilarity),
+    rank(rank)
 {
   // Validate number of recommendation factor.
   if (numRecs < 1)
   {
-    Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
+    Log::Warn << "CF::CF(): number of recommendations should be > 0("
         << numRecs << " given). Setting value to 5.\n";
     //Setting Default Value of 5
     this->numRecs = 5;
   }
-  else
-    this->numRecs = numRecs;
-  this->numUsersForSimilarity = 5;
 
-  CleanData();
-}
-
-CF::CF(const size_t numRecs, const size_t numUsersForSimilarity,
-     arma::mat& data) :
-     data(data)
-{
-  // Validate number of recommendation factor.
-  if (numRecs < 1)
-  {
-    Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
-        << numRecs << " given). Setting value to 5.\n";
-    //Setting Default Value of 5
-    this->numRecs = 5;
-  }
-  else
-    this->numRecs = numRecs;
   // Validate neighbourhood size.
   if (numUsersForSimilarity < 1)
   {
-    Log::Warn << "CF::CF(): neighbourhood size shoud be > 0("
+    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
         << numUsersForSimilarity << " given). Setting value to 5.\n";
     //Setting Default Value of 5
     this->numUsersForSimilarity = 5;
   }
-  else
-    this->numUsersForSimilarity = numUsersForSimilarity;
 
   CleanData();
 }
@@ -95,13 +69,23 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
 {
   // Base function for calculating recommendations.
 
+  // Check if the user wanted us to choose a rank for them.
+  if (rank == 0)
+  {
+    // This is a simple heuristic that picks a rank based on the density of the
+    // dataset between 5 and 105.
+    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+    const size_t rankEstimate = size_t(density) + 5;
+
+    // Set to heuristic value.
+    Log::Info << "No rank given for decomposition; using rank of "
+        << rankEstimate << " calculated by density-based heuristic."
+        << std::endl;
+    rank = rankEstimate;
+  }
+
   // Operations independent of the query:
   // Decompose the sparse data matrix to user and data matrices.
-  // This is a simple heuristic that picks a rank based on the density of the
-  // dataset between 5 and 105.
-  const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-  size_t rank = size_t(density) + 5;
-
   // Presently only ALS (via NMF) is supported as an optimizer.  This should be
   // converted to a template when more optimizers are available.
   NMF<RandomInitialization, WAlternatingLeastSquaresRule,
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index f244363..798306e 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -55,35 +55,23 @@ class CF
 {
  public:
   /**
-   * Create a CF object and (optionally) set the parameters with which
-   * collaborative filtering will be run.
+   * Initialize the CF object. Store a reference to the data that we
+   * will be using. There are parameters that can be set; default values
+   * are provided for each of them.  If the rank is left unset (or is set to 0),
+   * a simple density-based heuristic will be used to choose a rank.
    *
-   * @param data Initial (user,item,rating) matrix.
+   * @param data Initial (user, item, rating) matrix.
    * @param numRecs Desired number of recommendations for each user.
    * @param numUsersForSimilarity Size of the neighborhood.
+   * @param rank Rank parameter for matrix factorization.
    */
-  CF(const size_t numRecs,const size_t numUsersForSimilarity,
-     arma::mat& data);
-
-  /**
-   * Create a CF object and (optionally) set the parameters which CF
-   * will be run with.
-   *
-   * @param data Initial User,Item,Rating Matrix
-   * @param numRecs Number of Recommendations for each user.
-   */
-  CF(const size_t numRecs, arma::mat& data);
-
-  /**
-   * Create a CF object and (optionally) set the parameters which CF
-   * will be run with.
-   *
-   * @param data Initial User,Item,Rating Matrix
-   */
-  CF(arma::mat& data);
+  CF(arma::mat& data,
+     const size_t numRecs = 5,
+     const size_t numUsersForSimilarity = 5,
+     const size_t rank = 0);
 
   //! Sets number of Recommendations.
-  void NumRecs(size_t recs)
+  void NumRecs(const size_t recs)
   {
     if (recs < 1)
     {
@@ -101,7 +89,7 @@ class CF
   }
 
   //! Sets number of user for calculating similarity.
-  void NumUsersForSimilarity(size_t num)
+  void NumUsersForSimilarity(const size_t num)
   {
     if (num < 1)
     {
@@ -112,12 +100,24 @@ class CF
     this->numUsersForSimilarity = num;
   }
 
-  //! Gets number of users for calculating similarity/
+  //! Gets number of users for calculating similarity.
   size_t NumUsersForSimilarity()
   {
     return numUsersForSimilarity;
   }
 
+  //! Sets rank parameter for matrix factorization.
+  void Rank(const size_t rankValue)
+  {
+    this->rank = rankValue;
+  }
+
+  //! Gets rank parameter for matrix factorization.
+  size_t Rank()
+  {
+    return rank;
+  }
+
   //! Get the User Matrix.
   const arma::mat& W() const { return w; }
   //! Get the Item Matrix.
@@ -174,18 +174,20 @@ class CF
   std::string ToString() const;
 
  private:
+  //! Initial data matrix.
+  arma::mat data;
   //! Number of recommendations.
   size_t numRecs;
   //! Number of users for similarity.
   size_t numUsersForSimilarity;
+  //! Rank used for matrix factorization.
+  size_t rank;
   //! User matrix.
   arma::mat w;
   //! Item matrix.
   arma::mat h;
   //! Rating matrix.
   arma::mat rating;
-  //! Initial data matrix.
-  arma::mat data;
   //! Cleaned data matrix.
   arma::sp_mat cleanedData;
   //! Converts the User, Item, Value Matrix to User-Item Table



More information about the mlpack-git mailing list