[mlpack-svn] r15539 - in mlpack/trunk/src/mlpack: core/optimizers core/optimizers/als methods methods/cf

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 24 03:29:57 EDT 2013


Author: muditrajgupta
Date: Wed Jul 24 03:29:56 2013
New Revision: 15539

Log:
CF framework and ALS added

Added:
   mlpack/trunk/src/mlpack/core/optimizers/als/
   mlpack/trunk/src/mlpack/core/optimizers/als/CMakeLists.txt
   mlpack/trunk/src/mlpack/core/optimizers/als/als.hpp
   mlpack/trunk/src/mlpack/core/optimizers/als/als_impl.hpp
   mlpack/trunk/src/mlpack/core/optimizers/als/als_update_rules.hpp
   mlpack/trunk/src/mlpack/core/optimizers/als/random_acol_init.hpp
   mlpack/trunk/src/mlpack/core/optimizers/als/random_init.hpp
   mlpack/trunk/src/mlpack/methods/cf/
   mlpack/trunk/src/mlpack/methods/cf/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/cf/cf.cpp
   mlpack/trunk/src/mlpack/methods/cf/cf.hpp
   mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp
Modified:
   mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/CMakeLists.txt

Modified: mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt	(original)
+++ mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt	Wed Jul 24 03:29:56 2013
@@ -2,6 +2,7 @@
   aug_lagrangian
   lbfgs
   sgd
+  als
 )
 
 foreach(dir ${DIRS})

Added: mlpack/trunk/src/mlpack/core/optimizers/als/CMakeLists.txt
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/CMakeLists.txt	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,19 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+  als_update_rules.hpp
+  random_init.hpp
+  random_acol_init.hpp
+  als.hpp
+  als_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+

Added: mlpack/trunk/src/mlpack/core/optimizers/als/als.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/als.hpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,133 @@
+/**
+ * @file als.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Defines the ALS class for non-negative Matrix Factorization
+ * on a given sparcely populated matrix.
+ */
+#ifndef __MLPACK_CORE_OPTIMIZERS_ALS_ALS_HPP
+#define __MLPACK_CORE_OPTIMIZERS_ALS_ALS_HPP
+
+#include <mlpack/core.hpp>
+#include "als_update_rules.hpp"
+#include "random_init.hpp"
+
+namespace mlpack {
+namespace als {
+
+/**
+ * This class implements the NMF on the given matrix V. Non-negative Matrix
+ * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
+ * called the basis matrix and H is called the encoding matrix. V is taken
+ * to be of size n x m and the obtained W is n x r and H is r x m. The size r is
+ * called the rank of the factorization.
+ * 
+ * For more information on non-negative matrix factorization, see the following
+ * paper:
+ *
+ * @code
+ * @article{
+ *   title = {{Learning the parts of objects by non-negative matrix
+ *       factorization}},
+ *   author = {Lee, Daniel D. and Seung, H. Sebastian},
+ *   journal = {Nature},
+ *   month = {Oct},
+ *   year = {1999},
+ *   number = {6755},
+ *   pages = {788--791},
+ *   publisher = {Nature Publishing Group},
+ *   url = {http://dx.doi.org/10.1038/44565}
+ * }
+ * @endcode
+ *
+ * @tparam WUpdateRule The update rule for calculating W matrix at each
+ *     iteration; @see MultiplicativeDistanceW for an example.
+ * @tparam HUpdateRule The update rule for calculating H matrix at each
+ *     iteration; @see MultiplicativeDistanceH for an example.
+ */
+template<typename InitializationRule = RandomInitialization,
+         typename WUpdateRule = WAlternatingLeastSquaresRule,
+         typename HUpdateRule = HAlternatingLeastSquaresRule>
+class ALS
+{
+ public:
+  /**
+   * Create the NMF object and (optionally) set the parameters which NMF will
+   * run with.  The minimum residue refers to the root mean square of the
+   * difference between two subsequent iterations of the product W * H.  A low
+   * residue indicates that subsequent iterations are not producing much change
+   * in W and H.  Once the residue goes below the specified minimum residue, the
+   * algorithm terminates.
+   *
+   * @param maxIterations Maximum number of iterations allowed before giving up.
+   *     A value of 0 indicates no limit.
+   * @param minResidue The minimum allowed residue before the algorithm
+   *     terminates.
+   * @param Initialize Optional Initialization object for initializing the
+   *     W and H matrices
+   * @param WUpdate Optional WUpdateRule object; for when the update rule for
+   *     the W vector has states that it needs to store.
+   * @param HUpdate Optional HUpdateRule object; for when the update rule for
+   *     the H vector has states that it needs to store.
+   */
+  ALS(const size_t maxIterations = 10000,
+      const double minResidue = 1e-10,
+      const InitializationRule initializeRule = InitializationRule(),
+      const WUpdateRule wUpdate = WUpdateRule(),
+      const HUpdateRule hUpdate = HUpdateRule());
+
+  /**
+   * Apply Non-Negative Matrix Factorization to the provided matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be output.
+   * @param H Encoding matrix to output.
+   * @param r Rank r of the factorization.
+   */
+ void Apply(const arma::sp_mat& V, const size_t r, arma::mat& W, arma::mat& H)
+      const;
+
+ private:
+  //! The maximum number of iterations allowed before giving up.
+  size_t maxIterations;
+  //! The minimum residue, below which iteration is considered converged.
+  double minResidue;
+  //! Instantiated initialization Rule.
+  InitializationRule initializeRule;
+  //! Instantiated W update rule.
+  WUpdateRule wUpdate;
+  //! Instantiated H update rule.
+  HUpdateRule hUpdate;
+
+ public:
+  //! Access the maximum number of iterations.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations.
+  size_t& MaxIterations() { return maxIterations; }
+  //! Access the minimum residue before termination.
+  double MinResidue() const { return minResidue; }
+  //! Modify the minimum residue before termination.
+  double& MinResidue() { return minResidue; }
+  //! Access the initialization rule.
+  const InitializationRule& InitializeRule() const { return initializeRule; }
+  //! Modify the initialization rule.
+  InitializationRule& InitializeRule() { return initializeRule; }
+  //! Access the W update rule.
+  const WUpdateRule& WUpdate() const { return wUpdate; }
+  //! Modify the W update rule.
+  WUpdateRule& WUpdate() { return wUpdate; }
+  //! Access the H update rule.
+  const HUpdateRule& HUpdate() const { return hUpdate; }
+  //! Modify the H update rule.
+  HUpdateRule& HUpdate() { return hUpdate; }
+
+}; // class ALs
+
+}; // namespace nmf
+}; // namespace mlpack
+
+// Include implementation.
+#include "als_impl.hpp"
+
+#endif

Added: mlpack/trunk/src/mlpack/core/optimizers/als/als_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/als_impl.hpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,97 @@
+/**
+ * @file nmf.cpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Implementation of NMF class to perform Non-Negative Matrix Factorization
+ * on the given matrix.
+ *
+ */
+
+namespace mlpack {
+namespace als {
+
+/**
+ * Construct the NMF object.
+ */
+template<typename InitializationRule,
+         typename WUpdateRule,
+         typename HUpdateRule>
+ALS<InitializationRule, WUpdateRule, HUpdateRule>::ALS(
+    const size_t maxIterations,
+    const double minResidue,
+    const InitializationRule initializeRule,
+    const WUpdateRule wUpdate,
+    const HUpdateRule hUpdate) :
+    maxIterations(maxIterations),
+    minResidue(minResidue),
+    initializeRule(initializeRule),
+    wUpdate(wUpdate),
+    hUpdate(hUpdate)
+{
+  if (minResidue < 0.0)
+  {
+    Log::Warn << "ALS::ALS(): minResidue must be a positive value ("
+        << minResidue << " given). Setting to the default value of 1e-10.\n";
+    this->minResidue = 1e-10;
+  }
+}
+
+/**
+ * Apply Non-Negative Matrix Factorization to the provided matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ * @param r Rank r of the factorization
+ */
+template<typename InitializationRule,
+         typename WUpdateRule,
+         typename HUpdateRule>
+void ALS<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
+    const arma::sp_mat& V,
+    const size_t r,
+    arma::mat& W,
+    arma::mat& H) const
+{
+  const size_t n = V.n_rows;
+  const size_t m = V.n_cols;
+
+  // Initialize W and H.
+  initializeRule.Initialize(V, r, W, H);
+
+  Log::Info << "Initialized W and H." << std::endl;
+  size_t iteration = 1;
+  const size_t nm = n * m;
+  double residue = minResidue;
+  double normOld = 0;
+  arma::mat WH;
+  double normNew = 0;
+  while (residue >= minResidue && iteration != maxIterations)
+  {
+    // Update step.
+    // Update the value of W and H based on the Update Rules provided
+    wUpdate.Update(V, W, H);
+    hUpdate.Update(V, W, H);
+  
+    // Calculate norm of WH after each iteration.
+    WH = W * H;
+    normNew = sqrt(accu(WH % WH) / nm);
+
+    if (iteration != 0)
+    {
+      residue = fabs(normOld - normNew);
+      residue /= normOld;
+    }
+
+    normOld = normNew;
+
+    iteration++;
+  }
+
+  Log::Info << "ALS converged to residue of " << sqrt(residue) << " in "
+      << iteration << " iterations." << std::endl;
+}
+
+}; // namespace als
+}; // namespace mlpack

Added: mlpack/trunk/src/mlpack/core/optimizers/als/als_update_rules.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/als_update_rules.hpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,101 @@
+/**
+ * @file als_update_rules.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * titled 'Alternating Least Squares' described in the paper 'Positive Matrix
+ * Factorization: A Non-negative Factor Model with Optimal Utilization of
+ * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
+ * squares projection formula to reduce the error value of
+ * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
+ * respectively while holding the other matrix constant.
+ */
+#ifndef __MLPACK_CORE_OPTIMIZERS_ALS_ALS_UPDATE_RULES_HPP
+#define __MLPACK_CORE_OPTIMIZERS_ALS_ALS_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace als {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W^T = \frac{HV^T}{HH^T}
+ * \f]
+ */
+class WAlternatingLeastSquaresRule
+{
+ public:
+  // Empty constructor required for the WUpdateRule template.
+  WAlternatingLeastSquaresRule() { }
+
+  /**
+   * The update function that actually updates the W matrix. The function takes
+   * in all the matrices and only changes the value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  inline static void Update(const arma::sp_mat& V,
+                            arma::mat& W,
+                            const arma::mat& H)
+  {
+    // The call to inv() sometimes fails; so we are using the psuedoinverse.
+    // W = (inv(H * H.t()) * H * V.t()).t();
+    W = V * H.t() * pinv(H * H.t());
+
+    // Set all negative numbers to machine epsilon
+    for (size_t i = 0; i < W.n_elem; i++)
+    {
+      if (W(i) < 0.0)
+      {
+        W(i) = 0.0;
+      }
+    }
+  }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H = \frac{W^TV}{W^TW}
+ * \f]
+ */
+class HAlternatingLeastSquaresRule
+{
+ public:
+  // Empty constructor required for the HUpdateRule template.
+  HAlternatingLeastSquaresRule() { }
+
+  /**
+   * The update function that actually updates the H matrix. The function takes
+   * in all the matrices and only changes the value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+  inline static void Update(const arma::sp_mat& V,
+                            const arma::mat& W,
+                            arma::mat& H)
+  {
+    H = pinv(W.t() * W) * W.t() * V;
+
+    // Set all negative numbers to 0.
+    for (size_t i = 0; i < H.n_elem; i++)
+    {
+      if (H(i) < 0.0)
+      {
+        H(i) = 0.0;
+      }
+    }
+  }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Added: mlpack/trunk/src/mlpack/core/optimizers/als/random_acol_init.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/random_acol_init.hpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,71 @@
+/**
+ * @file random_acol_init.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Intialization rule for Non-Negative Matrix Factorization. This simple
+ * initialization is performed by the random Acol initialization introduced in
+ * the paper 'Algorithms, Initializations and Convergence' by Langville et al.
+ * This method sets each of the columns of W by averaging p randomly chosen
+ * columns of V.
+ */
+#ifndef __MLPACK_METHODS_NMF_RANDOM_ACOL_INIT_HPP
+#define __MLPACK_METHODS_NMF_RANDOM_ACOL_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * This class initializes the W matrix of the NMF algorithm by averaging p
+ * randomly chosen columns of V.  In this case, p is a template parameter.  H is
+ * then set randomly.
+ *
+ * @tparam The number of random columns to average for each column of W.
+ */
+template<int p = 5>
+class RandomAcolInitialization
+{
+ public:
+  // Empty constructor required for the InitializeRule template
+  RandomAcolInitialization()
+  { }
+
+  inline static void Initialize(const arma::mat& V,
+                                const size_t r,
+                                arma::mat& W,
+                                arma::mat& H)
+  {
+    const size_t n = V.n_rows;
+    const size_t m = V.n_cols;
+
+    if (p > m)
+    {
+      Log::Warn << "Number of random columns is more than the number of columns"
+          << "available in the V matrix; weird results may ensue!" << std::endl;
+    }
+
+    W.zeros(n, r);
+
+    // Initialize W matrix with random columns.
+    for (size_t col = 0; col < r; col++)
+    {
+      for (size_t randCol = 0; randCol < p; randCol++)
+      {
+        W.col(col) += V.col(math::RandInt(0, m));
+      }
+    }
+
+    // Now divide by p.
+    W /= p;
+
+    // Initialize H to random values.
+    H.randu(r, m);
+  }
+}; // Class RandomAcolInitialization
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Added: mlpack/trunk/src/mlpack/core/optimizers/als/random_init.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/random_init.hpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,42 @@
+/**
+ * @file randominit.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Intialization rule for Non-Negative Matrix Factorization (NMF). This simple
+ * initialization is performed by assigning a random matrix to W and H.
+ *
+ */
+#ifndef __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+#define __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace als {
+
+class RandomInitialization
+{
+ public:
+  // Empty constructor required for the InitializeRule template
+  RandomInitialization() { }
+
+  inline static void Initialize(const arma::sp_mat& V,
+                                const size_t r,
+                                arma::mat& W,
+                                arma::mat& H)
+  {
+    // Simple implementation (left in the header file due to its simplicity).
+    size_t n = V.n_rows;
+    size_t m = V.n_cols;
+
+    // Intialize to random values.
+    W.randu(n, r);
+    H.randu(r, m);
+  }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif

Modified: mlpack/trunk/src/mlpack/methods/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/methods/CMakeLists.txt	(original)
+++ mlpack/trunk/src/mlpack/methods/CMakeLists.txt	Wed Jul 24 03:29:56 2013
@@ -21,6 +21,7 @@
   range_search
   rann
   sparse_coding
+  cf
 )
 
 foreach(dir ${DIRS})

Added: mlpack/trunk/src/mlpack/methods/cf/CMakeLists.txt
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/CMakeLists.txt	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,23 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+  cf.hpp
+  cf.cpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(cf
+  cf_main.cpp
+)
+target_link_libraries(cf
+  mlpack
+)
+install(TARGETS cf RUNTIME DESTINATION bin)

Added: mlpack/trunk/src/mlpack/methods/cf/cf.cpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.cpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,313 @@
+/**
+ * @file cf.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Collaborative Filtering.
+ * 
+ * Implementation of CF class to perform Collaborative Filtering on the 
+ * specified data set.
+ *
+ */ 
+
+#include "cf.hpp"
+
+using namespace mlpack::als;
+using namespace std;
+
+namespace mlpack {
+namespace cf {
+
+/**
+ * Construct the CF object.
+ */
+CF::CF(arma::mat& data) :
+     data(data)
+{
+  Log::Info<<"Constructor (param: input data, default: numRecs;neighbourhood)"<<endl;
+  this->numRecs = 5;  
+  this->numUsersForSimilarity = 5;
+}
+
+CF::CF(const size_t numRecs,arma::mat& data) :
+     data(data)
+{
+  // Validate number of recommendation factor.
+  if (numRecs < 1)
+  {
+    Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
+        << numRecs << " given). Setting value to 5.\n";
+    //Setting Default Value of 5
+    this->numRecs = 5;
+  }
+  else
+    this->numRecs = numRecs;   
+  this->numUsersForSimilarity = 5;
+}
+
+CF::CF(const size_t numRecs, const size_t numUsersForSimilarity,
+     arma::mat& data) :
+     data(data)
+{
+  // Validate number of recommendation factor.
+  if (numRecs < 1)
+  {
+    Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
+        << numRecs << " given). Setting value to 5.\n";
+    //Setting Default Value of 5
+    this->numRecs = 5;
+  }
+  else
+    this->numRecs = numRecs;  
+  // Validate neighbourhood size.
+  if (numUsersForSimilarity < 1)
+  {
+    Log::Warn << "CF::CF(): neighbourhood size shoud be > 0("
+        << numUsersForSimilarity << " given). Setting value to 5.\n";
+    //Setting Default Value of 5
+    this->numUsersForSimilarity = 5;
+  }
+  else
+    this->numUsersForSimilarity = numUsersForSimilarity;
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations, 
+                            arma::Col<size_t>& users)
+{
+  Log::Info<<"GetRecommendations (param: recommendations,users)"<<endl;
+  //Base function for calculating Recommendations
+  //Operations Independent of the query
+  CalculateAprroximateRatings();
+  //Query Dependent Operations
+  Query(recommendations,users);  
+}
+
+void CF::CalculateAprroximateRatings() 
+{
+  Log::Info<<"CalculatineApproximateRating"<<endl;
+  //Build the initial rating tables with missing values
+  //if(cleanedData.n_rows==0)
+  CleanData();
+  //Decompose the size_tiial table size_to user and item 
+  Decompose();
+  //Generate new table by multiplying approximate values
+  GenerateRating();
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations) 
+{
+  Log::Info<<"GetRecommendations (param: recommendations)"<<endl;
+  //Build the initial rating tables with missing values
+  CleanData();
+  //Used to save user IDs
+  arma::Col<size_t> users = 
+    arma::zeros<arma::Col<size_t> >(cleanedData.n_cols,1);
+  //Getting all user IDs
+  for (size_t i=0;i<cleanedData.n_cols;i++)
+    users(i) = i+1;
+  //Calling Base Function for Recommendations
+  GetRecommendations(recommendations,users);
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
+                            arma::Col<size_t>& users,size_t num) 
+{
+  //Setting Number of Recommendations  
+  NumRecs(num); 
+  //Calling Base Function for Recommendations
+  GetRecommendations(recommendations,users);
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
+                            arma::Col<size_t>& users,size_t num,size_t s) 
+{
+  //Setting number of users that should be used for calculating 
+  //neighbours
+  NumUsersForSimilarity(s);
+  //Setting Number of Recommendations  
+  NumRecs(num);
+  //Calling Base Function for Recommendations
+  GetRecommendations(recommendations,users,num);
+}
+
+void CF::CleanData()
+{
+  Log::Info<<"CleanData";
+  //Temporarily stores max user id 
+  double maxUserID;
+  //Temporarily stores max item id
+  double maxItemID;
+  //Calculating max users and items
+  maxUserID = data(0,0);
+  maxItemID = data(1,0);
+  for (size_t i=1;i<data.n_cols;i++)
+  {
+      if(data(0,i)>maxUserID)
+        maxUserID = data(0,i);
+      if(data(1,i)>maxItemID)
+        maxItemID = data(1,i);
+  }
+  //Temporarily stores sparcely populated rating matrix
+  arma::sp_mat tmp((size_t)maxItemID,(size_t)maxUserID);
+  //Temporarily stores mask matrix 
+  arma::mat lMask = arma::ones<arma::mat>((size_t)maxItemID,
+                                          (size_t)maxUserID);
+  //Calculates the initial User-Item table
+  for (size_t i=0;i<data.n_cols;i++)
+    tmp(data(1,i)-1,data(0,i)-1) = data(2,i);
+  //Mask
+  for (size_t i=0;i<data.n_cols;i++)
+    lMask(data(1,i)-1,data(0,i)-1) = -1.0;
+  //Storing in a global variable
+  cleanedData = tmp;
+  //Storing the mask
+  mask=lMask;
+ // data::Save("cleanedData.csv",cleanedData);
+  data::Save("mask.csv",mask);
+}
+
+void CF::Decompose() 
+{ 
+   Log::Info<<"Decompose"<<endl;
+   size_t rank = 2;
+  //Presenltly only ALS is supported as an Optimizer
+  //Should be converted to a template
+  ALS<RandomInitialization, WAlternatingLeastSquaresRule,
+      HAlternatingLeastSquaresRule> als(10000, 1e-5);
+  als.Apply(cleanedData,rank,w,h);
+  data::Save("w.csv",w);
+  data::Save("h.csv",h);
+}
+
+void CF::GenerateRating() 
+{
+  Log::Info<<"GenerateRatings"<<endl;
+  //Calculating approximate rating 
+  rating = w*h;
+  data::Save("rating.csv",rating);
+}
+
+void CF::Query(arma::Mat<size_t>& recommendations,
+               arma::Col<size_t>& users) 
+{
+  Log::Info<<"Query"<<endl;
+  //Temproraily stores feature vector of queried users
+  arma::mat query(rating.n_rows, users.n_rows);// = arma::zeros<arma::mat>(rating.n_rows, users.n_rows);
+  //Calculates the feature vector of queried users
+  CreateQuery(query,users);
+  //Temporary storage for neighbourhood of the queried users
+  arma::Mat<size_t> neighbourhood;
+  //Calculates the neighbourhood of the queried users
+  GetNeighbourhood(query,neighbourhood); 
+  //Temporary storage for storing the average rating for each 
+  //user in their neighbourhood
+  arma::mat averages = arma::zeros<arma::mat>(rating.n_rows,query.n_cols);
+  //Calculates the average values
+  CalculateAverage(neighbourhood,averages);
+  //Calculates the top recommendations
+  CalculateTopRecommendations(recommendations,averages,users);
+}
+
+void CF::CreateQuery(arma::mat& query,arma::Col<size_t>& users) const
+{
+  Log::Info<<"CreateQuery"<<endl;
+  //Selecting feature vectors of queried users 
+  for(size_t i=0;i<users.n_rows;i++)
+    for(size_t j=0;j<rating.col(i).n_rows;j++)
+      query(j,i) = rating(j,users(i)-1);
+  data::Save("query.csv",query);
+}
+
+void CF::GetNeighbourhood(arma::mat& query, 
+                         arma::Mat<size_t>& neighbourhood) 
+{
+  Log::Info<<"GetNeighbourhood"<<endl;
+  if(numUsersForSimilarity>rating.n_cols)
+  {
+    Log::Warn << "CF::GetNeighbourhood(arma::mat,armaMat<size_t>):"
+        <<"neighbourhood size should be > total number of users("
+        << rating.n_cols << " given). Setting value to number of users.\n";
+    NumUsersForSimilarity(rating.n_cols);
+  }
+  //Creating an Alknn object
+  //Should be moved to templates
+  AllkNN a(rating, query);
+  //Temproraily storing distance between neighbours
+  arma::mat resultingDistances;
+  //Building neighbourhood
+  a.Search(numUsersForSimilarity, neighbourhood, 
+           resultingDistances); 
+  data::Save("neighbourhood.csv",neighbourhood);
+}
+
+void CF::CalculateAverage(arma::Mat<size_t>& neighbourhood, 
+                      arma::mat& averages) const
+{
+  Log::Info<<"CalculateAverage"<<endl;
+  //Temprorary Storage for calculating sum
+  arma::Col<double> tmp = arma::zeros<arma::Col<double> >(rating.n_rows,1);
+  size_t j;
+  //Iterating over all users
+  for(size_t i=0;i<neighbourhood.n_cols;i++)
+  {
+    tmp = arma::zeros<arma::Col<double> >(rating.n_rows,1);
+    //Iterating over all neighbours
+    for(j=0;j<neighbourhood.n_rows;j++)
+      tmp += rating.col(neighbourhood(j,i));  
+    //Calculating averages
+    averages.col(i) = tmp/j;  
+  }
+  data::Save("averages.csv",averages);
+}
+
+void CF::CalculateTopRecommendations(arma::Mat<size_t>& recommendations, 
+                                 arma::mat& averages,
+                                 arma::Col<size_t>& users) const
+{
+  Log::Info<<"CalculateTopRecommendations"<<endl;
+  int recos = numRecs;
+  if(averages.n_cols<numRecs)
+    recos = averages.n_rows;
+  //Stores recommendations
+  arma::Mat<size_t> rec = arma::zeros<arma::Mat<size_t> >(recos,users.n_rows);
+  //Stores valid ratings for items by a user
+  arma::Col<double> tmp = arma::zeros<arma::Col<double> >(rating.n_cols,1);
+  //Maps the items to their ratings
+  std::map<double,size_t> tmpMap;
+  //Iterator for the Item-Rating Map
+  std::map<double,size_t>::reverse_iterator iter;
+  std::map<double,size_t>::iterator it;
+  //Keeps count of number of recommendations provided for a user
+  size_t count;
+  //Iterate for all users
+  for(size_t i=0;i<users.n_rows;i++)
+  { 
+    count=0;
+    //Dot product between average rating and mask to dilute the ratings
+    // of the items that user i has already rated 
+    tmp = averages.col(users(i)-1) % mask.col(users(i)-1);
+    //Mapping Rating to Items
+    for(size_t j=0;j<tmp.n_rows;j++)
+      if(tmp(j)>=0)
+        tmpMap.insert(std::pair<double,size_t>(tmp(j),j+1));
+    //Iterating over Item-Rating Map
+    for(iter=tmpMap.rbegin();iter!=tmpMap.rend();++iter)
+    { 
+       //Saving recommendations to the recommendations table
+      rec(count,i) = (size_t)iter->second;
+      count++;
+      //break is desired number of recommendations are saved
+      if(count==numRecs)
+        break;
+    }
+    //Removing the items from the map
+    //note: Item 0 is just to maintain the consistency and it 
+    //represents not recommendations were available
+    for(it=tmpMap.begin();it!=tmpMap.end();++it)
+      tmpMap.erase(it);
+  }
+  //Saving to recommendations 
+  recommendations = rec;
+}
+
+}; // namespace mlpack
+}; // namespace cf

Added: mlpack/trunk/src/mlpack/methods/cf/cf.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.hpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,225 @@
+/**
+ * @file cf.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Collaborative Filtering.
+ *
+ * Defines the CF class to perform Collaborative Filtering on the
+ * specified data set.
+ *
+ */
+
+#ifndef __MLPACK_METHODS_CF_CF_HPP
+#define __MLPACK_METHODS_CF_CF_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/optimizers/als/als.hpp>
+#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
+#include <set>
+#include <map>
+#include <iostream>
+
+namespace mlpack {
+namespace cf /** Collaborative Filtering. */{
+
+/**
+ * This class implements Collaborative Filtering (CF). This 
+ * implementation presently supports Alternating Least Squares 
+ * for collaborative filtering. 
+ * 
+ * The template parameters can (optionally) be supplied are: the algorithm  
+ * used for CF and the neighbourhood search for user similarity.
+ *
+ * A simple example of how to run Collaborative Filtering is shown below.
+ *
+ * @code
+ * extern arma::mat data; // (user,item,rating) table
+ * extern arma::Col<size_t> users; // users seeking recommendations
+ * arma::mat recommendations; // Recommendations
+ * size_t numRecommendations = 10;  
+ *
+ * CF<> cf(data); // Default options.
+ *
+ * //Default number of Recommendations for all users 
+ * cf.GenerateRecommendations(recommendations);
+ *
+ * //Default number of Recommendations for specified users 
+ * cf.GenerateRecommendations(recommendations, users);
+ *
+ * //10 Recommendations for specified users
+ * cf.GenerateRecommendations(recommendations, users, numRecommendations);
+ *
+ * @endcode
+ * 
+ */
+
+class CF
+{ 
+ public:
+/**
+  * Create a CF object and (optionally) set the parameters which CF
+  * will be run with. 
+  *
+  * @param data Initial User,Item,Rating Matrix
+  * @param numRecs Number of Recommendations for each user.
+  * @param numUsersForSimilarity Size of the neighbourhood.
+  */
+  CF(const size_t numRecs,const size_t numUsersForSimilarity,
+     arma::mat& data);
+/**
+  * Create a CF object and (optionally) set the parameters which CF
+  * will be run with. 
+  *
+  * @param data Initial User,Item,Rating Matrix
+  * @param numRecs Number of Recommendations for each user.
+  */
+  CF(const size_t numRecs, arma::mat& data);
+/**
+  * Create a CF object and (optionally) set the parameters which CF
+  * will be run with. 
+  *
+  * @param data Initial User,Item,Rating Matrix
+  */
+  CF(arma::mat& data);
+  //! Sets number of Recommendations.
+  void NumRecs(size_t recs) 
+  { 
+    if (recs < 1)
+    {
+      Log::Warn << "CF::NumRecs(): invalid value (< 1) "
+          "ignored." << std::endl;
+      return;
+    }
+    this->numRecs = recs; 
+  }
+  //! Sets number of user for calculating similarity.
+  void NumUsersForSimilarity(size_t num) 
+  { 
+    if (num < 1)
+    {
+      Log::Warn << "CF::NumUsersForSimilarity(): invalid value (< 1) "
+          "ignored." << std::endl;
+      return;
+    }
+    this->numUsersForSimilarity = num; 
+  }
+  //! Get the User Matrix.
+  const arma::mat& W() const { return w; }
+  //! Get the Item Matrix.
+  const arma::mat& H() const { return h; }
+  //! Get the Rating Matrix.
+  const arma::mat& Rating() const { return rating; }
+
+/*
+ * Generates default number of recommendations for all users.
+ *
+ * @param recommendations Matrix to save recommendations
+ */
+  void GetRecommendations(arma::Mat<size_t>& recommendations);
+
+/*
+ * Generates default number of recommendations for specified users.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ */   
+  void GetRecommendations(arma::Mat<size_t>& recommendations, 
+                          arma::Col<size_t>& users);
+
+/*
+ * Generates a fixed number of recommendations for specified users.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ * @param num Number of Recommendations
+ */
+  void GetRecommendations(arma::Mat<size_t>& recommendations, 
+                          arma::Col<size_t>& users, size_t num);
+
+/*
+ * Generates a fixed number of recommendations for specified users.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ * @param num Number of Recommendations
+ * @param neighbours Number of user to be considered while calculating     
+ *        the neighbourhood
+ */
+  void GetRecommendations(arma::Mat<size_t>& recommendations, 
+                          arma::Col<size_t>& users, size_t num, 
+                          size_t neighbours);
+
+ private:
+  //! Number of Recommendations.
+  size_t numRecs;
+  //! Number of User for Similariy.
+  size_t numUsersForSimilarity;
+  //! User Matrix.
+  arma::mat w;
+  //! Item Matrix.
+  arma::mat h;
+  //! Rating Martix.
+  arma::mat rating;
+  //! Masking Matrix
+  arma::mat mask;
+  //! Initial Data Matrix.
+  arma::mat data;
+  //! Cleaned Data Matrix.
+  arma::sp_mat cleanedData;
+  //!Calculates a rating matrix with available data and approximations
+  void CalculateAprroximateRatings();
+  //!Converts the User, Item, Value Matrix to User-Item Table
+  void CleanData();
+  //!Decomposes the cleanedData size_to user and item matrices
+  void Decompose();
+  //!Create ratings from user and item matrices
+  void GenerateRating();
+/*
+ * Queries the obtained rating matrix.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ */  
+  void Query(arma::Mat<size_t>& recommendations,arma::Col<size_t>& users);
+/*
+ * Selects item preferences of the users 
+ *
+ * @param query Matrix to store the item preference of the user.
+ * @param users Users for which recommendations are to be generated
+ */
+  void CreateQuery(arma::mat& query,arma::Col<size_t>& users) const;
+/*
+ * Generates the neighbourhood of users. 
+ *
+ * @param query Matrix to store the item preference of the user.
+ * @param modifiedRating Matrix to store the Modified Matix.
+ * @param neighbourhood Matrix to store user neighbourhood.
+ */
+  void GetNeighbourhood(arma::mat& query,
+                        arma::Mat<size_t>& neighbourhood);
+/*
+ * Calculates the Average rating users would have given to 
+ * unrated items based on their similarity with other users. 
+ *
+ * @param neighbourhood Matrix to store user neighbourhood.
+ * @param averages stores the average rating for each item.
+ */
+  void CalculateAverage(arma::Mat<size_t>& neighbourhood, 
+                        arma::mat& averages) const;
+/*
+ * Calculates the top recommendations given average rating 
+ * for each user.
+ *
+ * @param neighbourhood Matrix to store user neighbourhood.
+ * @param averages stores the average rating for each item.
+ */ 
+  void CalculateTopRecommendations(arma::Mat<size_t>& recommendations, 
+                                   arma::mat& averages,
+                                   arma::Col<size_t>& users) const;
+
+}; // class CF
+
+}; // namespace cf
+}; // namespace mlpack
+
+#endif

Added: mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp	Wed Jul 24 03:29:56 2013
@@ -0,0 +1,71 @@
+/**
+ * @file cf_main.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Main executable to run CF.
+ *
+ */ 
+
+#include <mlpack/core.hpp>
+#include "cf.hpp"
+
+using namespace mlpack;
+using namespace mlpack::cf;
+using namespace std;
+
+// Document program.
+PROGRAM_INFO("Collaborating Filtering", "This program performs Collaborative "
+    "filtering(cf) on the given dataset. Given a list of user, item and "
+    "preferences the program output is a set of recommendations for users."
+    " Optionally, the users to be queried can be specified. The program also"
+    " provides the flexibility to select number of recommendations for each"
+    " user and also the neighbourhood. User, Item and Rating matrices can also"
+    " be extracted. Variable parameters include algorithm for performing " 
+    "cf, algorithm parameters and similarity measures to give recommendations");
+
+// Parameters for program.
+PARAM_STRING_REQ("input_file", "Input dataset to perform CF on.", "i");
+PARAM_STRING("output_file","Recommendations.", "o", "recommendations.csv");
+PARAM_STRING("algorithm", "Algorithm used for cf (als/svd).", "a","als");
+PARAM_STRING("ratings_file", "File to save ratings.", "R","ratings.csv");
+PARAM_STRING("user_file", "File to save the calculated User matrix to.", 
+             "U","user.csv");
+PARAM_STRING("item_file", "File to save the calculated Item matrix to.", 
+             "I","item.csv");
+PARAM_STRING("nearest_neighbour_algorithm", "Similarity Measure to be used "
+             "for generating recommendations", "s","knn");
+PARAM_STRING("query_file", "List of user for which recommendations are to "
+             "be generated (If unspecified then all)", "q","query.csv");
+PARAM_INT("number_of_Recommendations", "Number of Recommendations for each "
+          "user in query", "r",5);
+PARAM_INT("neighbourhood", "Size of the neighbourhood for all "
+          "user in query", "n",5);
+
+int main(int argc, char** argv)
+{
+  //Parse Command Line
+  CLI::ParseCommandLine(argc, argv);
+ 
+  //Read from the input file
+  string inputFile = CLI::GetParam<string>("input_file");
+  arma::mat dataset;
+  data::Load(inputFile.c_str(), dataset);
+    
+  //Recommendation matrix.
+  arma::Mat<size_t> recommendations; 
+   
+  //User Matrix
+  arma::Col<size_t> users;  
+
+  //Reading Users
+  string userf = CLI::GetParam<string>("query_file");
+  data::Load(userf.c_str(),users);
+
+  //Calculating Recommendations
+  CF c(dataset);
+
+  Log::Info << "Performing CF on dataset..." << endl;
+  c.GetRecommendations(recommendations,users);
+  string outputFile = CLI::GetParam<string>("output_file");
+  data::Save(outputFile, recommendations);
+}



More information about the mlpack-svn mailing list