[mlpack-svn] r15539 - in mlpack/trunk/src/mlpack: core/optimizers core/optimizers/als methods methods/cf
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Jul 24 03:29:57 EDT 2013
Author: muditrajgupta
Date: Wed Jul 24 03:29:56 2013
New Revision: 15539
Log:
CF framework and ALS added
Added:
mlpack/trunk/src/mlpack/core/optimizers/als/
mlpack/trunk/src/mlpack/core/optimizers/als/CMakeLists.txt
mlpack/trunk/src/mlpack/core/optimizers/als/als.hpp
mlpack/trunk/src/mlpack/core/optimizers/als/als_impl.hpp
mlpack/trunk/src/mlpack/core/optimizers/als/als_update_rules.hpp
mlpack/trunk/src/mlpack/core/optimizers/als/random_acol_init.hpp
mlpack/trunk/src/mlpack/core/optimizers/als/random_init.hpp
mlpack/trunk/src/mlpack/methods/cf/
mlpack/trunk/src/mlpack/methods/cf/CMakeLists.txt
mlpack/trunk/src/mlpack/methods/cf/cf.cpp
mlpack/trunk/src/mlpack/methods/cf/cf.hpp
mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp
Modified:
mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt
mlpack/trunk/src/mlpack/methods/CMakeLists.txt
Modified: mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt (original)
+++ mlpack/trunk/src/mlpack/core/optimizers/CMakeLists.txt Wed Jul 24 03:29:56 2013
@@ -2,6 +2,7 @@
aug_lagrangian
lbfgs
sgd
+ als
)
foreach(dir ${DIRS})
Added: mlpack/trunk/src/mlpack/core/optimizers/als/CMakeLists.txt
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/CMakeLists.txt Wed Jul 24 03:29:56 2013
@@ -0,0 +1,19 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+ als_update_rules.hpp
+ random_init.hpp
+ random_acol_init.hpp
+ als.hpp
+ als_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+ set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
Added: mlpack/trunk/src/mlpack/core/optimizers/als/als.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/als.hpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,133 @@
+/**
+ * @file als.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Defines the ALS class for non-negative Matrix Factorization
+ * on a given sparcely populated matrix.
+ */
+#ifndef __MLPACK_CORE_OPTIMIZERS_ALS_ALS_HPP
+#define __MLPACK_CORE_OPTIMIZERS_ALS_ALS_HPP
+
+#include <mlpack/core.hpp>
+#include "als_update_rules.hpp"
+#include "random_init.hpp"
+
+namespace mlpack {
+namespace als {
+
+/**
+ * This class implements the NMF on the given matrix V. Non-negative Matrix
+ * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
+ * called the basis matrix and H is called the encoding matrix. V is taken
+ * to be of size n x m and the obtained W is n x r and H is r x m. The size r is
+ * called the rank of the factorization.
+ *
+ * For more information on non-negative matrix factorization, see the following
+ * paper:
+ *
+ * @code
+ * @article{
+ * title = {{Learning the parts of objects by non-negative matrix
+ * factorization}},
+ * author = {Lee, Daniel D. and Seung, H. Sebastian},
+ * journal = {Nature},
+ * month = {Oct},
+ * year = {1999},
+ * number = {6755},
+ * pages = {788--791},
+ * publisher = {Nature Publishing Group},
+ * url = {http://dx.doi.org/10.1038/44565}
+ * }
+ * @endcode
+ *
+ * @tparam WUpdateRule The update rule for calculating W matrix at each
+ * iteration; @see MultiplicativeDistanceW for an example.
+ * @tparam HUpdateRule The update rule for calculating H matrix at each
+ * iteration; @see MultiplicativeDistanceH for an example.
+ */
+template<typename InitializationRule = RandomInitialization,
+ typename WUpdateRule = WAlternatingLeastSquaresRule,
+ typename HUpdateRule = HAlternatingLeastSquaresRule>
+class ALS
+{
+ public:
+ /**
+ * Create the NMF object and (optionally) set the parameters which NMF will
+ * run with. The minimum residue refers to the root mean square of the
+ * difference between two subsequent iterations of the product W * H. A low
+ * residue indicates that subsequent iterations are not producing much change
+ * in W and H. Once the residue goes below the specified minimum residue, the
+ * algorithm terminates.
+ *
+ * @param maxIterations Maximum number of iterations allowed before giving up.
+ * A value of 0 indicates no limit.
+ * @param minResidue The minimum allowed residue before the algorithm
+ * terminates.
+ * @param Initialize Optional Initialization object for initializing the
+ * W and H matrices
+ * @param WUpdate Optional WUpdateRule object; for when the update rule for
+ * the W vector has states that it needs to store.
+ * @param HUpdate Optional HUpdateRule object; for when the update rule for
+ * the H vector has states that it needs to store.
+ */
+ ALS(const size_t maxIterations = 10000,
+ const double minResidue = 1e-10,
+ const InitializationRule initializeRule = InitializationRule(),
+ const WUpdateRule wUpdate = WUpdateRule(),
+ const HUpdateRule hUpdate = HUpdateRule());
+
+ /**
+ * Apply Non-Negative Matrix Factorization to the provided matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix to be output.
+ * @param H Encoding matrix to output.
+ * @param r Rank r of the factorization.
+ */
+ void Apply(const arma::sp_mat& V, const size_t r, arma::mat& W, arma::mat& H)
+ const;
+
+ private:
+ //! The maximum number of iterations allowed before giving up.
+ size_t maxIterations;
+ //! The minimum residue, below which iteration is considered converged.
+ double minResidue;
+ //! Instantiated initialization Rule.
+ InitializationRule initializeRule;
+ //! Instantiated W update rule.
+ WUpdateRule wUpdate;
+ //! Instantiated H update rule.
+ HUpdateRule hUpdate;
+
+ public:
+ //! Access the maximum number of iterations.
+ size_t MaxIterations() const { return maxIterations; }
+ //! Modify the maximum number of iterations.
+ size_t& MaxIterations() { return maxIterations; }
+ //! Access the minimum residue before termination.
+ double MinResidue() const { return minResidue; }
+ //! Modify the minimum residue before termination.
+ double& MinResidue() { return minResidue; }
+ //! Access the initialization rule.
+ const InitializationRule& InitializeRule() const { return initializeRule; }
+ //! Modify the initialization rule.
+ InitializationRule& InitializeRule() { return initializeRule; }
+ //! Access the W update rule.
+ const WUpdateRule& WUpdate() const { return wUpdate; }
+ //! Modify the W update rule.
+ WUpdateRule& WUpdate() { return wUpdate; }
+ //! Access the H update rule.
+ const HUpdateRule& HUpdate() const { return hUpdate; }
+ //! Modify the H update rule.
+ HUpdateRule& HUpdate() { return hUpdate; }
+
+}; // class ALs
+
+}; // namespace nmf
+}; // namespace mlpack
+
+// Include implementation.
+#include "als_impl.hpp"
+
+#endif
Added: mlpack/trunk/src/mlpack/core/optimizers/als/als_impl.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/als_impl.hpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,97 @@
+/**
+ * @file nmf.cpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Implementation of NMF class to perform Non-Negative Matrix Factorization
+ * on the given matrix.
+ *
+ */
+
+namespace mlpack {
+namespace als {
+
+/**
+ * Construct the NMF object.
+ */
+template<typename InitializationRule,
+ typename WUpdateRule,
+ typename HUpdateRule>
+ALS<InitializationRule, WUpdateRule, HUpdateRule>::ALS(
+ const size_t maxIterations,
+ const double minResidue,
+ const InitializationRule initializeRule,
+ const WUpdateRule wUpdate,
+ const HUpdateRule hUpdate) :
+ maxIterations(maxIterations),
+ minResidue(minResidue),
+ initializeRule(initializeRule),
+ wUpdate(wUpdate),
+ hUpdate(hUpdate)
+{
+ if (minResidue < 0.0)
+ {
+ Log::Warn << "ALS::ALS(): minResidue must be a positive value ("
+ << minResidue << " given). Setting to the default value of 1e-10.\n";
+ this->minResidue = 1e-10;
+ }
+}
+
+/**
+ * Apply Non-Negative Matrix Factorization to the provided matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ * @param r Rank r of the factorization
+ */
+template<typename InitializationRule,
+ typename WUpdateRule,
+ typename HUpdateRule>
+void ALS<InitializationRule, WUpdateRule, HUpdateRule>::Apply(
+ const arma::sp_mat& V,
+ const size_t r,
+ arma::mat& W,
+ arma::mat& H) const
+{
+ const size_t n = V.n_rows;
+ const size_t m = V.n_cols;
+
+ // Initialize W and H.
+ initializeRule.Initialize(V, r, W, H);
+
+ Log::Info << "Initialized W and H." << std::endl;
+ size_t iteration = 1;
+ const size_t nm = n * m;
+ double residue = minResidue;
+ double normOld = 0;
+ arma::mat WH;
+ double normNew = 0;
+ while (residue >= minResidue && iteration != maxIterations)
+ {
+ // Update step.
+ // Update the value of W and H based on the Update Rules provided
+ wUpdate.Update(V, W, H);
+ hUpdate.Update(V, W, H);
+
+ // Calculate norm of WH after each iteration.
+ WH = W * H;
+ normNew = sqrt(accu(WH % WH) / nm);
+
+ if (iteration != 0)
+ {
+ residue = fabs(normOld - normNew);
+ residue /= normOld;
+ }
+
+ normOld = normNew;
+
+ iteration++;
+ }
+
+ Log::Info << "ALS converged to residue of " << sqrt(residue) << " in "
+ << iteration << " iterations." << std::endl;
+}
+
+}; // namespace als
+}; // namespace mlpack
Added: mlpack/trunk/src/mlpack/core/optimizers/als/als_update_rules.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/als_update_rules.hpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,101 @@
+/**
+ * @file als_update_rules.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * titled 'Alternating Least Squares' described in the paper 'Positive Matrix
+ * Factorization: A Non-negative Factor Model with Optimal Utilization of
+ * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
+ * squares projection formula to reduce the error value of
+ * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
+ * respectively while holding the other matrix constant.
+ */
+#ifndef __MLPACK_CORE_OPTIMIZERS_ALS_ALS_UPDATE_RULES_HPP
+#define __MLPACK_CORE_OPTIMIZERS_ALS_ALS_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace als {
+
+/**
+ * The update rule for the basis matrix W. The formula used is
+ * \f[
+ * W^T = \frac{HV^T}{HH^T}
+ * \f]
+ */
+class WAlternatingLeastSquaresRule
+{
+ public:
+ // Empty constructor required for the WUpdateRule template.
+ WAlternatingLeastSquaresRule() { }
+
+ /**
+ * The update function that actually updates the W matrix. The function takes
+ * in all the matrices and only changes the value of the W matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix to be updated.
+ * @param H Encoding matrix.
+ */
+ inline static void Update(const arma::sp_mat& V,
+ arma::mat& W,
+ const arma::mat& H)
+ {
+ // The call to inv() sometimes fails; so we are using the psuedoinverse.
+ // W = (inv(H * H.t()) * H * V.t()).t();
+ W = V * H.t() * pinv(H * H.t());
+
+ // Set all negative numbers to machine epsilon
+ for (size_t i = 0; i < W.n_elem; i++)
+ {
+ if (W(i) < 0.0)
+ {
+ W(i) = 0.0;
+ }
+ }
+ }
+};
+
+/**
+ * The update rule for the encoding matrix H. The formula used is
+ * \f[
+ * H = \frac{W^TV}{W^TW}
+ * \f]
+ */
+class HAlternatingLeastSquaresRule
+{
+ public:
+ // Empty constructor required for the HUpdateRule template.
+ HAlternatingLeastSquaresRule() { }
+
+ /**
+ * The update function that actually updates the H matrix. The function takes
+ * in all the matrices and only changes the value of the H matrix.
+ *
+ * @param V Input matrix to be factorized.
+ * @param W Basis matrix.
+ * @param H Encoding matrix to be updated.
+ */
+ inline static void Update(const arma::sp_mat& V,
+ const arma::mat& W,
+ arma::mat& H)
+ {
+ H = pinv(W.t() * W) * W.t() * V;
+
+ // Set all negative numbers to 0.
+ for (size_t i = 0; i < H.n_elem; i++)
+ {
+ if (H(i) < 0.0)
+ {
+ H(i) = 0.0;
+ }
+ }
+ }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/core/optimizers/als/random_acol_init.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/random_acol_init.hpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,71 @@
+/**
+ * @file random_acol_init.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Intialization rule for Non-Negative Matrix Factorization. This simple
+ * initialization is performed by the random Acol initialization introduced in
+ * the paper 'Algorithms, Initializations and Convergence' by Langville et al.
+ * This method sets each of the columns of W by averaging p randomly chosen
+ * columns of V.
+ */
+#ifndef __MLPACK_METHODS_NMF_RANDOM_ACOL_INIT_HPP
+#define __MLPACK_METHODS_NMF_RANDOM_ACOL_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace nmf {
+
+/**
+ * This class initializes the W matrix of the NMF algorithm by averaging p
+ * randomly chosen columns of V. In this case, p is a template parameter. H is
+ * then set randomly.
+ *
+ * @tparam The number of random columns to average for each column of W.
+ */
+template<int p = 5>
+class RandomAcolInitialization
+{
+ public:
+ // Empty constructor required for the InitializeRule template
+ RandomAcolInitialization()
+ { }
+
+ inline static void Initialize(const arma::mat& V,
+ const size_t r,
+ arma::mat& W,
+ arma::mat& H)
+ {
+ const size_t n = V.n_rows;
+ const size_t m = V.n_cols;
+
+ if (p > m)
+ {
+ Log::Warn << "Number of random columns is more than the number of columns"
+ << "available in the V matrix; weird results may ensue!" << std::endl;
+ }
+
+ W.zeros(n, r);
+
+ // Initialize W matrix with random columns.
+ for (size_t col = 0; col < r; col++)
+ {
+ for (size_t randCol = 0; randCol < p; randCol++)
+ {
+ W.col(col) += V.col(math::RandInt(0, m));
+ }
+ }
+
+ // Now divide by p.
+ W /= p;
+
+ // Initialize H to random values.
+ H.randu(r, m);
+ }
+}; // Class RandomAcolInitialization
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/core/optimizers/als/random_init.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/core/optimizers/als/random_init.hpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,42 @@
+/**
+ * @file randominit.hpp
+ * @author Mohan Rajendran
+ * @author Mudit Raj Gupta
+ *
+ * Intialization rule for Non-Negative Matrix Factorization (NMF). This simple
+ * initialization is performed by assigning a random matrix to W and H.
+ *
+ */
+#ifndef __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+#define __MLPACK_METHODS_NMF_RANDOM_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace als {
+
+class RandomInitialization
+{
+ public:
+ // Empty constructor required for the InitializeRule template
+ RandomInitialization() { }
+
+ inline static void Initialize(const arma::sp_mat& V,
+ const size_t r,
+ arma::mat& W,
+ arma::mat& H)
+ {
+ // Simple implementation (left in the header file due to its simplicity).
+ size_t n = V.n_rows;
+ size_t m = V.n_cols;
+
+ // Intialize to random values.
+ W.randu(n, r);
+ H.randu(r, m);
+ }
+};
+
+}; // namespace nmf
+}; // namespace mlpack
+
+#endif
Modified: mlpack/trunk/src/mlpack/methods/CMakeLists.txt
==============================================================================
--- mlpack/trunk/src/mlpack/methods/CMakeLists.txt (original)
+++ mlpack/trunk/src/mlpack/methods/CMakeLists.txt Wed Jul 24 03:29:56 2013
@@ -21,6 +21,7 @@
range_search
rann
sparse_coding
+ cf
)
foreach(dir ${DIRS})
Added: mlpack/trunk/src/mlpack/methods/cf/CMakeLists.txt
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/CMakeLists.txt Wed Jul 24 03:29:56 2013
@@ -0,0 +1,23 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+ cf.hpp
+ cf.cpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+ set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(cf
+ cf_main.cpp
+)
+target_link_libraries(cf
+ mlpack
+)
+install(TARGETS cf RUNTIME DESTINATION bin)
Added: mlpack/trunk/src/mlpack/methods/cf/cf.cpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.cpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,313 @@
+/**
+ * @file cf.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Collaborative Filtering.
+ *
+ * Implementation of CF class to perform Collaborative Filtering on the
+ * specified data set.
+ *
+ */
+
+#include "cf.hpp"
+
+using namespace mlpack::als;
+using namespace std;
+
+namespace mlpack {
+namespace cf {
+
+/**
+ * Construct the CF object.
+ */
+CF::CF(arma::mat& data) :
+ data(data)
+{
+ Log::Info<<"Constructor (param: input data, default: numRecs;neighbourhood)"<<endl;
+ this->numRecs = 5;
+ this->numUsersForSimilarity = 5;
+}
+
+CF::CF(const size_t numRecs,arma::mat& data) :
+ data(data)
+{
+ // Validate number of recommendation factor.
+ if (numRecs < 1)
+ {
+ Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
+ << numRecs << " given). Setting value to 5.\n";
+ //Setting Default Value of 5
+ this->numRecs = 5;
+ }
+ else
+ this->numRecs = numRecs;
+ this->numUsersForSimilarity = 5;
+}
+
+CF::CF(const size_t numRecs, const size_t numUsersForSimilarity,
+ arma::mat& data) :
+ data(data)
+{
+ // Validate number of recommendation factor.
+ if (numRecs < 1)
+ {
+ Log::Warn << "CF::CF(): number of recommendations shoud be > 0("
+ << numRecs << " given). Setting value to 5.\n";
+ //Setting Default Value of 5
+ this->numRecs = 5;
+ }
+ else
+ this->numRecs = numRecs;
+ // Validate neighbourhood size.
+ if (numUsersForSimilarity < 1)
+ {
+ Log::Warn << "CF::CF(): neighbourhood size shoud be > 0("
+ << numUsersForSimilarity << " given). Setting value to 5.\n";
+ //Setting Default Value of 5
+ this->numUsersForSimilarity = 5;
+ }
+ else
+ this->numUsersForSimilarity = numUsersForSimilarity;
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users)
+{
+ Log::Info<<"GetRecommendations (param: recommendations,users)"<<endl;
+ //Base function for calculating Recommendations
+ //Operations Independent of the query
+ CalculateAprroximateRatings();
+ //Query Dependent Operations
+ Query(recommendations,users);
+}
+
+void CF::CalculateAprroximateRatings()
+{
+ Log::Info<<"CalculatineApproximateRating"<<endl;
+ //Build the initial rating tables with missing values
+ //if(cleanedData.n_rows==0)
+ CleanData();
+ //Decompose the size_tiial table size_to user and item
+ Decompose();
+ //Generate new table by multiplying approximate values
+ GenerateRating();
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations)
+{
+ Log::Info<<"GetRecommendations (param: recommendations)"<<endl;
+ //Build the initial rating tables with missing values
+ CleanData();
+ //Used to save user IDs
+ arma::Col<size_t> users =
+ arma::zeros<arma::Col<size_t> >(cleanedData.n_cols,1);
+ //Getting all user IDs
+ for (size_t i=0;i<cleanedData.n_cols;i++)
+ users(i) = i+1;
+ //Calling Base Function for Recommendations
+ GetRecommendations(recommendations,users);
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users,size_t num)
+{
+ //Setting Number of Recommendations
+ NumRecs(num);
+ //Calling Base Function for Recommendations
+ GetRecommendations(recommendations,users);
+}
+
+void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users,size_t num,size_t s)
+{
+ //Setting number of users that should be used for calculating
+ //neighbours
+ NumUsersForSimilarity(s);
+ //Setting Number of Recommendations
+ NumRecs(num);
+ //Calling Base Function for Recommendations
+ GetRecommendations(recommendations,users,num);
+}
+
+void CF::CleanData()
+{
+ Log::Info<<"CleanData";
+ //Temporarily stores max user id
+ double maxUserID;
+ //Temporarily stores max item id
+ double maxItemID;
+ //Calculating max users and items
+ maxUserID = data(0,0);
+ maxItemID = data(1,0);
+ for (size_t i=1;i<data.n_cols;i++)
+ {
+ if(data(0,i)>maxUserID)
+ maxUserID = data(0,i);
+ if(data(1,i)>maxItemID)
+ maxItemID = data(1,i);
+ }
+ //Temporarily stores sparcely populated rating matrix
+ arma::sp_mat tmp((size_t)maxItemID,(size_t)maxUserID);
+ //Temporarily stores mask matrix
+ arma::mat lMask = arma::ones<arma::mat>((size_t)maxItemID,
+ (size_t)maxUserID);
+ //Calculates the initial User-Item table
+ for (size_t i=0;i<data.n_cols;i++)
+ tmp(data(1,i)-1,data(0,i)-1) = data(2,i);
+ //Mask
+ for (size_t i=0;i<data.n_cols;i++)
+ lMask(data(1,i)-1,data(0,i)-1) = -1.0;
+ //Storing in a global variable
+ cleanedData = tmp;
+ //Storing the mask
+ mask=lMask;
+ // data::Save("cleanedData.csv",cleanedData);
+ data::Save("mask.csv",mask);
+}
+
+void CF::Decompose()
+{
+ Log::Info<<"Decompose"<<endl;
+ size_t rank = 2;
+ //Presenltly only ALS is supported as an Optimizer
+ //Should be converted to a template
+ ALS<RandomInitialization, WAlternatingLeastSquaresRule,
+ HAlternatingLeastSquaresRule> als(10000, 1e-5);
+ als.Apply(cleanedData,rank,w,h);
+ data::Save("w.csv",w);
+ data::Save("h.csv",h);
+}
+
+void CF::GenerateRating()
+{
+ Log::Info<<"GenerateRatings"<<endl;
+ //Calculating approximate rating
+ rating = w*h;
+ data::Save("rating.csv",rating);
+}
+
+void CF::Query(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users)
+{
+ Log::Info<<"Query"<<endl;
+ //Temproraily stores feature vector of queried users
+ arma::mat query(rating.n_rows, users.n_rows);// = arma::zeros<arma::mat>(rating.n_rows, users.n_rows);
+ //Calculates the feature vector of queried users
+ CreateQuery(query,users);
+ //Temporary storage for neighbourhood of the queried users
+ arma::Mat<size_t> neighbourhood;
+ //Calculates the neighbourhood of the queried users
+ GetNeighbourhood(query,neighbourhood);
+ //Temporary storage for storing the average rating for each
+ //user in their neighbourhood
+ arma::mat averages = arma::zeros<arma::mat>(rating.n_rows,query.n_cols);
+ //Calculates the average values
+ CalculateAverage(neighbourhood,averages);
+ //Calculates the top recommendations
+ CalculateTopRecommendations(recommendations,averages,users);
+}
+
+void CF::CreateQuery(arma::mat& query,arma::Col<size_t>& users) const
+{
+ Log::Info<<"CreateQuery"<<endl;
+ //Selecting feature vectors of queried users
+ for(size_t i=0;i<users.n_rows;i++)
+ for(size_t j=0;j<rating.col(i).n_rows;j++)
+ query(j,i) = rating(j,users(i)-1);
+ data::Save("query.csv",query);
+}
+
+void CF::GetNeighbourhood(arma::mat& query,
+ arma::Mat<size_t>& neighbourhood)
+{
+ Log::Info<<"GetNeighbourhood"<<endl;
+ if(numUsersForSimilarity>rating.n_cols)
+ {
+ Log::Warn << "CF::GetNeighbourhood(arma::mat,armaMat<size_t>):"
+ <<"neighbourhood size should be > total number of users("
+ << rating.n_cols << " given). Setting value to number of users.\n";
+ NumUsersForSimilarity(rating.n_cols);
+ }
+ //Creating an Alknn object
+ //Should be moved to templates
+ AllkNN a(rating, query);
+ //Temproraily storing distance between neighbours
+ arma::mat resultingDistances;
+ //Building neighbourhood
+ a.Search(numUsersForSimilarity, neighbourhood,
+ resultingDistances);
+ data::Save("neighbourhood.csv",neighbourhood);
+}
+
+void CF::CalculateAverage(arma::Mat<size_t>& neighbourhood,
+ arma::mat& averages) const
+{
+ Log::Info<<"CalculateAverage"<<endl;
+ //Temprorary Storage for calculating sum
+ arma::Col<double> tmp = arma::zeros<arma::Col<double> >(rating.n_rows,1);
+ size_t j;
+ //Iterating over all users
+ for(size_t i=0;i<neighbourhood.n_cols;i++)
+ {
+ tmp = arma::zeros<arma::Col<double> >(rating.n_rows,1);
+ //Iterating over all neighbours
+ for(j=0;j<neighbourhood.n_rows;j++)
+ tmp += rating.col(neighbourhood(j,i));
+ //Calculating averages
+ averages.col(i) = tmp/j;
+ }
+ data::Save("averages.csv",averages);
+}
+
+void CF::CalculateTopRecommendations(arma::Mat<size_t>& recommendations,
+ arma::mat& averages,
+ arma::Col<size_t>& users) const
+{
+ Log::Info<<"CalculateTopRecommendations"<<endl;
+ int recos = numRecs;
+ if(averages.n_cols<numRecs)
+ recos = averages.n_rows;
+ //Stores recommendations
+ arma::Mat<size_t> rec = arma::zeros<arma::Mat<size_t> >(recos,users.n_rows);
+ //Stores valid ratings for items by a user
+ arma::Col<double> tmp = arma::zeros<arma::Col<double> >(rating.n_cols,1);
+ //Maps the items to their ratings
+ std::map<double,size_t> tmpMap;
+ //Iterator for the Item-Rating Map
+ std::map<double,size_t>::reverse_iterator iter;
+ std::map<double,size_t>::iterator it;
+ //Keeps count of number of recommendations provided for a user
+ size_t count;
+ //Iterate for all users
+ for(size_t i=0;i<users.n_rows;i++)
+ {
+ count=0;
+ //Dot product between average rating and mask to dilute the ratings
+ // of the items that user i has already rated
+ tmp = averages.col(users(i)-1) % mask.col(users(i)-1);
+ //Mapping Rating to Items
+ for(size_t j=0;j<tmp.n_rows;j++)
+ if(tmp(j)>=0)
+ tmpMap.insert(std::pair<double,size_t>(tmp(j),j+1));
+ //Iterating over Item-Rating Map
+ for(iter=tmpMap.rbegin();iter!=tmpMap.rend();++iter)
+ {
+ //Saving recommendations to the recommendations table
+ rec(count,i) = (size_t)iter->second;
+ count++;
+ //break is desired number of recommendations are saved
+ if(count==numRecs)
+ break;
+ }
+ //Removing the items from the map
+ //note: Item 0 is just to maintain the consistency and it
+ //represents not recommendations were available
+ for(it=tmpMap.begin();it!=tmpMap.end();++it)
+ tmpMap.erase(it);
+ }
+ //Saving to recommendations
+ recommendations = rec;
+}
+
+}; // namespace mlpack
+}; // namespace cf
Added: mlpack/trunk/src/mlpack/methods/cf/cf.hpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/cf.hpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,225 @@
+/**
+ * @file cf.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Collaborative Filtering.
+ *
+ * Defines the CF class to perform Collaborative Filtering on the
+ * specified data set.
+ *
+ */
+
+#ifndef __MLPACK_METHODS_CF_CF_HPP
+#define __MLPACK_METHODS_CF_CF_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/optimizers/als/als.hpp>
+#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
+#include <set>
+#include <map>
+#include <iostream>
+
+namespace mlpack {
+namespace cf /** Collaborative Filtering. */{
+
+/**
+ * This class implements Collaborative Filtering (CF). This
+ * implementation presently supports Alternating Least Squares
+ * for collaborative filtering.
+ *
+ * The template parameters can (optionally) be supplied are: the algorithm
+ * used for CF and the neighbourhood search for user similarity.
+ *
+ * A simple example of how to run Collaborative Filtering is shown below.
+ *
+ * @code
+ * extern arma::mat data; // (user,item,rating) table
+ * extern arma::Col<size_t> users; // users seeking recommendations
+ * arma::mat recommendations; // Recommendations
+ * size_t numRecommendations = 10;
+ *
+ * CF<> cf(data); // Default options.
+ *
+ * //Default number of Recommendations for all users
+ * cf.GenerateRecommendations(recommendations);
+ *
+ * //Default number of Recommendations for specified users
+ * cf.GenerateRecommendations(recommendations, users);
+ *
+ * //10 Recommendations for specified users
+ * cf.GenerateRecommendations(recommendations, users, numRecommendations);
+ *
+ * @endcode
+ *
+ */
+
+class CF
+{
+ public:
+/**
+ * Create a CF object and (optionally) set the parameters which CF
+ * will be run with.
+ *
+ * @param data Initial User,Item,Rating Matrix
+ * @param numRecs Number of Recommendations for each user.
+ * @param numUsersForSimilarity Size of the neighbourhood.
+ */
+ CF(const size_t numRecs,const size_t numUsersForSimilarity,
+ arma::mat& data);
+/**
+ * Create a CF object and (optionally) set the parameters which CF
+ * will be run with.
+ *
+ * @param data Initial User,Item,Rating Matrix
+ * @param numRecs Number of Recommendations for each user.
+ */
+ CF(const size_t numRecs, arma::mat& data);
+/**
+ * Create a CF object and (optionally) set the parameters which CF
+ * will be run with.
+ *
+ * @param data Initial User,Item,Rating Matrix
+ */
+ CF(arma::mat& data);
+ //! Sets number of Recommendations.
+ void NumRecs(size_t recs)
+ {
+ if (recs < 1)
+ {
+ Log::Warn << "CF::NumRecs(): invalid value (< 1) "
+ "ignored." << std::endl;
+ return;
+ }
+ this->numRecs = recs;
+ }
+ //! Sets number of user for calculating similarity.
+ void NumUsersForSimilarity(size_t num)
+ {
+ if (num < 1)
+ {
+ Log::Warn << "CF::NumUsersForSimilarity(): invalid value (< 1) "
+ "ignored." << std::endl;
+ return;
+ }
+ this->numUsersForSimilarity = num;
+ }
+ //! Get the User Matrix.
+ const arma::mat& W() const { return w; }
+ //! Get the Item Matrix.
+ const arma::mat& H() const { return h; }
+ //! Get the Rating Matrix.
+ const arma::mat& Rating() const { return rating; }
+
+/*
+ * Generates default number of recommendations for all users.
+ *
+ * @param recommendations Matrix to save recommendations
+ */
+ void GetRecommendations(arma::Mat<size_t>& recommendations);
+
+/*
+ * Generates default number of recommendations for specified users.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ */
+ void GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users);
+
+/*
+ * Generates a fixed number of recommendations for specified users.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ * @param num Number of Recommendations
+ */
+ void GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users, size_t num);
+
+/*
+ * Generates a fixed number of recommendations for specified users.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ * @param num Number of Recommendations
+ * @param neighbours Number of user to be considered while calculating
+ * the neighbourhood
+ */
+ void GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users, size_t num,
+ size_t neighbours);
+
+ private:
+ //! Number of Recommendations.
+ size_t numRecs;
+ //! Number of User for Similariy.
+ size_t numUsersForSimilarity;
+ //! User Matrix.
+ arma::mat w;
+ //! Item Matrix.
+ arma::mat h;
+ //! Rating Martix.
+ arma::mat rating;
+ //! Masking Matrix
+ arma::mat mask;
+ //! Initial Data Matrix.
+ arma::mat data;
+ //! Cleaned Data Matrix.
+ arma::sp_mat cleanedData;
+ //!Calculates a rating matrix with available data and approximations
+ void CalculateAprroximateRatings();
+ //!Converts the User, Item, Value Matrix to User-Item Table
+ void CleanData();
+ //!Decomposes the cleanedData size_to user and item matrices
+ void Decompose();
+ //!Create ratings from user and item matrices
+ void GenerateRating();
+/*
+ * Queries the obtained rating matrix.
+ *
+ * @param recommendations Matrix to save recommendations
+ * @param users Users for which recommendations are to be generated
+ */
+ void Query(arma::Mat<size_t>& recommendations,arma::Col<size_t>& users);
+/*
+ * Selects item preferences of the users
+ *
+ * @param query Matrix to store the item preference of the user.
+ * @param users Users for which recommendations are to be generated
+ */
+ void CreateQuery(arma::mat& query,arma::Col<size_t>& users) const;
+/*
+ * Generates the neighbourhood of users.
+ *
+ * @param query Matrix to store the item preference of the user.
+ * @param modifiedRating Matrix to store the Modified Matix.
+ * @param neighbourhood Matrix to store user neighbourhood.
+ */
+ void GetNeighbourhood(arma::mat& query,
+ arma::Mat<size_t>& neighbourhood);
+/*
+ * Calculates the Average rating users would have given to
+ * unrated items based on their similarity with other users.
+ *
+ * @param neighbourhood Matrix to store user neighbourhood.
+ * @param averages stores the average rating for each item.
+ */
+ void CalculateAverage(arma::Mat<size_t>& neighbourhood,
+ arma::mat& averages) const;
+/*
+ * Calculates the top recommendations given average rating
+ * for each user.
+ *
+ * @param neighbourhood Matrix to store user neighbourhood.
+ * @param averages stores the average rating for each item.
+ */
+ void CalculateTopRecommendations(arma::Mat<size_t>& recommendations,
+ arma::mat& averages,
+ arma::Col<size_t>& users) const;
+
+}; // class CF
+
+}; // namespace cf
+}; // namespace mlpack
+
+#endif
Added: mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp
==============================================================================
--- (empty file)
+++ mlpack/trunk/src/mlpack/methods/cf/cf_main.cpp Wed Jul 24 03:29:56 2013
@@ -0,0 +1,71 @@
+/**
+ * @file cf_main.hpp
+ * @author Mudit Raj Gupta
+ *
+ * Main executable to run CF.
+ *
+ */
+
+#include <mlpack/core.hpp>
+#include "cf.hpp"
+
+using namespace mlpack;
+using namespace mlpack::cf;
+using namespace std;
+
+// Document program.
+PROGRAM_INFO("Collaborating Filtering", "This program performs Collaborative "
+ "filtering(cf) on the given dataset. Given a list of user, item and "
+ "preferences the program output is a set of recommendations for users."
+ " Optionally, the users to be queried can be specified. The program also"
+ " provides the flexibility to select number of recommendations for each"
+ " user and also the neighbourhood. User, Item and Rating matrices can also"
+ " be extracted. Variable parameters include algorithm for performing "
+ "cf, algorithm parameters and similarity measures to give recommendations");
+
+// Parameters for program.
+PARAM_STRING_REQ("input_file", "Input dataset to perform CF on.", "i");
+PARAM_STRING("output_file","Recommendations.", "o", "recommendations.csv");
+PARAM_STRING("algorithm", "Algorithm used for cf (als/svd).", "a","als");
+PARAM_STRING("ratings_file", "File to save ratings.", "R","ratings.csv");
+PARAM_STRING("user_file", "File to save the calculated User matrix to.",
+ "U","user.csv");
+PARAM_STRING("item_file", "File to save the calculated Item matrix to.",
+ "I","item.csv");
+PARAM_STRING("nearest_neighbour_algorithm", "Similarity Measure to be used "
+ "for generating recommendations", "s","knn");
+PARAM_STRING("query_file", "List of user for which recommendations are to "
+ "be generated (If unspecified then all)", "q","query.csv");
+PARAM_INT("number_of_Recommendations", "Number of Recommendations for each "
+ "user in query", "r",5);
+PARAM_INT("neighbourhood", "Size of the neighbourhood for all "
+ "user in query", "n",5);
+
+int main(int argc, char** argv)
+{
+ //Parse Command Line
+ CLI::ParseCommandLine(argc, argv);
+
+ //Read from the input file
+ string inputFile = CLI::GetParam<string>("input_file");
+ arma::mat dataset;
+ data::Load(inputFile.c_str(), dataset);
+
+ //Recommendation matrix.
+ arma::Mat<size_t> recommendations;
+
+ //User Matrix
+ arma::Col<size_t> users;
+
+ //Reading Users
+ string userf = CLI::GetParam<string>("query_file");
+ data::Load(userf.c_str(),users);
+
+ //Calculating Recommendations
+ CF c(dataset);
+
+ Log::Info << "Performing CF on dataset..." << endl;
+ c.GetRecommendations(recommendations,users);
+ string outputFile = CLI::GetParam<string>("output_file");
+ data::Save(outputFile, recommendations);
+}
More information about the mlpack-svn
mailing list