[mlpack-git] master: * Adding support for user cleaned matrix in CF (b28b2de)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu May 21 21:05:08 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/7e9cd46afb53817ae93ccbd02637d7726137ce4d...d2f2976c7a43f8ab9139064ae33304bcf9f4f884
>---------------------------------------------------------------
commit b28b2deb137beaca385e3e9a6ef07bcc27652973
Author: sumedhghaisas <sumedhghaisas at gmail.com>
Date: Tue Apr 28 03:50:17 2015 +0530
* Adding support for user cleaned matrix in CF
>---------------------------------------------------------------
b28b2deb137beaca385e3e9a6ef07bcc27652973
src/mlpack/methods/cf/cf.hpp | 24 ++++++++++++++++++--
src/mlpack/methods/cf/cf_impl.hpp | 48 +++++++++++++++++++++++++++++++++++++--
2 files changed, 68 insertions(+), 4 deletions(-)
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 03e953d..61b9135 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -95,6 +95,25 @@ class CF
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
+ /**
+ * Initialize the CF object using an instantiated factorizer. Store a
+ * reference to the data that we will be using. There are parameters that can
+ * be set; default values are provided for each of them. If the rank is left
+ * unset (or is set to 0), a simple density-based heuristic will be used to
+ * choose a rank. Data will be considered in the format of items vs. users and
+ * will be passed directly to the factorizer without cleaning.
+ *
+ * @param data Initial (user, item, rating) matrix.
+ * @param factorizer Instantiated factorizer object.
+ * @param numUsersForSimilarity Size of the neighborhood.
+ * @param rank Rank parameter for matrix factorization.
+ * @param isCleaned If the data passed is cleaned for CF
+ */
+ CF(arma::sp_mat& data,
+ FactorizerType factorizer = FactorizerType(),
+ const size_t numUsersForSimilarity = 5,
+ const size_t rank = 0);
+
/*void ApplyFactorizer(arma::mat& data, const typename boost::enable_if_c<
FactorizerTraits<FactorizerType>::IsCleaned == false, int*>::type);
@@ -166,6 +185,9 @@ class CF
arma::Mat<size_t>& recommendations,
arma::Col<size_t>& users);
+ //! Converts the User, Item, Value Matrix to User-Item Table
+ static void CleanData(const arma::mat& data, arma::sp_mat& cleanedData);
+
/**
* Returns a string representation of this object.
*/
@@ -186,8 +208,6 @@ class CF
arma::mat rating;
//! Cleaned data matrix.
arma::sp_mat cleanedData;
- //! Converts the User, Item, Value Matrix to User-Item Table
- void CleanData(const arma::mat& data);
/**
* Helper function to insert a point into the recommendation matrices.
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index d857166..0b0eee4 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -63,6 +63,50 @@ template<typename FactorizerType>
CF<FactorizerType>::CF(arma::mat& data,
FactorizerType factorizer,
const size_t numUsersForSimilarity,
+ const size_t rank,
+ bool isCleaned) :
+ numUsersForSimilarity(numUsersForSimilarity),
+ rank(rank),
+ factorizer(factorizer)
+{
+ // Validate neighbourhood size.
+ if (numUsersForSimilarity < 1)
+ {
+ Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+ << numUsersForSimilarity << " given). Setting value to 5.\n";
+ //Setting Default Value of 5
+ this->numUsersForSimilarity = 5;
+ }
+
+ CleanData(data, cleanedData);
+
+ // Check if the user wanted us to choose a rank for them.
+ if (rank == 0)
+ {
+ // This is a simple heuristic that picks a rank based on the density of the
+ // dataset between 5 and 105.
+ const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+ const size_t rankEstimate = size_t(density) + 5;
+
+ // Set to heuristic value.
+ Log::Info << "No rank given for decomposition; using rank of "
+ << rankEstimate << " calculated by density-based heuristic."
+ << std::endl;
+ this->rank = rankEstimate;
+ }
+
+ // Operations independent of the query:
+ // Decompose the sparse data matrix to user and data matrices.
+ ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
+}
+
+/**
+ * Construct the CF object using an instantiated factorizer.
+ */
+template<typename FactorizerType>
+CF<FactorizerType>::CF(arma::mat& data,
+ FactorizerType factorizer,
+ const size_t numUsersForSimilarity,
const size_t rank) :
numUsersForSimilarity(numUsersForSimilarity),
rank(rank),
@@ -77,7 +121,7 @@ CF<FactorizerType>::CF(arma::mat& data,
this->numUsersForSimilarity = 5;
}
- CleanData(data);
+ CleanData(data, cleanedData);
// Check if the user wanted us to choose a rank for them.
if (rank == 0)
@@ -200,7 +244,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
}
template<typename FactorizerType>
-void CF<FactorizerType>::CleanData(const arma::mat& data)
+void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
{
// Generate list of locations for batch insert constructor for sparse
// matrices.
More information about the mlpack-git
mailing list