[mlpack-git] master: Refactor input a bit. We don't take dense matrices for CF. (802ead7)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Tue May 26 11:12:24 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/d2f2976c7a43f8ab9139064ae33304bcf9f4f884...29ab461472f64f72cfbdb93b0d9045024050cc95
>---------------------------------------------------------------
commit 802ead7170edf34c9e0d982ad57bdca0ed18987f
Author: Ryan Curtin <ryan at ratml.org>
Date: Sat May 23 02:36:56 2015 +0000
Refactor input a bit. We don't take dense matrices for CF.
>---------------------------------------------------------------
802ead7170edf34c9e0d982ad57bdca0ed18987f
src/mlpack/methods/cf/cf.hpp | 24 +++++++++---------------
src/mlpack/methods/cf/cf_impl.hpp | 37 ++++++++++++++++++++++++++++++++-----
src/mlpack/tests/cf_test.cpp | 32 ++++++++++++++++++++++++++------
3 files changed, 67 insertions(+), 26 deletions(-)
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 3f510eb..4e697f6 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -82,22 +82,18 @@ class CF
* reference to the data that we will be using. There are parameters that can
* be set; default values are provided for each of them. If the rank is left
* unset (or is set to 0), a simple density-based heuristic will be used to
- * choose a rank. This overload of the constructor will only be available if
- * the factorizer does not use a corodinate list (i.e. if UsesCoordinateList
- * is false).
+ * choose a rank.
*
- * The U and T template parameters are for SFINAE, so that this overload is
- * only available when the FactorizerType does not use a coordinate list.
+ * The provided dataset should be a coordinate list; that is, a 3-row matrix
+ * where each column corresponds to a (user, item, rating) entry in the
+ * matrix.
*
- * @param data Initial (user, item, rating) matrix.
+ * @param data Data matrix: coordinate list or dense matrix.
* @param factorizer Instantiated factorizer object.
* @param numUsersForSimilarity Size of the neighborhood.
* @param rank Rank parameter for matrix factorization.
*/
- template<typename U = FactorizerType,
- typename T = typename boost::enable_if_c<
- FactorizerTraits<U>::UsesCoordinateList>::type*>
- CF(arma::mat& data,
+ CF(const arma::mat& data,
FactorizerType factorizer = FactorizerType(),
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
@@ -115,17 +111,15 @@ class CF
* The U and T template parameters are for SFINAE, so that this overload is
* only available when the FactorizerType uses a coordinate list.
*
- * @param data Initial (user, item, rating) matrix.
+ * @param data Sparse matrix data.
* @param factorizer Instantiated factorizer object.
* @param numUsersForSimilarity Size of the neighborhood.
* @param rank Rank parameter for matrix factorization.
- * @param isCleaned If the data passed is cleaned for CF
*/
- template<typename MatType,
- typename U = FactorizerType,
+ template<typename U = FactorizerType,
typename T = typename boost::disable_if_c<
FactorizerTraits<U>::UsesCoordinateList>::type*>
- CF(const MatType& data,
+ CF(const arma::sp_mat& data,
FactorizerType factorizer = FactorizerType(),
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 4c78331..2e81d16 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -17,12 +17,39 @@
namespace mlpack {
namespace cf {
+// Apply the factorizer when a coordinate list is used.
+template<typename FactorizerType>
+void ApplyFactorizer(FactorizerType& factorizer,
+ const arma::mat& data,
+ const arma::sp_mat& /* cleanedData */,
+ const size_t rank,
+ arma::mat& w,
+ arma::mat& h,
+ const typename boost::enable_if_c<FactorizerTraits<
+ FactorizerType>::UsesCoordinateList>::type* = 0)
+{
+ factorizer.Apply(data, rank, w, h);
+}
+
+// Apply the factorizer when coordinate lists are not used.
+template<typename FactorizerType>
+void ApplyFactorizer(FactorizerType& factorizer,
+ const arma::mat& /* data */,
+ const arma::sp_mat& cleanedData,
+ const size_t rank,
+ arma::mat& w,
+ arma::mat& h,
+ const typename boost::disable_if_c<FactorizerTraits<
+ FactorizerType>::UsesCoordinateList>::type* = 0)
+{
+ factorizer.Apply(cleanedData, rank, w, h);
+}
+
/**
* Construct the CF object using an instantiated factorizer.
*/
template<typename FactorizerType>
-template<typename U, typename T>
-CF<FactorizerType>::CF(arma::mat& data,
+CF<FactorizerType>::CF(const arma::mat& data,
FactorizerType factorizer,
const size_t numUsersForSimilarity,
const size_t rank) :
@@ -58,15 +85,15 @@ CF<FactorizerType>::CF(arma::mat& data,
// Decompose the data matrix (which is in coordinate list form) to user and
// data matrices.
- factorizer.Apply(data, this->rank, w, h);
+ ApplyFactorizer(factorizer, data, cleanedData, this->rank, w, h);
}
/**
* Construct the CF object using an instantiated factorizer.
*/
template<typename FactorizerType>
-template<typename MatType, typename U, typename T>
-CF<FactorizerType>::CF(const MatType& data,
+template<typename U, typename T>
+CF<FactorizerType>::CF(const arma::sp_mat& data,
FactorizerType factorizer,
const size_t numUsersForSimilarity,
const size_t rank) :
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index feb4d4b..651733d 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -36,8 +36,12 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsAllUsersTest)
arma::mat dataset;
data::Load("GroupLens100k.csv", dataset);
- // Creat a CF object
- CF<> c(dataset);
+ // Make data into sparse matrix.
+ arma::sp_mat cleanedData;
+ CF<>::CleanData(dataset, cleanedData);
+
+ // Create a CF object.
+ CF<> c(cleanedData);
// Generate recommendations when query set is not specified.
c.GetRecommendations(numRecs, recommendations);
@@ -72,7 +76,11 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsQueriedUserTest)
arma::mat dataset;
data::Load("GroupLens100k.csv", dataset);
- CF<> c(dataset);
+ // Make data into sparse matrix.
+ arma::sp_mat cleanedData;
+ CF<>::CleanData(dataset, cleanedData);
+
+ CF<> c(cleanedData);
// Generate recommendations when query set is specified.
c.GetRecommendations(numRecsDefault, recommendations, users);
@@ -126,8 +134,12 @@ BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
}
}
+ // Make data into sparse matrix.
+ arma::sp_mat cleanedData;
+ CF<>::CleanData(dataset, cleanedData);
+
// Now create the CF object.
- CF<> c(dataset);
+ CF<> c(cleanedData);
// Obtain 150 recommendations for the users in savedCols, and make sure the
// missing item shows up in most of them. First, create the list of users,
@@ -215,8 +227,12 @@ BOOST_AUTO_TEST_CASE(CFPredictTest)
}
}
+ // Make data into sparse matrix.
+ arma::sp_mat cleanedData;
+ CF<>::CleanData(dataset, cleanedData);
+
// Now create the CF object.
- CF<> c(dataset);
+ CF<> c(cleanedData);
// Now, for each removed rating, make sure the prediction is... reasonably
// accurate.
@@ -277,8 +293,12 @@ BOOST_AUTO_TEST_CASE(CFBatchPredictTest)
}
}
+ // Make data into sparse matrix.
+ arma::sp_mat cleanedData;
+ CF<>::CleanData(dataset, cleanedData);
+
// Now create the CF object.
- CF<> c(dataset);
+ CF<> c(cleanedData);
// Get predictions for all user/item pairs we held back.
arma::Mat<size_t> combinations(2, savedCols.n_cols);
More information about the mlpack-git
mailing list