[mlpack-git] master, mlpack-1.0.x: Patch from Siddharth: templatize CF to accept arbitrary types of factorizers. (efda5e0)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:44:59 EST 2015
Repository : https://github.com/mlpack/mlpack
On branches: master,mlpack-1.0.x
Link : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40
>---------------------------------------------------------------
commit efda5e046343e11daa809c32bf8117e27a3c393b
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Mar 2 04:39:49 2014 +0000
Patch from Siddharth: templatize CF to accept arbitrary types of factorizers.
>---------------------------------------------------------------
efda5e046343e11daa809c32bf8117e27a3c393b
src/mlpack/methods/cf/CMakeLists.txt | 2 +-
src/mlpack/methods/cf/cf.hpp | 38 ++++++++++++++----
src/mlpack/methods/cf/{cf.cpp => cf_impl.hpp} | 58 ++++++++++++++++-----------
src/mlpack/methods/cf/cf_main.cpp | 2 +-
src/mlpack/tests/cf_test.cpp | 8 ++--
src/mlpack/tests/to_string_test.cpp | 2 +-
6 files changed, 72 insertions(+), 38 deletions(-)
diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/cf/CMakeLists.txt
index af6824f..6413af4 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/cf/CMakeLists.txt
@@ -2,7 +2,7 @@
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
cf.hpp
- cf.cpp
+ cf_impl.hpp
)
# Add directory name to sources.
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 798306e..38d1550 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -12,10 +12,14 @@
#include <mlpack/core.hpp>
#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
+#include <mlpack/methods/nmf/nmf.hpp>
+#include <mlpack/methods/nmf/als_update_rules.hpp>
#include <set>
#include <map>
#include <iostream>
+using namespace mlpack::nmf;
+
namespace mlpack {
namespace cf /** Collaborative filtering. */{
@@ -32,7 +36,7 @@ namespace cf /** Collaborative filtering. */{
* arma::Mat<size_t> recommendations; // Recommendations
* size_t numRecommendations = 10;
*
- * CF cf(data); // Default options.
+ * CF<> cf(data); // Default options.
*
* // Generate the default number of recommendations for all users.
* cf.GetRecommendations(recommendations);
@@ -49,8 +53,17 @@ namespace cf /** Collaborative filtering. */{
* should have three rows. The first represents the user; the second represents
* the item; and the third represents the rating. The user and item, while they
* are in a matrix that holds doubles, should hold integer (or size_t) values.
- * The user and item indices are assumed to be starting from 0.
+ * The user and item indices are assumed to start at 0.
+ *
+ * @tparam FactorizerType The type of matrix factorization to use to decompose
+ * the rating matrix (a W and H matrix). This must implement the method
+ * Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
*/
+template<
+ typename FactorizerType = NMF<RandomInitialization,
+ WAlternatingLeastSquaresRule,
+ HAlternatingLeastSquaresRule>
+>
class CF
{
public:
@@ -82,13 +95,13 @@ class CF
this->numRecs = recs;
}
- //! Gets numRecs
- size_t NumRecs()
+ //! Gets the number of recommendations.
+ size_t NumRecs() const
{
return numRecs;
}
- //! Sets number of user for calculating similarity.
+ //! Sets number of users for calculating similarity.
void NumUsersForSimilarity(const size_t num)
{
if (num < 1)
@@ -101,7 +114,7 @@ class CF
}
//! Gets number of users for calculating similarity.
- size_t NumUsersForSimilarity()
+ size_t NumUsersForSimilarity() const
{
return numUsersForSimilarity;
}
@@ -113,11 +126,17 @@ class CF
}
//! Gets rank parameter for matrix factorization.
- size_t Rank()
+ size_t Rank() const
{
return rank;
}
+ //! Sets factorizer for NMF
+ void Factorizer(const FactorizerType& f)
+ {
+ this->factorizer = f;
+ }
+
//! Get the User Matrix.
const arma::mat& W() const { return w; }
//! Get the Item Matrix.
@@ -182,6 +201,8 @@ class CF
size_t numUsersForSimilarity;
//! Rank used for matrix factorization.
size_t rank;
+ //! Instantiated factorizer object.
+ FactorizerType factorizer;
//! User matrix.
arma::mat w;
//! Item matrix.
@@ -214,4 +235,7 @@ class CF
}; // namespace cf
}; // namespace mlpack
+//Include implementation
+#include "cf_impl.hpp"
+
#endif
diff --git a/src/mlpack/methods/cf/cf.cpp b/src/mlpack/methods/cf/cf_impl.hpp
similarity index 81%
rename from src/mlpack/methods/cf/cf.cpp
rename to src/mlpack/methods/cf/cf_impl.hpp
index 2ee382d..89d6c37 100644
--- a/src/mlpack/methods/cf/cf.cpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -22,10 +22,11 @@ namespace cf {
/**
* Construct the CF object.
*/
-CF::CF(arma::mat& data,
- const size_t numRecs,
- const size_t numUsersForSimilarity,
- const size_t rank) :
+template<typename FactorizerType>
+CF<FactorizerType>::CF(arma::mat& data,
+ const size_t numRecs,
+ const size_t numUsersForSimilarity,
+ const size_t rank) :
data(data),
numRecs(numRecs),
numUsersForSimilarity(numUsersForSimilarity),
@@ -49,10 +50,15 @@ CF::CF(arma::mat& data,
this->numUsersForSimilarity = 5;
}
+ //Set default factorizer
+ FactorizerType f(10000, 1e-5);
+ Factorizer(f);
+
CleanData();
}
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations)
{
// Used to save user IDs.
arma::Col<size_t> users =
@@ -65,8 +71,9 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations)
GetRecommendations(recommendations, users);
}
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
- arma::Col<size_t>& users)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users)
{
// Base function for calculating recommendations.
@@ -87,11 +94,8 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
// Operations independent of the query:
// Decompose the sparse data matrix to user and data matrices.
- // Presently only ALS (via NMF) is supported as an optimizer. This should be
- // converted to a template when more optimizers are available.
- NMF<RandomInitialization, WAlternatingLeastSquaresRule,
- HAlternatingLeastSquaresRule> als(10000, 1e-5);
- als.Apply(cleanedData, rank, w, h);
+ // Presently only ALS (via NMF) is supported as an optimizer.
+ factorizer.Apply(cleanedData, rank, w, h);
// Generate new table by multiplying approximate values.
rating = w * h;
@@ -171,8 +175,9 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
}
}
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
- arma::Col<size_t>& users,size_t num)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users,size_t num)
{
//Setting Number of Recommendations
NumRecs(num);
@@ -180,8 +185,10 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
GetRecommendations(recommendations,users);
}
-void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
- arma::Col<size_t>& users,size_t num,size_t s)
+template<typename FactorizerType>
+void CF<FactorizerType>::GetRecommendations(arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users,size_t num,
+ size_t s)
{
//Setting number of users that should be used for calculating
//neighbours
@@ -192,7 +199,8 @@ void CF::GetRecommendations(arma::Mat<size_t>& recommendations,
GetRecommendations(recommendations,users,num);
}
-void CF::CleanData()
+template<typename FactorizerType>
+void CF<FactorizerType>::CleanData()
{
// Generate list of locations for batch insert constructor for sparse
// matrices.
@@ -222,12 +230,13 @@ void CF::CleanData()
* @param neighbor Index of item being inserted as a recommendation.
* @param value Value of recommendation.
*/
-void CF::InsertNeighbor(const size_t queryIndex,
- const size_t pos,
- const size_t neighbor,
- const double value,
- arma::Mat<size_t>& recommendations,
- arma::mat& values) const
+template<typename FactorizerType>
+void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
+ const size_t pos,
+ const size_t neighbor,
+ const double value,
+ arma::Mat<size_t>& recommendations,
+ arma::mat& values) const
{
// We only memmove() if there is actually a need to shift something.
if (pos < (recommendations.n_rows - 1))
@@ -247,7 +256,8 @@ void CF::InsertNeighbor(const size_t queryIndex,
}
// Return string of object.
-std::string CF::ToString() const
+template<typename FactorizerType>
+std::string CF<FactorizerType>::ToString() const
{
std::ostringstream convert;
convert << "Collaborative Filtering [" << this << "]" << std::endl;
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index ac18182..4c001cf 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -70,7 +70,7 @@ int main(int argc, char** argv)
// Perform decomposition to prepare for recommendations.
Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
- CF c(dataset);
+ CF<> c(dataset);
c.NumRecs(numRecs);
c.NumUsersForSimilarity(neighborhood);
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index 58289b8..b9cfcd5 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -33,7 +33,7 @@ BOOST_AUTO_TEST_CASE(CFConstructorTest)
// Number of users for similarity (not the default).
const size_t numUsersForSimilarity = 8;
- CF c(dataset, numRecs, numUsersForSimilarity);
+ CF<> c(dataset, numRecs, numUsersForSimilarity);
// Check parameters.
BOOST_REQUIRE_EQUAL(c.NumRecs(), numRecs);
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsAllUsersTest)
data::Load("GroupLens100k.csv", dataset);
// Creat a CF object
- CF c(dataset);
+ CF<> c(dataset);
// Set number of recommendations.
c.NumRecs(numRecs);
@@ -106,7 +106,7 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsQueriedUserTest)
arma::mat dataset;
data::Load("GroupLens100k.csv", dataset);
- CF c(dataset);
+ CF<> c(dataset);
// Generate recommendations when query set is specified.
c.GetRecommendations(recommendations, users);
@@ -161,7 +161,7 @@ BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
}
// Now create the CF object.
- CF c(dataset);
+ CF<> c(dataset);
// Obtain 150 recommendations for the users in savedCols, and make sure the
// missing item shows up in most of them. First, create the list of users,
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index 269e6e1..1a61574 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -292,7 +292,7 @@ BOOST_AUTO_TEST_CASE(CFString)
c(0, 2) = 1;
c(1, 2) = 3;
c(2, 2) = 0.7;
- mlpack::cf::CF d(c, a, a);
+ mlpack::cf::CF<> d(c, a, a);
Log::Debug << d;
std::string s = d.ToString();
BOOST_REQUIRE_NE(s, "");
More information about the mlpack-git
mailing list