[mlpack-git] master: * added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable (ff1c084)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:58:29 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40
>---------------------------------------------------------------
commit ff1c08472ae15583d14036bb9ddde79333107294
Author: sumedhghaisas <sumedhghaisas at gmail.com>
Date: Sun Aug 17 21:58:26 2014 +0000
* added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable
>---------------------------------------------------------------
ff1c08472ae15583d14036bb9ddde79333107294
src/mlpack/methods/cf/cf.hpp | 16 +-------
src/mlpack/methods/cf/cf_impl.hpp | 46 ++--------------------
src/mlpack/methods/cf/cf_main.cpp | 73 ++++++++++++++++++++++-------------
src/mlpack/methods/cf/svd_wrapper.hpp | 3 ++
src/mlpack/tests/to_string_test.cpp | 3 +-
5 files changed, 55 insertions(+), 86 deletions(-)
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 0312ccb..6f6b7b1 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -79,20 +79,6 @@ class CF
{
public:
/**
- * Initialize the CF object. Store a reference to the data that we
- * will be using. There are parameters that can be set; default values
- * are provided for each of them. If the rank is left unset (or is set to 0),
- * a simple density-based heuristic will be used to choose a rank.
- *
- * @param data Initial (user, item, rating) matrix.
- * @param numUsersForSimilarity Size of the neighborhood.
- * @param rank Rank parameter for matrix factorization.
- */
- CF(arma::mat& data,
- const size_t numUsersForSimilarity = 5,
- const size_t rank = 0);
-
- /**
* Initialize the CF object using an instantiated factorizer. Store a
* reference to the data that we will be using. There are parameters that can
* be set; default values are provided for each of them. If the rank is left
@@ -105,7 +91,7 @@ class CF
* @param rank Rank parameter for matrix factorization.
*/
CF(arma::mat& data,
- FactorizerType& factorizer,
+ FactorizerType factorizer = FactorizerType(),
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index c455018..53b0a05 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -29,6 +29,7 @@ void ApplyFactorizer(arma::mat& data,
FactorizerTraits<FactorizerType>::UsesCoordinateList == false,
int*>::type = 0)
{
+ (void)data;
factorizer.Apply(cleanedData, rank, w, h);
}
@@ -48,57 +49,16 @@ void ApplyFactorizer(arma::mat& data,
FactorizerTraits<FactorizerType>::UsesCoordinateList == true,
int*>::type = 0)
{
+ (void)cleanedData;
factorizer.Apply(data, rank, w, h);
}
/**
- * Construct the CF object.
- */
-template<typename FactorizerType>
-CF<FactorizerType>::CF(arma::mat& data,
- const size_t numUsersForSimilarity,
- const size_t rank) :
- numUsersForSimilarity(numUsersForSimilarity),
- rank(rank),
- factorizer()
-{
- // Validate neighbourhood size.
- if(numUsersForSimilarity < 1)
- {
- Log::Warn << "CF::CF(): neighbourhood size should be > 0("
- << numUsersForSimilarity << " given). Setting value to 5.\n";
- //Setting Default Value of 5
- this->numUsersForSimilarity = 5;
- }
-
- CleanData(data);
-
- // Check if the user wanted us to choose a rank for them.
- if(rank == 0)
- {
- // This is a simple heuristic that picks a rank based on the density of the
- // dataset between 5 and 105.
- const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
- const size_t rankEstimate = size_t(density) + 5;
-
- // Set to heuristic value.
- Log::Info << "No rank given for decomposition; using rank of "
- << rankEstimate << " calculated by density-based heuristic."
- << std::endl;
- this->rank = rankEstimate;
- }
-
- // Operations independent of the query:
- // Decompose the sparse data matrix to user and data matrices.
- ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
-}
-
-/**
* Construct the CF object using an instantiated factorizer.
*/
template<typename FactorizerType>
CF<FactorizerType>::CF(arma::mat& data,
- FactorizerType& factorizer,
+ FactorizerType factorizer,
const size_t numUsersForSimilarity,
const size_t rank) :
numUsersForSimilarity(numUsersForSimilarity),
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index 7c873d4..7998c79 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -6,10 +6,13 @@
*/
#include <mlpack/core.hpp>
+
+#include <mlpack/methods/amf/amf.hpp>
#include "cf.hpp"
using namespace mlpack;
using namespace mlpack::cf;
+using namespace mlpack::amf;
using namespace std;
// Document program.
@@ -40,11 +43,8 @@ PARAM_STRING("query_file", "List of users for which recommendations are to "
PARAM_STRING("output_file","File to save output recommendations to.", "o",
"recommendations.csv");
-// These features are not yet available in the CF code.
-//PARAM_STRING("algorithm", "Algorithm used for CF ('als' or 'svd').", "a",
-// "als");
-//PARAM_STRING("nearest_neighbor_algorithm", "Similarity search procedure to "
-// "be used for generating recommendations.", "s", "knn");
+PARAM_STRING("algorithm", "Algorithm used for matrix factorization.", "a",
+ "NMF");
PARAM_INT("recommendations", "Number of recommendations to generate for each "
"query user.", "r", 5);
@@ -53,6 +53,39 @@ PARAM_INT("neighborhood", "Size of the neighborhood of similar users to "
PARAM_INT("rank", "Rank of decomposed matrices.", "R", 2);
+template<typename Factorizer>
+void ComputeRecommendations(Factorizer factorizer,
+ arma::mat& dataset,
+ const size_t numRecs,
+ const size_t neighbourhood,
+ const size_t rank,
+ arma::Mat<size_t>& recommendations)
+{
+ CF<Factorizer> c(dataset, factorizer, neighbourhood, rank);
+
+ // Reading users.
+ const string queryFile = CLI::GetParam<string>("query_file");
+ if (queryFile != "")
+ {
+ // User matrix.
+ arma::Mat<size_t> userTmp;
+ arma::Col<size_t> users;
+ data::Load(queryFile, userTmp, true, false /* Don't transpose. */);
+ users = userTmp.col(0);
+
+ Log::Info << "Generating recommendations for " << users.n_elem << " users "
+ << "in '" << queryFile << "'." << endl;
+ c.GetRecommendations(numRecs, recommendations, users);
+ }
+ else
+ {
+ Log::Info << "Generating recommendations for all users." << endl;
+ c.GetRecommendations(numRecs, recommendations);
+ }
+}
+
+#define CR(x) ComputeRecommendations(x, dataset, numRecs, neighborhood, rank, recommendations)
+
int main(int argc, char** argv)
{
// Parse command line options.
@@ -73,29 +106,17 @@ int main(int argc, char** argv)
// Perform decomposition to prepare for recommendations.
Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
- CF<> c(dataset);
- c.NumUsersForSimilarity(neighborhood);
- c.Rank(rank);
- // Reading users.
- const string queryFile = CLI::GetParam<string>("query_file");
- if (queryFile != "")
- {
- // User matrix.
- arma::Mat<size_t> userTmp;
- arma::Col<size_t> users;
- data::Load(queryFile, userTmp, true, false /* Don't transpose. */);
- users = userTmp.col(0);
+ const string algo = CLI::GetParam<string>("algorithm");
- Log::Info << "Generating recommendations for " << users.n_elem << " users "
- << "in '" << queryFile << "'." << endl;
- c.GetRecommendations(numRecs, recommendations, users);
- }
- else
- {
- Log::Info << "Generating recommendations for all users." << endl;
- c.GetRecommendations(numRecs, recommendations);
- }
+ if(algo == "NMF")
+ CR(NMFALSFactorizer());
+ else if(algo == "SVDBatch")
+ CR(SparseSVDBatchFactorizer());
+ else if(algo == "SVDIncompleteIncremental")
+ CR(SparseSVDIncompleteIncrementalFactorizer());
+ else if(algo == "SVDCompleteIncremental")
+ CR(SparseSVDCompleteIncrementalFactorizer());
const string outputFile = CLI::GetParam<string>("output_file");
data::Save(outputFile, recommendations);
diff --git a/src/mlpack/methods/cf/svd_wrapper.hpp b/src/mlpack/methods/cf/svd_wrapper.hpp
index 3835e89..89b6d5a 100644
--- a/src/mlpack/methods/cf/svd_wrapper.hpp
+++ b/src/mlpack/methods/cf/svd_wrapper.hpp
@@ -74,6 +74,9 @@ class SVDWrapper
Factorizer factorizer;
}; // class SVDWrapper
+//! add simple typedefs
+typedef SVDWrapper<DummyClass> ArmaSVDFactorizer;
+
//! include the implementation
#include "svd_wrapper_impl.hpp"
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index 0b86d18..d06ccd4 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -281,7 +281,6 @@ BOOST_AUTO_TEST_CASE(MRKDString)
BOOST_AUTO_TEST_CASE(CFString)
{
- size_t a = 1 ;
arma::mat c(3, 3);
c(0, 0) = 1;
c(1, 0) = 2;
@@ -292,7 +291,7 @@ BOOST_AUTO_TEST_CASE(CFString)
c(0, 2) = 1;
c(1, 2) = 3;
c(2, 2) = 0.7;
- mlpack::cf::CF<> d(c, a, a);
+ mlpack::cf::CF<> d(c);
Log::Debug << d;
std::string s = d.ToString();
BOOST_REQUIRE_NE(s, "");
More information about the mlpack-git
mailing list