[mlpack-git] master: Only use template parameter for training. (341414b)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Dec 21 15:25:49 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/be72510a765362f86782a8892f0e979aaa4a9f62...51205e0ad285b2cf421546d8876fc63e994f2d73
>---------------------------------------------------------------
commit 341414ba5406ff02c79d9e6e95d334ccd768062c
Author: ryan <ryan at ratml.org>
Date: Mon Dec 21 15:02:13 2015 -0500
Only use template parameter for training.
There's no need for the CF model itself to always know what its FactorizerType
is.
>---------------------------------------------------------------
341414ba5406ff02c79d9e6e95d334ccd768062c
src/mlpack/methods/cf/CMakeLists.txt | 1 +
src/mlpack/methods/cf/{cf_impl.hpp => cf.cpp} | 153 ++----------
src/mlpack/methods/cf/cf.hpp | 63 +++--
src/mlpack/methods/cf/cf_impl.hpp | 326 ++++----------------------
src/mlpack/methods/cf/cf_main.cpp | 8 +-
src/mlpack/tests/cf_test.cpp | 20 +-
6 files changed, 122 insertions(+), 449 deletions(-)
diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/cf/CMakeLists.txt
index 5238d8c..a758136 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/cf/CMakeLists.txt
@@ -3,6 +3,7 @@
set(SOURCES
cf.hpp
cf_impl.hpp
+ cf.cpp
svd_wrapper.hpp
svd_wrapper_impl.hpp
)
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf.cpp
similarity index 65%
copy from src/mlpack/methods/cf/cf_impl.hpp
copy to src/mlpack/methods/cf/cf.cpp
index 75a43b9..f229f06 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf.cpp
@@ -1,5 +1,5 @@
/**
- * @file cf_impl.hpp
+ * @file cf.cpp
* @author Mudit Raj Gupta
* @author Sumedh Ghaisas
*
@@ -8,133 +8,29 @@
* Implementation of CF class to perform Collaborative Filtering on the
* specified data set.
*/
-#ifndef __MLPACK_METHODS_CF_CF_IMPL_HPP
-#define __MLPACK_METHODS_CF_CF_IMPL_HPP
-
-// In case it hasn't been included yet.
#include "cf.hpp"
namespace mlpack {
namespace cf {
-// Apply the factorizer when a coordinate list is used.
-template<typename FactorizerType>
-void ApplyFactorizer(FactorizerType& factorizer,
- const arma::mat& data,
- const arma::sp_mat& /* cleanedData */,
- const size_t rank,
- arma::mat& w,
- arma::mat& h,
- const typename boost::enable_if_c<FactorizerTraits<
- FactorizerType>::UsesCoordinateList>::type* = 0)
-{
- factorizer.Apply(data, rank, w, h);
-}
-
-// Apply the factorizer when coordinate lists are not used.
-template<typename FactorizerType>
-void ApplyFactorizer(FactorizerType& factorizer,
- const arma::mat& /* data */,
- const arma::sp_mat& cleanedData,
- const size_t rank,
- arma::mat& w,
- arma::mat& h,
- const typename boost::disable_if_c<FactorizerTraits<
- FactorizerType>::UsesCoordinateList>::type* = 0)
-{
- factorizer.Apply(cleanedData, rank, w, h);
-}
-
-/**
- * Construct the CF object using an instantiated factorizer.
- */
-template<typename FactorizerType>
-CF<FactorizerType>::CF(const arma::mat& data,
- FactorizerType factorizer,
- const size_t numUsersForSimilarity,
- const size_t rank) :
+// Default CF constructor.
+CF::CF(const size_t numUsersForSimilarity,
+ const size_t rank) :
numUsersForSimilarity(numUsersForSimilarity),
- rank(rank),
- factorizer(factorizer)
+ rank(rank)
{
// Validate neighbourhood size.
if (numUsersForSimilarity < 1)
{
- Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+ Log::Warn << "CF::CF(): neighbourhood size should be > 0 ("
<< numUsersForSimilarity << " given). Setting value to 5.\n";
// Set default value of 5.
this->numUsersForSimilarity = 5;
}
-
- CleanData(data, cleanedData);
-
- // Check if the user wanted us to choose a rank for them.
- if (rank == 0)
- {
- // This is a simple heuristic that picks a rank based on the density of the
- // dataset between 5 and 105.
- const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
- const size_t rankEstimate = size_t(density) + 5;
-
- // Set to heuristic value.
- Log::Info << "No rank given for decomposition; using rank of "
- << rankEstimate << " calculated by density-based heuristic."
- << std::endl;
- this->rank = rankEstimate;
- }
-
- // Decompose the data matrix (which is in coordinate list form) to user and
- // data matrices.
- Timer::Start("cf_factorization");
- ApplyFactorizer(factorizer, data, cleanedData, this->rank, w, h);
- Timer::Stop("cf_factorization");
-}
-
-/**
- * Construct the CF object using an instantiated factorizer.
- */
-template<typename FactorizerType>
-template<typename U, typename T>
-CF<FactorizerType>::CF(const arma::sp_mat& data,
- FactorizerType factorizer,
- const size_t numUsersForSimilarity,
- const size_t rank) :
- numUsersForSimilarity(numUsersForSimilarity),
- rank(rank),
- factorizer(factorizer)
-{
- // Validate neighbourhood size.
- if (numUsersForSimilarity < 1)
- {
- Log::Warn << "CF::CF(): neighbourhood size should be > 0("
- << numUsersForSimilarity << " given). Setting value to 5.\n";
- //Setting Default Value of 5
- this->numUsersForSimilarity = 5;
- }
-
- cleanedData = data;
-
- // Check if the user wanted us to choose a rank for them.
- if (rank == 0)
- {
- // This is a simple heuristic that picks a rank based on the density of the
- // dataset between 5 and 105.
- const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
- const size_t rankEstimate = size_t(density) + 5;
-
- // Set to heuristic value.
- Log::Info << "No rank given for decomposition; using rank of "
- << rankEstimate << " calculated by density-based heuristic."
- << std::endl;
- this->rank = rankEstimate;
- }
-
- factorizer.Apply(cleanedData, this->rank, w, h);
}
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
- arma::Mat<size_t>& recommendations)
+void CF::GetRecommendations(const size_t numRecs,
+ arma::Mat<size_t>& recommendations)
{
// Generate list of users. Maybe it would be more efficient to pass an empty
// users list, and then have the other overload of GetRecommendations() assume
@@ -147,10 +43,9 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
GetRecommendations(numRecs, recommendations, users);
}
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
- arma::Mat<size_t>& recommendations,
- arma::Col<size_t>& users)
+void CF::GetRecommendations(const size_t numRecs,
+ arma::Mat<size_t>& recommendations,
+ arma::Col<size_t>& users)
{
// We want to avoid calculating the full rating matrix, so we will do nearest
// neighbor search only on the H matrix, using the observation that if the
@@ -233,8 +128,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
}
// Predict the rating for a single user/item combination.
-template<typename FactorizerType>
-double CF<FactorizerType>::Predict(const size_t user, const size_t item) const
+double CF::Predict(const size_t user, const size_t item) const
{
// First, we need to find the nearest neighbors of the given user.
// We'll use the same technique as for GetRecommendations().
@@ -275,9 +169,8 @@ double CF<FactorizerType>::Predict(const size_t user, const size_t item) const
}
// Predict the rating for a group of user/item combinations.
-template<typename FactorizerType>
-void CF<FactorizerType>::Predict(const arma::Mat<size_t>& combinations,
- arma::vec& predictions) const
+void CF::Predict(const arma::Mat<size_t>& combinations,
+ arma::vec& predictions) const
{
// First, for nearest neighbor search, stretch the H matrix.
arma::mat l = arma::chol(w.t() * w);
@@ -329,8 +222,7 @@ void CF<FactorizerType>::Predict(const arma::Mat<size_t>& combinations,
}
}
-template<typename FactorizerType>
-void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
+void CF::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
{
// Generate list of locations for batch insert constructor for sparse
// matrices.
@@ -363,13 +255,12 @@ void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedD
* @param neighbor Index of item being inserted as a recommendation.
* @param value Value of recommendation.
*/
-template<typename FactorizerType>
-void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
- const size_t pos,
- const size_t neighbor,
- const double value,
- arma::Mat<size_t>& recommendations,
- arma::mat& values) const
+void CF::InsertNeighbor(const size_t queryIndex,
+ const size_t pos,
+ const size_t neighbor,
+ const double value,
+ arma::Mat<size_t>& recommendations,
+ arma::mat& values) const
{
// We only memmove() if there is actually a need to shift something.
if (pos < (recommendations.n_rows - 1))
@@ -390,5 +281,3 @@ void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
} // namespace mlpack
} // namespace cf
-
-#endif
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index e2165e9..6b9e85e 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -72,14 +72,19 @@ struct FactorizerTraits
* the rating matrix (a W and H matrix). This must implement the method
* Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
*/
-template<
- typename FactorizerType = amf::NMFALSFactorizer>
class CF
{
public:
/**
- * Initialize the CF object using an instantiated factorizer. Store a
- * reference to the data that we will be using. There are parameters that can
+ * Initialize the CF object without performing any factorization. Be sure to
+ * call Train() before calling GetRecommendations() or any other functions!
+ */
+ CF(const size_t numUsersForSimilarity = 5,
+ const size_t rank = 0);
+
+ /**
+ * Initialize the CF object using an instantiated factorizer, immediately
+ * factorizing the given data to create a model. There are parameters that can
* be set; default values are provided for each of them. If the rank is left
* unset (or is set to 0), a simple density-based heuristic will be used to
* choose a rank.
@@ -93,14 +98,15 @@ class CF
* @param numUsersForSimilarity Size of the neighborhood.
* @param rank Rank parameter for matrix factorization.
*/
+ template<typename FactorizerType = amf::NMFALSFactorizer>
CF(const arma::mat& data,
FactorizerType factorizer = FactorizerType(),
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
/**
- * Initialize the CF object using an instantiated factorizer. Store a
- * reference to the data that we will be using. There are parameters that can
+ * Initialize the CF object using an instantiated factorizer, immediately
+ * factorizing the given data to create a model. There are parameters that can
* be set; default values are provided for each of them. If the rank is left
* unset (or is set to 0), a simple density-based heuristic will be used to
* choose a rank. Data will be considered in the format of items vs. users and
@@ -116,13 +122,40 @@ class CF
* @param numUsersForSimilarity Size of the neighborhood.
* @param rank Rank parameter for matrix factorization.
*/
- template<typename U = FactorizerType,
- typename T = typename boost::disable_if_c<
- FactorizerTraits<U>::UsesCoordinateList>::type*>
+ template<typename FactorizerType = amf::NMFALSFactorizer>
CF(const arma::sp_mat& data,
FactorizerType factorizer = FactorizerType(),
const size_t numUsersForSimilarity = 5,
- const size_t rank = 0);
+ const size_t rank = 0,
+ const typename boost::disable_if_c<
+ FactorizerTraits<FactorizerType>::UsesCoordinateList>::type* = 0);
+
+ /**
+ * Train the CF model (i.e. factorize the input matrix) using the parameters
+ * that have already been set for the model (specifically, the rank
+ * parameter), and optionally, using the given FactorizerType.
+ *
+ * @param data Input dataset; coordinate list or dense matrix.
+ * @param factorizer Instantiated factorizer.
+ */
+ template<typename FactorizerType>
+ void Train(const arma::mat& data,
+ FactorizerType factorizer = FactorizerType());
+
+ /**
+ * Train the CF model (i.e. factorize the input matrix) using the parameters
+ * that have already been set for the model (specifically, the rank
+ * parameter), and optionally, using the given FactorizerType.
+ *
+ * @param data Sparse matrix data.
+ * @param factorizer Instantiated factorizer.
+ */
+ template<typename FactorizerType>
+ void Train(const arma::sp_mat& data,
+ FactorizerType factorizer = FactorizerType(),
+ const typename boost::disable_if_c<
+ FactorizerTraits<FactorizerType>::UsesCoordinateList>::type*
+ = 0);
//! Sets number of users for calculating similarity.
void NumUsersForSimilarity(const size_t num)
@@ -154,12 +187,6 @@ class CF
return rank;
}
- //! Sets factorizer for NMF
- void Factorizer(const FactorizerType& f)
- {
- this->factorizer = f;
- }
-
//! Get the User Matrix.
const arma::mat& W() const { return w; }
//! Get the Item Matrix.
@@ -220,8 +247,6 @@ class CF
size_t numUsersForSimilarity;
//! Rank used for matrix factorization.
size_t rank;
- //! Instantiated factorizer object.
- FactorizerType factorizer;
//! User matrix.
arma::mat w;
//! Item matrix.
@@ -252,7 +277,7 @@ class CF
} // namespace cf
} // namespace mlpack
-//Include implementation
+// Include implementation of templated functions.
#include "cf_impl.hpp"
#endif
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 75a43b9..0781e7b 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -49,23 +49,53 @@ void ApplyFactorizer(FactorizerType& factorizer,
* Construct the CF object using an instantiated factorizer.
*/
template<typename FactorizerType>
-CF<FactorizerType>::CF(const arma::mat& data,
- FactorizerType factorizer,
- const size_t numUsersForSimilarity,
- const size_t rank) :
+CF::CF(const arma::mat& data,
+ FactorizerType factorizer,
+ const size_t numUsersForSimilarity,
+ const size_t rank) :
numUsersForSimilarity(numUsersForSimilarity),
- rank(rank),
- factorizer(factorizer)
+ rank(rank)
{
// Validate neighbourhood size.
if (numUsersForSimilarity < 1)
{
- Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+ Log::Warn << "CF::CF(): neighbourhood size should be > 0 ("
<< numUsersForSimilarity << " given). Setting value to 5.\n";
// Set default value of 5.
this->numUsersForSimilarity = 5;
}
+ Train(data, factorizer);
+}
+
+/**
+ * Construct the CF object using an instantiated factorizer.
+ */
+template<typename FactorizerType>
+CF::CF(const arma::sp_mat& data,
+ FactorizerType factorizer,
+ const size_t numUsersForSimilarity,
+ const size_t rank,
+ const typename boost::disable_if_c<FactorizerTraits<
+ FactorizerType>::UsesCoordinateList>::type*) :
+ numUsersForSimilarity(numUsersForSimilarity),
+ rank(rank)
+{
+ // Validate neighbourhood size.
+ if (numUsersForSimilarity < 1)
+ {
+ Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+ << numUsersForSimilarity << " given). Setting value to 5.\n";
+ //Setting Default Value of 5
+ this->numUsersForSimilarity = 5;
+ }
+
+ Train(data, factorizer);
+}
+
+template<typename FactorizerType>
+void CF::Train(const arma::mat& data, FactorizerType factorizer)
+{
CleanData(data, cleanedData);
// Check if the user wanted us to choose a rank for them.
@@ -90,28 +120,12 @@ CF<FactorizerType>::CF(const arma::mat& data,
Timer::Stop("cf_factorization");
}
-/**
- * Construct the CF object using an instantiated factorizer.
- */
template<typename FactorizerType>
-template<typename U, typename T>
-CF<FactorizerType>::CF(const arma::sp_mat& data,
- FactorizerType factorizer,
- const size_t numUsersForSimilarity,
- const size_t rank) :
- numUsersForSimilarity(numUsersForSimilarity),
- rank(rank),
- factorizer(factorizer)
+void CF::Train(const arma::sp_mat& data,
+ FactorizerType factorizer,
+ const typename boost::disable_if_c<FactorizerTraits<
+ FactorizerType>::UsesCoordinateList>::type*)
{
- // Validate neighbourhood size.
- if (numUsersForSimilarity < 1)
- {
- Log::Warn << "CF::CF(): neighbourhood size should be > 0("
- << numUsersForSimilarity << " given). Setting value to 5.\n";
- //Setting Default Value of 5
- this->numUsersForSimilarity = 5;
- }
-
cleanedData = data;
// Check if the user wanted us to choose a rank for them.
@@ -129,263 +143,9 @@ CF<FactorizerType>::CF(const arma::sp_mat& data,
this->rank = rankEstimate;
}
+ Timer::Start("cf_factorization");
factorizer.Apply(cleanedData, this->rank, w, h);
-}
-
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
- arma::Mat<size_t>& recommendations)
-{
- // Generate list of users. Maybe it would be more efficient to pass an empty
- // users list, and then have the other overload of GetRecommendations() assume
- // that if users is empty, then recommendations should be generated for all
- // users?
- arma::Col<size_t> users = arma::linspace<arma::Col<size_t> >(0,
- cleanedData.n_cols - 1, cleanedData.n_cols);
-
- // Call the main overload for recommendations.
- GetRecommendations(numRecs, recommendations, users);
-}
-
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
- arma::Mat<size_t>& recommendations,
- arma::Col<size_t>& users)
-{
- // We want to avoid calculating the full rating matrix, so we will do nearest
- // neighbor search only on the H matrix, using the observation that if the
- // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
- // H.col(j)). This can be seen as nearest neighbor search on the H matrix
- // with the Mahalanobis distance where M^{-1} = W^T W. So, we'll decompose
- // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
- // Then we can perform nearest neighbor search.
- arma::mat l = arma::chol(w.t() * w);
- arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
-
- // Now, we will use the decomposed w and h matrices to estimate what the user
- // would have rated items as, and then pick the best items.
-
- // Temporarily store feature vector of queried users.
- arma::mat query(stretchedH.n_rows, users.n_elem);
-
- // Select feature vectors of queried users.
- for (size_t i = 0; i < users.n_elem; i++)
- query.col(i) = stretchedH.col(users(i));
-
- // Temporary storage for neighborhood of the queried users.
- arma::Mat<size_t> neighborhood;
-
- // Calculate the neighborhood of the queried users.
- // This should be a templatized option.
- neighbor::AllkNN a(stretchedH);
- arma::mat resultingDistances; // Temporary storage.
- a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);
-
- // Generate recommendations for each query user by finding the maximum numRecs
- // elements in the averages matrix.
- recommendations.set_size(numRecs, users.n_elem);
- recommendations.fill(cleanedData.n_rows); // Invalid item number.
- arma::mat values(numRecs, users.n_elem);
- values.fill(-DBL_MAX); // The smallest possible value.
- for (size_t i = 0; i < users.n_elem; i++)
- {
- // First, calculate average of neighborhood values.
- arma::vec averages;
- averages.zeros(cleanedData.n_rows);
-
- for (size_t j = 0; j < neighborhood.n_rows; ++j)
- averages += w * h.col(neighborhood(j, i));
- averages /= neighborhood.n_rows;
-
- // Look through the averages column corresponding to the current user.
- for (size_t j = 0; j < averages.n_rows; ++j)
- {
- // Ensure that the user hasn't already rated the item.
- if (cleanedData(j, users(i)) != 0.0)
- continue; // The user already rated the item.
-
- // Is the estimated value better than the worst candidate?
- const double value = averages[j];
- if (value > values(values.n_rows - 1, i))
- {
- // It should be inserted. Which position?
- size_t insertPosition = values.n_rows - 1;
- while (insertPosition > 0)
- {
- if (value <= values(insertPosition - 1, i))
- break; // The current value is the right one.
- insertPosition--;
- }
-
- // Now insert it into the list.
- InsertNeighbor(i, insertPosition, j, value, recommendations,
- values);
- }
- }
-
- // If we were not able to come up with enough recommendations, issue a
- // warning.
- if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
- Log::Warn << "Could not provide " << values.n_rows << " recommendations "
- << "for user " << users(i) << " (not enough un-rated items)!"
- << std::endl;
- }
-}
-
-// Predict the rating for a single user/item combination.
-template<typename FactorizerType>
-double CF<FactorizerType>::Predict(const size_t user, const size_t item) const
-{
- // First, we need to find the nearest neighbors of the given user.
- // We'll use the same technique as for GetRecommendations().
-
- // We want to avoid calculating the full rating matrix, so we will do nearest
- // neighbor search only on the H matrix, using the observation that if the
- // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
- // H.col(j)). This can be seen as nearest neighbor search on the H matrix
- // with the Mahalanobis distance where M^{-1} = W^T W. So, we'll decompose
- // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
- // Then we can perform nearest neighbor search.
- arma::mat l = arma::chol(w.t() * w);
- arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
-
- // Now, we will use the decomposed w and h matrices to estimate what the user
- // would have rated items as, and then pick the best items.
-
- // Temporarily store feature vector of queried users.
- arma::mat query = stretchedH.col(user);
-
- // Temporary storage for neighborhood of the queried users.
- arma::Mat<size_t> neighborhood;
-
- // Calculate the neighborhood of the queried users.
- // This should be a templatized option.
- neighbor::AllkNN a(stretchedH, false, true /* single-tree mode */);
- arma::mat resultingDistances; // Temporary storage.
-
- a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);
-
- double rating = 0; // We'll take the average of neighborhood values.
-
- for (size_t j = 0; j < neighborhood.n_rows; ++j)
- rating += arma::as_scalar(w.row(item) * h.col(neighborhood(j, 0)));
- rating /= neighborhood.n_rows;
-
- return rating;
-}
-
-// Predict the rating for a group of user/item combinations.
-template<typename FactorizerType>
-void CF<FactorizerType>::Predict(const arma::Mat<size_t>& combinations,
- arma::vec& predictions) const
-{
- // First, for nearest neighbor search, stretch the H matrix.
- arma::mat l = arma::chol(w.t() * w);
- arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
-
- // Now, we must determine those query indices we need to find the nearest
- // neighbors for. This is easiest if we just sort the combinations matrix.
- arma::Mat<size_t> sortedCombinations(combinations.n_rows,
- combinations.n_cols);
- arma::uvec ordering = arma::sort_index(combinations.row(0).t());
- for (size_t i = 0; i < ordering.n_elem; ++i)
- sortedCombinations.col(i) = combinations.col(ordering[i]);
-
- // Now, we have to get the list of unique users we will be searching for.
- arma::Col<size_t> users = arma::unique(combinations.row(0).t());
-
- // Assemble our query matrix from the stretchedH matrix.
- arma::mat queries(stretchedH.n_rows, users.n_elem);
- for (size_t i = 0; i < queries.n_cols; ++i)
- queries.col(i) = stretchedH.col(users[i]);
-
- // Now calculate the neighborhood of these users.
- neighbor::AllkNN a(stretchedH);
- arma::mat distances;
- arma::Mat<size_t> neighborhood;
-
- a.Search(queries, numUsersForSimilarity, neighborhood, distances);
-
- // Now that we have the neighborhoods we need, calculate the predictions.
- predictions.set_size(combinations.n_cols);
-
- size_t user = 0; // Cumulative user count, because we are doing it in order.
- for (size_t i = 0; i < sortedCombinations.n_cols; ++i)
- {
- // Could this be made faster by calculating dot products for multiple items
- // at once?
- double rating = 0.0;
-
- // Map the combination's user to the user ID used for kNN.
- while (users[user] < sortedCombinations(0, i))
- ++user;
-
- for (size_t j = 0; j < neighborhood.n_rows; ++j)
- rating += arma::as_scalar(w.row(sortedCombinations(1, i)) *
- h.col(neighborhood(j, user)));
- rating /= neighborhood.n_rows;
-
- predictions(ordering[i]) = rating;
- }
-}
-
-template<typename FactorizerType>
-void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
-{
- // Generate list of locations for batch insert constructor for sparse
- // matrices.
- arma::umat locations(2, data.n_cols);
- arma::vec values(data.n_cols);
- for (size_t i = 0; i < data.n_cols; ++i)
- {
- // We have to transpose it because items are rows, and users are columns.
- locations(1, i) = ((arma::uword) data(0, i));
- locations(0, i) = ((arma::uword) data(1, i));
- values(i) = data(2, i);
- if (values(i) == 0)
- Log::Warn << "User rating of 0 ignored for user " << locations(1, i)
- << ", item " << locations(0, i) << "." << std::endl;
- }
-
- // Find maximum user and item IDs.
- const size_t maxItemID = (size_t) max(locations.row(0)) + 1;
- const size_t maxUserID = (size_t) max(locations.row(1)) + 1;
-
- // Fill sparse matrix.
- cleanedData = arma::sp_mat(locations, values, maxItemID, maxUserID);
-}
-
-/**
- * Helper function to insert a point into the recommendation matrices.
- *
- * @param queryIndex Index of point whose recommendations we are inserting into.
- * @param pos Position in list to insert into.
- * @param neighbor Index of item being inserted as a recommendation.
- * @param value Value of recommendation.
- */
-template<typename FactorizerType>
-void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
- const size_t pos,
- const size_t neighbor,
- const double value,
- arma::Mat<size_t>& recommendations,
- arma::mat& values) const
-{
- // We only memmove() if there is actually a need to shift something.
- if (pos < (recommendations.n_rows - 1))
- {
- const int len = (values.n_rows - 1) - pos;
- memmove(values.colptr(queryIndex) + (pos + 1),
- values.colptr(queryIndex) + pos,
- sizeof(double) * len);
- memmove(recommendations.colptr(queryIndex) + (pos + 1),
- recommendations.colptr(queryIndex) + pos,
- sizeof(size_t) * len);
- }
-
- // Now put the new information in the right index.
- values(pos, queryIndex) = value;
- recommendations(pos, queryIndex) = neighbor;
+ Timer::Stop("cf_factorization");
}
} // namespace mlpack
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index d4b5636..42ef2b6 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -83,8 +83,7 @@ PARAM_DOUBLE("min_residue", "Residue required to terminate the factorization "
PARAM_INT("seed", "Set the random seed (0 uses std::time(NULL)).", "s", 0);
-template<typename Factorizer>
-void ComputeRecommendations(CF<Factorizer>& cf,
+void ComputeRecommendations(CF& cf,
const size_t numRecs,
arma::Mat<size_t>& recommendations)
{
@@ -109,8 +108,7 @@ void ComputeRecommendations(CF<Factorizer>& cf,
}
}
-template<typename Factorizer>
-void ComputeRMSE(CF<Factorizer>& cf)
+void ComputeRMSE(CF& cf)
{
// Now, compute each test point.
const string testFile = CLI::GetParam<string>("test_file");
@@ -146,7 +144,7 @@ void PerformAction(Factorizer&& factorizer,
{
// Parameters for generating the CF object.
const size_t neighborhood = (size_t) CLI::GetParam<int>("neighborhood");
- CF<Factorizer> c(dataset, factorizer, neighborhood, rank);
+ CF c(dataset, factorizer, neighborhood, rank);
if (CLI::HasParam("query_file") || CLI::HasParam("all_user_recommendations"))
{
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index 651733d..9fbce77 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -38,10 +38,10 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsAllUsersTest)
// Make data into sparse matrix.
arma::sp_mat cleanedData;
- CF<>::CleanData(dataset, cleanedData);
+ CF::CleanData(dataset, cleanedData);
// Create a CF object.
- CF<> c(cleanedData);
+ CF c(cleanedData);
// Generate recommendations when query set is not specified.
c.GetRecommendations(numRecs, recommendations);
@@ -78,9 +78,9 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsQueriedUserTest)
// Make data into sparse matrix.
arma::sp_mat cleanedData;
- CF<>::CleanData(dataset, cleanedData);
+ CF::CleanData(dataset, cleanedData);
- CF<> c(cleanedData);
+ CF c(cleanedData);
// Generate recommendations when query set is specified.
c.GetRecommendations(numRecsDefault, recommendations, users);
@@ -136,10 +136,10 @@ BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
// Make data into sparse matrix.
arma::sp_mat cleanedData;
- CF<>::CleanData(dataset, cleanedData);
+ CF::CleanData(dataset, cleanedData);
// Now create the CF object.
- CF<> c(cleanedData);
+ CF c(cleanedData);
// Obtain 150 recommendations for the users in savedCols, and make sure the
// missing item shows up in most of them. First, create the list of users,
@@ -229,10 +229,10 @@ BOOST_AUTO_TEST_CASE(CFPredictTest)
// Make data into sparse matrix.
arma::sp_mat cleanedData;
- CF<>::CleanData(dataset, cleanedData);
+ CF::CleanData(dataset, cleanedData);
// Now create the CF object.
- CF<> c(cleanedData);
+ CF c(cleanedData);
// Now, for each removed rating, make sure the prediction is... reasonably
// accurate.
@@ -295,10 +295,10 @@ BOOST_AUTO_TEST_CASE(CFBatchPredictTest)
// Make data into sparse matrix.
arma::sp_mat cleanedData;
- CF<>::CleanData(dataset, cleanedData);
+ CF::CleanData(dataset, cleanedData);
// Now create the CF object.
- CF<> c(cleanedData);
+ CF c(cleanedData);
// Get predictions for all user/item pairs we held back.
arma::Mat<size_t> combinations(2, savedCols.n_cols);
More information about the mlpack-git
mailing list