[mlpack-git] master: Only use template parameter for training. (341414b)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Mon Dec 21 15:25:49 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/be72510a765362f86782a8892f0e979aaa4a9f62...51205e0ad285b2cf421546d8876fc63e994f2d73

>---------------------------------------------------------------

commit 341414ba5406ff02c79d9e6e95d334ccd768062c
Author: ryan <ryan at ratml.org>
Date:   Mon Dec 21 15:02:13 2015 -0500

    Only use template parameter for training.
    
    There's no need for the CF model itself to always know what its FactorizerType
    is.


>---------------------------------------------------------------

341414ba5406ff02c79d9e6e95d334ccd768062c
 src/mlpack/methods/cf/CMakeLists.txt          |   1 +
 src/mlpack/methods/cf/{cf_impl.hpp => cf.cpp} | 153 ++----------
 src/mlpack/methods/cf/cf.hpp                  |  63 +++--
 src/mlpack/methods/cf/cf_impl.hpp             | 326 ++++----------------------
 src/mlpack/methods/cf/cf_main.cpp             |   8 +-
 src/mlpack/tests/cf_test.cpp                  |  20 +-
 6 files changed, 122 insertions(+), 449 deletions(-)

diff --git a/src/mlpack/methods/cf/CMakeLists.txt b/src/mlpack/methods/cf/CMakeLists.txt
index 5238d8c..a758136 100644
--- a/src/mlpack/methods/cf/CMakeLists.txt
+++ b/src/mlpack/methods/cf/CMakeLists.txt
@@ -3,6 +3,7 @@
 set(SOURCES
   cf.hpp
   cf_impl.hpp
+  cf.cpp
   svd_wrapper.hpp
   svd_wrapper_impl.hpp
 )
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf.cpp
similarity index 65%
copy from src/mlpack/methods/cf/cf_impl.hpp
copy to src/mlpack/methods/cf/cf.cpp
index 75a43b9..f229f06 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf.cpp
@@ -1,5 +1,5 @@
 /**
- * @file cf_impl.hpp
+ * @file cf.cpp
  * @author Mudit Raj Gupta
  * @author Sumedh Ghaisas
  *
@@ -8,133 +8,29 @@
  * Implementation of CF class to perform Collaborative Filtering on the
  * specified data set.
  */
-#ifndef __MLPACK_METHODS_CF_CF_IMPL_HPP
-#define __MLPACK_METHODS_CF_CF_IMPL_HPP
-
-// In case it hasn't been included yet.
 #include "cf.hpp"
 
 namespace mlpack {
 namespace cf {
 
-// Apply the factorizer when a coordinate list is used.
-template<typename FactorizerType>
-void ApplyFactorizer(FactorizerType& factorizer,
-                     const arma::mat& data,
-                     const arma::sp_mat& /* cleanedData */,
-                     const size_t rank,
-                     arma::mat& w,
-                     arma::mat& h,
-                     const typename boost::enable_if_c<FactorizerTraits<
-                         FactorizerType>::UsesCoordinateList>::type* = 0)
-{
-  factorizer.Apply(data, rank, w, h);
-}
-
-// Apply the factorizer when coordinate lists are not used.
-template<typename FactorizerType>
-void ApplyFactorizer(FactorizerType& factorizer,
-                     const arma::mat& /* data */,
-                     const arma::sp_mat& cleanedData,
-                     const size_t rank,
-                     arma::mat& w,
-                     arma::mat& h,
-                     const typename boost::disable_if_c<FactorizerTraits<
-                         FactorizerType>::UsesCoordinateList>::type* = 0)
-{
-  factorizer.Apply(cleanedData, rank, w, h);
-}
-
-/**
- * Construct the CF object using an instantiated factorizer.
- */
-template<typename FactorizerType>
-CF<FactorizerType>::CF(const arma::mat& data,
-                       FactorizerType factorizer,
-                       const size_t numUsersForSimilarity,
-                       const size_t rank) :
+// Default CF constructor.
+CF::CF(const size_t numUsersForSimilarity,
+       const size_t rank) :
     numUsersForSimilarity(numUsersForSimilarity),
-    rank(rank),
-    factorizer(factorizer)
+    rank(rank)
 {
   // Validate neighbourhood size.
   if (numUsersForSimilarity < 1)
   {
-    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+    Log::Warn << "CF::CF(): neighbourhood size should be > 0 ("
         << numUsersForSimilarity << " given). Setting value to 5.\n";
     // Set default value of 5.
     this->numUsersForSimilarity = 5;
   }
-
-  CleanData(data, cleanedData);
-
-  // Check if the user wanted us to choose a rank for them.
-  if (rank == 0)
-  {
-    // This is a simple heuristic that picks a rank based on the density of the
-    // dataset between 5 and 105.
-    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-    const size_t rankEstimate = size_t(density) + 5;
-
-    // Set to heuristic value.
-    Log::Info << "No rank given for decomposition; using rank of "
-        << rankEstimate << " calculated by density-based heuristic."
-        << std::endl;
-    this->rank = rankEstimate;
-  }
-
-  // Decompose the data matrix (which is in coordinate list form) to user and
-  // data matrices.
-  Timer::Start("cf_factorization");
-  ApplyFactorizer(factorizer, data, cleanedData, this->rank, w, h);
-  Timer::Stop("cf_factorization");
-}
-
-/**
- * Construct the CF object using an instantiated factorizer.
- */
-template<typename FactorizerType>
-template<typename U, typename T>
-CF<FactorizerType>::CF(const arma::sp_mat& data,
-                       FactorizerType factorizer,
-                       const size_t numUsersForSimilarity,
-                       const size_t rank) :
-    numUsersForSimilarity(numUsersForSimilarity),
-    rank(rank),
-    factorizer(factorizer)
-{
-  // Validate neighbourhood size.
-  if (numUsersForSimilarity < 1)
-  {
-    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
-        << numUsersForSimilarity << " given). Setting value to 5.\n";
-    //Setting Default Value of 5
-    this->numUsersForSimilarity = 5;
-  }
-
-  cleanedData = data;
-
-  // Check if the user wanted us to choose a rank for them.
-  if (rank == 0)
-  {
-    // This is a simple heuristic that picks a rank based on the density of the
-    // dataset between 5 and 105.
-    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-    const size_t rankEstimate = size_t(density) + 5;
-
-    // Set to heuristic value.
-    Log::Info << "No rank given for decomposition; using rank of "
-        << rankEstimate << " calculated by density-based heuristic."
-        << std::endl;
-    this->rank = rankEstimate;
-  }
-
-  factorizer.Apply(cleanedData, this->rank, w, h);
 }
 
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
-                                            arma::Mat<size_t>& recommendations)
+void CF::GetRecommendations(const size_t numRecs,
+                            arma::Mat<size_t>& recommendations)
 {
   // Generate list of users.  Maybe it would be more efficient to pass an empty
   // users list, and then have the other overload of GetRecommendations() assume
@@ -147,10 +43,9 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
   GetRecommendations(numRecs, recommendations, users);
 }
 
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
-                                            arma::Mat<size_t>& recommendations,
-                                            arma::Col<size_t>& users)
+void CF::GetRecommendations(const size_t numRecs,
+                            arma::Mat<size_t>& recommendations,
+                            arma::Col<size_t>& users)
 {
   // We want to avoid calculating the full rating matrix, so we will do nearest
   // neighbor search only on the H matrix, using the observation that if the
@@ -233,8 +128,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
 }
 
 // Predict the rating for a single user/item combination.
-template<typename FactorizerType>
-double CF<FactorizerType>::Predict(const size_t user, const size_t item) const
+double CF::Predict(const size_t user, const size_t item) const
 {
   // First, we need to find the nearest neighbors of the given user.
   // We'll use the same technique as for GetRecommendations().
@@ -275,9 +169,8 @@ double CF<FactorizerType>::Predict(const size_t user, const size_t item) const
 }
 
 // Predict the rating for a group of user/item combinations.
-template<typename FactorizerType>
-void CF<FactorizerType>::Predict(const arma::Mat<size_t>& combinations,
-                                 arma::vec& predictions) const
+void CF::Predict(const arma::Mat<size_t>& combinations,
+                 arma::vec& predictions) const
 {
   // First, for nearest neighbor search, stretch the H matrix.
   arma::mat l = arma::chol(w.t() * w);
@@ -329,8 +222,7 @@ void CF<FactorizerType>::Predict(const arma::Mat<size_t>& combinations,
   }
 }
 
-template<typename FactorizerType>
-void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
+void CF::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
 {
   // Generate list of locations for batch insert constructor for sparse
   // matrices.
@@ -363,13 +255,12 @@ void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedD
  * @param neighbor Index of item being inserted as a recommendation.
  * @param value Value of recommendation.
  */
-template<typename FactorizerType>
-void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
-                                        const size_t pos,
-                                        const size_t neighbor,
-                                        const double value,
-                                        arma::Mat<size_t>& recommendations,
-                                        arma::mat& values) const
+void CF::InsertNeighbor(const size_t queryIndex,
+                        const size_t pos,
+                        const size_t neighbor,
+                        const double value,
+                        arma::Mat<size_t>& recommendations,
+                        arma::mat& values) const
 {
   // We only memmove() if there is actually a need to shift something.
   if (pos < (recommendations.n_rows - 1))
@@ -390,5 +281,3 @@ void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
 
 } // namespace mlpack
 } // namespace cf
-
-#endif
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index e2165e9..6b9e85e 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -72,14 +72,19 @@ struct FactorizerTraits
  *     the rating matrix (a W and H matrix).  This must implement the method
  *     Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
  */
-template<
-    typename FactorizerType = amf::NMFALSFactorizer>
 class CF
 {
  public:
   /**
-   * Initialize the CF object using an instantiated factorizer. Store a
-   * reference to the data that we will be using. There are parameters that can
+   * Initialize the CF object without performing any factorization.  Be sure to
+   * call Train() before calling GetRecommendations() or any other functions!
+   */
+  CF(const size_t numUsersForSimilarity = 5,
+     const size_t rank = 0);
+
+  /**
+   * Initialize the CF object using an instantiated factorizer, immediately
+   * factorizing the given data to create a model. There are parameters that can
    * be set; default values are provided for each of them. If the rank is left
    * unset (or is set to 0), a simple density-based heuristic will be used to
    * choose a rank.
@@ -93,14 +98,15 @@ class CF
    * @param numUsersForSimilarity Size of the neighborhood.
    * @param rank Rank parameter for matrix factorization.
    */
+  template<typename FactorizerType = amf::NMFALSFactorizer>
   CF(const arma::mat& data,
      FactorizerType factorizer = FactorizerType(),
      const size_t numUsersForSimilarity = 5,
      const size_t rank = 0);
 
   /**
-   * Initialize the CF object using an instantiated factorizer. Store a
-   * reference to the data that we will be using. There are parameters that can
+   * Initialize the CF object using an instantiated factorizer, immediately
+   * factorizing the given data to create a model. There are parameters that can
    * be set; default values are provided for each of them. If the rank is left
    * unset (or is set to 0), a simple density-based heuristic will be used to
    * choose a rank. Data will be considered in the format of items vs. users and
@@ -116,13 +122,40 @@ class CF
    * @param numUsersForSimilarity Size of the neighborhood.
    * @param rank Rank parameter for matrix factorization.
    */
-  template<typename U = FactorizerType,
-           typename T = typename boost::disable_if_c<
-               FactorizerTraits<U>::UsesCoordinateList>::type*>
+  template<typename FactorizerType = amf::NMFALSFactorizer>
   CF(const arma::sp_mat& data,
      FactorizerType factorizer = FactorizerType(),
      const size_t numUsersForSimilarity = 5,
-     const size_t rank = 0);
+     const size_t rank = 0,
+     const typename boost::disable_if_c<
+         FactorizerTraits<FactorizerType>::UsesCoordinateList>::type* = 0);
+
+  /**
+   * Train the CF model (i.e. factorize the input matrix) using the parameters
+   * that have already been set for the model (specifically, the rank
+   * parameter), and optionally, using the given FactorizerType.
+   *
+   * @param data Input dataset; coordinate list or dense matrix.
+   * @param factorizer Instantiated factorizer.
+   */
+  template<typename FactorizerType>
+  void Train(const arma::mat& data,
+             FactorizerType factorizer = FactorizerType());
+
+  /**
+   * Train the CF model (i.e. factorize the input matrix) using the parameters
+   * that have already been set for the model (specifically, the rank
+   * parameter), and optionally, using the given FactorizerType.
+   *
+   * @param data Sparse matrix data.
+   * @param factorizer Instantiated factorizer.
+   */
+  template<typename FactorizerType>
+  void Train(const arma::sp_mat& data,
+             FactorizerType factorizer = FactorizerType(),
+             const typename boost::disable_if_c<
+                 FactorizerTraits<FactorizerType>::UsesCoordinateList>::type*
+                 = 0);
 
   //! Sets number of users for calculating similarity.
   void NumUsersForSimilarity(const size_t num)
@@ -154,12 +187,6 @@ class CF
     return rank;
   }
 
-  //! Sets factorizer for NMF
-  void Factorizer(const FactorizerType& f)
-  {
-    this->factorizer = f;
-  }
-
   //! Get the User Matrix.
   const arma::mat& W() const { return w; }
   //! Get the Item Matrix.
@@ -220,8 +247,6 @@ class CF
   size_t numUsersForSimilarity;
   //! Rank used for matrix factorization.
   size_t rank;
-  //! Instantiated factorizer object.
-  FactorizerType factorizer;
   //! User matrix.
   arma::mat w;
   //! Item matrix.
@@ -252,7 +277,7 @@ class CF
 } // namespace cf
 } // namespace mlpack
 
-//Include implementation
+// Include implementation of templated functions.
 #include "cf_impl.hpp"
 
 #endif
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 75a43b9..0781e7b 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -49,23 +49,53 @@ void ApplyFactorizer(FactorizerType& factorizer,
  * Construct the CF object using an instantiated factorizer.
  */
 template<typename FactorizerType>
-CF<FactorizerType>::CF(const arma::mat& data,
-                       FactorizerType factorizer,
-                       const size_t numUsersForSimilarity,
-                       const size_t rank) :
+CF::CF(const arma::mat& data,
+       FactorizerType factorizer,
+       const size_t numUsersForSimilarity,
+       const size_t rank) :
     numUsersForSimilarity(numUsersForSimilarity),
-    rank(rank),
-    factorizer(factorizer)
+    rank(rank)
 {
   // Validate neighbourhood size.
   if (numUsersForSimilarity < 1)
   {
-    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+    Log::Warn << "CF::CF(): neighbourhood size should be > 0 ("
         << numUsersForSimilarity << " given). Setting value to 5.\n";
     // Set default value of 5.
     this->numUsersForSimilarity = 5;
   }
 
+  Train(data, factorizer);
+}
+
+/**
+ * Construct the CF object using an instantiated factorizer.
+ */
+template<typename FactorizerType>
+CF::CF(const arma::sp_mat& data,
+       FactorizerType factorizer,
+       const size_t numUsersForSimilarity,
+       const size_t rank,
+       const typename boost::disable_if_c<FactorizerTraits<
+           FactorizerType>::UsesCoordinateList>::type*) :
+    numUsersForSimilarity(numUsersForSimilarity),
+    rank(rank)
+{
+  // Validate neighbourhood size.
+  if (numUsersForSimilarity < 1)
+  {
+    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+        << numUsersForSimilarity << " given). Setting value to 5.\n";
+    //Setting Default Value of 5
+    this->numUsersForSimilarity = 5;
+  }
+
+  Train(data, factorizer);
+}
+
+template<typename FactorizerType>
+void CF::Train(const arma::mat& data, FactorizerType factorizer)
+{
   CleanData(data, cleanedData);
 
   // Check if the user wanted us to choose a rank for them.
@@ -90,28 +120,12 @@ CF<FactorizerType>::CF(const arma::mat& data,
   Timer::Stop("cf_factorization");
 }
 
-/**
- * Construct the CF object using an instantiated factorizer.
- */
 template<typename FactorizerType>
-template<typename U, typename T>
-CF<FactorizerType>::CF(const arma::sp_mat& data,
-                       FactorizerType factorizer,
-                       const size_t numUsersForSimilarity,
-                       const size_t rank) :
-    numUsersForSimilarity(numUsersForSimilarity),
-    rank(rank),
-    factorizer(factorizer)
+void CF::Train(const arma::sp_mat& data,
+               FactorizerType factorizer,
+               const typename boost::disable_if_c<FactorizerTraits<
+                   FactorizerType>::UsesCoordinateList>::type*)
 {
-  // Validate neighbourhood size.
-  if (numUsersForSimilarity < 1)
-  {
-    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
-        << numUsersForSimilarity << " given). Setting value to 5.\n";
-    //Setting Default Value of 5
-    this->numUsersForSimilarity = 5;
-  }
-
   cleanedData = data;
 
   // Check if the user wanted us to choose a rank for them.
@@ -129,263 +143,9 @@ CF<FactorizerType>::CF(const arma::sp_mat& data,
     this->rank = rankEstimate;
   }
 
+  Timer::Start("cf_factorization");
   factorizer.Apply(cleanedData, this->rank, w, h);
-}
-
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
-                                            arma::Mat<size_t>& recommendations)
-{
-  // Generate list of users.  Maybe it would be more efficient to pass an empty
-  // users list, and then have the other overload of GetRecommendations() assume
-  // that if users is empty, then recommendations should be generated for all
-  // users?
-  arma::Col<size_t> users = arma::linspace<arma::Col<size_t> >(0,
-      cleanedData.n_cols - 1, cleanedData.n_cols);
-
-  // Call the main overload for recommendations.
-  GetRecommendations(numRecs, recommendations, users);
-}
-
-template<typename FactorizerType>
-void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
-                                            arma::Mat<size_t>& recommendations,
-                                            arma::Col<size_t>& users)
-{
-  // We want to avoid calculating the full rating matrix, so we will do nearest
-  // neighbor search only on the H matrix, using the observation that if the
-  // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
-  // H.col(j)).  This can be seen as nearest neighbor search on the H matrix
-  // with the Mahalanobis distance where M^{-1} = W^T W.  So, we'll decompose
-  // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
-  // Then we can perform nearest neighbor search.
-  arma::mat l = arma::chol(w.t() * w);
-  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
-
-  // Now, we will use the decomposed w and h matrices to estimate what the user
-  // would have rated items as, and then pick the best items.
-
-  // Temporarily store feature vector of queried users.
-  arma::mat query(stretchedH.n_rows, users.n_elem);
-
-  // Select feature vectors of queried users.
-  for (size_t i = 0; i < users.n_elem; i++)
-    query.col(i) = stretchedH.col(users(i));
-
-  // Temporary storage for neighborhood of the queried users.
-  arma::Mat<size_t> neighborhood;
-
-  // Calculate the neighborhood of the queried users.
-  // This should be a templatized option.
-  neighbor::AllkNN a(stretchedH);
-  arma::mat resultingDistances; // Temporary storage.
-  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);
-
-  // Generate recommendations for each query user by finding the maximum numRecs
-  // elements in the averages matrix.
-  recommendations.set_size(numRecs, users.n_elem);
-  recommendations.fill(cleanedData.n_rows); // Invalid item number.
-  arma::mat values(numRecs, users.n_elem);
-  values.fill(-DBL_MAX); // The smallest possible value.
-  for (size_t i = 0; i < users.n_elem; i++)
-  {
-    // First, calculate average of neighborhood values.
-    arma::vec averages;
-    averages.zeros(cleanedData.n_rows);
-
-    for (size_t j = 0; j < neighborhood.n_rows; ++j)
-      averages += w * h.col(neighborhood(j, i));
-    averages /= neighborhood.n_rows;
-
-    // Look through the averages column corresponding to the current user.
-    for (size_t j = 0; j < averages.n_rows; ++j)
-    {
-      // Ensure that the user hasn't already rated the item.
-      if (cleanedData(j, users(i)) != 0.0)
-        continue; // The user already rated the item.
-
-      // Is the estimated value better than the worst candidate?
-      const double value = averages[j];
-      if (value > values(values.n_rows - 1, i))
-      {
-        // It should be inserted.  Which position?
-        size_t insertPosition = values.n_rows - 1;
-        while (insertPosition > 0)
-        {
-          if (value <= values(insertPosition - 1, i))
-            break; // The current value is the right one.
-          insertPosition--;
-        }
-
-        // Now insert it into the list.
-        InsertNeighbor(i, insertPosition, j, value, recommendations,
-            values);
-      }
-    }
-
-    // If we were not able to come up with enough recommendations, issue a
-    // warning.
-    if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
-      Log::Warn << "Could not provide " << values.n_rows << " recommendations "
-          << "for user " << users(i) << " (not enough un-rated items)!"
-          << std::endl;
-  }
-}
-
-// Predict the rating for a single user/item combination.
-template<typename FactorizerType>
-double CF<FactorizerType>::Predict(const size_t user, const size_t item) const
-{
-  // First, we need to find the nearest neighbors of the given user.
-  // We'll use the same technique as for GetRecommendations().
-
-  // We want to avoid calculating the full rating matrix, so we will do nearest
-  // neighbor search only on the H matrix, using the observation that if the
-  // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
-  // H.col(j)).  This can be seen as nearest neighbor search on the H matrix
-  // with the Mahalanobis distance where M^{-1} = W^T W.  So, we'll decompose
-  // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
-  // Then we can perform nearest neighbor search.
-  arma::mat l = arma::chol(w.t() * w);
-  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
-
-  // Now, we will use the decomposed w and h matrices to estimate what the user
-  // would have rated items as, and then pick the best items.
-
-  // Temporarily store feature vector of queried users.
-  arma::mat query = stretchedH.col(user);
-
-  // Temporary storage for neighborhood of the queried users.
-  arma::Mat<size_t> neighborhood;
-
-  // Calculate the neighborhood of the queried users.
-  // This should be a templatized option.
-  neighbor::AllkNN a(stretchedH, false, true /* single-tree mode */);
-  arma::mat resultingDistances; // Temporary storage.
-
-  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);
-
-  double rating = 0; // We'll take the average of neighborhood values.
-
-  for (size_t j = 0; j < neighborhood.n_rows; ++j)
-    rating += arma::as_scalar(w.row(item) * h.col(neighborhood(j, 0)));
-  rating /= neighborhood.n_rows;
-
-  return rating;
-}
-
-// Predict the rating for a group of user/item combinations.
-template<typename FactorizerType>
-void CF<FactorizerType>::Predict(const arma::Mat<size_t>& combinations,
-                                 arma::vec& predictions) const
-{
-  // First, for nearest neighbor search, stretch the H matrix.
-  arma::mat l = arma::chol(w.t() * w);
-  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
-
-  // Now, we must determine those query indices we need to find the nearest
-  // neighbors for.  This is easiest if we just sort the combinations matrix.
-  arma::Mat<size_t> sortedCombinations(combinations.n_rows,
-                                       combinations.n_cols);
-  arma::uvec ordering = arma::sort_index(combinations.row(0).t());
-  for (size_t i = 0; i < ordering.n_elem; ++i)
-    sortedCombinations.col(i) = combinations.col(ordering[i]);
-
-  // Now, we have to get the list of unique users we will be searching for.
-  arma::Col<size_t> users = arma::unique(combinations.row(0).t());
-
-  // Assemble our query matrix from the stretchedH matrix.
-  arma::mat queries(stretchedH.n_rows, users.n_elem);
-  for (size_t i = 0; i < queries.n_cols; ++i)
-    queries.col(i) = stretchedH.col(users[i]);
-
-  // Now calculate the neighborhood of these users.
-  neighbor::AllkNN a(stretchedH);
-  arma::mat distances;
-  arma::Mat<size_t> neighborhood;
-
-  a.Search(queries, numUsersForSimilarity, neighborhood, distances);
-
-  // Now that we have the neighborhoods we need, calculate the predictions.
-  predictions.set_size(combinations.n_cols);
-
-  size_t user = 0; // Cumulative user count, because we are doing it in order.
-  for (size_t i = 0; i < sortedCombinations.n_cols; ++i)
-  {
-    // Could this be made faster by calculating dot products for multiple items
-    // at once?
-    double rating = 0.0;
-
-    // Map the combination's user to the user ID used for kNN.
-    while (users[user] < sortedCombinations(0, i))
-      ++user;
-
-    for (size_t j = 0; j < neighborhood.n_rows; ++j)
-      rating += arma::as_scalar(w.row(sortedCombinations(1, i)) *
-          h.col(neighborhood(j, user)));
-    rating /= neighborhood.n_rows;
-
-    predictions(ordering[i]) = rating;
-  }
-}
-
-template<typename FactorizerType>
-void CF<FactorizerType>::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
-{
-  // Generate list of locations for batch insert constructor for sparse
-  // matrices.
-  arma::umat locations(2, data.n_cols);
-  arma::vec values(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; ++i)
-  {
-    // We have to transpose it because items are rows, and users are columns.
-    locations(1, i) = ((arma::uword) data(0, i));
-    locations(0, i) = ((arma::uword) data(1, i));
-    values(i) = data(2, i);
-    if (values(i) == 0)
-      Log::Warn << "User rating of 0 ignored for user " << locations(1, i)
-          << ", item " << locations(0, i) << "." << std::endl;
-  }
-
-  // Find maximum user and item IDs.
-  const size_t maxItemID = (size_t) max(locations.row(0)) + 1;
-  const size_t maxUserID = (size_t) max(locations.row(1)) + 1;
-
-  // Fill sparse matrix.
-  cleanedData = arma::sp_mat(locations, values, maxItemID, maxUserID);
-}
-
-/**
- * Helper function to insert a point into the recommendation matrices.
- *
- * @param queryIndex Index of point whose recommendations we are inserting into.
- * @param pos Position in list to insert into.
- * @param neighbor Index of item being inserted as a recommendation.
- * @param value Value of recommendation.
- */
-template<typename FactorizerType>
-void CF<FactorizerType>::InsertNeighbor(const size_t queryIndex,
-                                        const size_t pos,
-                                        const size_t neighbor,
-                                        const double value,
-                                        arma::Mat<size_t>& recommendations,
-                                        arma::mat& values) const
-{
-  // We only memmove() if there is actually a need to shift something.
-  if (pos < (recommendations.n_rows - 1))
-  {
-    const int len = (values.n_rows - 1) - pos;
-    memmove(values.colptr(queryIndex) + (pos + 1),
-        values.colptr(queryIndex) + pos,
-        sizeof(double) * len);
-    memmove(recommendations.colptr(queryIndex) + (pos + 1),
-        recommendations.colptr(queryIndex) + pos,
-        sizeof(size_t) * len);
-  }
-
-  // Now put the new information in the right index.
-  values(pos, queryIndex) = value;
-  recommendations(pos, queryIndex) = neighbor;
+  Timer::Stop("cf_factorization");
 }
 
 } // namespace mlpack
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index d4b5636..42ef2b6 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -83,8 +83,7 @@ PARAM_DOUBLE("min_residue", "Residue required to terminate the factorization "
 
 PARAM_INT("seed", "Set the random seed (0 uses std::time(NULL)).", "s", 0);
 
-template<typename Factorizer>
-void ComputeRecommendations(CF<Factorizer>& cf,
+void ComputeRecommendations(CF& cf,
                             const size_t numRecs,
                             arma::Mat<size_t>& recommendations)
 {
@@ -109,8 +108,7 @@ void ComputeRecommendations(CF<Factorizer>& cf,
   }
 }
 
-template<typename Factorizer>
-void ComputeRMSE(CF<Factorizer>& cf)
+void ComputeRMSE(CF& cf)
 {
   // Now, compute each test point.
   const string testFile = CLI::GetParam<string>("test_file");
@@ -146,7 +144,7 @@ void PerformAction(Factorizer&& factorizer,
 {
   // Parameters for generating the CF object.
   const size_t neighborhood = (size_t) CLI::GetParam<int>("neighborhood");
-  CF<Factorizer> c(dataset, factorizer, neighborhood, rank);
+  CF c(dataset, factorizer, neighborhood, rank);
 
   if (CLI::HasParam("query_file") || CLI::HasParam("all_user_recommendations"))
   {
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index 651733d..9fbce77 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -38,10 +38,10 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsAllUsersTest)
 
   // Make data into sparse matrix.
   arma::sp_mat cleanedData;
-  CF<>::CleanData(dataset, cleanedData);
+  CF::CleanData(dataset, cleanedData);
 
   // Create a CF object.
-  CF<> c(cleanedData);
+  CF c(cleanedData);
 
   // Generate recommendations when query set is not specified.
   c.GetRecommendations(numRecs, recommendations);
@@ -78,9 +78,9 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsQueriedUserTest)
 
   // Make data into sparse matrix.
   arma::sp_mat cleanedData;
-  CF<>::CleanData(dataset, cleanedData);
+  CF::CleanData(dataset, cleanedData);
 
-  CF<> c(cleanedData);
+  CF c(cleanedData);
 
   // Generate recommendations when query set is specified.
   c.GetRecommendations(numRecsDefault, recommendations, users);
@@ -136,10 +136,10 @@ BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
 
   // Make data into sparse matrix.
   arma::sp_mat cleanedData;
-  CF<>::CleanData(dataset, cleanedData);
+  CF::CleanData(dataset, cleanedData);
 
   // Now create the CF object.
-  CF<> c(cleanedData);
+  CF c(cleanedData);
 
   // Obtain 150 recommendations for the users in savedCols, and make sure the
   // missing item shows up in most of them.  First, create the list of users,
@@ -229,10 +229,10 @@ BOOST_AUTO_TEST_CASE(CFPredictTest)
 
   // Make data into sparse matrix.
   arma::sp_mat cleanedData;
-  CF<>::CleanData(dataset, cleanedData);
+  CF::CleanData(dataset, cleanedData);
 
   // Now create the CF object.
-  CF<> c(cleanedData);
+  CF c(cleanedData);
 
   // Now, for each removed rating, make sure the prediction is... reasonably
   // accurate.
@@ -295,10 +295,10 @@ BOOST_AUTO_TEST_CASE(CFBatchPredictTest)
 
   // Make data into sparse matrix.
   arma::sp_mat cleanedData;
-  CF<>::CleanData(dataset, cleanedData);
+  CF::CleanData(dataset, cleanedData);
 
   // Now create the CF object.
-  CF<> c(cleanedData);
+  CF c(cleanedData);
 
   // Get predictions for all user/item pairs we held back.
   arma::Mat<size_t> combinations(2, savedCols.n_cols);



More information about the mlpack-git mailing list