[mlpack-git] master: Refactor input a bit. We don't take dense matrices for CF. (802ead7)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Tue May 26 11:12:24 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/d2f2976c7a43f8ab9139064ae33304bcf9f4f884...29ab461472f64f72cfbdb93b0d9045024050cc95

>---------------------------------------------------------------

commit 802ead7170edf34c9e0d982ad57bdca0ed18987f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat May 23 02:36:56 2015 +0000

    Refactor input a bit.  We don't take dense matrices for CF.


>---------------------------------------------------------------

802ead7170edf34c9e0d982ad57bdca0ed18987f
 src/mlpack/methods/cf/cf.hpp      | 24 +++++++++---------------
 src/mlpack/methods/cf/cf_impl.hpp | 37 ++++++++++++++++++++++++++++++++-----
 src/mlpack/tests/cf_test.cpp      | 32 ++++++++++++++++++++++++++------
 3 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 3f510eb..4e697f6 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -82,22 +82,18 @@ class CF
    * reference to the data that we will be using. There are parameters that can
    * be set; default values are provided for each of them. If the rank is left
    * unset (or is set to 0), a simple density-based heuristic will be used to
-   * choose a rank.  This overload of the constructor will only be available if
-   * the factorizer does not use a corodinate list (i.e. if UsesCoordinateList
-   * is false).
+   * choose a rank.
    *
-   * The U and T template parameters are for SFINAE, so that this overload is
-   * only available when the FactorizerType does not use a coordinate list.
+   * The provided dataset should be a coordinate list; that is, a 3-row matrix
+   * where each column corresponds to a (user, item, rating) entry in the
+   * matrix.
    *
-   * @param data Initial (user, item, rating) matrix.
+   * @param data Data matrix: coordinate list or dense matrix.
    * @param factorizer Instantiated factorizer object.
    * @param numUsersForSimilarity Size of the neighborhood.
    * @param rank Rank parameter for matrix factorization.
    */
-  template<typename U = FactorizerType,
-           typename T = typename boost::enable_if_c<
-               FactorizerTraits<U>::UsesCoordinateList>::type*>
-  CF(arma::mat& data,
+  CF(const arma::mat& data,
      FactorizerType factorizer = FactorizerType(),
      const size_t numUsersForSimilarity = 5,
      const size_t rank = 0);
@@ -115,17 +111,15 @@ class CF
    * The U and T template parameters are for SFINAE, so that this overload is
    * only available when the FactorizerType uses a coordinate list.
    *
-   * @param data Initial (user, item, rating) matrix.
+   * @param data Sparse matrix data.
    * @param factorizer Instantiated factorizer object.
    * @param numUsersForSimilarity Size of the neighborhood.
    * @param rank Rank parameter for matrix factorization.
-   * @param isCleaned If the data passed is cleaned for CF
    */
-  template<typename MatType,
-           typename U = FactorizerType,
+  template<typename U = FactorizerType,
            typename T = typename boost::disable_if_c<
                FactorizerTraits<U>::UsesCoordinateList>::type*>
-  CF(const MatType& data,
+  CF(const arma::sp_mat& data,
      FactorizerType factorizer = FactorizerType(),
      const size_t numUsersForSimilarity = 5,
      const size_t rank = 0);
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 4c78331..2e81d16 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -17,12 +17,39 @@
 namespace mlpack {
 namespace cf {
 
+// Apply the factorizer when a coordinate list is used.
+template<typename FactorizerType>
+void ApplyFactorizer(FactorizerType& factorizer,
+                     const arma::mat& data,
+                     const arma::sp_mat& /* cleanedData */,
+                     const size_t rank,
+                     arma::mat& w,
+                     arma::mat& h,
+                     const typename boost::enable_if_c<FactorizerTraits<
+                         FactorizerType>::UsesCoordinateList>::type* = 0)
+{
+  factorizer.Apply(data, rank, w, h);
+}
+
+// Apply the factorizer when coordinate lists are not used.
+template<typename FactorizerType>
+void ApplyFactorizer(FactorizerType& factorizer,
+                     const arma::mat& /* data */,
+                     const arma::sp_mat& cleanedData,
+                     const size_t rank,
+                     arma::mat& w,
+                     arma::mat& h,
+                     const typename boost::disable_if_c<FactorizerTraits<
+                         FactorizerType>::UsesCoordinateList>::type* = 0)
+{
+  factorizer.Apply(cleanedData, rank, w, h);
+}
+
 /**
  * Construct the CF object using an instantiated factorizer.
  */
 template<typename FactorizerType>
-template<typename U, typename T>
-CF<FactorizerType>::CF(arma::mat& data,
+CF<FactorizerType>::CF(const arma::mat& data,
                        FactorizerType factorizer,
                        const size_t numUsersForSimilarity,
                        const size_t rank) :
@@ -58,15 +85,15 @@ CF<FactorizerType>::CF(arma::mat& data,
 
   // Decompose the data matrix (which is in coordinate list form) to user and
   // data matrices.
-  factorizer.Apply(data, this->rank, w, h);
+  ApplyFactorizer(factorizer, data, cleanedData, this->rank, w, h);
 }
 
 /**
  * Construct the CF object using an instantiated factorizer.
  */
 template<typename FactorizerType>
-template<typename MatType, typename U, typename T>
-CF<FactorizerType>::CF(const MatType& data,
+template<typename U, typename T>
+CF<FactorizerType>::CF(const arma::sp_mat& data,
                        FactorizerType factorizer,
                        const size_t numUsersForSimilarity,
                        const size_t rank) :
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index feb4d4b..651733d 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -36,8 +36,12 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsAllUsersTest)
   arma::mat dataset;
   data::Load("GroupLens100k.csv", dataset);
 
-  // Creat a CF object
-  CF<> c(dataset);
+  // Make data into sparse matrix.
+  arma::sp_mat cleanedData;
+  CF<>::CleanData(dataset, cleanedData);
+
+  // Create a CF object.
+  CF<> c(cleanedData);
 
   // Generate recommendations when query set is not specified.
   c.GetRecommendations(numRecs, recommendations);
@@ -72,7 +76,11 @@ BOOST_AUTO_TEST_CASE(CFGetRecommendationsQueriedUserTest)
   arma::mat dataset;
   data::Load("GroupLens100k.csv", dataset);
 
-  CF<> c(dataset);
+  // Make data into sparse matrix.
+  arma::sp_mat cleanedData;
+  CF<>::CleanData(dataset, cleanedData);
+
+  CF<> c(cleanedData);
 
   // Generate recommendations when query set is specified.
   c.GetRecommendations(numRecsDefault, recommendations, users);
@@ -126,8 +134,12 @@ BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
     }
   }
 
+  // Make data into sparse matrix.
+  arma::sp_mat cleanedData;
+  CF<>::CleanData(dataset, cleanedData);
+
   // Now create the CF object.
-  CF<> c(dataset);
+  CF<> c(cleanedData);
 
   // Obtain 150 recommendations for the users in savedCols, and make sure the
   // missing item shows up in most of them.  First, create the list of users,
@@ -215,8 +227,12 @@ BOOST_AUTO_TEST_CASE(CFPredictTest)
     }
   }
 
+  // Make data into sparse matrix.
+  arma::sp_mat cleanedData;
+  CF<>::CleanData(dataset, cleanedData);
+
   // Now create the CF object.
-  CF<> c(dataset);
+  CF<> c(cleanedData);
 
   // Now, for each removed rating, make sure the prediction is... reasonably
   // accurate.
@@ -277,8 +293,12 @@ BOOST_AUTO_TEST_CASE(CFBatchPredictTest)
     }
   }
 
+  // Make data into sparse matrix.
+  arma::sp_mat cleanedData;
+  CF<>::CleanData(dataset, cleanedData);
+
   // Now create the CF object.
-  CF<> c(dataset);
+  CF<> c(cleanedData);
 
   // Get predictions for all user/item pairs we held back.
   arma::Mat<size_t> combinations(2, savedCols.n_cols);



More information about the mlpack-git mailing list