[mlpack-git] master, mlpack-1.0.x: For each random dataset, ensure that the size of the implied user/item matrix is numUsers by numItems by manually setting the last element. Some formatting and const-correctness fixes. Also, increase the number of iterations for the optimization test since it didn't seem to be converging (hopefully the specific number of 10 passes over the data was not chosen for a particular reason). (8579106)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:54:35 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 857910648fca4a00d90e9032aeba8e27a57746ce
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Jul 25 20:13:14 2014 +0000

    For each random dataset, ensure that the size of the implied user/item matrix is
    numUsers by numItems by manually setting the last element.  Some formatting and
    const-correctness fixes.  Also, increase the number of iterations for the
    optimization test since it didn't seem to be converging (hopefully the specific
    number of 10 passes over the data was not chosen for a particular reason).


>---------------------------------------------------------------

857910648fca4a00d90e9032aeba8e27a57746ce
 src/mlpack/tests/regularized_svd_test.cpp | 49 +++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/mlpack/tests/regularized_svd_test.cpp b/src/mlpack/tests/regularized_svd_test.cpp
index 75fdd20..3d35c76 100644
--- a/src/mlpack/tests/regularized_svd_test.cpp
+++ b/src/mlpack/tests/regularized_svd_test.cpp
@@ -10,6 +10,7 @@
 #include <boost/test/unit_test.hpp>
 #include "old_boost_test_definitions.hpp"
 
+using namespace mlpack;
 using namespace mlpack::svd;
 
 BOOST_AUTO_TEST_SUITE(RegularizedSVDTest);
@@ -30,24 +31,28 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionRandomEvaluate)
   data.row(1) = floor(data.row(1) * numItems);
   data.row(2) = floor(data.row(2) * maxRating + 0.5);
 
+  // Manually set last row to maximum user and maximum item.
+  data(0, numRatings - 1) = numUsers - 1;
+  data(1, numRatings - 1) = numItems - 1;
+
   // Make a RegularizedSVDFunction with zero regularization.
   RegularizedSVDFunction rSVDFunc(data, rank, 0);
 
-  for(size_t i = 0; i < numTrials; i++)
+  for (size_t i = 0; i < numTrials; i++)
   {
     arma::mat parameters = arma::randu(rank, numUsers + numItems);
 
     // Calculate cost by summing up cost of each example.
     double cost = 0;
-    for(size_t j = 0; j < numRatings; j++)
+    for (size_t j = 0; j < numRatings; j++)
     {
       const size_t user = data(0, j);
       const size_t item = data(1, j) + numUsers;
 
       const double rating = data(2, j);
-      double ratingError = rating - arma::dot(parameters.col(user),
-                                              parameters.col(item));
-      double ratingErrorSquared = ratingError * ratingError;
+      const double ratingError = rating - arma::dot(parameters.col(user),
+                                                    parameters.col(item));
+      const double ratingErrorSquared = ratingError * ratingError;
 
       cost += ratingErrorSquared;
     }
@@ -73,13 +78,17 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionRegularizationEvaluate)
   data.row(1) = floor(data.row(1) * numItems);
   data.row(2) = floor(data.row(2) * maxRating + 0.5);
 
+  // Manually set last row to maximum user and maximum item.
+  data(0, numRatings - 1) = numUsers - 1;
+  data(1, numRatings - 1) = numItems - 1;
+
   // Make three RegularizedSVDFunction objects with different amounts of
   // regularization.
   RegularizedSVDFunction rSVDFuncNoReg(data, rank, 0);
   RegularizedSVDFunction rSVDFuncSmallReg(data, rank, 0.5);
   RegularizedSVDFunction rSVDFuncBigReg(data, rank, 20);
 
-  for(size_t i = 0; i < numTrials; i++)
+  for (size_t i = 0; i < numTrials; i++)
   {
     arma::mat parameters = arma::randu(rank, numUsers + numItems);
 
@@ -87,13 +96,13 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionRegularizationEvaluate)
     // each rating and sum them up.
     double smallRegTerm = 0;
     double bigRegTerm = 0;
-    for(size_t j = 0; j < numRatings; j++)
+    for (size_t j = 0; j < numRatings; j++)
     {
       const size_t user = data(0, j);
       const size_t item = data(1, j) + numUsers;
 
-      double userVecNorm = arma::norm(parameters.col(user), 2);
-      double itemVecNorm = arma::norm(parameters.col(item), 2);
+      const double userVecNorm = arma::norm(parameters.col(user), 2);
+      const double itemVecNorm = arma::norm(parameters.col(item), 2);
       smallRegTerm += 0.5 * (userVecNorm * userVecNorm +
                              itemVecNorm * itemVecNorm);
       bigRegTerm += 20 * (userVecNorm * userVecNorm +
@@ -124,6 +133,10 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionGradient)
   data.row(1) = floor(data.row(1) * numItems);
   data.row(2) = floor(data.row(2) * maxRating + 0.5);
 
+  // Manually set last row to maximum user and maximum item.
+  data(0, numRatings - 1) = numUsers - 1;
+  data(1, numRatings - 1) = numItems - 1;
+
   arma::mat parameters = arma::randu(rank, numUsers + numItems);
 
   // Make two RegularizedSVDFunction objects, one with regularization and one
@@ -141,9 +154,9 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionGradient)
   double costPlus1, costMinus1, numGradient1;
   double costPlus2, costMinus2, numGradient2;
 
-  for(size_t i = 0; i < rank; i++)
+  for (size_t i = 0; i < rank; i++)
   {
-    for(size_t j = 0; j < numUsers + numItems; j++)
+    for (size_t j = 0; j < numUsers + numItems; j++)
     {
       // Perturb parameter with a positive constant and get costs.
       parameters(i, j) += epsilon;
@@ -175,7 +188,7 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionOptimize)
   const size_t numUsers = 50;
   const size_t numItems = 50;
   const size_t numRatings = 100;
-  const size_t iterations = 10;
+  const size_t iterations = 30;
   const size_t rank = 10;
   const double alpha = 0.01;
   const double lambda = 0.01;
@@ -188,8 +201,12 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionOptimize)
   data.row(0) = floor(data.row(0) * numUsers);
   data.row(1) = floor(data.row(1) * numItems);
 
+  // Manually set last row to maximum user and maximum item.
+  data(0, numRatings - 1) = numUsers - 1;
+  data(1, numRatings - 1) = numItems - 1;
+
   // Make rating entries based on the parameters.
-  for(size_t i = 0; i < numRatings; i++)
+  for (size_t i = 0; i < numRatings; i++)
   {
     data(2, i) = arma::dot(parameters.col(data(0, i)),
                            parameters.col(numUsers + data(1, i)));
@@ -206,15 +223,15 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionOptimize)
 
   // Get predicted ratings from optimized parameters.
   arma::mat predictedData(1, numRatings);
-  for(size_t i = 0; i < numRatings; i++)
+  for (size_t i = 0; i < numRatings; i++)
   {
     predictedData(0, i) = arma::dot(optParameters.col(data(0, i)),
                                     optParameters.col(numUsers + data(1, i)));
   }
 
   // Calculate relative error.
-  double relativeError = arma::norm(data.row(2) - predictedData, "frob") /
-                         arma::norm(data, "frob");
+  const double relativeError = arma::norm(data.row(2) - predictedData, "frob") /
+                               arma::norm(data, "frob");
 
   // Relative error should be small.
   BOOST_REQUIRE_SMALL(relativeError, 1e-2);



More information about the mlpack-git mailing list