[mlpack-svn] r15892 - mlpack/trunk/src/mlpack/tests
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Oct 1 15:22:03 EDT 2013
Author: rcurtin
Date: Tue Oct 1 15:22:03 2013
New Revision: 15892
Log:
Add a test to make sure the output of the CF object is reasonable. I think this
is a good test...
Modified:
mlpack/trunk/src/mlpack/tests/cf_test.cpp
Modified: mlpack/trunk/src/mlpack/tests/cf_test.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/tests/cf_test.cpp (original)
+++ mlpack/trunk/src/mlpack/tests/cf_test.cpp Tue Oct 1 15:22:03 2013
@@ -118,4 +118,96 @@
BOOST_REQUIRE_EQUAL(recommendations.n_cols, numUsers);
}
+/**
+ * Make sure recommendations that are generated are reasonably accurate.
+ */
+BOOST_AUTO_TEST_CASE(RecommendationAccuracyTest)
+{
+ // Load the GroupLens dataset; then, we will remove some values from it.
+ arma::mat dataset;
+ data::Load("GroupLens100k.csv", dataset);
+
+ // Save the columns we've removed.
+ arma::mat savedCols(3, 300); // Remove 300 5-star ratings.
+ size_t currentCol = 0;
+ for (size_t i = 0; i < dataset.n_cols; ++i)
+ {
+ if (currentCol == 300)
+ break;
+
+ if (dataset(2, i) > 4.5) // 5-star rating.
+ {
+ // Make sure we don't have this user yet. This is a slow way to do this
+ // but I don't particularly care here because it's in the tests.
+ bool found = false;
+ for (size_t j = 0; j < currentCol; ++j)
+ {
+ if (savedCols(0, j) == dataset(0, i))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ // If this user doesn't already exist in savedCols, add them. Otherwise
+ // ignore this point.
+ if (!found)
+ {
+ savedCols.col(currentCol) = dataset.col(i);
+ dataset.shed_col(i);
+ ++currentCol;
+ }
+ }
+ }
+
+ // Now create the CF object.
+ CF c(dataset);
+
+ // Obtain 150 recommendations for the users in savedCols, and make sure the
+ // missing item shows up in most of them. First, create the list of users,
+ // which requires casting from doubles...
+ arma::Col<size_t> users(300);
+ for (size_t i = 0; i < 300; ++i)
+ users(i) = (size_t) savedCols(0, i);
+ arma::Mat<size_t> recommendations;
+ size_t numRecs = 150;
+ c.NumRecs(numRecs);
+ c.GetRecommendations(recommendations, users);
+
+ BOOST_REQUIRE_EQUAL(recommendations.n_rows, numRecs);
+ BOOST_REQUIRE_EQUAL(recommendations.n_cols, 300);
+
+ size_t failures = 0;
+ for (size_t i = 0; i < 300; ++i)
+ {
+ size_t targetItem = (size_t) savedCols(1, i) - 1;
+ bool found = false;
+ // Make sure the target item shows up in the recommendations.
+ for (size_t j = 0; j < numRecs; ++j)
+ {
+ const size_t user = users(i) - 1;
+ const size_t item = recommendations(j, i) - 1;
+ if (item == targetItem)
+ {
+ found = true;
+ }
+ else
+ {
+ // Make sure we aren't being recommended an item that the user already
+ // rated.
+ BOOST_REQUIRE_EQUAL((double) c.CleanedData()(item, user), 0.0);
+ }
+ }
+
+ if (!found)
+ ++failures;
+ }
+
+ // Make sure the right item showed up in at least 2/3 of the recommendations.
+ // Random chance (that is, if we selected recommendations randomly) for this
+ // GroupLens dataset would give somewhere around a 10% success rate (failures
+ // would be closer to 270).
+ BOOST_REQUIRE_LT(failures, 100);
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-svn
mailing list