[mlpack-git] master: Use a priority queue (heap) to search for the best valued recommendations. (b64e365)

gitdub at mlpack.org gitdub at mlpack.org
Tue Jul 26 21:22:08 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ef51b032f275266f781d42b9bd0aa50aa26a3077...8522b04c3d9a82fb7e964bafd72e70f0cd30bf4b

>---------------------------------------------------------------

commit b64e36591fcd53db0bddba6a7c2a9119eb1125f9
Author: MarcosPividori <marcos.pividori at gmail.com>
Date:   Thu Jul 21 16:28:53 2016 -0300

    Use a priority queue (heap) to search for the best valued recommendations.


>---------------------------------------------------------------

b64e36591fcd53db0bddba6a7c2a9119eb1125f9
 src/mlpack/methods/cf/cf.cpp | 75 +++++++++++++++-----------------------------
 src/mlpack/methods/cf/cf.hpp | 34 ++++++++++----------
 2 files changed, 43 insertions(+), 66 deletions(-)

diff --git a/src/mlpack/methods/cf/cf.cpp b/src/mlpack/methods/cf/cf.cpp
index daf121c..121cf13 100644
--- a/src/mlpack/methods/cf/cf.cpp
+++ b/src/mlpack/methods/cf/cf.cpp
@@ -9,6 +9,8 @@
  * specified data set.
  */
 #include "cf.hpp"
+#include <vector>
+#include <queue>
 
 namespace mlpack {
 namespace cf {
@@ -79,9 +81,8 @@ void CF::GetRecommendations(const size_t numRecs,
   // Generate recommendations for each query user by finding the maximum numRecs
   // elements in the averages matrix.
   recommendations.set_size(numRecs, users.n_elem);
-  recommendations.fill(cleanedData.n_rows); // Invalid item number.
   arma::mat values(numRecs, users.n_elem);
-  values.fill(-DBL_MAX); // The smallest possible value.
+
   for (size_t i = 0; i < users.n_elem; i++)
   {
     // First, calculate average of neighborhood values.
@@ -92,6 +93,14 @@ void CF::GetRecommendations(const size_t numRecs,
       averages += w * h.col(neighborhood(j, i));
     averages /= neighborhood.n_rows;
 
+    // Let's build the list of candidate recomendations for the given user.
+    // Default candidate: the smallest possible value and invalid item number.
+    const Candidate def(-DBL_MAX, cleanedData.n_rows);
+    std::vector<Candidate> vect(numRecs, def);
+    typedef std::priority_queue<Candidate, std::vector<Candidate>,
+        std::greater<Candidate>> CandidateList;
+    CandidateList pqueue(std::greater<Candidate>(), std::move(vect));
+
     // Look through the averages column corresponding to the current user.
     for (size_t j = 0; j < averages.n_rows; ++j)
     {
@@ -99,29 +108,27 @@ void CF::GetRecommendations(const size_t numRecs,
       if (cleanedData(j, users(i)) != 0.0)
         continue; // The user already rated the item.
 
+      Candidate c(averages[j], j);
+
       // Is the estimated value better than the worst candidate?
-      const double value = averages[j];
-      if (value > values(values.n_rows - 1, i))
+      if (c > pqueue.top())
       {
-        // It should be inserted.  Which position?
-        size_t insertPosition = values.n_rows - 1;
-        while (insertPosition > 0)
-        {
-          if (value <= values(insertPosition - 1, i))
-            break; // The current value is the right one.
-          insertPosition--;
-        }
-
-        // Now insert it into the list.
-        InsertNeighbor(i, insertPosition, j, value, recommendations,
-            values);
+        pqueue.pop();
+        pqueue.push(c);
       }
     }
 
+    for (size_t p = 1; p <= numRecs; p++)
+    {
+      recommendations(numRecs - p, i) = pqueue.top().item;
+      values(numRecs - p, i) = pqueue.top().value;
+      pqueue.pop();
+    }
+
     // If we were not able to come up with enough recommendations, issue a
     // warning.
-    if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
-      Log::Warn << "Could not provide " << values.n_rows << " recommendations "
+    if (recommendations(numRecs - 1, i) == def.item)
+      Log::Warn << "Could not provide " << numRecs << " recommendations "
           << "for user " << users(i) << " (not enough un-rated items)!"
           << std::endl;
   }
@@ -247,37 +254,5 @@ void CF::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
   cleanedData = arma::sp_mat(locations, values, maxItemID, maxUserID);
 }
 
-/**
- * Helper function to insert a point into the recommendation matrices.
- *
- * @param queryIndex Index of point whose recommendations we are inserting into.
- * @param pos Position in list to insert into.
- * @param neighbor Index of item being inserted as a recommendation.
- * @param value Value of recommendation.
- */
-void CF::InsertNeighbor(const size_t queryIndex,
-                        const size_t pos,
-                        const size_t neighbor,
-                        const double value,
-                        arma::Mat<size_t>& recommendations,
-                        arma::mat& values) const
-{
-  // We only memmove() if there is actually a need to shift something.
-  if (pos < (recommendations.n_rows - 1))
-  {
-    const int len = (values.n_rows - 1) - pos;
-    memmove(values.colptr(queryIndex) + (pos + 1),
-        values.colptr(queryIndex) + pos,
-        sizeof(double) * len);
-    memmove(recommendations.colptr(queryIndex) + (pos + 1),
-        recommendations.colptr(queryIndex) + pos,
-        sizeof(size_t) * len);
-  }
-
-  // Now put the new information in the right index.
-  values(pos, queryIndex) = value;
-  recommendations(pos, queryIndex) = neighbor;
-}
-
 } // namespace mlpack
 } // namespace cf
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 22cb6fc..42b1a4b 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -258,22 +258,24 @@ class CF
   //! Cleaned data matrix.
   arma::sp_mat cleanedData;
 
-  /**
-   * Helper function to insert a point into the recommendation matrices.
-   *
-   * @param queryIndex Index of point whose recommendations we are inserting
-   *     into.
-   * @param pos Position in list to insert into.
-   * @param neighbor Index of item being inserted as a recommendation.
-   * @param value Value of recommendation.
-   */
-  void InsertNeighbor(const size_t queryIndex,
-                      const size_t pos,
-                      const size_t neighbor,
-                      const double value,
-                      arma::Mat<size_t>& recommendations,
-                      arma::mat& values) const;
-
+  //! Candidate represents a possible recommendation.
+  struct Candidate
+  {
+    //! Value of this recommendation.
+    double value;
+    //! Item of this recommendation.
+    size_t item;
+    //! Trivial constructor.
+    Candidate(double value, size_t item) :
+        value(value),
+        item(item)
+    {};
+    //! Compare the value of two candidates.
+    friend bool operator>(const Candidate& l, const Candidate& r)
+    {
+      return l.value > r.value;
+    };
+  };
 }; // class CF
 
 } // namespace cf




More information about the mlpack-git mailing list