[mlpack-git] master: Use a priority queue (heap) to search for the best valued recommendations. (b64e365)
gitdub at mlpack.org
gitdub at mlpack.org
Tue Jul 26 21:22:08 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ef51b032f275266f781d42b9bd0aa50aa26a3077...8522b04c3d9a82fb7e964bafd72e70f0cd30bf4b
>---------------------------------------------------------------
commit b64e36591fcd53db0bddba6a7c2a9119eb1125f9
Author: MarcosPividori <marcos.pividori at gmail.com>
Date: Thu Jul 21 16:28:53 2016 -0300
Use a priority queue (heap) to search for the best valued recommendations.
>---------------------------------------------------------------
b64e36591fcd53db0bddba6a7c2a9119eb1125f9
src/mlpack/methods/cf/cf.cpp | 75 +++++++++++++++-----------------------------
src/mlpack/methods/cf/cf.hpp | 34 ++++++++++----------
2 files changed, 43 insertions(+), 66 deletions(-)
diff --git a/src/mlpack/methods/cf/cf.cpp b/src/mlpack/methods/cf/cf.cpp
index daf121c..121cf13 100644
--- a/src/mlpack/methods/cf/cf.cpp
+++ b/src/mlpack/methods/cf/cf.cpp
@@ -9,6 +9,8 @@
* specified data set.
*/
#include "cf.hpp"
+#include <vector>
+#include <queue>
namespace mlpack {
namespace cf {
@@ -79,9 +81,8 @@ void CF::GetRecommendations(const size_t numRecs,
// Generate recommendations for each query user by finding the maximum numRecs
// elements in the averages matrix.
recommendations.set_size(numRecs, users.n_elem);
- recommendations.fill(cleanedData.n_rows); // Invalid item number.
arma::mat values(numRecs, users.n_elem);
- values.fill(-DBL_MAX); // The smallest possible value.
+
for (size_t i = 0; i < users.n_elem; i++)
{
// First, calculate average of neighborhood values.
@@ -92,6 +93,14 @@ void CF::GetRecommendations(const size_t numRecs,
averages += w * h.col(neighborhood(j, i));
averages /= neighborhood.n_rows;
+ // Let's build the list of candidate recomendations for the given user.
+ // Default candidate: the smallest possible value and invalid item number.
+ const Candidate def(-DBL_MAX, cleanedData.n_rows);
+ std::vector<Candidate> vect(numRecs, def);
+ typedef std::priority_queue<Candidate, std::vector<Candidate>,
+ std::greater<Candidate>> CandidateList;
+ CandidateList pqueue(std::greater<Candidate>(), std::move(vect));
+
// Look through the averages column corresponding to the current user.
for (size_t j = 0; j < averages.n_rows; ++j)
{
@@ -99,29 +108,27 @@ void CF::GetRecommendations(const size_t numRecs,
if (cleanedData(j, users(i)) != 0.0)
continue; // The user already rated the item.
+ Candidate c(averages[j], j);
+
// Is the estimated value better than the worst candidate?
- const double value = averages[j];
- if (value > values(values.n_rows - 1, i))
+ if (c > pqueue.top())
{
- // It should be inserted. Which position?
- size_t insertPosition = values.n_rows - 1;
- while (insertPosition > 0)
- {
- if (value <= values(insertPosition - 1, i))
- break; // The current value is the right one.
- insertPosition--;
- }
-
- // Now insert it into the list.
- InsertNeighbor(i, insertPosition, j, value, recommendations,
- values);
+ pqueue.pop();
+ pqueue.push(c);
}
}
+ for (size_t p = 1; p <= numRecs; p++)
+ {
+ recommendations(numRecs - p, i) = pqueue.top().item;
+ values(numRecs - p, i) = pqueue.top().value;
+ pqueue.pop();
+ }
+
// If we were not able to come up with enough recommendations, issue a
// warning.
- if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
- Log::Warn << "Could not provide " << values.n_rows << " recommendations "
+ if (recommendations(numRecs - 1, i) == def.item)
+ Log::Warn << "Could not provide " << numRecs << " recommendations "
<< "for user " << users(i) << " (not enough un-rated items)!"
<< std::endl;
}
@@ -247,37 +254,5 @@ void CF::CleanData(const arma::mat& data, arma::sp_mat& cleanedData)
cleanedData = arma::sp_mat(locations, values, maxItemID, maxUserID);
}
-/**
- * Helper function to insert a point into the recommendation matrices.
- *
- * @param queryIndex Index of point whose recommendations we are inserting into.
- * @param pos Position in list to insert into.
- * @param neighbor Index of item being inserted as a recommendation.
- * @param value Value of recommendation.
- */
-void CF::InsertNeighbor(const size_t queryIndex,
- const size_t pos,
- const size_t neighbor,
- const double value,
- arma::Mat<size_t>& recommendations,
- arma::mat& values) const
-{
- // We only memmove() if there is actually a need to shift something.
- if (pos < (recommendations.n_rows - 1))
- {
- const int len = (values.n_rows - 1) - pos;
- memmove(values.colptr(queryIndex) + (pos + 1),
- values.colptr(queryIndex) + pos,
- sizeof(double) * len);
- memmove(recommendations.colptr(queryIndex) + (pos + 1),
- recommendations.colptr(queryIndex) + pos,
- sizeof(size_t) * len);
- }
-
- // Now put the new information in the right index.
- values(pos, queryIndex) = value;
- recommendations(pos, queryIndex) = neighbor;
-}
-
} // namespace mlpack
} // namespace cf
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 22cb6fc..42b1a4b 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -258,22 +258,24 @@ class CF
//! Cleaned data matrix.
arma::sp_mat cleanedData;
- /**
- * Helper function to insert a point into the recommendation matrices.
- *
- * @param queryIndex Index of point whose recommendations we are inserting
- * into.
- * @param pos Position in list to insert into.
- * @param neighbor Index of item being inserted as a recommendation.
- * @param value Value of recommendation.
- */
- void InsertNeighbor(const size_t queryIndex,
- const size_t pos,
- const size_t neighbor,
- const double value,
- arma::Mat<size_t>& recommendations,
- arma::mat& values) const;
-
+ //! Candidate represents a possible recommendation.
+ struct Candidate
+ {
+ //! Value of this recommendation.
+ double value;
+ //! Item of this recommendation.
+ size_t item;
+ //! Trivial constructor.
+ Candidate(double value, size_t item) :
+ value(value),
+ item(item)
+ {};
+ //! Compare the value of two candidates.
+ friend bool operator>(const Candidate& l, const Candidate& r)
+ {
+ return l.value > r.value;
+ };
+ };
}; // class CF
} // namespace cf
More information about the mlpack-git
mailing list