[mlpack-git] master: Fix two FastMKS bugs. (49d765c)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Sep 2 10:16:07 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/9e6c0fe30a8ffc9e3e6593dea1e4d40042438649...49d765cc540f0499e14b5d5bfafe5e63bb25c1f5

>---------------------------------------------------------------

commit 49d765cc540f0499e14b5d5bfafe5e63bb25c1f5
Author: ryan <ryan at ratml.org>
Date:   Wed Sep 2 10:15:00 2015 -0400

    Fix two FastMKS bugs.
    
    1. Stop pruning of duplicate points (> -> >=)
    2. Reformulate bound function for when many max-kernel candidates are desired.


>---------------------------------------------------------------

49d765cc540f0499e14b5d5bfafe5e63bb25c1f5
 src/mlpack/methods/fastmks/fastmks_rules_impl.hpp | 41 ++++++++++++++++-------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/mlpack/methods/fastmks/fastmks_rules_impl.hpp b/src/mlpack/methods/fastmks/fastmks_rules_impl.hpp
index f85c947..abce9ab 100644
--- a/src/mlpack/methods/fastmks/fastmks_rules_impl.hpp
+++ b/src/mlpack/methods/fastmks/fastmks_rules_impl.hpp
@@ -188,7 +188,7 @@ double FastMKSRules<KernelType, TreeType>::Score(const size_t queryIndex,
 
   // We return the inverse of the maximum kernel so that larger kernels are
   // recursed into first.
-  return (maxKernel > bestKernel) ? (1.0 / maxKernel) : DBL_MAX;
+  return (maxKernel >= bestKernel) ? (1.0 / maxKernel) : DBL_MAX;
 }
 
 template<typename KernelType, typename TreeType>
@@ -377,7 +377,7 @@ double FastMKSRules<KernelType, TreeType>::Score(TreeType& queryNode,
 
   // We return the inverse of the maximum kernel so that larger kernels are
   // recursed into first.
-  return (maxKernel > bestKernel) ? (1.0 / maxKernel) : DBL_MAX;
+  return (maxKernel >= bestKernel) ? (1.0 / maxKernel) : DBL_MAX;
 }
 
 template<typename KernelType, typename TreeType>
@@ -387,7 +387,7 @@ double FastMKSRules<KernelType, TreeType>::Rescore(const size_t queryIndex,
 {
   const double bestKernel = products(products.n_rows - 1, queryIndex);
 
-  return ((1.0 / oldScore) > bestKernel) ? oldScore : DBL_MAX;
+  return ((1.0 / oldScore) >= bestKernel) ? oldScore : DBL_MAX;
 }
 
 template<typename KernelType, typename TreeType>
@@ -398,7 +398,7 @@ double FastMKSRules<KernelType, TreeType>::Rescore(TreeType& queryNode,
   queryNode.Stat().Bound() = CalculateBound(queryNode);
   const double bestKernel = queryNode.Stat().Bound();
 
-  return ((1.0 / oldScore) > bestKernel) ? oldScore : DBL_MAX;
+  return ((1.0 / oldScore) >= bestKernel) ? oldScore : DBL_MAX;
 }
 
 /**
@@ -426,7 +426,9 @@ double FastMKSRules<KernelType, TreeType>::CalculateBound(TreeType& queryNode)
 
   const double queryDescendantDistance = queryNode.FurthestDescendantDistance();
 
-  // Loop over all points in this node to find the best and worst.
+  // Loop over all points in this node to find the worst max-kernel value and
+  // the best possible adjusted max-kernel value that could be held by any
+  // descendant.
   for (size_t i = 0; i < queryNode.NumPoints(); ++i)
   {
     const size_t point = queryNode.Point(i);
@@ -438,12 +440,26 @@ double FastMKSRules<KernelType, TreeType>::CalculateBound(TreeType& queryNode)
 
     // This should be (queryDescendantDistance + centroidDistance) for any tree
     // but it works for cover trees since centroidDistance = 0 for cover trees.
-    const double candidateKernel = products(products.n_rows - 1, point) -
-        queryDescendantDistance *
-        referenceKernels[indices(indices.n_rows - 1, point)];
+    // The formulation here is slightly different than in Equation 43 of
+    // "Dual-tree fast exact max-kernel search".  Because we could be searching
+    // for k max kernels and not just one, the bound for this point must
+    // actually be the minimum adjusted kernel of all k candidate kernels.
+    // So,
+    //   B(N_q) = min_{1 \le j \le k} k_j^*(p_q) -
+    //       \lambda_q \sqrt(K(p_j^*(p_q), p_j^*(p_q)))
+    // where p_j^*(p_q) is the j'th kernel candidate for query point p_q and
+    // k_j^*(p_q) is K(p_q, p_j^*(p_q)).
+    double worstPointCandidateKernel = DBL_MAX;
+    for (size_t j = 0; j < products.n_rows; ++j)
+    {
+      const double candidateKernel = products(j, point) -
+          queryDescendantDistance * referenceKernels[indices(j, point)];
+      if (candidateKernel < worstPointCandidateKernel)
+        worstPointCandidateKernel = candidateKernel;
+    }
 
-    if (candidateKernel > bestAdjustedPointKernel)
-      bestAdjustedPointKernel = candidateKernel;
+    if (worstPointCandidateKernel > bestAdjustedPointKernel)
+      bestAdjustedPointKernel = worstPointCandidateKernel;
   }
 
   // Loop over all the children in the node.
@@ -466,7 +482,6 @@ double FastMKSRules<KernelType, TreeType>::CalculateBound(TreeType& queryNode)
   // Pick the best of these bounds.
   const double interA = (firstBound > bestAdjustedPointKernel) ? firstBound :
       bestAdjustedPointKernel;
-//  const double interA = 0.0;
   const double interB = fourthBound;
 
   return (interA > interB) ? interA : interB;
@@ -503,7 +518,7 @@ void FastMKSRules<KernelType, TreeType>::InsertNeighbor(const size_t queryIndex,
   indices(pos, queryIndex) = neighbor;
 }
 
-}; // namespace fastmks
-}; // namespace mlpack
+} // namespace fastmks
+} // namespace mlpack
 
 #endif



More information about the mlpack-git mailing list