[mlpack-svn] r12954 - mlpack/trunk/src/mlpack/methods/maxip

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Jun 5 15:15:14 EDT 2012


Author: rcurtin
Date: 2012-06-05 15:15:14 -0400 (Tue, 05 Jun 2012)
New Revision: 12954

Modified:
   mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt
   mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp
   mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp
   mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp
   mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp
   mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp
Log:
Check in mess of MaxIP code which is being cleaned little by little...


Modified: mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt	2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt	2012-06-05 19:15:14 UTC (rev 12954)
@@ -27,4 +27,11 @@
   mlpack
 )
 
+#add_executable(maxip_string
+#  max_ip_string_main.cpp
+#)
+#target_link_libraries(maxip_string
+#  mlpack
+#)
+
 install(TARGETS maxip RUNTIME DESTINATION bin)

Modified: mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp	2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp	2012-06-05 19:15:14 UTC (rev 12954)
@@ -15,18 +15,24 @@
 class IPMetric
 {
  public:
-  typedef KernelType Kernel;
+  IPMetric() : kernel(localKernel) { }
 
   /**
    * Create the IPMetric.
    */
-  IPMetric() { }
+  IPMetric(KernelType& kernel) : kernel(kernel) { }
 
   /**
    * Evaluate the metric.
    */
   template<typename Vec1Type, typename Vec2Type>
-  static double Evaluate(const Vec1Type& a, const Vec2Type& b);
+  double Evaluate(const Vec1Type& a, const Vec2Type& b);
+
+  const KernelType& Kernel() const { return kernel; }
+  KernelType& Kernel() { return kernel; }
+
+  KernelType localKernel;
+  KernelType& kernel;
 };
 
 }; // namespace maxip

Modified: mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp	2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp	2012-06-05 19:15:14 UTC (rev 12954)
@@ -18,13 +18,14 @@
 
 template<typename KernelType>
 template<typename Vec1Type, typename Vec2Type>
-inline double IPMetric<KernelType>::Evaluate(const Vec1Type& a, const Vec2Type& b)
+inline double IPMetric<KernelType>::Evaluate(const Vec1Type& a,
+                                             const Vec2Type& b)
 {
   // This is the metric induced by the kernel function.
   // Maybe we can do better by caching some of this?
   ++distanceEvaluations;
-  return KernelType::Evaluate(a, a) + KernelType::Evaluate(b, b) -
-      2 * KernelType::Evaluate(a, b);
+  return sqrt(kernel.Evaluate(a, a) + kernel.Evaluate(b, b) -
+      2 * kernel.Evaluate(a, b));
 }
 
 template<>

Modified: mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp	2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp	2012-06-05 19:15:14 UTC (rev 12954)
@@ -19,13 +19,17 @@
 {
  public:
   MaxIP(const arma::mat& referenceSet,
+        KernelType& kernel,
         bool single = false,
-        bool naive = false);
+        bool naive = false,
+        double expansionConstant = 2.0);
 
   MaxIP(const arma::mat& referenceSet,
         const arma::mat& querySet,
+        KernelType& kernel,
         bool single = false,
-        bool naive = false);
+        bool naive = false,
+        double expansionConstant = 2.0);
 
   ~MaxIP();
 
@@ -46,6 +50,8 @@
 
   bool naive;
 
+  IPMetric<KernelType> metric;
+
   // Utility function.  Copied too many times from too many places.
   void InsertNeighbor(arma::Mat<size_t>& indices,
                       arma::mat& products,

Modified: mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp	2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp	2012-06-05 19:15:14 UTC (rev 12954)
@@ -12,10 +12,171 @@
 
 #include "max_ip_rules.hpp"
 
+#include <mlpack/core/kernels/gaussian_kernel.hpp>
+
 namespace mlpack {
 namespace maxip {
 
 template<typename TreeType>
+void RecurseTreeCountLeaves(const TreeType& node, arma::vec& counts)
+{
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+  {
+    if (node.Child(i).NumChildren() == 0)
+      counts[node.Child(i).Point()]++;
+    else
+      RecurseTreeCountLeaves<TreeType>(node.Child(i), counts);
+  }
+}
+
+template<typename TreeType>
+void CheckSelfChild(const TreeType& node)
+{
+  if (node.NumChildren() == 0)
+    return; // No self-child applicable here.
+
+  bool found = false;
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+  {
+    if (node.Child(i).Point() == node.Point())
+      found = true;
+
+    // Recursively check the children.
+    CheckSelfChild(node.Child(i));
+  }
+
+  // Ensure this has its own self-child.
+  Log::Assert(found == true);
+}
+
+template<typename TreeType, typename MetricType>
+void CheckCovering(const TreeType& node)
+{
+  // Return if a leaf.  No checking necessary.
+  if (node.NumChildren() == 0)
+    return;
+
+  const arma::mat& dataset = node.Dataset();
+  const size_t nodePoint = node.Point();
+
+  // To ensure that this node satisfies the covering principle, we must ensure
+  // that the distance to each child is less than pow(expansionConstant, scale).
+  double maxDistance = pow(node.ExpansionConstant(), node.Scale());
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+  {
+    const size_t childPoint = node.Child(i).Point();
+
+    double distance = MetricType::Evaluate(dataset.col(nodePoint),
+        dataset.col(childPoint));
+
+    Log::Assert(distance <= maxDistance);
+
+    // Check the child.
+    CheckCovering<TreeType, MetricType>(node.Child(i));
+  }
+}
+
+template<typename TreeType, typename MetricType>
+void CheckIndividualSeparation(const TreeType& constantNode,
+                               const TreeType& node)
+{
+  // Don't check points at a lower scale.
+  if (node.Scale() < constantNode.Scale())
+    return;
+
+  // If at a higher scale, recurse.
+  if (node.Scale() > constantNode.Scale())
+  {
+    for (size_t i = 0; i < node.NumChildren(); ++i)
+    {
+      // Don't recurse into leaves.
+      if (node.Child(i).NumChildren() > 0)
+        CheckIndividualSeparation<TreeType, MetricType>(constantNode,
+            node.Child(i));
+    }
+
+    return;
+  }
+
+  // Don't compare the same point against itself.
+  if (node.Point() == constantNode.Point())
+    return;
+
+  // Now we know we are at the same scale, so make the comparison.
+  const arma::mat& dataset = constantNode.Dataset();
+  const size_t constantPoint = constantNode.Point();
+  const size_t nodePoint = node.Point();
+
+  // Make sure the distance is at least the following value (in accordance with
+  // the separation principle of cover trees).
+  double minDistance = pow(constantNode.ExpansionConstant(),
+      constantNode.Scale());
+
+  double distance = MetricType::Evaluate(dataset.col(constantPoint),
+      dataset.col(nodePoint));
+
+}
+
+template<typename TreeType, typename MetricType>
+void CheckSeparation(const TreeType& node, const TreeType& root)
+{
+  // Check the separation between this point and all other points on this scale.
+  CheckIndividualSeparation<TreeType, MetricType>(node, root);
+
+  // Check the children, but only if they are not leaves.  Leaves don't need to
+  // be checked.
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+    if (node.Child(i).NumChildren() > 0)
+      CheckSeparation<TreeType, MetricType>(node.Child(i), root);
+}
+
+template<typename TreeType, typename MetricType>
+void GetMaxDistance(TreeType& node, TreeType& constantNode, double& best, size_t& index)
+{
+  const arma::mat& dataset = node.Dataset();
+  const double eval = MetricType::Evaluate(dataset.unsafe_col(node.Point()),
+      dataset.unsafe_col(constantNode.Point()));
+  if (eval > best)
+  {
+    best = eval;
+    index = node.Point();
+  }
+
+  // Recurse into children.
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+    GetMaxDistance<TreeType, MetricType>(node.Child(i), constantNode, best, index);
+}
+
+template<typename TreeType, typename MetricType>
+void CheckMaxDistances(TreeType& node)
+{
+  // Check child distances.
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+  {
+    const arma::mat& dataset = node.Dataset();
+    double eval = MetricType::Evaluate(dataset.unsafe_col(node.Point()),
+        dataset.unsafe_col(node.Child(i).Point()));
+
+//    Log::Debug << "Point " << node.Point() << ", child point " << node.Child(i).Point() << "\n";
+//    Log::Debug << "Actual eval " << eval << ", supposed parent distance " << node.Child(i).ParentDistance() << "\n";
+//    Log::Debug << "Diff " << eval - node.Child(i).ParentDistance() << "\n";
+    Log::Assert(std::abs(eval - node.Child(i).ParentDistance()) < 1e-10);
+  }
+
+  // Check all descendants.
+  double maxDescendantDistance = 0;
+  size_t maxIndex = 0;
+  GetMaxDistance<TreeType, MetricType>(node, node, maxDescendantDistance, maxIndex);
+
+//  Log::Debug << "Point " << node.Point() << ", max desc. index " << maxIndex << "\n";
+//  Log::Debug << "Calculated maxDescendantDistance " << maxDescendantDistance << " given " << node.FurthestDescendantDistance() << std::endl;
+  Log::Assert(std::abs(maxDescendantDistance - node.FurthestDescendantDistance()) < 1e-10);
+
+  for (size_t i = 0; i < node.NumChildren(); ++i)
+    CheckMaxDistances<TreeType, MetricType>(node.Child(i));
+}
+
+template<typename TreeType>
 struct SearchFrame
 {
   TreeType* node;
@@ -42,21 +203,26 @@
 
 template<typename KernelType>
 MaxIP<KernelType>::MaxIP(const arma::mat& referenceSet,
+                         KernelType& kernel,
                          bool single,
-                         bool naive) :
+                         bool naive,
+                         double expansionConstant) :
     referenceSet(referenceSet),
     querySet(referenceSet), // Gotta point it somewhere...
     queryTree(NULL),
     single(single),
-    naive(naive)
+    naive(naive),
+    metric(kernel)
 {
+
   Timer::Start("tree_building");
 
   // Build the tree.  Could we do this in the initialization list?
   if (naive)
     referenceTree = NULL;
   else
-    referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet);
+    referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet,
+        expansionConstant, &metric);
 
   Timer::Stop("tree_building");
 }
@@ -64,12 +230,15 @@
 template<typename KernelType>
 MaxIP<KernelType>::MaxIP(const arma::mat& referenceSet,
                          const arma::mat& querySet,
+                         KernelType& kernel,
                          bool single,
-                         bool naive) :
+                         bool naive,
+                         double expansionConstant) :
     referenceSet(referenceSet),
     querySet(querySet),
     single(single),
-    naive(naive)
+    naive(naive),
+    metric(kernel)
 {
   Timer::Start("tree_building");
 
@@ -77,13 +246,42 @@
   if (naive)
     referenceTree = NULL;
   else
-    referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet);
+    referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet,
+        expansionConstant, &metric);
 
   if (single || naive)
     queryTree = NULL;
   else
-    queryTree = new tree::CoverTree<IPMetric<KernelType> >(querySet);
+    queryTree = new tree::CoverTree<IPMetric<KernelType> >(querySet,
+        expansionConstant, &metric);
 
+/*  if (referenceTree != NULL)
+  {
+    Log::Debug << "Check counts" << std::endl;
+    // Now loop through the tree and ensure that each leaf is only created once.
+    arma::vec counts;
+    counts.zeros(referenceSet.n_elem);
+    RecurseTreeCountLeaves(*referenceTree, counts);
+
+    // Each point should only have one leaf node representing it.
+    for (size_t i = 0; i < 20; ++i)
+      Log::Assert(counts[i] == 1);
+
+    Log::Debug << "Check self child\n";
+    // Each non-leaf should have a self-child.
+    CheckSelfChild<tree::CoverTree<IPMetric<KernelType> > >(*referenceTree);
+
+    Log::Debug << "Check covering\n";
+    // Each node must satisfy the covering principle (its children must be less
+    // than or equal to a certain distance apart).
+    CheckCovering<tree::CoverTree<IPMetric<KernelType> >, IPMetric<KernelType> >(*referenceTree);
+
+    Log::Debug << "Check max distances\n";
+    // Check maximum distance of children and grandchildren.
+    CheckMaxDistances<tree::CoverTree<IPMetric<KernelType> >, IPMetric<KernelType> >(*referenceTree);
+    Log::Debug << "Done\n";
+  }*/
+
   Timer::Stop("tree_building");
 }
 
@@ -104,7 +302,7 @@
   // No remapping will be necessary.
   indices.set_size(k, querySet.n_cols);
   products.set_size(k, querySet.n_cols);
-  products.fill(DBL_MIN);
+  products.fill(-1.0);
 
   Timer::Start("computing_products");
 
@@ -118,8 +316,8 @@
     {
       for (size_t r = 0; r < referenceSet.n_cols; ++r)
       {
-        const double eval = KernelType::Evaluate(querySet.unsafe_col(q),
-                                                 referenceSet.unsafe_col(r));
+        const double eval = metric.Kernel().Evaluate(querySet.unsafe_col(q),
+            referenceSet.unsafe_col(r));
         ++kernelEvaluations;
 
         size_t insertPosition;
@@ -148,7 +346,7 @@
     // Precalculate query products ( || q || for all q).
     arma::vec queryProducts(querySet.n_cols);
     for (size_t queryIndex = 0; queryIndex < querySet.n_cols; ++queryIndex)
-      queryProducts[queryIndex] = sqrt(KernelType::Evaluate(
+      queryProducts[queryIndex] = sqrt(metric.Kernel().Evaluate(
           querySet.unsafe_col(queryIndex), querySet.unsafe_col(queryIndex)));
     kernelEvaluations += querySet.n_cols;
 
@@ -165,7 +363,7 @@
       // Add initial frame.
       SearchFrame<tree::CoverTree<IPMetric<KernelType> > > nextFrame;
       nextFrame.node = referenceTree;
-      nextFrame.eval = KernelType::Evaluate(querySet.unsafe_col(queryIndex),
+      nextFrame.eval = metric.Kernel().Evaluate(querySet.unsafe_col(queryIndex),
           referenceSet.unsafe_col(referenceTree->Point()));
       ++kernelEvaluations;
 
@@ -200,8 +398,11 @@
           childFrame.node = &(referenceNode->Child(0));
           childFrame.eval = eval;
 
-          maxProduct = eval + std::pow(childFrame.node->ExpansionConstant(),
-              childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+//          maxProduct = eval + std::pow(childFrame.node->ExpansionConstant(),
+//              childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+          // Alternate pruning rule.
+          maxProduct = eval + childFrame.node->FurthestDescendantDistance() *
+              queryProducts[queryIndex];
 
           // Add self-child if we can't prune it.
           if (maxProduct > products(products.n_rows - 1, queryIndex))
@@ -215,18 +416,33 @@
 
           for (size_t i = 1; i < referenceNode->NumChildren(); ++i)
           {
+            // Before we evaluate the child, let's see if it can possibly have
+            // a better evaluation.
+            double maxChildEval = eval + queryProducts[queryIndex] *
+                (referenceNode->Child(i).ParentDistance() +
+                referenceNode->Child(i).FurthestDescendantDistance());
+
+            if (maxChildEval <= products(products.n_rows - 1, queryIndex))
+            {
+              ++numPrunes;
+              continue; // Skip this child; it can't be any better.
+            }
+
             // Evaluate child.
             childFrame.node = &(referenceNode->Child(i));
-            childFrame.eval = KernelType::Evaluate(
+            childFrame.eval = metric.Kernel().Evaluate(
                 querySet.unsafe_col(queryIndex),
                 referenceSet.unsafe_col(referenceNode->Child(i).Point()));
             ++kernelEvaluations;
 
             // Can we prune it?  If we can, we can avoid putting it in the queue
             // (saves time).
-            double maxProduct = childFrame.eval +
-                std::pow(childFrame.node->ExpansionConstant(),
-                childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+//            double maxProduct = childFrame.eval +
+//                std::pow(childFrame.node->ExpansionConstant(),
+//                childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+            // Alternate pruning rule.
+            maxProduct = childFrame.eval + queryProducts[queryIndex] *
+                childFrame.node->FurthestDescendantDistance();
 
             if (maxProduct > products(products.n_rows - 1, queryIndex))
             {
@@ -309,6 +525,217 @@
   indices(pos, queryIndex) = neighbor;
 }
 
+// Specialized implementation for tighter bounds for Gaussian.
+template<>
+void MaxIP<kernel::GaussianKernel>::Search(const size_t k,
+                                           arma::Mat<size_t>& indices,
+                                           arma::mat& products)
+{
+  Log::Warn << "Alternate implementation!" << std::endl;
+
+  // Terrible copypasta.  Bad bad bad.
+  // No remapping will be necessary.
+  indices.set_size(k, querySet.n_cols);
+  products.set_size(k, querySet.n_cols);
+  products.fill(-1.0);
+
+  Timer::Start("computing_products");
+
+  size_t kernelEvaluations = 0;
+
+  // Naive implementation.
+  if (naive)
+  {
+    // Simple double loop.  Stupid, slow, but a good benchmark.
+    for (size_t q = 0; q < querySet.n_cols; ++q)
+    {
+      for (size_t r = 0; r < referenceSet.n_cols; ++r)
+      {
+        const double eval = metric.Kernel().Evaluate(querySet.unsafe_col(q),
+            referenceSet.unsafe_col(r));
+        ++kernelEvaluations;
+
+        size_t insertPosition;
+        for (insertPosition = 0; insertPosition < indices.n_rows;
+            ++insertPosition)
+          if (eval > products(insertPosition, q))
+            break;
+
+        if (insertPosition < indices.n_rows)
+          InsertNeighbor(indices, products, q, insertPosition, r, eval);
+      }
+    }
+
+    Timer::Stop("computing_products");
+
+    Log::Info << "Kernel evaluations: " << kernelEvaluations << "." << std::endl;
+    return;
+  }
+
+  // Single-tree implementation.
+  if (single)
+  {
+    // Calculate number of pruned nodes.
+    size_t numPrunes = 0;
+
+    // Precalculate query products ( || q || for all q).
+    arma::vec queryProducts(querySet.n_cols);
+    for (size_t queryIndex = 0; queryIndex < querySet.n_cols; ++queryIndex)
+      queryProducts[queryIndex] = sqrt(metric.Kernel().Evaluate(
+          querySet.unsafe_col(queryIndex), querySet.unsafe_col(queryIndex)));
+    kernelEvaluations += querySet.n_cols;
+
+    // Screw the CoverTreeTraverser, we'll implement it by hand.
+    for (size_t queryIndex = 0; queryIndex < querySet.n_cols; ++queryIndex)
+    {
+      // Use an array of priority queues?
+      std::priority_queue<
+          SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > >,
+          std::vector<SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > > >,
+          SearchFrameCompare<tree::CoverTree<IPMetric<kernel::GaussianKernel> > > >
+          frameQueue;
+
+      // Add initial frame.
+      SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > > nextFrame;
+      nextFrame.node = referenceTree;
+      nextFrame.eval = metric.Kernel().Evaluate(querySet.unsafe_col(queryIndex),
+          referenceSet.unsafe_col(referenceTree->Point()));
+      Log::Assert(nextFrame.eval <= 1);
+      ++kernelEvaluations;
+
+      // The initial evaluation will be the best so far.
+      indices(0, queryIndex) = referenceTree->Point();
+      products(0, queryIndex) = nextFrame.eval;
+
+      frameQueue.push(nextFrame);
+
+      tree::CoverTree<IPMetric<kernel::GaussianKernel> >* referenceNode;
+      double eval;
+      double maxProduct;
+
+      while (!frameQueue.empty())
+      {
+        // Get the information for this node.
+        const SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > >&
+            frame = frameQueue.top();
+
+        referenceNode = frame.node;
+        eval = frame.eval;
+
+        // Loop through the children, seeing if we can prune them; if not, add
+        // them to the queue.  The self-child is different -- it has the same
+        // parent (and therefore the same kernel evaluation).
+        if (referenceNode->NumChildren() > 0)
+        {
+          SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > >
+              childFrame;
+
+          // We must handle the self-child differently, to avoid adding it to
+          // the results twice.
+          childFrame.node = &(referenceNode->Child(0));
+          childFrame.eval = eval;
+
+          // Alternate pruning rule.
+          const double mdd = childFrame.node->FurthestDescendantDistance();
+          if (eval >= (1 - std::pow(mdd, 2.0) / 2.0))
+            maxProduct = 1;
+          else
+            maxProduct = eval * (1 - std::pow(mdd, 2.0) / 2.0) + sqrt(1 -
+                std::pow(eval, 2.0)) * mdd * sqrt(1 - std::pow(mdd, 2.0) / 4.0);
+
+          // Add self-child if we can't prune it.
+          if (maxProduct > products(products.n_rows - 1, queryIndex))
+          {
+            // But only if it has children of its own.
+            if (childFrame.node->NumChildren() > 0)
+              frameQueue.push(childFrame);
+          }
+          else
+            ++numPrunes;
+
+          for (size_t i = 1; i < referenceNode->NumChildren(); ++i)
+          {
+            // Before we evaluate the child, let's see if it can possibly have
+            // a better evaluation.
+            const double mpdd = std::min(
+                referenceNode->Child(i).ParentDistance() +
+                referenceNode->Child(i).FurthestDescendantDistance(), 2.0);
+            double maxChildEval = 1;
+            if (eval < (1 - std::pow(mpdd, 2.0) / 2.0))
+              maxChildEval = eval * (1 - std::pow(mpdd, 2.0) / 2.0) + sqrt(1 -
+                  std::pow(eval, 2.0)) * mpdd * sqrt(1 - std::pow(mpdd, 2.0)
+                  / 4.0);
+
+            if (maxChildEval > products(products.n_rows - 1, queryIndex))
+            {
+              // Evaluate child.
+              childFrame.node = &(referenceNode->Child(i));
+              childFrame.eval = metric.Kernel().Evaluate(
+                  querySet.unsafe_col(queryIndex),
+                  referenceSet.unsafe_col(referenceNode->Child(i).Point()));
+              ++kernelEvaluations;
+
+              // Can we prune it?  If we can, we can avoid putting it in the
+              // queue (saves time).
+              const double cmdd = childFrame.node->FurthestDescendantDistance();
+              if (childFrame.eval > (1 - std::pow(cmdd, 2.0) / 2.0))
+                maxProduct = 1;
+              else
+                maxProduct = childFrame.eval * (1 - std::pow(cmdd, 2.0) / 2.0)
+                    + sqrt(1 - std::pow(eval, 2.0)) * cmdd * sqrt(1 -
+                    std::pow(cmdd, 2.0) / 4.0);
+
+              if (maxProduct > products(products.n_rows - 1, queryIndex))
+              {
+                // Good enough to recurse into.  While we're at it, check the
+                // actual evaluation and see if it's an improvement.
+                if (childFrame.eval > products(products.n_rows - 1, queryIndex))
+                {
+                  // This is a better result.  Find out where to insert.
+                  size_t insertPosition = 0;
+                  for ( ; insertPosition < products.n_rows - 1;
+                      ++insertPosition)
+                    if (childFrame.eval > products(insertPosition, queryIndex))
+                      break;
+
+                  // Insert into the correct position; we are guaranteed that
+                  // insertPosition is valid.
+                  InsertNeighbor(indices, products, queryIndex, insertPosition,
+                      childFrame.node->Point(), childFrame.eval);
+                }
+
+                // Now add this to the queue (if it has any children which may
+                // need to be recursed into).
+                if (childFrame.node->NumChildren() > 0)
+                  frameQueue.push(childFrame);
+              }
+              else
+                ++numPrunes;
+            }
+            else
+              ++numPrunes;
+          }
+        }
+
+        frameQueue.pop();
+      }
+    }
+
+    Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;
+    Log::Info << "Kernel evaluations: " << kernelEvaluations << "."
+        << std::endl;
+    Log::Info << "Distance evaluations: " << distanceEvaluations << "."
+        << std::endl;
+
+    Timer::Stop("computing_products");
+    return;
+  }
+
+  // Double-tree implementation.
+  Log::Fatal << "Dual-tree search not implemented yet... oops..." << std::endl;
+
+}
+
 }; // namespace maxip
 }; // namespace mlpack
 

Modified: mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp	2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp	2012-06-05 19:15:14 UTC (rev 12954)
@@ -13,6 +13,10 @@
 #include <mlpack/core/kernels/polynomial_kernel.hpp>
 #include <mlpack/core/kernels/cosine_distance.hpp>
 #include <mlpack/core/kernels/gaussian_kernel.hpp>
+#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
+#include <mlpack/core/kernels/triangular_kernel.hpp>
+#include <mlpack/core/kernels/epanechnikov_kernel.hpp>
+#include <mlpack/core/kernels/inverse_multi_quadric_kernel.hpp>
 
 #include "max_ip.hpp"
 
@@ -48,6 +52,51 @@
 PARAM_FLAG("single", "If true, single-tree search is used (as opposed to "
     "dual-tree search.", "s");
 
+PARAM_DOUBLE("base", "Base to use during cover tree construction.", "b", 2.0);
+
+PARAM_DOUBLE("degree", "Degree of polynomial kernel.", "d", 2.0);
+PARAM_DOUBLE("offset", "Offset of kernel (for polynomial and hyptan kernels).",
+    "o", 0.0);
+PARAM_DOUBLE("bandwidth", "Bandwidth (for Gaussian and Epanechnikov kernels).",
+    "w", 1.0);
+
+template<typename KernelType>
+void RunMaxIP(const arma::mat& referenceData,
+              const bool single,
+              const bool naive,
+              const double base,
+              const size_t k,
+              arma::Mat<size_t>& indices,
+              arma::mat& products,
+              KernelType& kernel)
+{
+  // Create MaxIP object.
+  MaxIP<KernelType> maxip(referenceData, kernel, (single && !naive), naive,
+      base);
+
+  // Now search with it.
+  maxip.Search(k, indices, products);
+}
+
+template<typename KernelType>
+void RunMaxIP(const arma::mat& referenceData,
+              const arma::mat& queryData,
+              const bool single,
+              const bool naive,
+              const double base,
+              const size_t k,
+              arma::Mat<size_t>& indices,
+              arma::mat& products,
+              KernelType& kernel)
+{
+  // Create MaxIP object.
+  MaxIP<KernelType> maxip(referenceData, queryData, kernel, (single && !naive),
+      naive, base);
+
+  // Now search with it.
+  maxip.Search(k, indices, products);
+}
+
 int main(int argc, char** argv)
 {
   distanceEvaluations = 0;
@@ -64,6 +113,12 @@
 
   const string kernelType = CLI::GetParam<string>("kernel");
 
+  const double base = CLI::GetParam<double>("base");
+
+  const double degree = CLI::GetParam<double>("degree");
+  const double offset = CLI::GetParam<double>("offset");
+  const double bandwidth = CLI::GetParam<double>("bandwidth");
+
   arma::mat referenceData;
   arma::mat queryData;
 
@@ -82,9 +137,10 @@
 
   // Check on kernel type.
   if ((kernelType != "linear") && (kernelType != "polynomial") &&
-      (kernelType != "cosine") && (kernelType != "polynomial5") &&
-      (kernelType != "polynomial10") && (kernelType != "polynomial0.5") &&
-      (kernelType != "polynomial0.1") && (kernelType != "gaussian"))
+      (kernelType != "cosine") && (kernelType != "gaussian") &&
+      (kernelType != "graph") && (kernelType != "approxGraph") &&
+      (kernelType != "triangular") && (kernelType != "hyptan") &&
+      (kernelType != "inv-mq") && (kernelType != "epanechnikov"))
   {
     Log::Fatal << "Invalid kernel type: '" << kernelType << "'; must be ";
     Log::Fatal << "'linear' or 'polynomial'." << endl;
@@ -120,101 +176,132 @@
   {
     if (kernelType == "linear")
     {
-      MaxIP<LinearKernel> maxip(referenceData, (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      LinearKernel lk;
+      RunMaxIP<LinearKernel>(referenceData, single, naive, base, k, indices,
+          products, lk);
     }
     else if (kernelType == "polynomial")
     {
-      MaxIP<PolynomialNoOffsetKernel<2> > maxip(referenceData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+
+      PolynomialKernel pk(degree, offset);
+      RunMaxIP<PolynomialKernel>(referenceData, single, naive, base, k, indices,
+          products, pk);
     }
     else if (kernelType == "cosine")
     {
-      MaxIP<CosineDistance> maxip(referenceData, (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      CosineDistance cd;
+      RunMaxIP<CosineDistance>(referenceData, single, naive, base, k, indices,
+          products, cd);
     }
-    else if (kernelType == "polynomial5")
+    else if (kernelType == "gaussian")
     {
-      MaxIP<PolynomialNoOffsetKernel<5> > maxip(referenceData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      GaussianKernel gk(bandwidth);
+      RunMaxIP<GaussianKernel>(referenceData, single, naive, base, k, indices,
+          products, gk);
     }
-    else if (kernelType == "polynomial10")
+    else if (kernelType == "epanechnikov")
     {
-      MaxIP<PolynomialNoOffsetKernel<10> > maxip(referenceData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      EpanechnikovKernel ek(bandwidth);
+      RunMaxIP<EpanechnikovKernel>(referenceData, single, naive, base, k,
+          indices, products, ek);
     }
-    else if (kernelType == "polynomial0.5")
+    else if (kernelType == "triangular")
     {
-      MaxIP<PolynomialFractionKernel<2> > maxip(referenceData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      TriangularKernel tk(bandwidth);
+      RunMaxIP<TriangularKernel>(referenceData, single, naive, base, k, indices,
+          products, tk);
     }
-    else if (kernelType == "polynomial0.1")
+/*    else if (kernelType == "inv-mq")
     {
-      MaxIP<PolynomialFractionKernel<10> > maxip(referenceData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      MaxIP<InverseMultiQuadricKernel> maxip(referenceData, (single && !naive),
+        naive, base);
+      Log::Info << "k = 1 search." << std::endl;
+      maxip.Search(1, indices, products);
+      Log::Info << "k = 2 search." << std::endl;
+      maxip.Search(2, indices, products);
+      Log::Info << "k = 5 search." << std::endl;
+      maxip.Search(5, indices, products);
+      Log::Info << "k = 10 search." << std::endl;
+      maxip.Search(10, indices, products);
     }
-    else if (kernelType == "gaussian")
+    else if (kernelType == "hyptan")
     {
-      MaxIP<GaussianStaticKernel> maxip(referenceData, (single && !naive),
-          naive);
-      maxip.Search(k, indices, products);
-    }
+      MaxIP<StaticHypTanKernel> maxip(referenceData, (single && !naive), naive,
+        base);
+      Log::Info << "k = 1 search." << std::endl;
+      maxip.Search(1, indices, products);
+      Log::Info << "k = 2 search." << std::endl;
+      maxip.Search(2, indices, products);
+      Log::Info << "k = 5 search." << std::endl;
+      maxip.Search(5, indices, products);
+      Log::Info << "k = 10 search." << std::endl;
+      maxip.Search(10, indices, products);
+    }*/
   }
   else
   {
     if (kernelType == "linear")
     {
-      MaxIP<LinearKernel> maxip(referenceData, queryData, (single && !naive),
-          naive);
-      maxip.Search(k, indices, products);
+      LinearKernel lk;
+      RunMaxIP<LinearKernel>(referenceData, queryData, single, naive, base, k,
+          indices, products, lk);
     }
     else if (kernelType == "polynomial")
     {
-      MaxIP<PolynomialNoOffsetKernel<2> > maxip(referenceData, queryData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      PolynomialKernel pk(degree, offset);
+      RunMaxIP<PolynomialKernel>(referenceData, queryData, single, naive, base,
+          k, indices, products, pk);
     }
     else if (kernelType == "cosine")
     {
-      MaxIP<CosineDistance> maxip(referenceData, queryData, (single && !naive),
-        naive);
-      maxip.Search(k, indices, products);
+      CosineDistance cd;
+      RunMaxIP<CosineDistance>(referenceData, queryData, single, naive, base, k,
+          indices, products, cd);
     }
-    else if (kernelType == "polynomial5")
+    else if (kernelType == "gaussian")
     {
-      MaxIP<PolynomialNoOffsetKernel<5> > maxip(referenceData, queryData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      GaussianKernel gk(bandwidth);
+      RunMaxIP<GaussianKernel>(referenceData, queryData, single, naive, base, k,
+          indices, products, gk);
     }
-    else if (kernelType == "polynomial10")
+    else if (kernelType == "epanechnikov")
     {
-      MaxIP<PolynomialNoOffsetKernel<10> > maxip(referenceData, queryData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      EpanechnikovKernel ek(bandwidth);
+      RunMaxIP<EpanechnikovKernel>(referenceData, queryData, single, naive,
+          base, k, indices, products, ek);
     }
-    else if (kernelType == "polynomial0.5")
+    else if (kernelType == "triangular")
     {
-      MaxIP<PolynomialFractionKernel<2> > maxip(referenceData, queryData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      TriangularKernel tk(bandwidth);
+      RunMaxIP<TriangularKernel>(referenceData, queryData, single, naive, base,
+          k, indices, products, tk);
     }
-    else if (kernelType == "polynomial0.1")
+/*    else if (kernelType == "inv-mq")
     {
-      MaxIP<PolynomialFractionKernel<10> > maxip(referenceData, queryData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
+      MaxIP<InverseMultiQuadricKernel> maxip(referenceData, queryData,
+          (single && !naive), naive, base);
+      Log::Info << "k = 1 search." << std::endl;
+      maxip.Search(1, indices, products);
+      Log::Info << "k = 2 search." << std::endl;
+      maxip.Search(2, indices, products);
+      Log::Info << "k = 5 search." << std::endl;
+      maxip.Search(5, indices, products);
+      Log::Info << "k = 10 search." << std::endl;
+      maxip.Search(10, indices, products);
     }
-    else if (kernelType == "gaussian")
+    else if (kernelType == "hyptan")
     {
-      MaxIP<GaussianStaticKernel> maxip(referenceData, queryData,
-          (single && !naive), naive);
-      maxip.Search(k, indices, products);
-    }
+      MaxIP<StaticHypTanKernel> maxip(referenceData, queryData,
+          (single && !naive), naive, base);
+      Log::Info << "k = 1 search." << std::endl;
+      maxip.Search(1, indices, products);
+      Log::Info << "k = 2 search." << std::endl;
+      maxip.Search(2, indices, products);
+      Log::Info << "k = 5 search." << std::endl;
+      maxip.Search(5, indices, products);
+      Log::Info << "k = 10 search." << std::endl;
+      maxip.Search(10, indices, products);
+    }*/
   }
 
   // Save output, if we were asked to.




More information about the mlpack-svn mailing list