[mlpack-svn] r12954 - mlpack/trunk/src/mlpack/methods/maxip
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Tue Jun 5 15:15:14 EDT 2012
Author: rcurtin
Date: 2012-06-05 15:15:14 -0400 (Tue, 05 Jun 2012)
New Revision: 12954
Modified:
mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt
mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp
mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp
mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp
mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp
mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp
Log:
Check in mess of MaxIP code which is being cleaned little by little...
Modified: mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt 2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/CMakeLists.txt 2012-06-05 19:15:14 UTC (rev 12954)
@@ -27,4 +27,11 @@
mlpack
)
+#add_executable(maxip_string
+# max_ip_string_main.cpp
+#)
+#target_link_libraries(maxip_string
+# mlpack
+#)
+
install(TARGETS maxip RUNTIME DESTINATION bin)
Modified: mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp 2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/ip_metric.hpp 2012-06-05 19:15:14 UTC (rev 12954)
@@ -15,18 +15,24 @@
class IPMetric
{
public:
- typedef KernelType Kernel;
+ IPMetric() : kernel(localKernel) { }
/**
* Create the IPMetric.
*/
- IPMetric() { }
+ IPMetric(KernelType& kernel) : kernel(kernel) { }
/**
* Evaluate the metric.
*/
template<typename Vec1Type, typename Vec2Type>
- static double Evaluate(const Vec1Type& a, const Vec2Type& b);
+ double Evaluate(const Vec1Type& a, const Vec2Type& b);
+
+ const KernelType& Kernel() const { return kernel; }
+ KernelType& Kernel() { return kernel; }
+
+ KernelType localKernel;
+ KernelType& kernel;
};
}; // namespace maxip
Modified: mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp 2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/ip_metric_impl.hpp 2012-06-05 19:15:14 UTC (rev 12954)
@@ -18,13 +18,14 @@
template<typename KernelType>
template<typename Vec1Type, typename Vec2Type>
-inline double IPMetric<KernelType>::Evaluate(const Vec1Type& a, const Vec2Type& b)
+inline double IPMetric<KernelType>::Evaluate(const Vec1Type& a,
+ const Vec2Type& b)
{
// This is the metric induced by the kernel function.
// Maybe we can do better by caching some of this?
++distanceEvaluations;
- return KernelType::Evaluate(a, a) + KernelType::Evaluate(b, b) -
- 2 * KernelType::Evaluate(a, b);
+ return sqrt(kernel.Evaluate(a, a) + kernel.Evaluate(b, b) -
+ 2 * kernel.Evaluate(a, b));
}
template<>
Modified: mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp 2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/max_ip.hpp 2012-06-05 19:15:14 UTC (rev 12954)
@@ -19,13 +19,17 @@
{
public:
MaxIP(const arma::mat& referenceSet,
+ KernelType& kernel,
bool single = false,
- bool naive = false);
+ bool naive = false,
+ double expansionConstant = 2.0);
MaxIP(const arma::mat& referenceSet,
const arma::mat& querySet,
+ KernelType& kernel,
bool single = false,
- bool naive = false);
+ bool naive = false,
+ double expansionConstant = 2.0);
~MaxIP();
@@ -46,6 +50,8 @@
bool naive;
+ IPMetric<KernelType> metric;
+
// Utility function. Copied too many times from too many places.
void InsertNeighbor(arma::Mat<size_t>& indices,
arma::mat& products,
Modified: mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp 2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/max_ip_impl.hpp 2012-06-05 19:15:14 UTC (rev 12954)
@@ -12,10 +12,171 @@
#include "max_ip_rules.hpp"
+#include <mlpack/core/kernels/gaussian_kernel.hpp>
+
namespace mlpack {
namespace maxip {
template<typename TreeType>
+void RecurseTreeCountLeaves(const TreeType& node, arma::vec& counts)
+{
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ {
+ if (node.Child(i).NumChildren() == 0)
+ counts[node.Child(i).Point()]++;
+ else
+ RecurseTreeCountLeaves<TreeType>(node.Child(i), counts);
+ }
+}
+
+template<typename TreeType>
+void CheckSelfChild(const TreeType& node)
+{
+ if (node.NumChildren() == 0)
+ return; // No self-child applicable here.
+
+ bool found = false;
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ {
+ if (node.Child(i).Point() == node.Point())
+ found = true;
+
+ // Recursively check the children.
+ CheckSelfChild(node.Child(i));
+ }
+
+ // Ensure this has its own self-child.
+ Log::Assert(found == true);
+}
+
+template<typename TreeType, typename MetricType>
+void CheckCovering(const TreeType& node)
+{
+ // Return if a leaf. No checking necessary.
+ if (node.NumChildren() == 0)
+ return;
+
+ const arma::mat& dataset = node.Dataset();
+ const size_t nodePoint = node.Point();
+
+ // To ensure that this node satisfies the covering principle, we must ensure
+ // that the distance to each child is less than pow(expansionConstant, scale).
+ double maxDistance = pow(node.ExpansionConstant(), node.Scale());
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ {
+ const size_t childPoint = node.Child(i).Point();
+
+ double distance = MetricType::Evaluate(dataset.col(nodePoint),
+ dataset.col(childPoint));
+
+ Log::Assert(distance <= maxDistance);
+
+ // Check the child.
+ CheckCovering<TreeType, MetricType>(node.Child(i));
+ }
+}
+
+template<typename TreeType, typename MetricType>
+void CheckIndividualSeparation(const TreeType& constantNode,
+ const TreeType& node)
+{
+ // Don't check points at a lower scale.
+ if (node.Scale() < constantNode.Scale())
+ return;
+
+ // If at a higher scale, recurse.
+ if (node.Scale() > constantNode.Scale())
+ {
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ {
+ // Don't recurse into leaves.
+ if (node.Child(i).NumChildren() > 0)
+ CheckIndividualSeparation<TreeType, MetricType>(constantNode,
+ node.Child(i));
+ }
+
+ return;
+ }
+
+ // Don't compare the same point against itself.
+ if (node.Point() == constantNode.Point())
+ return;
+
+ // Now we know we are at the same scale, so make the comparison.
+ const arma::mat& dataset = constantNode.Dataset();
+ const size_t constantPoint = constantNode.Point();
+ const size_t nodePoint = node.Point();
+
+ // Make sure the distance is at least the following value (in accordance with
+ // the separation principle of cover trees).
+ double minDistance = pow(constantNode.ExpansionConstant(),
+ constantNode.Scale());
+
+ double distance = MetricType::Evaluate(dataset.col(constantPoint),
+ dataset.col(nodePoint));
+
+}
+
+template<typename TreeType, typename MetricType>
+void CheckSeparation(const TreeType& node, const TreeType& root)
+{
+ // Check the separation between this point and all other points on this scale.
+ CheckIndividualSeparation<TreeType, MetricType>(node, root);
+
+ // Check the children, but only if they are not leaves. Leaves don't need to
+ // be checked.
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ if (node.Child(i).NumChildren() > 0)
+ CheckSeparation<TreeType, MetricType>(node.Child(i), root);
+}
+
+template<typename TreeType, typename MetricType>
+void GetMaxDistance(TreeType& node, TreeType& constantNode, double& best, size_t& index)
+{
+ const arma::mat& dataset = node.Dataset();
+ const double eval = MetricType::Evaluate(dataset.unsafe_col(node.Point()),
+ dataset.unsafe_col(constantNode.Point()));
+ if (eval > best)
+ {
+ best = eval;
+ index = node.Point();
+ }
+
+ // Recurse into children.
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ GetMaxDistance<TreeType, MetricType>(node.Child(i), constantNode, best, index);
+}
+
+template<typename TreeType, typename MetricType>
+void CheckMaxDistances(TreeType& node)
+{
+ // Check child distances.
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ {
+ const arma::mat& dataset = node.Dataset();
+ double eval = MetricType::Evaluate(dataset.unsafe_col(node.Point()),
+ dataset.unsafe_col(node.Child(i).Point()));
+
+// Log::Debug << "Point " << node.Point() << ", child point " << node.Child(i).Point() << "\n";
+// Log::Debug << "Actual eval " << eval << ", supposed parent distance " << node.Child(i).ParentDistance() << "\n";
+// Log::Debug << "Diff " << eval - node.Child(i).ParentDistance() << "\n";
+ Log::Assert(std::abs(eval - node.Child(i).ParentDistance()) < 1e-10);
+ }
+
+ // Check all descendants.
+ double maxDescendantDistance = 0;
+ size_t maxIndex = 0;
+ GetMaxDistance<TreeType, MetricType>(node, node, maxDescendantDistance, maxIndex);
+
+// Log::Debug << "Point " << node.Point() << ", max desc. index " << maxIndex << "\n";
+// Log::Debug << "Calculated maxDescendantDistance " << maxDescendantDistance << " given " << node.FurthestDescendantDistance() << std::endl;
+ Log::Assert(std::abs(maxDescendantDistance - node.FurthestDescendantDistance()) < 1e-10);
+
+ for (size_t i = 0; i < node.NumChildren(); ++i)
+ CheckMaxDistances<TreeType, MetricType>(node.Child(i));
+}
+
+template<typename TreeType>
struct SearchFrame
{
TreeType* node;
@@ -42,21 +203,26 @@
template<typename KernelType>
MaxIP<KernelType>::MaxIP(const arma::mat& referenceSet,
+ KernelType& kernel,
bool single,
- bool naive) :
+ bool naive,
+ double expansionConstant) :
referenceSet(referenceSet),
querySet(referenceSet), // Gotta point it somewhere...
queryTree(NULL),
single(single),
- naive(naive)
+ naive(naive),
+ metric(kernel)
{
+
Timer::Start("tree_building");
// Build the tree. Could we do this in the initialization list?
if (naive)
referenceTree = NULL;
else
- referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet);
+ referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet,
+ expansionConstant, &metric);
Timer::Stop("tree_building");
}
@@ -64,12 +230,15 @@
template<typename KernelType>
MaxIP<KernelType>::MaxIP(const arma::mat& referenceSet,
const arma::mat& querySet,
+ KernelType& kernel,
bool single,
- bool naive) :
+ bool naive,
+ double expansionConstant) :
referenceSet(referenceSet),
querySet(querySet),
single(single),
- naive(naive)
+ naive(naive),
+ metric(kernel)
{
Timer::Start("tree_building");
@@ -77,13 +246,42 @@
if (naive)
referenceTree = NULL;
else
- referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet);
+ referenceTree = new tree::CoverTree<IPMetric<KernelType> >(referenceSet,
+ expansionConstant, &metric);
if (single || naive)
queryTree = NULL;
else
- queryTree = new tree::CoverTree<IPMetric<KernelType> >(querySet);
+ queryTree = new tree::CoverTree<IPMetric<KernelType> >(querySet,
+ expansionConstant, &metric);
+/* if (referenceTree != NULL)
+ {
+ Log::Debug << "Check counts" << std::endl;
+ // Now loop through the tree and ensure that each leaf is only created once.
+ arma::vec counts;
+ counts.zeros(referenceSet.n_elem);
+ RecurseTreeCountLeaves(*referenceTree, counts);
+
+ // Each point should only have one leaf node representing it.
+ for (size_t i = 0; i < 20; ++i)
+ Log::Assert(counts[i] == 1);
+
+ Log::Debug << "Check self child\n";
+ // Each non-leaf should have a self-child.
+ CheckSelfChild<tree::CoverTree<IPMetric<KernelType> > >(*referenceTree);
+
+ Log::Debug << "Check covering\n";
+ // Each node must satisfy the covering principle (its children must be less
+ // than or equal to a certain distance apart).
+ CheckCovering<tree::CoverTree<IPMetric<KernelType> >, IPMetric<KernelType> >(*referenceTree);
+
+ Log::Debug << "Check max distances\n";
+ // Check maximum distance of children and grandchildren.
+ CheckMaxDistances<tree::CoverTree<IPMetric<KernelType> >, IPMetric<KernelType> >(*referenceTree);
+ Log::Debug << "Done\n";
+ }*/
+
Timer::Stop("tree_building");
}
@@ -104,7 +302,7 @@
// No remapping will be necessary.
indices.set_size(k, querySet.n_cols);
products.set_size(k, querySet.n_cols);
- products.fill(DBL_MIN);
+ products.fill(-1.0);
Timer::Start("computing_products");
@@ -118,8 +316,8 @@
{
for (size_t r = 0; r < referenceSet.n_cols; ++r)
{
- const double eval = KernelType::Evaluate(querySet.unsafe_col(q),
- referenceSet.unsafe_col(r));
+ const double eval = metric.Kernel().Evaluate(querySet.unsafe_col(q),
+ referenceSet.unsafe_col(r));
++kernelEvaluations;
size_t insertPosition;
@@ -148,7 +346,7 @@
// Precalculate query products ( || q || for all q).
arma::vec queryProducts(querySet.n_cols);
for (size_t queryIndex = 0; queryIndex < querySet.n_cols; ++queryIndex)
- queryProducts[queryIndex] = sqrt(KernelType::Evaluate(
+ queryProducts[queryIndex] = sqrt(metric.Kernel().Evaluate(
querySet.unsafe_col(queryIndex), querySet.unsafe_col(queryIndex)));
kernelEvaluations += querySet.n_cols;
@@ -165,7 +363,7 @@
// Add initial frame.
SearchFrame<tree::CoverTree<IPMetric<KernelType> > > nextFrame;
nextFrame.node = referenceTree;
- nextFrame.eval = KernelType::Evaluate(querySet.unsafe_col(queryIndex),
+ nextFrame.eval = metric.Kernel().Evaluate(querySet.unsafe_col(queryIndex),
referenceSet.unsafe_col(referenceTree->Point()));
++kernelEvaluations;
@@ -200,8 +398,11 @@
childFrame.node = &(referenceNode->Child(0));
childFrame.eval = eval;
- maxProduct = eval + std::pow(childFrame.node->ExpansionConstant(),
- childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+// maxProduct = eval + std::pow(childFrame.node->ExpansionConstant(),
+// childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+ // Alternate pruning rule.
+ maxProduct = eval + childFrame.node->FurthestDescendantDistance() *
+ queryProducts[queryIndex];
// Add self-child if we can't prune it.
if (maxProduct > products(products.n_rows - 1, queryIndex))
@@ -215,18 +416,33 @@
for (size_t i = 1; i < referenceNode->NumChildren(); ++i)
{
+ // Before we evaluate the child, let's see if it can possibly have
+ // a better evaluation.
+ double maxChildEval = eval + queryProducts[queryIndex] *
+ (referenceNode->Child(i).ParentDistance() +
+ referenceNode->Child(i).FurthestDescendantDistance());
+
+ if (maxChildEval <= products(products.n_rows - 1, queryIndex))
+ {
+ ++numPrunes;
+ continue; // Skip this child; it can't be any better.
+ }
+
// Evaluate child.
childFrame.node = &(referenceNode->Child(i));
- childFrame.eval = KernelType::Evaluate(
+ childFrame.eval = metric.Kernel().Evaluate(
querySet.unsafe_col(queryIndex),
referenceSet.unsafe_col(referenceNode->Child(i).Point()));
++kernelEvaluations;
// Can we prune it? If we can, we can avoid putting it in the queue
// (saves time).
- double maxProduct = childFrame.eval +
- std::pow(childFrame.node->ExpansionConstant(),
- childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+// double maxProduct = childFrame.eval +
+// std::pow(childFrame.node->ExpansionConstant(),
+// childFrame.node->Scale() + 1) * queryProducts[queryIndex];
+ // Alternate pruning rule.
+ maxProduct = childFrame.eval + queryProducts[queryIndex] *
+ childFrame.node->FurthestDescendantDistance();
if (maxProduct > products(products.n_rows - 1, queryIndex))
{
@@ -309,6 +525,217 @@
indices(pos, queryIndex) = neighbor;
}
+// Specialized implementation for tighter bounds for Gaussian.
+template<>
+void MaxIP<kernel::GaussianKernel>::Search(const size_t k,
+ arma::Mat<size_t>& indices,
+ arma::mat& products)
+{
+ Log::Warn << "Alternate implementation!" << std::endl;
+
+ // Terrible copypasta. Bad bad bad.
+ // No remapping will be necessary.
+ indices.set_size(k, querySet.n_cols);
+ products.set_size(k, querySet.n_cols);
+ products.fill(-1.0);
+
+ Timer::Start("computing_products");
+
+ size_t kernelEvaluations = 0;
+
+ // Naive implementation.
+ if (naive)
+ {
+ // Simple double loop. Stupid, slow, but a good benchmark.
+ for (size_t q = 0; q < querySet.n_cols; ++q)
+ {
+ for (size_t r = 0; r < referenceSet.n_cols; ++r)
+ {
+ const double eval = metric.Kernel().Evaluate(querySet.unsafe_col(q),
+ referenceSet.unsafe_col(r));
+ ++kernelEvaluations;
+
+ size_t insertPosition;
+ for (insertPosition = 0; insertPosition < indices.n_rows;
+ ++insertPosition)
+ if (eval > products(insertPosition, q))
+ break;
+
+ if (insertPosition < indices.n_rows)
+ InsertNeighbor(indices, products, q, insertPosition, r, eval);
+ }
+ }
+
+ Timer::Stop("computing_products");
+
+ Log::Info << "Kernel evaluations: " << kernelEvaluations << "." << std::endl;
+ return;
+ }
+
+ // Single-tree implementation.
+ if (single)
+ {
+ // Calculate number of pruned nodes.
+ size_t numPrunes = 0;
+
+ // Precalculate query products ( || q || for all q).
+ arma::vec queryProducts(querySet.n_cols);
+ for (size_t queryIndex = 0; queryIndex < querySet.n_cols; ++queryIndex)
+ queryProducts[queryIndex] = sqrt(metric.Kernel().Evaluate(
+ querySet.unsafe_col(queryIndex), querySet.unsafe_col(queryIndex)));
+ kernelEvaluations += querySet.n_cols;
+
+ // Screw the CoverTreeTraverser, we'll implement it by hand.
+ for (size_t queryIndex = 0; queryIndex < querySet.n_cols; ++queryIndex)
+ {
+ // Use an array of priority queues?
+ std::priority_queue<
+ SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > >,
+ std::vector<SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > > >,
+ SearchFrameCompare<tree::CoverTree<IPMetric<kernel::GaussianKernel> > > >
+ frameQueue;
+
+ // Add initial frame.
+ SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > > nextFrame;
+ nextFrame.node = referenceTree;
+ nextFrame.eval = metric.Kernel().Evaluate(querySet.unsafe_col(queryIndex),
+ referenceSet.unsafe_col(referenceTree->Point()));
+ Log::Assert(nextFrame.eval <= 1);
+ ++kernelEvaluations;
+
+ // The initial evaluation will be the best so far.
+ indices(0, queryIndex) = referenceTree->Point();
+ products(0, queryIndex) = nextFrame.eval;
+
+ frameQueue.push(nextFrame);
+
+ tree::CoverTree<IPMetric<kernel::GaussianKernel> >* referenceNode;
+ double eval;
+ double maxProduct;
+
+ while (!frameQueue.empty())
+ {
+ // Get the information for this node.
+ const SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > >&
+ frame = frameQueue.top();
+
+ referenceNode = frame.node;
+ eval = frame.eval;
+
+ // Loop through the children, seeing if we can prune them; if not, add
+ // them to the queue. The self-child is different -- it has the same
+ // parent (and therefore the same kernel evaluation).
+ if (referenceNode->NumChildren() > 0)
+ {
+ SearchFrame<tree::CoverTree<IPMetric<kernel::GaussianKernel> > >
+ childFrame;
+
+ // We must handle the self-child differently, to avoid adding it to
+ // the results twice.
+ childFrame.node = &(referenceNode->Child(0));
+ childFrame.eval = eval;
+
+ // Alternate pruning rule.
+ const double mdd = childFrame.node->FurthestDescendantDistance();
+ if (eval >= (1 - std::pow(mdd, 2.0) / 2.0))
+ maxProduct = 1;
+ else
+ maxProduct = eval * (1 - std::pow(mdd, 2.0) / 2.0) + sqrt(1 -
+ std::pow(eval, 2.0)) * mdd * sqrt(1 - std::pow(mdd, 2.0) / 4.0);
+
+ // Add self-child if we can't prune it.
+ if (maxProduct > products(products.n_rows - 1, queryIndex))
+ {
+ // But only if it has children of its own.
+ if (childFrame.node->NumChildren() > 0)
+ frameQueue.push(childFrame);
+ }
+ else
+ ++numPrunes;
+
+ for (size_t i = 1; i < referenceNode->NumChildren(); ++i)
+ {
+ // Before we evaluate the child, let's see if it can possibly have
+ // a better evaluation.
+ const double mpdd = std::min(
+ referenceNode->Child(i).ParentDistance() +
+ referenceNode->Child(i).FurthestDescendantDistance(), 2.0);
+ double maxChildEval = 1;
+ if (eval < (1 - std::pow(mpdd, 2.0) / 2.0))
+ maxChildEval = eval * (1 - std::pow(mpdd, 2.0) / 2.0) + sqrt(1 -
+ std::pow(eval, 2.0)) * mpdd * sqrt(1 - std::pow(mpdd, 2.0)
+ / 4.0);
+
+ if (maxChildEval > products(products.n_rows - 1, queryIndex))
+ {
+ // Evaluate child.
+ childFrame.node = &(referenceNode->Child(i));
+ childFrame.eval = metric.Kernel().Evaluate(
+ querySet.unsafe_col(queryIndex),
+ referenceSet.unsafe_col(referenceNode->Child(i).Point()));
+ ++kernelEvaluations;
+
+ // Can we prune it? If we can, we can avoid putting it in the
+ // queue (saves time).
+ const double cmdd = childFrame.node->FurthestDescendantDistance();
+ if (childFrame.eval > (1 - std::pow(cmdd, 2.0) / 2.0))
+ maxProduct = 1;
+ else
+ maxProduct = childFrame.eval * (1 - std::pow(cmdd, 2.0) / 2.0)
+ + sqrt(1 - std::pow(eval, 2.0)) * cmdd * sqrt(1 -
+ std::pow(cmdd, 2.0) / 4.0);
+
+ if (maxProduct > products(products.n_rows - 1, queryIndex))
+ {
+ // Good enough to recurse into. While we're at it, check the
+ // actual evaluation and see if it's an improvement.
+ if (childFrame.eval > products(products.n_rows - 1, queryIndex))
+ {
+ // This is a better result. Find out where to insert.
+ size_t insertPosition = 0;
+ for ( ; insertPosition < products.n_rows - 1;
+ ++insertPosition)
+ if (childFrame.eval > products(insertPosition, queryIndex))
+ break;
+
+ // Insert into the correct position; we are guaranteed that
+ // insertPosition is valid.
+ InsertNeighbor(indices, products, queryIndex, insertPosition,
+ childFrame.node->Point(), childFrame.eval);
+ }
+
+ // Now add this to the queue (if it has any children which may
+ // need to be recursed into).
+ if (childFrame.node->NumChildren() > 0)
+ frameQueue.push(childFrame);
+ }
+ else
+ ++numPrunes;
+ }
+ else
+ ++numPrunes;
+ }
+ }
+
+ frameQueue.pop();
+ }
+ }
+
+ Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;
+ Log::Info << "Kernel evaluations: " << kernelEvaluations << "."
+ << std::endl;
+ Log::Info << "Distance evaluations: " << distanceEvaluations << "."
+ << std::endl;
+
+ Timer::Stop("computing_products");
+ return;
+ }
+
+ // Double-tree implementation.
+ Log::Fatal << "Dual-tree search not implemented yet... oops..." << std::endl;
+
+}
+
}; // namespace maxip
}; // namespace mlpack
Modified: mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp 2012-06-05 19:14:26 UTC (rev 12953)
+++ mlpack/trunk/src/mlpack/methods/maxip/max_ip_main.cpp 2012-06-05 19:15:14 UTC (rev 12954)
@@ -13,6 +13,10 @@
#include <mlpack/core/kernels/polynomial_kernel.hpp>
#include <mlpack/core/kernels/cosine_distance.hpp>
#include <mlpack/core/kernels/gaussian_kernel.hpp>
+#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
+#include <mlpack/core/kernels/triangular_kernel.hpp>
+#include <mlpack/core/kernels/epanechnikov_kernel.hpp>
+#include <mlpack/core/kernels/inverse_multi_quadric_kernel.hpp>
#include "max_ip.hpp"
@@ -48,6 +52,51 @@
PARAM_FLAG("single", "If true, single-tree search is used (as opposed to "
"dual-tree search.", "s");
+PARAM_DOUBLE("base", "Base to use during cover tree construction.", "b", 2.0);
+
+PARAM_DOUBLE("degree", "Degree of polynomial kernel.", "d", 2.0);
+PARAM_DOUBLE("offset", "Offset of kernel (for polynomial and hyptan kernels).",
+ "o", 0.0);
+PARAM_DOUBLE("bandwidth", "Bandwidth (for Gaussian and Epanechnikov kernels).",
+ "w", 1.0);
+
+template<typename KernelType>
+void RunMaxIP(const arma::mat& referenceData,
+ const bool single,
+ const bool naive,
+ const double base,
+ const size_t k,
+ arma::Mat<size_t>& indices,
+ arma::mat& products,
+ KernelType& kernel)
+{
+ // Create MaxIP object.
+ MaxIP<KernelType> maxip(referenceData, kernel, (single && !naive), naive,
+ base);
+
+ // Now search with it.
+ maxip.Search(k, indices, products);
+}
+
+template<typename KernelType>
+void RunMaxIP(const arma::mat& referenceData,
+ const arma::mat& queryData,
+ const bool single,
+ const bool naive,
+ const double base,
+ const size_t k,
+ arma::Mat<size_t>& indices,
+ arma::mat& products,
+ KernelType& kernel)
+{
+ // Create MaxIP object.
+ MaxIP<KernelType> maxip(referenceData, queryData, kernel, (single && !naive),
+ naive, base);
+
+ // Now search with it.
+ maxip.Search(k, indices, products);
+}
+
int main(int argc, char** argv)
{
distanceEvaluations = 0;
@@ -64,6 +113,12 @@
const string kernelType = CLI::GetParam<string>("kernel");
+ const double base = CLI::GetParam<double>("base");
+
+ const double degree = CLI::GetParam<double>("degree");
+ const double offset = CLI::GetParam<double>("offset");
+ const double bandwidth = CLI::GetParam<double>("bandwidth");
+
arma::mat referenceData;
arma::mat queryData;
@@ -82,9 +137,10 @@
// Check on kernel type.
if ((kernelType != "linear") && (kernelType != "polynomial") &&
- (kernelType != "cosine") && (kernelType != "polynomial5") &&
- (kernelType != "polynomial10") && (kernelType != "polynomial0.5") &&
- (kernelType != "polynomial0.1") && (kernelType != "gaussian"))
+ (kernelType != "cosine") && (kernelType != "gaussian") &&
+ (kernelType != "graph") && (kernelType != "approxGraph") &&
+ (kernelType != "triangular") && (kernelType != "hyptan") &&
+ (kernelType != "inv-mq") && (kernelType != "epanechnikov"))
{
Log::Fatal << "Invalid kernel type: '" << kernelType << "'; must be ";
Log::Fatal << "'linear' or 'polynomial'." << endl;
@@ -120,101 +176,132 @@
{
if (kernelType == "linear")
{
- MaxIP<LinearKernel> maxip(referenceData, (single && !naive), naive);
- maxip.Search(k, indices, products);
+ LinearKernel lk;
+ RunMaxIP<LinearKernel>(referenceData, single, naive, base, k, indices,
+ products, lk);
}
else if (kernelType == "polynomial")
{
- MaxIP<PolynomialNoOffsetKernel<2> > maxip(referenceData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+
+ PolynomialKernel pk(degree, offset);
+ RunMaxIP<PolynomialKernel>(referenceData, single, naive, base, k, indices,
+ products, pk);
}
else if (kernelType == "cosine")
{
- MaxIP<CosineDistance> maxip(referenceData, (single && !naive), naive);
- maxip.Search(k, indices, products);
+ CosineDistance cd;
+ RunMaxIP<CosineDistance>(referenceData, single, naive, base, k, indices,
+ products, cd);
}
- else if (kernelType == "polynomial5")
+ else if (kernelType == "gaussian")
{
- MaxIP<PolynomialNoOffsetKernel<5> > maxip(referenceData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ GaussianKernel gk(bandwidth);
+ RunMaxIP<GaussianKernel>(referenceData, single, naive, base, k, indices,
+ products, gk);
}
- else if (kernelType == "polynomial10")
+ else if (kernelType == "epanechnikov")
{
- MaxIP<PolynomialNoOffsetKernel<10> > maxip(referenceData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ EpanechnikovKernel ek(bandwidth);
+ RunMaxIP<EpanechnikovKernel>(referenceData, single, naive, base, k,
+ indices, products, ek);
}
- else if (kernelType == "polynomial0.5")
+ else if (kernelType == "triangular")
{
- MaxIP<PolynomialFractionKernel<2> > maxip(referenceData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ TriangularKernel tk(bandwidth);
+ RunMaxIP<TriangularKernel>(referenceData, single, naive, base, k, indices,
+ products, tk);
}
- else if (kernelType == "polynomial0.1")
+/* else if (kernelType == "inv-mq")
{
- MaxIP<PolynomialFractionKernel<10> > maxip(referenceData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ MaxIP<InverseMultiQuadricKernel> maxip(referenceData, (single && !naive),
+ naive, base);
+ Log::Info << "k = 1 search." << std::endl;
+ maxip.Search(1, indices, products);
+ Log::Info << "k = 2 search." << std::endl;
+ maxip.Search(2, indices, products);
+ Log::Info << "k = 5 search." << std::endl;
+ maxip.Search(5, indices, products);
+ Log::Info << "k = 10 search." << std::endl;
+ maxip.Search(10, indices, products);
}
- else if (kernelType == "gaussian")
+ else if (kernelType == "hyptan")
{
- MaxIP<GaussianStaticKernel> maxip(referenceData, (single && !naive),
- naive);
- maxip.Search(k, indices, products);
- }
+ MaxIP<StaticHypTanKernel> maxip(referenceData, (single && !naive), naive,
+ base);
+ Log::Info << "k = 1 search." << std::endl;
+ maxip.Search(1, indices, products);
+ Log::Info << "k = 2 search." << std::endl;
+ maxip.Search(2, indices, products);
+ Log::Info << "k = 5 search." << std::endl;
+ maxip.Search(5, indices, products);
+ Log::Info << "k = 10 search." << std::endl;
+ maxip.Search(10, indices, products);
+ }*/
}
else
{
if (kernelType == "linear")
{
- MaxIP<LinearKernel> maxip(referenceData, queryData, (single && !naive),
- naive);
- maxip.Search(k, indices, products);
+ LinearKernel lk;
+ RunMaxIP<LinearKernel>(referenceData, queryData, single, naive, base, k,
+ indices, products, lk);
}
else if (kernelType == "polynomial")
{
- MaxIP<PolynomialNoOffsetKernel<2> > maxip(referenceData, queryData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ PolynomialKernel pk(degree, offset);
+ RunMaxIP<PolynomialKernel>(referenceData, queryData, single, naive, base,
+ k, indices, products, pk);
}
else if (kernelType == "cosine")
{
- MaxIP<CosineDistance> maxip(referenceData, queryData, (single && !naive),
- naive);
- maxip.Search(k, indices, products);
+ CosineDistance cd;
+ RunMaxIP<CosineDistance>(referenceData, queryData, single, naive, base, k,
+ indices, products, cd);
}
- else if (kernelType == "polynomial5")
+ else if (kernelType == "gaussian")
{
- MaxIP<PolynomialNoOffsetKernel<5> > maxip(referenceData, queryData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ GaussianKernel gk(bandwidth);
+ RunMaxIP<GaussianKernel>(referenceData, queryData, single, naive, base, k,
+ indices, products, gk);
}
- else if (kernelType == "polynomial10")
+ else if (kernelType == "epanechnikov")
{
- MaxIP<PolynomialNoOffsetKernel<10> > maxip(referenceData, queryData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ EpanechnikovKernel ek(bandwidth);
+ RunMaxIP<EpanechnikovKernel>(referenceData, queryData, single, naive,
+ base, k, indices, products, ek);
}
- else if (kernelType == "polynomial0.5")
+ else if (kernelType == "triangular")
{
- MaxIP<PolynomialFractionKernel<2> > maxip(referenceData, queryData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ TriangularKernel tk(bandwidth);
+ RunMaxIP<TriangularKernel>(referenceData, queryData, single, naive, base,
+ k, indices, products, tk);
}
- else if (kernelType == "polynomial0.1")
+/* else if (kernelType == "inv-mq")
{
- MaxIP<PolynomialFractionKernel<10> > maxip(referenceData, queryData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
+ MaxIP<InverseMultiQuadricKernel> maxip(referenceData, queryData,
+ (single && !naive), naive, base);
+ Log::Info << "k = 1 search." << std::endl;
+ maxip.Search(1, indices, products);
+ Log::Info << "k = 2 search." << std::endl;
+ maxip.Search(2, indices, products);
+ Log::Info << "k = 5 search." << std::endl;
+ maxip.Search(5, indices, products);
+ Log::Info << "k = 10 search." << std::endl;
+ maxip.Search(10, indices, products);
}
- else if (kernelType == "gaussian")
+ else if (kernelType == "hyptan")
{
- MaxIP<GaussianStaticKernel> maxip(referenceData, queryData,
- (single && !naive), naive);
- maxip.Search(k, indices, products);
- }
+ MaxIP<StaticHypTanKernel> maxip(referenceData, queryData,
+ (single && !naive), naive, base);
+ Log::Info << "k = 1 search." << std::endl;
+ maxip.Search(1, indices, products);
+ Log::Info << "k = 2 search." << std::endl;
+ maxip.Search(2, indices, products);
+ Log::Info << "k = 5 search." << std::endl;
+ maxip.Search(5, indices, products);
+ Log::Info << "k = 10 search." << std::endl;
+ maxip.Search(10, indices, products);
+ }*/
}
// Save output, if we were asked to.
More information about the mlpack-svn
mailing list