[mlpack-git] master: Add debug output; don't adjust second bound. This provides another minor speedup, but this still is nowhere near as fast as it should be with a properly working Hamerly prune. (772dc74)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:30 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 772dc745c4ed525789cc0aa0a39ffbd2db642862
Author: Ryan Curtin <ryan at ratml.org>
Date: Wed Jan 21 17:08:48 2015 -0500
Add debug output; don't adjust second bound. This provides another minor speedup, but this still is nowhere near as fast as it should be with a properly working Hamerly prune.
>---------------------------------------------------------------
772dc745c4ed525789cc0aa0a39ffbd2db642862
.../methods/kmeans/dual_tree_kmeans_impl.hpp | 10 ++--
.../methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 53 ++++++++++++++++++++--
2 files changed, 57 insertions(+), 6 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 747e69f..ab293af 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -223,6 +223,13 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
distances(i) = distance;
}
+ // Re-set second closest bound if necessary.
+ if (node->Stat().ClustersPruned() == size_t(-1))
+ {
+// Log::Warn << "Update second closest bound!\n";
+ node->Stat().SecondClosestBound() = node->Parent()->Stat().SecondClosestBound();
+ }
+
if (secondClosestDist < node->Stat().SecondClosestBound() - 1e-15)
{
Log::Warn << "Owner " << node->Stat().Owner() << ", mqnd " <<
@@ -272,9 +279,6 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
// We have to set the closest query node to NULL because the cluster tree will
// be rebuilt.
node->Stat().ClosestQueryNode() = NULL;
- node->Stat().SecondClosestBound() -= clusterDistances[clusters];
- if (node->Stat().SecondClosestBound() < 0)
- node->Stat().SecondClosestBound() = 0;
// if (node->Begin() == 37591)
// Log::Warn << "scb for r37591c" << node->Count() << " updated to " <<
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 6e8cdb2..f51c380 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -113,12 +113,24 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
traversalInfo.LastReferenceNode() = &referenceNode;
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "Visit r37591c" << referenceNode.Count() << ", q" <<
+//queryNode.Begin() << "c" << queryNode.Count() << ".\n";
+
// If there's no closest query node assigned, but the parent has one, take
// that one.
if (referenceNode.Stat().ClosestQueryNode() == NULL &&
referenceNode.Parent() != NULL &&
referenceNode.Parent()->Stat().ClosestQueryNode() != NULL)
{
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "Update closest query node for r37591c" <<
+//referenceNode.Count() << " to parent's, which is "
+// << ((TreeType*)
+//referenceNode.Parent()->Stat().ClosestQueryNode())->Begin() << "c" <<
+//((TreeType*) referenceNode.Parent()->Stat().ClosestQueryNode())->Count() <<
+//".\n";
+
referenceNode.Stat().ClosestQueryNode() =
referenceNode.Parent()->Stat().ClosestQueryNode();
referenceNode.Stat().MaxQueryNodeDistance() = std::min(
@@ -127,16 +139,25 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
referenceNode.Stat().SecondClosestBound() = std::min(
referenceNode.Parent()->Stat().SecondClosestBound(),
referenceNode.Stat().SecondClosestBound());
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "Update second closest bound for r37591c" <<
+//referenceNode.Count() << " to parent's, which "
+// << "is " << referenceNode.Stat().SecondClosestBound() << ".\n";
}
double score = HamerlyTypeScore(referenceNode);
if (score == DBL_MAX)
{
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "Hamerly prune for r37591c" << referenceNode.Count() << ", q" << queryNode.Begin() << "c" <<
+//queryNode.Count() << ".\n";
if (origPruned == size_t(-1))
{
const size_t cluster = referenceNode.Stat().Owner();
newCentroids.col(cluster) += referenceNode.Stat().Centroid() *
referenceNode.NumDescendants();
+// Log::Warn << "Hamerly prune: r" << referenceNode.Begin() << "c" <<
+// referenceNode.Count() << ".\n";
counts(cluster) += referenceNode.NumDescendants();
referenceNode.Stat().ClustersPruned() += queryNode.NumDescendants();
}
@@ -158,19 +179,28 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
if (minDistance < referenceNode.Stat().MinQueryNodeDistance())
{
const double maxDistance = referenceNode.MaxDistance(&queryNode);
- // Only take the previous minimum query node distance in some
- // circumstances.
if (!IsDescendantOf(*((TreeType*)
referenceNode.Stat().ClosestQueryNode()), queryNode) &&
referenceNode.Stat().MinQueryNodeDistance() != DBL_MAX &&
referenceNode.Stat().MinQueryNodeDistance() <
- referenceNode.Stat().SecondClosestBound())
+ referenceNode.Stat().SecondClosestBound())
+ {
referenceNode.Stat().SecondClosestBound() =
referenceNode.Stat().MinQueryNodeDistance();
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "scb for r37591c" << referenceNode.Count() << " taken "
+// << "from minDistance, which is " <<
+//referenceNode.Stat().MinQueryNodeDistance() << ".\n";
+ }
++distanceCalculations;
referenceNode.Stat().ClosestQueryNode() = (void*) &queryNode;
referenceNode.Stat().MinQueryNodeDistance() = minDistance;
referenceNode.Stat().MaxQueryNodeDistance() = maxDistance;
+
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "mQND for r37591c" << referenceNode.Count() << " updated to " << minDistance << " and "
+// << "MQND to " << maxDistance << " with furthest query node " <<
+// queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
else if (IsDescendantOf(*((TreeType*)
referenceNode.Stat().ClosestQueryNode()), queryNode))
@@ -180,10 +210,18 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
referenceNode.Stat().ClosestQueryNode() = (void*) &queryNode;
referenceNode.Stat().MinQueryNodeDistance() = minDistance;
referenceNode.Stat().MaxQueryNodeDistance() = maxDistance;
+
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "mQND for r37591c" << referenceNode.Count() << " updated to " << minDistance << " and "
+// << "MQND to " << maxDistance << " via descendant with fqn " <<
+// queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
else if (minDistance < referenceNode.Stat().SecondClosestBound())
{
referenceNode.Stat().SecondClosestBound() = minDistance;
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "scb for r37591c" << referenceNode.Count() << " updated to " << minDistance << " via "
+// << queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
}
}
@@ -191,6 +229,11 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
if (score == DBL_MAX)
{
referenceNode.Stat().ClustersPruned() += queryNode.NumDescendants();
+// if (referenceNode.Begin() == 37591)
+// Log::Warn << "For r37591c" << referenceNode.Count() << ", q" <<
+//queryNode.Begin() << "c" << queryNode.Count() << " is pruned. Min distance is"
+// << " " << queryNode.MinDistance(&referenceNode) << " and scb is " <<
+//referenceNode.Stat().SecondClosestBound() << ".\n";
// Have we pruned everything?
if (referenceNode.Stat().ClustersPruned() +
@@ -244,7 +287,11 @@ double DualTreeKMeansRules<MetricType, TreeType>::HamerlyTypeScore(
TreeType& referenceNode)
{
if (referenceNode.Stat().HamerlyPruned())
+ {
+ Log::Warn << "Hamerly prune! r" << referenceNode.Begin() << "c" <<
+referenceNode.Count() << ".\n";
return DBL_MAX;
+ }
return 0.0;
}
More information about the mlpack-git
mailing list