[mlpack-git] master: Um, I made it faster. We're well into the land of confusion. Confusion will reign until eventual refactoring. Hamerly prunes still don't appear to be working properly after a couple iterations. (79379ee)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:51 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 79379ee09ebe22f29047f248fabd4e863646b49e
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Jan 27 17:18:30 2015 -0500
Um, I made it faster. We're well into the land of confusion. Confusion will reign until eventual refactoring. Hamerly prunes still don't appear to be working properly after a couple iterations.
>---------------------------------------------------------------
79379ee09ebe22f29047f248fabd4e863646b49e
src/mlpack/methods/kmeans/dual_tree_kmeans.hpp | 3 +-
.../methods/kmeans/dual_tree_kmeans_impl.hpp | 247 ++++++++++++++++-----
.../methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 127 +++++------
3 files changed, 261 insertions(+), 116 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index b7e3c61..9948b10 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -66,7 +66,8 @@ class DualTreeKMeans
const arma::vec& clusterDistances,
const arma::Col<size_t>& assignments,
const arma::mat& oldCentroids,
- const arma::mat& dataset);
+ const arma::mat& dataset,
+ const std::vector<size_t>& oldFromNew);
};
template<typename MetricType, typename MatType>
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 633151c..16680a8 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -127,7 +127,7 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
// Update the tree with the centroid movement information.
TreeUpdate(tree, centroids.n_cols, clusterDistances, assignments,
- oldCentroids, dataset);
+ oldCentroids, dataset, oldFromNewCentroids);
delete centroidTree;
@@ -177,7 +177,8 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
const arma::vec& clusterDistances,
const arma::Col<size_t>& assignments,
const arma::mat& centroids,
- const arma::mat& dataset)
+ const arma::mat& dataset,
+ const std::vector<size_t>& oldFromNew)
{
// This is basically IterationUpdate(), but pulled out to be separate from the
// actual dual-tree algorithm.
@@ -186,9 +187,15 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
node->Stat().Owner() = node->Parent()->Stat().Owner();
const size_t cluster = assignments[node->Descendant(0)];
+ if (node->Begin() == 16954)
+ Log::Info << "r16954c" << node->Count() << ", descendant 0 has cluster "
+ << cluster << ".\n";
bool allSame = true;
for (size_t i = 1; i < node->NumDescendants(); ++i)
{
+ if (node->Begin() == 16954)
+ Log::Info << "Descendant " << i << " has cluster " <<
+ assignments[node->Descendant(i)] << ".\n";
if (assignments[node->Descendant(i)] != cluster)
{
allSame = false;
@@ -198,7 +205,10 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
if (allSame)
node->Stat().Owner() = cluster;
+ else
+ node->Stat().Owner() = clusters; // No owner.
+ const bool prunedLastIteration = node->Stat().HamerlyPruned();
node->Stat().HamerlyPruned() = false;
// The easy case: this node had an owner.
@@ -221,29 +231,159 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
if (node->Stat().SecondClosestBound() == DBL_MAX && node->Parent() == NULL)
node->Stat().SecondClosestBound() = 0.0; // Don't prune the root.
- if (node->Begin() == 34654)
- Log::Warn << "r34654c" << node->Count() << " scb " <<
+ if (node->Begin() == 16954)
+ Log::Warn << "r16954c" << node->Count() << " scb " <<
node->Stat().SecondClosestBound() << " and lscb " <<
node->Stat().LastSecondClosestBound() << ".\n";
- if (node->Parent()->Stat().SecondClosestBound() != DBL_MAX &&
-node->Stat().LastSecondClosestBound() != DBL_MAX)
- node->Stat().SecondClosestBound() =
-std::max(node->Parent()->Stat().SecondClosestBound(),
-node->Stat().LastSecondClosestBound());
- else
+ // If both the second closest bound and last second closest bound are valid,
+ // we have the option of taking the better of the two bounds. But if only
+ // one is valid, take the minimum of the two (which will be the valid one).
+ // If neither is valid, then we end up with a second closest bound of
+ // DBL_MAX.
+ const double scb = node->Stat().SecondClosestBound();
+ const double lscb = node->Stat().LastSecondClosestBound();
+ if (scb != DBL_MAX && lscb != DBL_MAX)
+ node->Stat().SecondClosestBound() = std::max(scb, lscb);
+ else
+ node->Stat().SecondClosestBound() = std::min(scb, lscb);
+
+ // But if we were Hamerly pruned last time, we can't trust the second
+ // closest bound and thus have to take last iteration's.
+ if (prunedLastIteration)
+ node->Stat().SecondClosestBound() = lscb;
+ else
+ {
+ // Now, we must ensure that we don't need to take the parent's second
+ // closest bound. We surely do if the current bound is DBL_MAX. We
+ // already took care of the root node earlier so we don't need to check if
+ // Parent() is NULL.
+ if (node->Stat().SecondClosestBound() == DBL_MAX)
+ node->Stat().SecondClosestBound() =
+ node->Parent()->Stat().SecondClosestBound();
+
+ // There may exist a case where the true second closest query node got
+ // pruned by the parent, and was thus never visited with this node. This
+ // situation occurs if the second closest query node is not a descendant
+ // of the second closest query node of the parent.
+ if (node->Stat().SecondClosestQueryNode() != NULL)
+ {
+ if (node->Begin() == 16954)
+ {
+ Log::Warn << "Second closest query node is q" << ((TreeType*)
+node->Stat().SecondClosestQueryNode())->Begin() << "c" << ((TreeType*)
+node->Stat().SecondClosestQueryNode())->Count() << ", with scb " <<
+node->Stat().SecondClosestBound() << ".\n";
+ Log::Warn << "True SCB to this node should be " <<
+node->MinDistance((TreeType*) node->Stat().SecondClosestQueryNode()) << ".\n";
+ }
+ }
+
+ if (node->Stat().ClosestQueryNode() != NULL)
+ if (node->Begin() == 16954)
+ Log::Warn << "Closest query node: q" << ((TreeType*)
+node->Stat().ClosestQueryNode())->Begin() << "c" << ((TreeType*)
+node->Stat().ClosestQueryNode())->Count() << ", with MQND " <<
+node->Stat().MaxQueryNodeDistance() << " and mQND " <<
+node->Stat().MinQueryNodeDistance() << ".\n";
+
+ // If the closest query node contains more than one descendant, we have to
+ // find the closest...
+ TreeType* cqn = (TreeType*) node->Stat().ClosestQueryNode();
+ if (cqn != NULL && cqn->NumDescendants() > 1)
+ {
+ size_t closest = centroids.n_cols;
+ double closestDistance = DBL_MAX;
+ size_t secondClosest = centroids.n_cols;
+ double secondClosestDistance = DBL_MAX;
+ for (size_t i = 0; i < cqn->NumDescendants(); ++i)
+ {
+ const size_t index = cqn->Descendant(i);
+ const double distance =
+ node->MinDistance(centroids.col(oldFromNew[index]));
+// Log::Info << "Index " << index << ", distance " << distance << " (i "
+// << i + cqn->Begin() << ").\n";
+ ++distanceCalculations;
+ if (distance < closestDistance)
+ {
+ secondClosest = closest;
+ secondClosestDistance = closestDistance;
+ closest = index;
+ closestDistance = distance;
+ }
+ else if (distance < secondClosestDistance)
+ {
+ secondClosest = index;
+ secondClosestDistance = distance;
+ }
+ }
+
+ // Recalculate maximum distance.
+ const double maxDistance = node->MaxDistance(centroids.col(closest));
+ ++distanceCalculations;
+
+ node->Stat().MinQueryNodeDistance() = closestDistance;
+ node->Stat().MaxQueryNodeDistance() = maxDistance;
+ if (secondClosestDistance < node->Stat().SecondClosestBound())
+ node->Stat().SecondClosestBound() = secondClosestDistance;
+
+ if (node->Begin() == 16954)
+ Log::Warn << "After recalculation, closest for r" << node->Begin() << "c" << node->Count()
+<< " is " << closest << ", with mQND " << node->Stat().MinQueryNodeDistance() <<
+", MQND" << node->Stat().MaxQueryNodeDistance() << ", and scb " <<
+node->Stat().SecondClosestBound() << ", " << secondClosest << ".\n";
+ }
+
+ if (node->Parent() != NULL &&
+node->Parent()->Stat().SecondClosestQueryNode() != NULL)
+ if (node->Begin() == 16954)
+ Log::Warn << "Parent's (r" << node->Parent()->Begin() << "c"
+<< node->Parent()->Count() << ") second closest query node is q" << ((TreeType*)
+node->Parent()->Stat().SecondClosestQueryNode())->Begin() << "c" << ((TreeType*)
+node->Parent()->Stat().SecondClosestQueryNode())->Count() << ", with scb " <<
+node->Parent()->Stat().SecondClosestBound() << ".\n";
+
+ if (node->Parent() != NULL &&
+ node->Stat().SecondClosestQueryNode() != NULL &&
+ node->Parent()->Stat().SecondClosestQueryNode() != NULL && !IsDescendantOf((*(TreeType*)
+node->Parent()->Stat().SecondClosestQueryNode()), (*(TreeType*)
+node->Stat().SecondClosestQueryNode())))
+ {
+ // Does this even work?
+ const double minDistance = node->MinDistance((TreeType*)
+ node->Parent()->Stat().SecondClosestQueryNode());
+ if (node->Begin() == 16954)
+ Log::Warn << "r16954c" << node->Count() << " has min distance " <<
+minDistance << " to SCQN of parent (q" << ((TreeType*)
+node->Parent()->Stat().SecondClosestQueryNode())->Begin() << "c" << ((TreeType*)
+node->Parent()->Stat().SecondClosestQueryNode())->Count() << ")\n";
+
+ if (minDistance < node->Stat().SecondClosestBound())
+ {
node->Stat().SecondClosestBound() =
-std::min(node->Parent()->Stat().SecondClosestBound(),
-node->Stat().LastSecondClosestBound());
-// if (node->Begin() == 35871)
-// Log::Warn << "Update second closest bound for r35871c" <<
+ node->Parent()->Stat().SecondClosestBound();
+ node->Stat().SecondClosestQueryNode() =
+ node->Parent()->Stat().SecondClosestQueryNode();
+ if (node->Begin() == 16954)
+ Log::Warn << "r16954c" << node->Count() << " has recalculated scb "
+ << node->Stat().SecondClosestBound() << ".\n";
+ }
+ }
+ }
+
+ if (node->Begin() == 16954)
+ Log::Warn << "r16954c" << node->Count() << " now scb " <<
+node->Stat().SecondClosestBound() << " and lscb " <<
+node->Stat().LastSecondClosestBound() << ".\n";
+// if (node->Begin() == 16954)
+// Log::Warn << "Update second closest bound for r16954c" <<
//node->Count() << " to " << node->Stat().SecondClosestBound() << ", which could "
// << "have been parent's (" << node->Parent()->Stat().SecondClosestBound()
//<< ") or adjusted last iteration's (" << node->Stat().LastSecondClosestBound()
//<< ").\n";
-// if (node->Begin() == 35871)
-// Log::Warn << "r35871c" << node->Count() << " has second bound " <<
+// if (node->Begin() == 16954)
+// Log::Warn << "r16954c" << node->Count() << " has second bound " <<
//node->Stat().SecondClosestBound() << " (q" << ((TreeType*)
//node->Stat().SecondClosestQueryNode())->Begin() << "c" << ((TreeType*)
//node->Stat().SecondClosestQueryNode())->Count() << ") and parent has second "
@@ -259,62 +399,62 @@ node->Stat().SecondClosestQueryNode()), *((TreeType*)
node->Parent()->Stat().SecondClosestQueryNode())) &&
node->Parent()->Stat().SecondClosestBound() < node->Stat().SecondClosestBound())
{
-// if (node->Begin() == 35871)
-// Log::Warn << "Take second closest bound for r35871c" <<
+// if (node->Begin() == 16954)
+// Log::Warn << "Take second closest bound for r16954c" <<
//node->Count() << " from parent: " << node->Parent()->Stat().SecondClosestBound()
//<< " (was " << node->Stat().SecondClosestBound() << ").\n";
node->Stat().SecondClosestBound() =
node->Parent()->Stat().SecondClosestBound();
}*/
- if (node->Begin() == 34654)
+ if (node->Begin() == 16954)
{
- Log::Warn << "Attempt Hamerly prune on r34654c" << node->Count() <<
+ Log::Warn << "Attempt Hamerly prune on r16954c" << node->Count() <<
" with MQND " << node->Stat().MaxQueryNodeDistance() << ", scb "
<< node->Stat().SecondClosestBound() << ", owner " <<
node->Stat().Owner() << ", and clusterDistances " << clusterDistances[clusters]
<< ".\n";
}
- if (node->Stat().MaxQueryNodeDistance() < node->Stat().SecondClosestBound()
- - clusterDistances[clusters])
+ // Check the second bound. (This is time-consuming...)
+ for (size_t j = 0; j < node->NumDescendants(); ++j)
{
- node->Stat().HamerlyPruned() = true;
-// if (node->Begin() == 35871)
- Log::Warn << "Mark r" << node->Begin() << "c" << node->Count() << " as "
- << "Hamerly pruned.\n";
-
- // Check the second bound. (This is time-consuming...)
- for (size_t j = 0; j < node->NumDescendants(); ++j)
+ arma::vec distances(centroids.n_cols);
+ double secondClosestDist = DBL_MAX;
+ for (size_t i = 0; i < centroids.n_cols; ++i)
{
- arma::vec distances(centroids.n_cols);
- double secondClosestDist = DBL_MAX;
- for (size_t i = 0; i < centroids.n_cols; ++i)
- {
- const double distance = MetricType::Evaluate(centroids.col(i),
- dataset.col(node->Descendant(j)));
- if (distance < secondClosestDist && i != node->Stat().Owner())
- secondClosestDist = distance;
+ const double distance = MetricType::Evaluate(centroids.col(i),
+ dataset.col(node->Descendant(j)));
+ if (distance < secondClosestDist && i != node->Stat().Owner())
+ secondClosestDist = distance;
- distances(i) = distance;
- }
+ distances(i) = distance;
+ }
- if (secondClosestDist < node->Stat().SecondClosestBound() - 1e-15)
- {
- Log::Warn << "Owner " << node->Stat().Owner() << ", mqnd " <<
+ if (node->Begin() == 16954)
+ Log::Warn << "r16954c" << node->Count() << ": " << distances.t();
+ if (secondClosestDist < node->Stat().SecondClosestBound() - 1e-15)
+ {
+ Log::Warn << "r" << node->Begin() << "c" << node->Count() << ":\n";
+ Log::Warn << "Owner " << node->Stat().Owner() << ", mqnd " <<
node->Stat().MaxQueryNodeDistance() << ", mnqnd " <<
node->Stat().MinQueryNodeDistance() << ".\n";
- Log::Warn << distances.t();
- Log::Fatal << "Second closest bound " <<
+ Log::Warn << distances.t();
+ Log::Fatal << "Second closest bound " <<
node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
- << "! (" << node->Stat().SecondClosestBound() - secondClosestDist
+ << "! (" << node->Stat().SecondClosestBound() - secondClosestDist
<< ")\n";
-
- }
-// if (node->Begin() == 35871)
-// Log::Warn << "r35871c" << node->Count() << ": " << distances.t();
}
}
+
+ if (node->Stat().MaxQueryNodeDistance() < node->Stat().SecondClosestBound()
+ - clusterDistances[clusters])
+ {
+ node->Stat().HamerlyPruned() = true;
+ if (node->Begin() == 16954)
+ Log::Warn << "Mark r" << node->Begin() << "c" << node->Count() << " as "
+ << "Hamerly pruned.\n";
+ }
// else
// {
// Log::Warn << "Failed Hamerly prune for r" << node->Begin() << "c" <<
@@ -342,6 +482,7 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
// Since the node didn't have an owner, it can't be Hamerly pruned.
node->Stat().HamerlyPruned() = false;
+ node->Stat().Owner() = centroids.n_cols;
}
node->Stat().Iteration() = iteration;
@@ -350,14 +491,14 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
// be rebuilt.
node->Stat().ClosestQueryNode() = NULL;
-// if (node->Begin() == 35871)
-// Log::Warn << "scb for r35871c" << node->Count() << " updated to " <<
+// if (node->Begin() == 16954)
+// Log::Warn << "scb for r16954c" << node->Count() << " updated to " <<
//node->Stat().SecondClosestBound() << ".\n";
- if (!node->Stat().HamerlyPruned())
+// if (!node->Stat().HamerlyPruned())
for (size_t i = 0; i < node->NumChildren(); ++i)
TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
- centroids, dataset);
+ centroids, dataset, oldFromNew);
node->Stat().LastSecondClosestBound() = node->Stat().SecondClosestBound() -
clusterDistances[clusters];
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 0a84376..d8e9069 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -113,9 +113,9 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
traversalInfo.LastReferenceNode() = &referenceNode;
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Visit r37591c" << referenceNode.Count() << ", q" <<
-//queryNode.Begin() << "c" << queryNode.Count() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "Visit r16954c" << referenceNode.Count() << ", q" <<
+queryNode.Begin() << "c" << queryNode.Count() << ".\n";
// If there's no closest query node assigned, but the parent has one, take
// that one.
@@ -123,13 +123,13 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
referenceNode.Parent() != NULL &&
referenceNode.Parent()->Stat().ClosestQueryNode() != NULL)
{
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Update closest query node for r37591c" <<
-//referenceNode.Count() << " to parent's, which is "
-// << ((TreeType*)
-//referenceNode.Parent()->Stat().ClosestQueryNode())->Begin() << "c" <<
-//((TreeType*) referenceNode.Parent()->Stat().ClosestQueryNode())->Count() <<
-//".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "Update closest query node for r16954c" <<
+referenceNode.Count() << " to parent's, which is "
+ << ((TreeType*)
+referenceNode.Parent()->Stat().ClosestQueryNode())->Begin() << "c" <<
+((TreeType*) referenceNode.Parent()->Stat().ClosestQueryNode())->Count() <<
+".\n";
referenceNode.Stat().ClosestQueryNode() =
referenceNode.Parent()->Stat().ClosestQueryNode();
@@ -139,8 +139,8 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
// referenceNode.Stat().SecondClosestBound() = std::min(
// referenceNode.Parent()->Stat().SecondClosestBound(),
// referenceNode.Stat().SecondClosestBound());
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Update second closest bound for r37591c" <<
+// if (referenceNode.Begin() == 16954)
+// Log::Warn << "Update second closest bound for r16954c" <<
//referenceNode.Count() << " to parent's, which "
// << "is " << referenceNode.Stat().SecondClosestBound() << ".\n";
}
@@ -148,9 +148,9 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
double score = HamerlyTypeScore(referenceNode);
if (score == DBL_MAX)
{
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Hamerly prune for r37591c" << referenceNode.Count() << ", q" << queryNode.Begin() << "c" <<
-//queryNode.Count() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "Hamerly prune for r16954c" << referenceNode.Count() << ", q" << queryNode.Begin() << "c" <<
+queryNode.Count() << ".\n";
if (origPruned == size_t(-1))
{
const size_t cluster = referenceNode.Stat().Owner();
@@ -175,9 +175,11 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
const double minDistance = referenceNode.MinDistance(&queryNode);
++distanceCalculations;
score = PellegMooreScore(queryNode, referenceNode, minDistance);
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "mQND for r37591c" << referenceNode.Count() << " is "
-// << referenceNode.Stat().MinQueryNodeDistance() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "mQND for r16954c" << referenceNode.Count() << " is "
+ << referenceNode.Stat().MinQueryNodeDistance() << "; minDistance "
+ << minDistance << ", scb " <<
+referenceNode.Stat().SecondClosestBound() << ".\n";
if (minDistance < referenceNode.Stat().MinQueryNodeDistance())
{
@@ -192,10 +194,10 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
referenceNode.Stat().MinQueryNodeDistance();
referenceNode.Stat().SecondClosestQueryNode() =
referenceNode.Stat().ClosestQueryNode();
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "scb for r37591c" << referenceNode.Count() << " taken "
-// << "from minDistance, which is " <<
-//referenceNode.Stat().MinQueryNodeDistance() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "scb for r16954c" << referenceNode.Count() << " taken "
+ << "from minDistance, which is " <<
+referenceNode.Stat().MinQueryNodeDistance() << ".\n";
}
if (referenceNode.Stat().MinQueryNodeDistance() == DBL_MAX &&
@@ -204,10 +206,10 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
{
referenceNode.Stat().SecondClosestBound() = minDistance;
referenceNode.Stat().SecondClosestQueryNode() = &queryNode;
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "scb for r37591c" << referenceNode.Count() << " taken "
-// << "from minDistance for pruned query node, which is " <<
-//minDistance << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "scb for r16954c" << referenceNode.Count() << " taken "
+ << "from minDistance for pruned query node, which is " <<
+minDistance << ".\n";
}
if (score != DBL_MAX)
@@ -217,60 +219,60 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
referenceNode.Stat().MinQueryNodeDistance() = minDistance;
referenceNode.Stat().MaxQueryNodeDistance() = maxDistance;
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "mQND for r37591c" << referenceNode.Count() << " updated to " << minDistance << " and "
-// << "MQND to " << maxDistance << " with furthest query node " <<
-// queryNode.Begin() << "c" << queryNode.Count() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "mQND for r16954c" << referenceNode.Count() << " updated to " << minDistance << " and "
+ << "MQND to " << maxDistance << " with furthest query node " <<
+ queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
}
else if (IsDescendantOf(*((TreeType*)
referenceNode.Stat().ClosestQueryNode()), queryNode))
{
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Old closest for r37591c" << referenceNode.Count() <<
-// " is q" << ((TreeType*)
-//referenceNode.Stat().ClosestQueryNode())->Begin() << "c" << ((TreeType*)
-//referenceNode.Stat().ClosestQueryNode())->Count() << " with mQND " <<
-//referenceNode.Stat().MinQueryNodeDistance() << " and MQND " <<
-//referenceNode.Stat().MaxQueryNodeDistance() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "Old closest for r16954c" << referenceNode.Count() <<
+ " is q" << ((TreeType*)
+referenceNode.Stat().ClosestQueryNode())->Begin() << "c" << ((TreeType*)
+referenceNode.Stat().ClosestQueryNode())->Count() << " with mQND " <<
+referenceNode.Stat().MinQueryNodeDistance() << " and MQND " <<
+referenceNode.Stat().MaxQueryNodeDistance() << ".\n";
const double maxDistance = referenceNode.MaxDistance(&queryNode);
++distanceCalculations;
referenceNode.Stat().ClosestQueryNode() = (void*) &queryNode;
referenceNode.Stat().MinQueryNodeDistance() = minDistance;
referenceNode.Stat().MaxQueryNodeDistance() = maxDistance;
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "mQND for r37591c" << referenceNode.Count() << " updated to " << minDistance << " and "
-// << "MQND to " << maxDistance << " via descendant with fqn " <<
-// queryNode.Begin() << "c" << queryNode.Count() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "mQND for r16954c" << referenceNode.Count() << " updated to " << minDistance << " and "
+ << "MQND to " << maxDistance << " via descendant with fqn " <<
+ queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
else if (minDistance < referenceNode.Stat().SecondClosestBound())
{
referenceNode.Stat().SecondClosestBound() = minDistance;
referenceNode.Stat().SecondClosestQueryNode() = &queryNode;
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "scb for r37591c" << referenceNode.Count() << " updated to " << minDistance << " via "
-// << queryNode.Begin() << "c" << queryNode.Count() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "scb for r16954c" << referenceNode.Count() << " updated to " << minDistance << " via "
+ << queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
}
}
- if (((TreeType*) referenceNode.Stat().ClosestQueryNode())->NumDescendants() > 1)
- {
- referenceNode.Stat().SecondClosestBound() =
- referenceNode.Stat().MinQueryNodeDistance();
- referenceNode.Stat().SecondClosestQueryNode() =
- referenceNode.Stat().ClosestQueryNode();
- }
+// if (((TreeType*) referenceNode.Stat().ClosestQueryNode())->NumDescendants() > 1)
+// {
+// referenceNode.Stat().SecondClosestBound() =
+// referenceNode.Stat().MinQueryNodeDistance();
+// referenceNode.Stat().SecondClosestQueryNode() =
+// referenceNode.Stat().ClosestQueryNode();
+// }
if (score == DBL_MAX)
{
referenceNode.Stat().ClustersPruned() += queryNode.NumDescendants();
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "For r37591c" << referenceNode.Count() << ", q" <<
-//queryNode.Begin() << "c" << queryNode.Count() << " is pruned. Min distance is"
-// << " " << queryNode.MinDistance(&referenceNode) << " and scb is " <<
-//referenceNode.Stat().SecondClosestBound() << ".\n";
+ if (referenceNode.Begin() == 16954)
+ Log::Warn << "For r16954c" << referenceNode.Count() << ", q" <<
+queryNode.Begin() << "c" << queryNode.Count() << " is pruned. Min distance is"
+ << " " << queryNode.MinDistance(&referenceNode) << " and scb is " <<
+referenceNode.Stat().SecondClosestBound() << ".\n";
// Have we pruned everything?
if (referenceNode.Stat().ClustersPruned() +
@@ -325,7 +327,8 @@ double DualTreeKMeansRules<MetricType, TreeType>::HamerlyTypeScore(
{
if (referenceNode.Stat().HamerlyPruned())
{
- Log::Warn << "Hamerly prune! r" << referenceNode.Begin() << "c" <<
+ if (referenceNode.Begin() == 16954)
+ Log::Info << "Hamerly prune! r" << referenceNode.Begin() << "c" <<
referenceNode.Count() << ".\n";
return DBL_MAX;
}
@@ -372,8 +375,8 @@ double DualTreeKMeansRules<MetricType, TreeType>::ElkanTypeScore(
queryNode)) &&
(&queryNode != (TreeType*) referenceNode.Stat().ClosestQueryNode()))
{
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Elkan prune r37591c" << referenceNode.Count() << ", q" <<
+// if (referenceNode.Begin() == 16954)
+// Log::Warn << "Elkan prune r16954c" << referenceNode.Count() << ", q" <<
//queryNode.Begin() << "c" << queryNode.Count() << "!\n";
// Then we can conclude d_max(best(N_r), N_r) <= d_min(N_q, N_r) which
// means that N_q cannot possibly hold any clusters that own any points in
@@ -393,14 +396,14 @@ double DualTreeKMeansRules<MetricType, TreeType>::PellegMooreScore(
// If the minimum distance to the node is greater than the bound, then every
// cluster in the query node cannot possibly be the nearest neighbor of any of
// the points in the reference node.
-// if (referenceNode.Begin() == 37591)
-// Log::Warn << "Pelleg-Moore prune attempt r37591c" << referenceNode.Count() << ", "
+// if (referenceNode.Begin() == 16954)
+// Log::Warn << "Pelleg-Moore prune attempt r16954c" << referenceNode.Count() << ", "
// << "q" << queryNode.Begin() << "c" << queryNode.Count() << "; "
// << "minDistance " << minDistance << ", MQND " <<
//referenceNode.Stat().MaxQueryNodeDistance() << ".\n";
if (minDistance > referenceNode.Stat().MaxQueryNodeDistance())
{
-// if (referenceNode.Begin() == 37591)
+// if (referenceNode.Begin() == 16954)
// Log::Warn << "Attempt successful!\n";
return DBL_MAX;
}
More information about the mlpack-git
mailing list