[mlpack-git] master: Remove debug output; implement Pelleg-Moore prune. Now this is faster than the regular dtkm branch. Sometimes starting over is a good thing. (50d4a32)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:53 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 50d4a325c7fc451700f7ac5535b86fe447e3a447
Author: Ryan Curtin <ryan at ratml.org>
Date: Wed Jan 28 16:21:58 2015 -0500
Remove debug output; implement Pelleg-Moore prune. Now this is faster than the regular dtkm branch. Sometimes starting over is a good thing.
>---------------------------------------------------------------
50d4a325c7fc451700f7ac5535b86fe447e3a447
.../methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 40 ++++++++--------------
1 file changed, 14 insertions(+), 26 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 4615b80..ced3ffa 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -50,9 +50,6 @@ inline force_inline double DualTreeKMeansRules<MetricType, TreeType>::BaseCase(
const size_t queryIndex,
const size_t referenceIndex)
{
- if (referenceIndex == 26038)
- Log::Warn << "Visit 26038 with query " << queryIndex << ".\n";
-
// Collect the number of clusters that have been pruned during the traversal.
// The ternary operator may not be necessary.
const size_t traversalPruned = (traversalInfo.LastReferenceNode() != NULL) ?
@@ -75,26 +72,17 @@ inline force_inline double DualTreeKMeansRules<MetricType, TreeType>::BaseCase(
distanceIteration[referenceIndex] = iteration;
distances[referenceIndex] = distance;
assignments[referenceIndex] = mappings[queryIndex];
- if (referenceIndex == 26038)
- Log::Warn << "assignment for point " << referenceIndex << " set to " <<
-mappings[queryIndex] << ".\n";
}
else if (distance < distances[referenceIndex])
{
distances[referenceIndex] = distance;
assignments[referenceIndex] = mappings[queryIndex];
- if (referenceIndex == 26038)
- Log::Warn << "assignment for point " << referenceIndex << " set to " <<
-mappings[queryIndex] << ".\n";
}
++visited[referenceIndex];
if (visited[referenceIndex] + traversalPruned == centroids.n_cols)
{
- if (referenceIndex == 26038)
- Log::Warn << "assignment for point " << referenceIndex << " committed to " <<
-assignments[referenceIndex] << ".\n";
newCentroids.col(assignments[referenceIndex]) +=
dataset.col(referenceIndex);
++counts(assignments[referenceIndex]);
@@ -117,12 +105,7 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
TreeType& queryNode,
TreeType& referenceNode)
{
-// if (referenceNode.Begin() == 2432)
-// Log::Warn << "Visit q" << queryNode.Begin() << "c" << queryNode.Count() <<
-//", r" << referenceNode.Begin() << "c" << referenceNode.Count() << ".\n";
-
// This won't happen with the root since it is explicitly set to 0.
- const size_t origPruned = referenceNode.Stat().ClustersPruned();
if (referenceNode.Stat().ClustersPruned() == size_t(-1))
referenceNode.Stat().ClustersPruned() =
referenceNode.Parent()->Stat().ClustersPruned();
@@ -132,9 +115,6 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
// Add to centroids if necessary.
if (referenceNode.Stat().MinQueryNodeDistance() == DBL_MAX /* hack */)
{
- if (referenceNode.Begin() == 26038)
- Log::Warn << "Add centroid mass for r26038c" << referenceNode.Count() <<
-".\n";
newCentroids.col(referenceNode.Stat().Owner()) +=
referenceNode.NumDescendants() * referenceNode.Stat().Centroid();
counts(referenceNode.Stat().Owner()) += referenceNode.NumDescendants();
@@ -154,9 +134,6 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
// Is this closer than the current best query node?
if (distances.Lo() < referenceNode.Stat().MinQueryNodeDistance())
{
- if (referenceNode.Begin() == 26038)
- Log::Warn << "r26038c" << referenceNode.Count() << ": new CQN " <<
-queryNode.Begin() << "c" << queryNode.Count() << ".\n";
// This is the new closest node.
referenceNode.Stat().SecondClosestQueryNode() =
referenceNode.Stat().ClosestQueryNode();
@@ -170,14 +147,25 @@ queryNode.Begin() << "c" << queryNode.Count() << ".\n";
}
else if (distances.Lo() < referenceNode.Stat().SecondMinQueryNodeDistance())
{
- if (referenceNode.Begin() == 26038)
- Log::Warn << "r26038c" << referenceNode.Count() << ": new SCQN " <<
-queryNode.Begin() << "c" << queryNode.Count() << ".\n";
// This is the new second closest node.
referenceNode.Stat().SecondClosestQueryNode() = (void*) &queryNode;
referenceNode.Stat().SecondMinQueryNodeDistance() = distances.Lo();
referenceNode.Stat().SecondMaxQueryNodeDistance() = distances.Hi();
}
+ else if (distances.Lo() > referenceNode.Stat().SecondMaxQueryNodeDistance())
+ {
+ // This is a Pelleg-Moore type prune.
+// Log::Warn << "Pelleg-Moore prune: " << distances.Lo() << "/" <<
+//distances.Hi() << ", r" << referenceNode.Begin() << "c" << referenceNode.Count()
+//<< ", q" << queryNode.Begin() << "c" << queryNode.Count() << "; mQND " <<
+//referenceNode.Stat().MinQueryNodeDistance() << ", MQND " <<
+//referenceNode.Stat().MaxQueryNodeDistance() << ", smQND " <<
+//referenceNode.Stat().SecondMinQueryNodeDistance() << ", sMQND " <<
+//referenceNode.Stat().SecondMaxQueryNodeDistance() << ".\n";
+
+ referenceNode.Stat().ClustersPruned() += queryNode.NumDescendants();
+ return DBL_MAX;
+ }
return 0.0; // No pruning allowed at this time.
}
More information about the mlpack-git
mailing list