[mlpack-git] master: Prune a node when all its points are pruned. (cc0aec0)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:28 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit cc0aec07640800b2ce0d9bf551cddb420f9b5dae
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Feb 2 21:39:47 2015 -0500
Prune a node when all its points are pruned.
>---------------------------------------------------------------
cc0aec07640800b2ce0d9bf551cddb420f9b5dae
src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 82 ++++++++++++++++----------
1 file changed, 51 insertions(+), 31 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index 8db18c7..9e26e40 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -369,23 +369,23 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
{
// The node isn't owned by a single cluster. But if it has no points and
// its children are all pruned, we may prune it too.
- if (childrenPruned && node.NumChildren() > 0)
- {
- Log::Warn << "Prune parent node " << node.Point(0) << "c" <<
-node.NumDescendants() << ".\n";
- node.Stat().Pruned() = true;
- }
- if (node.NumChildren() > 0)
- if (node.Child(0).Stat().Pruned() && !node.Child(1).Stat().Pruned())
- Log::Warn << "Node left child pruned but right child not:\n" <<
-node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
- if (node.NumChildren() > 0)
- if (node.Child(1).Stat().Pruned() && !node.Child(0).Stat().Pruned())
- Log::Warn << "Node right child pruned but left child not:\n" <<
-node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
- if (node.NumChildren() > 0)
- Log::Warn << "Node has more than 0 children: " << node << ".l\n" <<
-node.Child(0) << ", r\n" << node.Child(1) << ".\n";
+// if (childrenPruned && node.NumChildren() > 0)
+// {
+// Log::Warn << "Prune parent node " << node.Point(0) << "c" <<
+//node.NumDescendants() << ".\n";
+// node.Stat().Pruned() = true;
+// }
+// if (node.NumChildren() > 0)
+// if (node.Child(0).Stat().Pruned() && !node.Child(1).Stat().Pruned())
+// Log::Warn << "Node left child pruned but right child not:\n" <<
+//node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+// if (node.NumChildren() > 0)
+// if (node.Child(1).Stat().Pruned() && !node.Child(0).Stat().Pruned())
+// Log::Warn << "Node right child pruned but left child not:\n" <<
+//node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+// if (node.NumChildren() > 0)
+// Log::Warn << "Node has more than 0 children: " << node << ".l\n" <<
+//node.Child(0) << ", r\n" << node.Child(1) << ".\n";
// Adjust the bounds for next iteration.
node.Stat().MaxClusterDistance() += clusterDistances[centroids.n_cols];
@@ -427,20 +427,18 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
}
}*/
- // Will our bounds still work?
- if (node.Stat().MaxClusterDistance() +
- clusterDistances[node.Stat().Owner()] <
- node.Stat().SecondClusterBound() - clusterDistances[centroids.n_cols])
+ // If it was pruned because all points were pruned, we need to check
+ // individually.
+ if (node.Stat().Owner() == centroids.n_cols)
{
- // The node remains pruned. Adjust the bounds for next iteration.
- node.Stat().MaxClusterDistance() += clusterDistances[node.Stat().Owner()];
- node.Stat().SecondClusterBound() -= clusterDistances[centroids.n_cols];
+ node.Stat().Pruned() = false;
}
else
{
- // Attempt other prune.
- if (node.Stat().MaxClusterDistance() < 0.5 *
- interclusterDistances[newFromOldCentroids[node.Stat().Owner()]])
+ // Will our bounds still work?
+ if (node.Stat().MaxClusterDistance() +
+ clusterDistances[node.Stat().Owner()] <
+ node.Stat().SecondClusterBound() - clusterDistances[centroids.n_cols])
{
// The node remains pruned. Adjust the bounds for next iteration.
node.Stat().MaxClusterDistance() +=
@@ -449,9 +447,21 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
}
else
{
- node.Stat().Pruned() = false;
- node.Stat().MaxClusterDistance() = DBL_MAX;
- node.Stat().SecondClusterBound() = 0.0;
+ // Attempt other prune.
+ if (node.Stat().MaxClusterDistance() < 0.5 *
+ interclusterDistances[newFromOldCentroids[node.Stat().Owner()]])
+ {
+ // The node remains pruned. Adjust the bounds for next iteration.
+ node.Stat().MaxClusterDistance() +=
+ clusterDistances[node.Stat().Owner()];
+ node.Stat().SecondClusterBound() -= clusterDistances[centroids.n_cols];
+ }
+ else
+ {
+ node.Stat().Pruned() = false;
+ node.Stat().MaxClusterDistance() = DBL_MAX;
+ node.Stat().SecondClusterBound() = 0.0;
+ }
}
}
}
@@ -472,6 +482,7 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
// If the node wasn't pruned, try to prune individual points.
if (!node.Stat().Pruned())
{
+ bool allPruned = true;
for (size_t i = 0; i < node.NumPoints(); ++i)
{
const size_t index = node.Point(i);
@@ -485,7 +496,7 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
upperBounds[index] = distances(0, index);
lowerSecondBounds[index] = distances(1, index);
}
- else if (prunedLastIteration)
+ else if (prunedLastIteration && node.Stat().Owner() < centroids.n_cols)
{
owner = node.Stat().Owner();
}
@@ -547,6 +558,7 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
// Attempt to tighten the lower bound.
upperBounds[index] = metric.Evaluate(centroids.col(owner),
dataset.col(index));
+ ++distanceCalculations;
if (upperBounds[index] < lowerSecondBounds[index] -
clusterDistances[centroids.n_cols])
{
@@ -568,9 +580,17 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
else
{
prunedPoints[index] = false;
+ allPruned = false;
}
}
}
+
+ if (allPruned && node.NumPoints() > 0)
+ {
+ // Prune the entire node.
+ node.Stat().Pruned() = true;
+ node.Stat().Owner() = centroids.n_cols;
+ }
}
if (node.Stat().Pruned())
More information about the mlpack-git
mailing list