[mlpack-git] master: Prune parents when children are pruned. (cad4791)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:15 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit cad47917ac95004a12da1d058323d6955a106353
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Feb 2 20:30:37 2015 -0500
Prune parents when children are pruned.
>---------------------------------------------------------------
cad47917ac95004a12da1d058323d6955a106353
src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 33 ++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index ff56e13..305fbc1 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -240,6 +240,7 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
size_t owner = centroids.n_cols + 1;
if (!node.Stat().Pruned() && childrenPruned)
{
+ // Determine the bounds for the points.
double newMaxClusterDistance = 0.0;
double newSecondClusterBound = DBL_MAX;
for (size_t i = 0; i < node.NumPoints(); ++i)
@@ -292,6 +293,10 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
newSecondClusterBound = node.Child(i).Stat().SecondClusterBound();
}
+// if (node.NumChildren() > 0)
+// Log::Warn << "Node:\n" << node << "single owner: " << singleOwner <<
+//".l\n" << node.Child(0) << ".r\n" << node.Child(1) << ".\n";
+
// What do we do with the new cluster bounds?
if (newMaxClusterDistance > 0.0 && newMaxClusterDistance <
node.Stat().MaxClusterDistance())
@@ -337,8 +342,13 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
}
*/
+ if (node.NumPoints() == 0 && childrenPruned)
+ {
+ // Pruned because its children are all pruned.
+ node.Stat().Pruned() = true;
+ }
// What is the maximum distance to the closest cluster in the node?
- if (node.Stat().MaxClusterDistance() +
+ else if (node.Stat().MaxClusterDistance() +
clusterDistances[node.Stat().Owner()] <
node.Stat().SecondClusterBound() - clusterDistances[centroids.n_cols])
{
@@ -361,7 +371,26 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
}
else
{
- // The node isn't owned by a single cluster.
+ // The node isn't owned by a single cluster. But if it has no points and
+ // its children are all pruned, we may prune it too.
+ if (childrenPruned && node.NumChildren() > 0)
+ {
+ Log::Warn << "Prune parent node " << node.Point(0) << "c" <<
+node.NumDescendants() << ".\n";
+ node.Stat().Pruned() = true;
+ }
+ if (node.NumChildren() > 0)
+ if (node.Child(0).Stat().Pruned() && !node.Child(1).Stat().Pruned())
+ Log::Warn << "Node left child pruned but right child not:\n" <<
+node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+ if (node.NumChildren() > 0)
+ if (node.Child(1).Stat().Pruned() && !node.Child(0).Stat().Pruned())
+ Log::Warn << "Node right child pruned but left child not:\n" <<
+node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+ if (node.NumChildren() > 0)
+ Log::Warn << "Node has more than 0 children: " << node << ".l\n" <<
+node.Child(0) << ", r\n" << node.Child(1) << ".\n";
+
// Adjust the bounds for next iteration.
node.Stat().MaxClusterDistance() += clusterDistances[centroids.n_cols];
node.Stat().SecondClusterBound() = std::max(0.0,
More information about the mlpack-git
mailing list