[mlpack-git] master: Prune parents when children are pruned. (cad4791)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:15 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit cad47917ac95004a12da1d058323d6955a106353
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Feb 2 20:30:37 2015 -0500

    Prune parents when children are pruned.


>---------------------------------------------------------------

cad47917ac95004a12da1d058323d6955a106353
 src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 33 ++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index ff56e13..305fbc1 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -240,6 +240,7 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
   size_t owner = centroids.n_cols + 1;
   if (!node.Stat().Pruned() && childrenPruned)
   {
+    // Determine the bounds for the points.
     double newMaxClusterDistance = 0.0;
     double newSecondClusterBound = DBL_MAX;
     for (size_t i = 0; i < node.NumPoints(); ++i)
@@ -292,6 +293,10 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
         newSecondClusterBound = node.Child(i).Stat().SecondClusterBound();
     }
 
+//    if (node.NumChildren() > 0)
+//      Log::Warn << "Node:\n" << node << "single owner: " << singleOwner <<
+//".l\n" << node.Child(0) << ".r\n" << node.Child(1) << ".\n";
+
     // What do we do with the new cluster bounds?
     if (newMaxClusterDistance > 0.0 && newMaxClusterDistance <
         node.Stat().MaxClusterDistance())
@@ -337,8 +342,13 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
       }
 */
 
+      if (node.NumPoints() == 0 && childrenPruned)
+      {
+        // Pruned because its children are all pruned.
+        node.Stat().Pruned() = true;
+      }
       // What is the maximum distance to the closest cluster in the node?
-      if (node.Stat().MaxClusterDistance() +
+      else if (node.Stat().MaxClusterDistance() +
           clusterDistances[node.Stat().Owner()] <
           node.Stat().SecondClusterBound() - clusterDistances[centroids.n_cols])
       {
@@ -361,7 +371,26 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
     }
     else
     {
-      // The node isn't owned by a single cluster.
+      // The node isn't owned by a single cluster.  But if it has no points and
+      // its children are all pruned, we may prune it too.
+      if (childrenPruned && node.NumChildren() > 0)
+      {
+        Log::Warn << "Prune parent node " << node.Point(0) << "c" <<
+node.NumDescendants() << ".\n";
+        node.Stat().Pruned() = true;
+      }
+      if (node.NumChildren() > 0)
+        if (node.Child(0).Stat().Pruned() && !node.Child(1).Stat().Pruned())
+          Log::Warn << "Node left child pruned but right child not:\n" <<
+node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+      if (node.NumChildren() > 0)
+        if (node.Child(1).Stat().Pruned() && !node.Child(0).Stat().Pruned())
+          Log::Warn << "Node right child pruned but left child not:\n" <<
+node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+      if (node.NumChildren() > 0)
+        Log::Warn << "Node has more than 0 children: " << node << ".l\n" <<
+node.Child(0) << ", r\n" << node.Child(1) << ".\n";
+
       // Adjust the bounds for next iteration.
       node.Stat().MaxClusterDistance() += clusterDistances[centroids.n_cols];
       node.Stat().SecondClusterBound() = std::max(0.0,



More information about the mlpack-git mailing list