[mlpack-git] master: Prune a node when all its points are pruned. (cc0aec0)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:28 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit cc0aec07640800b2ce0d9bf551cddb420f9b5dae
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Feb 2 21:39:47 2015 -0500

    Prune a node when all its points are pruned.


>---------------------------------------------------------------

cc0aec07640800b2ce0d9bf551cddb420f9b5dae
 src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 82 ++++++++++++++++----------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index 8db18c7..9e26e40 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -369,23 +369,23 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
     {
       // The node isn't owned by a single cluster.  But if it has no points and
       // its children are all pruned, we may prune it too.
-      if (childrenPruned && node.NumChildren() > 0)
-      {
-        Log::Warn << "Prune parent node " << node.Point(0) << "c" <<
-node.NumDescendants() << ".\n";
-        node.Stat().Pruned() = true;
-      }
-      if (node.NumChildren() > 0)
-        if (node.Child(0).Stat().Pruned() && !node.Child(1).Stat().Pruned())
-          Log::Warn << "Node left child pruned but right child not:\n" <<
-node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
-      if (node.NumChildren() > 0)
-        if (node.Child(1).Stat().Pruned() && !node.Child(0).Stat().Pruned())
-          Log::Warn << "Node right child pruned but left child not:\n" <<
-node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
-      if (node.NumChildren() > 0)
-        Log::Warn << "Node has more than 0 children: " << node << ".l\n" <<
-node.Child(0) << ", r\n" << node.Child(1) << ".\n";
+//      if (childrenPruned && node.NumChildren() > 0)
+//      {
+//        Log::Warn << "Prune parent node " << node.Point(0) << "c" <<
+//node.NumDescendants() << ".\n";
+//        node.Stat().Pruned() = true;
+//      }
+//      if (node.NumChildren() > 0)
+//        if (node.Child(0).Stat().Pruned() && !node.Child(1).Stat().Pruned())
+//          Log::Warn << "Node left child pruned but right child not:\n" <<
+//node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+//      if (node.NumChildren() > 0)
+//        if (node.Child(1).Stat().Pruned() && !node.Child(0).Stat().Pruned())
+//          Log::Warn << "Node right child pruned but left child not:\n" <<
+//node.Child(0) << ", r\n" << node.Child(1) << ", this:\n" << node;
+//      if (node.NumChildren() > 0)
+//        Log::Warn << "Node has more than 0 children: " << node << ".l\n" <<
+//node.Child(0) << ", r\n" << node.Child(1) << ".\n";
 
       // Adjust the bounds for next iteration.
       node.Stat().MaxClusterDistance() += clusterDistances[centroids.n_cols];
@@ -427,20 +427,18 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
         }
       }*/
 
-    // Will our bounds still work?
-    if (node.Stat().MaxClusterDistance() +
-        clusterDistances[node.Stat().Owner()] <
-        node.Stat().SecondClusterBound() - clusterDistances[centroids.n_cols])
+    // If it was pruned because all points were pruned, we need to check
+    // individually.
+    if (node.Stat().Owner() == centroids.n_cols)
     {
-      // The node remains pruned.  Adjust the bounds for next iteration.
-      node.Stat().MaxClusterDistance() += clusterDistances[node.Stat().Owner()];
-      node.Stat().SecondClusterBound() -= clusterDistances[centroids.n_cols];
+      node.Stat().Pruned() = false;
     }
     else
     { 
-      // Attempt other prune.
-      if (node.Stat().MaxClusterDistance() < 0.5 *
-          interclusterDistances[newFromOldCentroids[node.Stat().Owner()]])
+      // Will our bounds still work?
+      if (node.Stat().MaxClusterDistance() +
+          clusterDistances[node.Stat().Owner()] <
+          node.Stat().SecondClusterBound() - clusterDistances[centroids.n_cols])
       {
         // The node remains pruned.  Adjust the bounds for next iteration.
         node.Stat().MaxClusterDistance() +=
@@ -449,9 +447,21 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
       }
       else
       {
-        node.Stat().Pruned() = false;
-        node.Stat().MaxClusterDistance() = DBL_MAX;
-        node.Stat().SecondClusterBound() = 0.0;
+        // Attempt other prune.
+        if (node.Stat().MaxClusterDistance() < 0.5 *
+            interclusterDistances[newFromOldCentroids[node.Stat().Owner()]])
+        {
+          // The node remains pruned.  Adjust the bounds for next iteration.
+          node.Stat().MaxClusterDistance() +=
+              clusterDistances[node.Stat().Owner()];
+          node.Stat().SecondClusterBound() -= clusterDistances[centroids.n_cols];
+        }
+        else
+        {
+          node.Stat().Pruned() = false;
+          node.Stat().MaxClusterDistance() = DBL_MAX;
+          node.Stat().SecondClusterBound() = 0.0;
+        }
       }
     }
   }
@@ -472,6 +482,7 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
   // If the node wasn't pruned, try to prune individual points.
   if (!node.Stat().Pruned())
   {
+    bool allPruned = true;
     for (size_t i = 0; i < node.NumPoints(); ++i)
     {
       const size_t index = node.Point(i);
@@ -485,7 +496,7 @@ node.Child(0) << ", r\n" << node.Child(1) << ".\n";
         upperBounds[index] = distances(0, index);
         lowerSecondBounds[index] = distances(1, index);
       }
-      else if (prunedLastIteration)
+      else if (prunedLastIteration && node.Stat().Owner() < centroids.n_cols)
       {
         owner = node.Stat().Owner();
       }
@@ -547,6 +558,7 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
         // Attempt to tighten the lower bound.
         upperBounds[index] = metric.Evaluate(centroids.col(owner),
                                              dataset.col(index));
+        ++distanceCalculations;
         if (upperBounds[index] < lowerSecondBounds[index] -
             clusterDistances[centroids.n_cols])
         {
@@ -568,9 +580,17 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
         else
         {
           prunedPoints[index] = false;
+          allPruned = false;
         }
       }
     }
+
+    if (allPruned && node.NumPoints() > 0)
+    {
+      // Prune the entire node.
+      node.Stat().Pruned() = true;
+      node.Stat().Owner() = centroids.n_cols;
+    }
   }
 
   if (node.Stat().Pruned())



More information about the mlpack-git mailing list