[mlpack-git] master: Apply another prune, but it isn't very effective. Maybe that's an artifact of the datasets I'm currently playing with. (2a3f6f1)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:04:13 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit 2a3f6f18907e1566ffd7f988b900ed0f03e37d46
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Feb 1 23:35:11 2015 -0500

    Apply another prune, but it isn't very effective. Maybe that's an artifact of the datasets I'm currently playing with.


>---------------------------------------------------------------

2a3f6f18907e1566ffd7f988b900ed0f03e37d46
 src/mlpack/methods/kmeans/dtnn_kmeans.hpp      |  3 +-
 src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 49 +++++++++++++++++++-------
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
index e655e32..74dde6c 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
@@ -91,7 +91,8 @@ class DTNNKMeans
                   const arma::Mat<size_t>& assignments,
                   const arma::mat& distances,
                   const arma::mat& clusterDistances,
-                  const std::vector<size_t>& oldFromNewCentroids);
+                  const std::vector<size_t>& oldFromNewCentroids,
+                  const arma::mat& interclusterDistances);
 
   void PrecalculateCentroids(TreeType& node);
 };
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index f71d791..d0a7d63 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -173,7 +173,7 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
   distanceCalculations += centroids.n_cols;
 
   UpdateTree(*tree, maxMovement, oldCentroids, assignments, distances,
-      clusterDistances, oldFromNewCentroids);
+      clusterDistances, oldFromNewCentroids, interclusterDistances);
 
   // Reset centroids and counts for things we will collect during pruning.
   prunedCentroids.zeros(centroids.n_rows, centroids.n_cols);
@@ -195,7 +195,8 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
     const arma::Mat<size_t>& assignments,
     const arma::mat& distances,
     const arma::mat& clusterDistances,
-    const std::vector<size_t>& oldFromNewCentroids)
+    const std::vector<size_t>& oldFromNewCentroids,
+    const arma::mat& interclusterDistances)
 {
   // Update iteration.
 //  node.Stat().Iteration() = iteration;
@@ -208,7 +209,7 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
   for (size_t i = 0; i < node.NumChildren(); ++i)
   {
     UpdateTree(node.Child(i), tolerance, centroids, assignments, distances,
-        clusterDistances, oldFromNewCentroids);
+        clusterDistances, oldFromNewCentroids, interclusterDistances);
     if (!node.Child(i).Stat().Pruned())
       childrenPruned = false; // Not all children are pruned.
   }
@@ -297,9 +298,7 @@ oldFromNewCentroids[assignments(0, node.Point(i))] << " " <<
 oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
           }
         }
-      }
-*/
-
+      }*/
 
       // What is the maximum distance to the closest cluster in the node?
       if (node.Stat().MaxClusterDistance() +
@@ -308,6 +307,17 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
       {
         node.Stat().Pruned() = true;
       }
+      else
+      {
+        // Also do between-cluster prune.
+        size_t newCluster = centroids.n_cols;
+        for (size_t i = 0; i < centroids.n_cols; ++i)
+          if (oldFromNewCentroids[i] == owner)
+            newCluster = i;
+        if (node.Stat().MaxClusterDistance() < 0.5 *
+            interclusterDistances[newCluster])
+          node.Stat().Pruned() = true;
+      }
 
       // Adjust for next iteration.
       node.Stat().MaxClusterDistance() +=
@@ -369,12 +379,27 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
     }
     else
     {
-      node.Stat().Pruned() = false;
-      node.Stat().FirstBound() = DBL_MAX;
-      node.Stat().SecondBound() = DBL_MAX;
-      node.Stat().Bound() = DBL_MAX;
-      node.Stat().MaxClusterDistance() = DBL_MAX;
-      node.Stat().SecondClusterBound() = 0.0;
+      // Attempt other prune.
+      size_t newCluster = centroids.n_cols;
+      for (size_t i = 0; i < centroids.n_cols; ++i)
+        if (oldFromNewCentroids[i] == owner)
+          newCluster = i;
+      if (node.Stat().MaxClusterDistance() < 0.5 *
+          interclusterDistances[newCluster])
+      {
+        // The node remains pruned.  Adjust the bounds for next iteration.
+        node.Stat().MaxClusterDistance() += clusterDistances[node.Stat().Owner()];
+        node.Stat().SecondClusterBound() -= clusterDistances[centroids.n_cols];
+      }
+      else
+      {
+        node.Stat().Pruned() = false;
+        node.Stat().FirstBound() = DBL_MAX;
+        node.Stat().SecondBound() = DBL_MAX;
+        node.Stat().Bound() = DBL_MAX;
+        node.Stat().MaxClusterDistance() = DBL_MAX;
+        node.Stat().SecondClusterBound() = 0.0;
+      }
     }
   }
   else



More information about the mlpack-git mailing list