[mlpack-git] master: Shitty implementation of Elkan-type prune. It makes things faster... a little... (aa36ea4)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:46 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit aa36ea42dc1aa628b7fa1457e197aa97202595ba
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Jan 29 17:39:11 2015 -0500

    Shitty implementation of Elkan-type prune. It makes things faster... a little...


>---------------------------------------------------------------

aa36ea42dc1aa628b7fa1457e197aa97202595ba
 src/mlpack/methods/kmeans/dual_tree_kmeans.hpp      |  3 ++-
 src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index ab036fa..914586f 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -76,7 +76,8 @@ class DualTreeKMeans
                   const std::vector<size_t>& oldFromNew,
                   size_t& hamerlyPruned,
                   size_t& hamerlyPrunedNodes,
-                  size_t& totalNodes);
+                  size_t& totalNodes,
+                  const arma::mat& interclusterDistances);
 };
 
 template<typename MetricType, typename MatType>
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 8887acb..dbefdc9 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -92,7 +92,7 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
   visited.zeros(dataset.n_cols);
   RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids,
       iteration, clusterDistances, distances, assignments, visited,
-      distanceIteration, interclusterDistances, metric);
+      distanceIteration, hamerlyBounds, interclusterDistances, metric);
 
   // Use the dual-tree traverser.
 //typename TreeType::template DualTreeTraverser<RulesType> traverser(rules);
@@ -133,7 +133,7 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
   UpdateOwner(tree, centroids.n_cols, assignments);
   TreeUpdate(tree, centroids.n_cols, clusterDistances, assignments,
       oldCentroids, dataset, oldFromNewCentroids, hamerlyPruned,
-      hamerlyPrunedNodes, totalNodes);
+      hamerlyPrunedNodes, totalNodes, interclusterDistances);
 
   delete centroidTree;
 
@@ -230,7 +230,8 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
     const std::vector<size_t>& oldFromNew,
     size_t& hamerlyPruned,
     size_t& hamerlyPrunedNodes,
-    size_t& totalNodes)
+    size_t& totalNodes,
+    const arma::mat& interclusterDistances)
 {
   // This is basically IterationUpdate(), but pulled out to be separate from the
   // actual dual-tree algorithm.
@@ -308,6 +309,15 @@ closest << "!  It's part of node r" << node->Begin() << "c" << node->Count() <<
         if (!node->Parent()->Stat().HamerlyPruned())
           hamerlyPruned += node->NumDescendants();
       }
+      else if (node->Stat().MaxQueryNodeDistance() < 0.5 *
+          interclusterDistances(0, owner))
+      {
+        Log::Warn << "Secondary Elkan prune! r" << node->Begin() << "c" <<
+node->Count() << ".\n";
+        node->Stat().HamerlyPruned() = true;
+        if (!node->Parent()->Stat().HamerlyPruned())
+          hamerlyPruned += node->NumDescendants();
+      }
     }
     else
     {
@@ -367,7 +377,7 @@ closest << "!  It's part of node r" << node->Begin() << "c" << node->Count() <<
   {
     TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
         centroids, dataset, oldFromNew, hamerlyPruned, hamerlyPrunedNodes,
-        totalNodes);
+        totalNodes, interclusterDistances);
     if (!node->Child(i).Stat().HamerlyPruned())
       allPruned = false;
     else if (owner == clusters)



More information about the mlpack-git mailing list