[mlpack-git] master: Shitty implementation of Elkan-type prune. It makes things faster... a little... (aa36ea4)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:46 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit aa36ea42dc1aa628b7fa1457e197aa97202595ba
Author: Ryan Curtin <ryan at ratml.org>
Date: Thu Jan 29 17:39:11 2015 -0500
Shitty implementation of Elkan-type prune. It makes things faster... a little...
>---------------------------------------------------------------
aa36ea42dc1aa628b7fa1457e197aa97202595ba
src/mlpack/methods/kmeans/dual_tree_kmeans.hpp | 3 ++-
src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 18 ++++++++++++++----
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index ab036fa..914586f 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -76,7 +76,8 @@ class DualTreeKMeans
const std::vector<size_t>& oldFromNew,
size_t& hamerlyPruned,
size_t& hamerlyPrunedNodes,
- size_t& totalNodes);
+ size_t& totalNodes,
+ const arma::mat& interclusterDistances);
};
template<typename MetricType, typename MatType>
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 8887acb..dbefdc9 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -92,7 +92,7 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
visited.zeros(dataset.n_cols);
RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids,
iteration, clusterDistances, distances, assignments, visited,
- distanceIteration, interclusterDistances, metric);
+ distanceIteration, hamerlyBounds, interclusterDistances, metric);
// Use the dual-tree traverser.
//typename TreeType::template DualTreeTraverser<RulesType> traverser(rules);
@@ -133,7 +133,7 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
UpdateOwner(tree, centroids.n_cols, assignments);
TreeUpdate(tree, centroids.n_cols, clusterDistances, assignments,
oldCentroids, dataset, oldFromNewCentroids, hamerlyPruned,
- hamerlyPrunedNodes, totalNodes);
+ hamerlyPrunedNodes, totalNodes, interclusterDistances);
delete centroidTree;
@@ -230,7 +230,8 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
const std::vector<size_t>& oldFromNew,
size_t& hamerlyPruned,
size_t& hamerlyPrunedNodes,
- size_t& totalNodes)
+ size_t& totalNodes,
+ const arma::mat& interclusterDistances)
{
// This is basically IterationUpdate(), but pulled out to be separate from the
// actual dual-tree algorithm.
@@ -308,6 +309,15 @@ closest << "! It's part of node r" << node->Begin() << "c" << node->Count() <<
if (!node->Parent()->Stat().HamerlyPruned())
hamerlyPruned += node->NumDescendants();
}
+ else if (node->Stat().MaxQueryNodeDistance() < 0.5 *
+ interclusterDistances(0, owner))
+ {
+ Log::Warn << "Secondary Elkan prune! r" << node->Begin() << "c" <<
+node->Count() << ".\n";
+ node->Stat().HamerlyPruned() = true;
+ if (!node->Parent()->Stat().HamerlyPruned())
+ hamerlyPruned += node->NumDescendants();
+ }
}
else
{
@@ -367,7 +377,7 @@ closest << "! It's part of node r" << node->Begin() << "c" << node->Count() <<
{
TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
centroids, dataset, oldFromNew, hamerlyPruned, hamerlyPrunedNodes,
- totalNodes);
+ totalNodes, interclusterDistances);
if (!node->Child(i).Stat().HamerlyPruned())
allPruned = false;
else if (owner == clusters)
More information about the mlpack-git
mailing list