[mlpack-git] master: Count number of points that are Hamerly pruned. (27f3a36)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:02:24 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 27f3a36e93c29db4e4a0c67f0d63fc3ce22eca0c
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Jan 27 17:36:01 2015 -0500
Count number of points that are Hamerly pruned.
>---------------------------------------------------------------
27f3a36e93c29db4e4a0c67f0d63fc3ce22eca0c
src/mlpack/methods/kmeans/dual_tree_kmeans.hpp | 3 ++-
src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp | 18 +++++++++++++-----
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
index 9948b10..9e0c17a 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans.hpp
@@ -67,7 +67,8 @@ class DualTreeKMeans
const arma::Col<size_t>& assignments,
const arma::mat& oldCentroids,
const arma::mat& dataset,
- const std::vector<size_t>& oldFromNew);
+ const std::vector<size_t>& oldFromNew,
+ size_t& hamerlyPruned);
};
template<typename MetricType, typename MatType>
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 22dd0ad..083dcdd 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -126,8 +126,9 @@ double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate(
}
// Update the tree with the centroid movement information.
+ size_t hamerlyPruned = 0;
TreeUpdate(tree, centroids.n_cols, clusterDistances, assignments,
- oldCentroids, dataset, oldFromNewCentroids);
+ oldCentroids, dataset, oldFromNewCentroids, hamerlyPruned);
delete centroidTree;
@@ -178,7 +179,8 @@ void DualTreeKMeans<MetricType, MatType, TreeType>::TreeUpdate(
const arma::Col<size_t>& assignments,
const arma::mat& centroids,
const arma::mat& dataset,
- const std::vector<size_t>& oldFromNew)
+ const std::vector<size_t>& oldFromNew,
+ size_t& hamerlyPruned)
{
// This is basically IterationUpdate(), but pulled out to be separate from the
// actual dual-tree algorithm.
@@ -453,8 +455,11 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
node->Stat().HamerlyPruned() = true;
// if (node->Begin() == 16954)
if (!node->Parent()->Stat().HamerlyPruned())
- Log::Warn << "Mark r" << node->Begin() << "c" << node->Count() << " as "
- << "Hamerly pruned.\n";
+ {
+// Log::Warn << "Mark r" << node->Begin() << "c" << node->Count() << " as "
+// << "Hamerly pruned.\n";
+ hamerlyPruned += node->NumDescendants();
+ }
}
// else
// {
@@ -499,13 +504,16 @@ node->Stat().SecondClosestBound() << " is too loose! -- " << secondClosestDist
// if (!node->Stat().HamerlyPruned())
for (size_t i = 0; i < node->NumChildren(); ++i)
TreeUpdate(&node->Child(i), clusters, clusterDistances, assignments,
- centroids, dataset, oldFromNew);
+ centroids, dataset, oldFromNew, hamerlyPruned);
node->Stat().LastSecondClosestBound() = node->Stat().SecondClosestBound() -
clusterDistances[clusters];
// This should change later, but I'm not yet sure how to do it.
node->Stat().SecondClosestBound() = DBL_MAX;
node->Stat().SecondClosestQueryNode() = NULL;
+
+ if (node->Parent() == NULL)
+ Log::Info << "Total Hamerly pruned points: " << hamerlyPruned << ".\n";
}
} // namespace kmeans
More information about the mlpack-git
mailing list