[mlpack-git] master: Recalculate upper bound before giving up on prune. This is done by Hamerly's algorithm and we obtain minor speedup as a result. (e4b3d87)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:32 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit e4b3d877560bd80575ffd841d3657325765c2cc4
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Feb 2 21:06:51 2015 -0500
Recalculate upper bound before giving up on prune. This is done by Hamerly's algorithm and we obtain minor speedup as a result.
>---------------------------------------------------------------
e4b3d877560bd80575ffd841d3657325765c2cc4
src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 27 ++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index 407b16c..8db18c7 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -124,7 +124,6 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
if (iteration != 0)
{
// Do the tree update for the previous iteration.
- Log::Warn << "Performing tree update.\n";
// Reset centroids and counts for things we will collect during pruning.
prunedCentroids.zeros(centroids.n_rows, centroids.n_cols);
@@ -545,7 +544,31 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
}
else
{
- prunedPoints[index] = false;
+ // Attempt to tighten the lower bound.
+ upperBounds[index] = metric.Evaluate(centroids.col(owner),
+ dataset.col(index));
+ if (upperBounds[index] < lowerSecondBounds[index] -
+ clusterDistances[centroids.n_cols])
+ {
+ prunedPoints[index] = true;
+ lastOwners[index] = owner;
+ lowerSecondBounds[index] -= clusterDistances[centroids.n_cols];
+ prunedCentroids.col(owner) += dataset.col(index);
+ prunedCounts(owner)++;
+ }
+ else if (upperBounds[index] < 0.5 *
+ interclusterDistances[newFromOldCentroids[owner]])
+ {
+ prunedPoints[index] = true;
+ lastOwners[index] = owner;
+ lowerSecondBounds[index] -= clusterDistances[centroids.n_cols];
+ prunedCentroids.col(owner) += dataset.col(index);
+ prunedCounts(owner)++;
+ }
+ else
+ {
+ prunedPoints[index] = false;
+ }
}
}
}
More information about the mlpack-git
mailing list