[mlpack-svn] r17239 - mlpack/trunk/src/mlpack/methods/kmeans
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Fri Oct 10 21:43:53 EDT 2014
Author: rcurtin
Date: Fri Oct 10 21:43:52 2014
New Revision: 17239
Log:
Fix a bug; now this algorithm is much faster.
Modified:
mlpack/trunk/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
Modified: mlpack/trunk/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp Fri Oct 10 21:43:52 2014
@@ -45,7 +45,8 @@
{
for (size_t j = i + 1; j < centroids.n_cols; ++j)
{
- const double dist = metric.Evaluate(centroids.col(i), centroids.col(j));
+ const double dist = metric.Evaluate(centroids.col(i), centroids.col(j)) /
+ 2.0;
++distanceCalculations;
// Update bounds, if this intra-cluster distance is smaller.
@@ -58,7 +59,7 @@
for (size_t i = 0; i < dataset.n_cols; ++i)
{
- const double m = std::max(minClusterDistances(assignments[i]) / 2.0,
+ const double m = std::max(minClusterDistances(assignments[i]),
lowerBounds(i));
// First bound test.
@@ -84,13 +85,14 @@
// The bounds failed. So test against all other clusters.
// This is Hamerly's Point-All-Ctrs() function from the paper.
+ // We have to reset the lower bound first.
+ lowerBounds(i) = DBL_MAX;
for (size_t c = 0; c < centroids.n_cols; ++c)
{
if (c == assignments[i])
continue;
const double dist = metric.Evaluate(dataset.col(i), centroids.col(c));
- ++distanceCalculations;
// Is this a better cluster? At this point, upperBounds[i] = d(i, c(i)).
if (dist < upperBounds(i))
@@ -106,6 +108,7 @@
lowerBounds(i) = dist;
}
}
+ distanceCalculations += centroids.n_cols - 1;
// Update new centroids.
newCentroids.col(assignments[i]) += dataset.col(i);
More information about the mlpack-svn
mailing list