[mlpack-git] master: Use distances instead of upperBounds. Remove upperBounds entirely. Minor speed improvement (not 100% sure why). (d6d0d78)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:04:07 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit d6d0d78b3843ebdac91121fc0240b10561d29609
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Feb 3 17:36:04 2015 -0500
Use distances instead of upperBounds. Remove upperBounds entirely. Minor speed improvement (not 100% sure why).
>---------------------------------------------------------------
d6d0d78b3843ebdac91121fc0240b10561d29609
src/mlpack/methods/kmeans/dtnn_kmeans.hpp | 2 --
src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 23 +++++++----------------
2 files changed, 7 insertions(+), 18 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
index f9b35bb..547842b 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
@@ -87,8 +87,6 @@ class DTNNKMeans
//! Distances that the clusters moved last iteration.
arma::vec clusterDistances;
- //! Upper bounds on cluster distances for each point.
- arma::vec upperBounds;
//! Lower bounds on second closest cluster distance for each point.
arma::vec lowerSecondBounds;
//! Indicator of whether or not the point is pruned.
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index 3853837..80a25e7 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -57,8 +57,6 @@ DTNNKMeans<MetricType, MatType, TreeType>::DTNNKMeans(const MatType& dataset,
assignments(2, dataset.n_cols)
{
prunedPoints.resize(dataset.n_cols, false); // Fill with false.
- upperBounds.set_size(dataset.n_cols);
- upperBounds.fill(DBL_MAX);
lowerSecondBounds.zeros(dataset.n_cols);
lastOwners.zeros(dataset.n_cols);
@@ -266,8 +264,8 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
else
{
// Use the cached bounds.
- if (upperBounds[node.Point(i)] > newMaxClusterDistance)
- newMaxClusterDistance = upperBounds[node.Point(i)];
+ if (distances(0, node.Point(i)) > newMaxClusterDistance)
+ newMaxClusterDistance = distances(0, node.Point(i));
if (lowerSecondBounds[node.Point(i)] < newSecondClusterBound)
newSecondClusterBound = lowerSecondBounds[node.Point(i)];
}
@@ -503,7 +501,6 @@ assignments(0, node.Point(i - 1)) << ".\n";
{
owner = assignments(0, index);
// Establish bounds, since these points were searched this iteration.
- upperBounds[index] = distances(0, index);
lowerSecondBounds[index] = distances(1, index);
}
else if (prunedLastIteration && node.Stat().Owner() < centroids.n_cols)
@@ -515,7 +512,7 @@ assignments(0, node.Point(i - 1)) << ".\n";
owner = lastOwners[index];
}
- if (upperBounds[index] + clusterDistances[owner] <
+ if (distances(0, index) + clusterDistances[owner] <
lowerSecondBounds[index] - clusterDistances[centroids.n_cols])
{
/*
@@ -537,7 +534,7 @@ assignments(0, node.Point(i - 1)) << ".\n";
if (trueOwner != owner)
{
- Log::Warn << "Point " << index << ", ub " << upperBounds[index] << ","
+ Log::Warn << "Point " << index << ", ub " << distances(0, index) << ","
<< " lb " << lowerSecondBounds[index] << ", pruned " <<
prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
"owner!\n";
@@ -547,7 +544,6 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
}
*/
prunedPoints[index] = true;
- upperBounds[index] += clusterDistances[owner];
distances(0, index) += clusterDistances[owner];
lastOwners[index] = owner;
distances(1, index) += clusterDistances[centroids.n_cols];
@@ -555,11 +551,10 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
prunedCentroids.col(owner) += dataset.col(index);
prunedCounts(owner)++;
}
- else if (upperBounds[index] + clusterDistances[owner] < 0.5 *
+ else if (distances(0, index) + clusterDistances[owner] < 0.5 *
interclusterDistances[newFromOldCentroids[owner]])
{
prunedPoints[index] = true;
- upperBounds[index] += clusterDistances[owner];
distances(0, index) += clusterDistances[owner];
lastOwners[index] = owner;
distances(1, index) += clusterDistances[centroids.n_cols];
@@ -572,25 +567,22 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
// Attempt to tighten the lower bound.
distances(0, index) = metric.Evaluate(centroids.col(owner),
dataset.col(index));
- upperBounds[index] = distances(0, index);
++distanceCalculations;
- if (upperBounds[index] < lowerSecondBounds[index] -
+ if (distances(0, index) < lowerSecondBounds[index] -
clusterDistances[centroids.n_cols])
{
prunedPoints[index] = true;
lastOwners[index] = owner;
- upperBounds[index] += clusterDistances[owner];
lowerSecondBounds[index] -= clusterDistances[centroids.n_cols];
distances(1, index) += clusterDistances[centroids.n_cols];
prunedCentroids.col(owner) += dataset.col(index);
prunedCounts(owner)++;
}
- else if (upperBounds[index] < 0.5 *
+ else if (distances(0, index) < 0.5 *
interclusterDistances[newFromOldCentroids[owner]])
{
prunedPoints[index] = true;
lastOwners[index] = owner;
- upperBounds[index] += clusterDistances[owner];
lowerSecondBounds[index] -= clusterDistances[centroids.n_cols];
distances(1, index) += clusterDistances[centroids.n_cols];
prunedCentroids.col(owner) += dataset.col(index);
@@ -621,7 +613,6 @@ prunedPoints[index] << ", lastOwner " << lastOwners[index] << ": invalid "
for (size_t i = 0; i < node.NumPoints(); ++i)
{
const size_t index = node.Point(i);
- upperBounds[index] += clusterDistances[node.Stat().Owner()];
lowerSecondBounds[index] -= clusterDistances[node.Stat().Owner()];
}
}
More information about the mlpack-git
mailing list