[mlpack-git] master: Apply another prune, but it isn't very effective. Maybe that's an artifact of the datasets I'm currently playing with. (2a3f6f1)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:04:13 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 2a3f6f18907e1566ffd7f988b900ed0f03e37d46
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Feb 1 23:35:11 2015 -0500
Apply another prune, but it isn't very effective. Maybe that's an artifact of the datasets I'm currently playing with.
>---------------------------------------------------------------
2a3f6f18907e1566ffd7f988b900ed0f03e37d46
src/mlpack/methods/kmeans/dtnn_kmeans.hpp | 3 +-
src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 49 +++++++++++++++++++-------
2 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
index e655e32..74dde6c 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
@@ -91,7 +91,8 @@ class DTNNKMeans
const arma::Mat<size_t>& assignments,
const arma::mat& distances,
const arma::mat& clusterDistances,
- const std::vector<size_t>& oldFromNewCentroids);
+ const std::vector<size_t>& oldFromNewCentroids,
+ const arma::mat& interclusterDistances);
void PrecalculateCentroids(TreeType& node);
};
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index f71d791..d0a7d63 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -173,7 +173,7 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
distanceCalculations += centroids.n_cols;
UpdateTree(*tree, maxMovement, oldCentroids, assignments, distances,
- clusterDistances, oldFromNewCentroids);
+ clusterDistances, oldFromNewCentroids, interclusterDistances);
// Reset centroids and counts for things we will collect during pruning.
prunedCentroids.zeros(centroids.n_rows, centroids.n_cols);
@@ -195,7 +195,8 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
const arma::Mat<size_t>& assignments,
const arma::mat& distances,
const arma::mat& clusterDistances,
- const std::vector<size_t>& oldFromNewCentroids)
+ const std::vector<size_t>& oldFromNewCentroids,
+ const arma::mat& interclusterDistances)
{
// Update iteration.
// node.Stat().Iteration() = iteration;
@@ -208,7 +209,7 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
for (size_t i = 0; i < node.NumChildren(); ++i)
{
UpdateTree(node.Child(i), tolerance, centroids, assignments, distances,
- clusterDistances, oldFromNewCentroids);
+ clusterDistances, oldFromNewCentroids, interclusterDistances);
if (!node.Child(i).Stat().Pruned())
childrenPruned = false; // Not all children are pruned.
}
@@ -297,9 +298,7 @@ oldFromNewCentroids[assignments(0, node.Point(i))] << " " <<
oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
}
}
- }
-*/
-
+ }*/
// What is the maximum distance to the closest cluster in the node?
if (node.Stat().MaxClusterDistance() +
@@ -308,6 +307,17 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
{
node.Stat().Pruned() = true;
}
+ else
+ {
+ // Also do between-cluster prune.
+ size_t newCluster = centroids.n_cols;
+ for (size_t i = 0; i < centroids.n_cols; ++i)
+ if (oldFromNewCentroids[i] == owner)
+ newCluster = i;
+ if (node.Stat().MaxClusterDistance() < 0.5 *
+ interclusterDistances[newCluster])
+ node.Stat().Pruned() = true;
+ }
// Adjust for next iteration.
node.Stat().MaxClusterDistance() +=
@@ -369,12 +379,27 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
}
else
{
- node.Stat().Pruned() = false;
- node.Stat().FirstBound() = DBL_MAX;
- node.Stat().SecondBound() = DBL_MAX;
- node.Stat().Bound() = DBL_MAX;
- node.Stat().MaxClusterDistance() = DBL_MAX;
- node.Stat().SecondClusterBound() = 0.0;
+ // Attempt other prune.
+ size_t newCluster = centroids.n_cols;
+ for (size_t i = 0; i < centroids.n_cols; ++i)
+ if (oldFromNewCentroids[i] == owner)
+ newCluster = i;
+ if (node.Stat().MaxClusterDistance() < 0.5 *
+ interclusterDistances[newCluster])
+ {
+ // The node remains pruned. Adjust the bounds for next iteration.
+ node.Stat().MaxClusterDistance() += clusterDistances[node.Stat().Owner()];
+ node.Stat().SecondClusterBound() -= clusterDistances[centroids.n_cols];
+ }
+ else
+ {
+ node.Stat().Pruned() = false;
+ node.Stat().FirstBound() = DBL_MAX;
+ node.Stat().SecondBound() = DBL_MAX;
+ node.Stat().Bound() = DBL_MAX;
+ node.Stat().MaxClusterDistance() = DBL_MAX;
+ node.Stat().SecondClusterBound() = 0.0;
+ }
}
}
else
More information about the mlpack-git
mailing list