[mlpack-git] master: Cache mappings from new to old. Slows it down...? Not sure why there's a slowdown. Doesn't make much sense to me. (33ebcd5)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:03:34 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44
>---------------------------------------------------------------
commit 33ebcd5b2c4346b3f9d7a15ae194d815218c885e
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Feb 2 11:27:04 2015 -0500
Cache mappings from new to old. Slows it down...? Not sure why there's a slowdown. Doesn't make much sense to me.
>---------------------------------------------------------------
33ebcd5b2c4346b3f9d7a15ae194d815218c885e
src/mlpack/methods/kmeans/dtnn_kmeans.hpp | 3 ++-
src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 29 ++++++++++++++------------
2 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
index 74dde6c..211eda3 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
@@ -92,7 +92,8 @@ class DTNNKMeans
const arma::mat& distances,
const arma::mat& clusterDistances,
const std::vector<size_t>& oldFromNewCentroids,
- const arma::mat& interclusterDistances);
+ const arma::mat& interclusterDistances,
+ const std::vector<size_t>& newFromOldCentroids);
void PrecalculateCentroids(TreeType& node);
};
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index d0a7d63..02471be 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -94,6 +94,14 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
std::vector<size_t> oldFromNewCentroids;
TreeType* centroidTree = BuildTree<TreeType>(
const_cast<typename TreeType::Mat&>(centroids), oldFromNewCentroids);
+ // Calculate new from old mappings.
+ std::vector<size_t> newFromOldCentroids;
+ if (tree::TreeTraits<TreeType>::RearrangesDataset)
+ {
+ newFromOldCentroids.resize(centroids.n_cols);
+ for (size_t i = 0; i < centroids.n_cols; ++i)
+ newFromOldCentroids[oldFromNewCentroids[i]] = i;
+ }
// Find the nearest neighbors of each of the clusters.
neighbor::NeighborSearch<neighbor::NearestNeighborSort, MetricType, TreeType>
@@ -173,7 +181,8 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
distanceCalculations += centroids.n_cols;
UpdateTree(*tree, maxMovement, oldCentroids, assignments, distances,
- clusterDistances, oldFromNewCentroids, interclusterDistances);
+ clusterDistances, oldFromNewCentroids, interclusterDistances,
+ newFromOldCentroids);
// Reset centroids and counts for things we will collect during pruning.
prunedCentroids.zeros(centroids.n_rows, centroids.n_cols);
@@ -196,7 +205,8 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
const arma::mat& distances,
const arma::mat& clusterDistances,
const std::vector<size_t>& oldFromNewCentroids,
- const arma::mat& interclusterDistances)
+ const arma::mat& interclusterDistances,
+ const std::vector<size_t>& newFromOldCentroids)
{
// Update iteration.
// node.Stat().Iteration() = iteration;
@@ -209,7 +219,8 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
for (size_t i = 0; i < node.NumChildren(); ++i)
{
UpdateTree(node.Child(i), tolerance, centroids, assignments, distances,
- clusterDistances, oldFromNewCentroids, interclusterDistances);
+ clusterDistances, oldFromNewCentroids, interclusterDistances,
+ newFromOldCentroids);
if (!node.Child(i).Stat().Pruned())
childrenPruned = false; // Not all children are pruned.
}
@@ -310,12 +321,8 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
else
{
// Also do between-cluster prune.
- size_t newCluster = centroids.n_cols;
- for (size_t i = 0; i < centroids.n_cols; ++i)
- if (oldFromNewCentroids[i] == owner)
- newCluster = i;
if (node.Stat().MaxClusterDistance() < 0.5 *
- interclusterDistances[newCluster])
+ interclusterDistances[newFromOldCentroids[owner]])
node.Stat().Pruned() = true;
}
@@ -380,12 +387,8 @@ oldFromNewCentroids[assignments(0, node.Point(i - 1))] << ".\n";
else
{
// Attempt other prune.
- size_t newCluster = centroids.n_cols;
- for (size_t i = 0; i < centroids.n_cols; ++i)
- if (oldFromNewCentroids[i] == owner)
- newCluster = i;
if (node.Stat().MaxClusterDistance() < 0.5 *
- interclusterDistances[newCluster])
+ interclusterDistances[newFromOldCentroids[node.Stat().Owner()]])
{
// The node remains pruned. Adjust the bounds for next iteration.
node.Stat().MaxClusterDistance() += clusterDistances[node.Stat().Owner()];
More information about the mlpack-git
mailing list