[mlpack-git] master: Don't perform cluster 1-NN unless we have to. (ba7f71b)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:04:37 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit ba7f71b367c2bb46a99390ccea4d8ac8ab555a4d
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Feb 19 10:22:48 2015 -0500

    Don't perform cluster 1-NN unless we have to.


>---------------------------------------------------------------

ba7f71b367c2bb46a99390ccea4d8ac8ab555a4d
 src/mlpack/methods/kmeans/dtnn_kmeans.hpp      |  2 ++
 src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 34 +++++++++++++-------------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
index f9c21c7..adf009f 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
@@ -94,6 +94,8 @@ class DTNNKMeans
 
   arma::vec clusterDistances; // The amount the clusters moved last iteration.
 
+  arma::vec interclusterDistances; // Static storage for intercluster distances.
+
   //! Update the bounds in the tree before the next iteration.
   //! centroids is the current (not yet searched) centroids.
   void UpdateTree(TreeType& node,
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index a9c64a1..a04a930 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -100,26 +100,25 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
   TreeType* centroidTree = BuildTree<TreeType>(
       const_cast<typename TreeType::Mat&>(centroids), oldFromNewCentroids);
 
-  Timer::Start("knn");
-  // Find the nearest neighbors of each of the clusters.
-  neighbor::NeighborSearch<neighbor::NearestNeighborSort, MetricType, TreeType>
-      nns(centroidTree, centroids);
-  arma::mat interclusterDistancesTemp;
-  arma::Mat<size_t> closestClusters; // We don't actually care about these.
-  nns.Search(1, closestClusters, interclusterDistancesTemp);
-  distanceCalculations += nns.BaseCases() + nns.Scores();
-
-  // We need to do the unmapping ourselves.
-  arma::vec interclusterDistances(centroids.n_cols);
-  for (size_t i = 0; i < interclusterDistances.n_elem; ++i)
-    interclusterDistances[oldFromNewCentroids[i]] =
-        interclusterDistancesTemp[i];
-
-  Timer::Stop("knn");
-
   // Reset information in the tree, if we need to.
   if (iteration > 0)
   {
+    Timer::Start("knn");
+    // Find the nearest neighbors of each of the clusters.
+    neighbor::NeighborSearch<neighbor::NearestNeighborSort, MetricType,
+        TreeType> nns(centroidTree, centroids);
+    arma::mat interclusterDistancesTemp;
+    arma::Mat<size_t> closestClusters; // We don't actually care about these.
+    nns.Search(1, closestClusters, interclusterDistancesTemp);
+    distanceCalculations += nns.BaseCases() + nns.Scores();
+
+    // We need to do the unmapping ourselves.
+    for (size_t i = 0; i < interclusterDistances.n_elem; ++i)
+      interclusterDistances[oldFromNewCentroids[i]] =
+          interclusterDistancesTemp[i];
+
+    Timer::Stop("knn");
+
     UpdateTree(*tree, oldCentroids, interclusterDistances);
 
     for (size_t i = 0; i < dataset.n_cols; ++i)
@@ -129,6 +128,7 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
   {
     // Not initialized yet.
     clusterDistances.set_size(centroids.n_cols + 1);
+    interclusterDistances.set_size(centroids.n_cols);
   }
 
   // We won't use the AllkNN class here because we have our own set of rules.



More information about the mlpack-git mailing list