[mlpack-git] master: Unmap intercluster distances (oops). Minor speedup. (e650149)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 12 16:04:54 EDT 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eddd7167d69b6c88b271ef2e51d1c20e13f1acd8...70342dd8e5c17e0c164cfb8189748671e9c0dd44

>---------------------------------------------------------------

commit e650149355a889c04ac6b52698a3a09c211b7d64
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Feb 19 10:20:06 2015 -0500

    Unmap intercluster distances (oops). Minor speedup.


>---------------------------------------------------------------

e650149355a889c04ac6b52698a3a09c211b7d64
 src/mlpack/methods/kmeans/dtnn_kmeans.hpp      |  2 +-
 src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp | 32 ++++++++++++++++----------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
index ec141db..f9c21c7 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans.hpp
@@ -98,7 +98,7 @@ class DTNNKMeans
   //! centroids is the current (not yet searched) centroids.
   void UpdateTree(TreeType& node,
                   const arma::mat& centroids,
-                  const arma::mat& interclusterDistances);
+                  const arma::vec& interclusterDistances);
 
   //! Extract the centroids of the clusters.
   void ExtractCentroids(TreeType& node,
diff --git a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
index ef6cc2f..a9c64a1 100644
--- a/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dtnn_kmeans_impl.hpp
@@ -104,10 +104,17 @@ double DTNNKMeans<MetricType, MatType, TreeType>::Iterate(
   // Find the nearest neighbors of each of the clusters.
   neighbor::NeighborSearch<neighbor::NearestNeighborSort, MetricType, TreeType>
       nns(centroidTree, centroids);
-  arma::mat interclusterDistances;
+  arma::mat interclusterDistancesTemp;
   arma::Mat<size_t> closestClusters; // We don't actually care about these.
-  nns.Search(1, closestClusters, interclusterDistances);
-//  distanceCalculations += nns.BaseCases() + nns.Scores();
+  nns.Search(1, closestClusters, interclusterDistancesTemp);
+  distanceCalculations += nns.BaseCases() + nns.Scores();
+
+  // We need to do the unmapping ourselves.
+  arma::vec interclusterDistances(centroids.n_cols);
+  for (size_t i = 0; i < interclusterDistances.n_elem; ++i)
+    interclusterDistances[oldFromNewCentroids[i]] =
+        interclusterDistancesTemp[i];
+
   Timer::Stop("knn");
 
   // Reset information in the tree, if we need to.
@@ -188,7 +195,7 @@ template<typename MetricType, typename MatType, typename TreeType>
 void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
     TreeType& node,
     const arma::mat& centroids,
-    const arma::mat& interclusterDistances)
+    const arma::vec& interclusterDistances)
 {
   const bool prunedLastIteration = node.Stat().StaticPruned();
   node.Stat().StaticPruned() = false;
@@ -251,12 +258,12 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
       }
 
       prunedPoints[index] = false;
-      const size_t owner = assignments[node.Point(i)];
+      const size_t owner = assignments[index];
       const double lowerBound = std::min(lowerBounds[index] -
           clusterDistances[centroids.n_cols], node.Stat().LowerBound());
-//      const double pruningLowerBound = std::max(lowerBound,
-//          interclusterDistances[owner] / 2.0);
-      if (upperBounds[index] + clusterDistances[owner] < lowerBound)
+      const double pruningLowerBound = std::max(lowerBound,
+          interclusterDistances[owner] / 2.0);
+      if (upperBounds[index] + clusterDistances[owner] < pruningLowerBound)
       {
         prunedPoints[index] = true;
         upperBounds[index] += clusterDistances[owner];
@@ -268,7 +275,7 @@ void DTNNKMeans<MetricType, MatType, TreeType>::UpdateTree(
         upperBounds[index] = metric.Evaluate(dataset.col(index),
                                              centroids.col(owner));
         ++distanceCalculations;
-        if (upperBounds[index] < lowerBound)
+        if (upperBounds[index] < pruningLowerBound)
         {
           prunedPoints[index] = true;
           lowerBounds[index] = lowerBound;
@@ -383,9 +390,10 @@ node.Stat().UpperBound() << " and owner " << node.Stat().Owner() << ".\n";
           Log::Fatal << "Point " << index << " of node " << node.Point(0) << "c"
   << node.NumDescendants() << " has true minimum cluster " << minIndex << " with "
         << "distance " << minDist << " but was assigned to cluster " <<
-assignments[node.Point(0)] << " with ub " << upperBounds[node.Point(0)] <<
-" and lb " << lowerBounds[node.Point(0)] << "; pp " <<
-(prunedPoints[node.Point(0)] ? "true" : "false") << ", visited " << (visited[node.Point(0)] ? "true"
+assignments[node.Point(i)] << " with ub " << upperBounds[node.Point(i)] <<
+" and lb " << lowerBounds[node.Point(i)] << "; pp " <<
+(prunedPoints[node.Point(i)] ? "true" : "false") << ", visited " <<
+(visited[node.Point(i)] ? "true"
 : "false") << ".\n";
         }
 */



More information about the mlpack-git mailing list