[mlpack-git] master: Refactor: only track distanceCalculations, not scores and baseCases. Also remove traversalInfo because it's not used, and count distance calculations during cluster domination calculation. (7123314)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 22:01:14 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 7123314702d6e223540876c433997ea467ae50a6
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Oct 12 20:31:01 2014 +0000

    Refactor: only track distanceCalculations, not scores and baseCases.  Also
    remove traversalInfo because it's not used, and count distance calculations
    during cluster domination calculation.


>---------------------------------------------------------------

7123314702d6e223540876c433997ea467ae50a6
 .../methods/kmeans/pelleg_moore_kmeans_impl.hpp    |  2 +-
 .../methods/kmeans/pelleg_moore_kmeans_rules.hpp   | 30 +++++-----------------
 .../kmeans/pelleg_moore_kmeans_rules_impl.hpp      |  9 ++++---
 3 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
index 51dbd66..e7fd385 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
@@ -63,7 +63,7 @@ double PellegMooreKMeans<MetricType, MatType>::Iterate(
   // irrelevant; we are checking each node with all clusters.
   traverser.Traverse(0, *tree);
 
-  distanceCalculations += rules.BaseCases() + rules.Scores();
+  distanceCalculations += rules.DistanceCalculations();
 
   // Now, calculate how far the clusters moved, after normalizing them.
   double residual = 0.0;
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
index 9bb808d..874723d 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
@@ -32,23 +32,10 @@ class PellegMooreKMeansRules
                  TreeType& referenceNode,
                  const double oldScore);
 
-  //! Get the number of base cases that have been performed.
-  size_t BaseCases() const { return baseCases; }
-  //! Modify the number of base cases that have been performed.
-  size_t& BaseCases() { return baseCases; }
-
-  //! Get the number of scores that have been performed.
-  size_t Scores() const { return scores; }
-  //! Modify the number of scores that have been performed.
-  size_t& Scores() { return scores; }
-
-  //! Convenience typedef.
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
-
-  //! Get the traversal info.
-  const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
-  //! Modify the traversal info.
-  TraversalInfoType& TraversalInfo() { return traversalInfo; }
+  //! Get the number of distance calculations that have been performed.
+  size_t DistanceCalculations() const { return distanceCalculations; }
+  //! Modify the number of distance calculations that have been performed.
+  size_t& DistanceCalculations() { return distanceCalculations; }
 
  private:
   //! The dataset.
@@ -62,13 +49,10 @@ class PellegMooreKMeansRules
   //! Instantiated metric.
   MetricType& metric;
 
-  //! The number of base cases that have been performed.
-  size_t baseCases;
-  //! The number of scores that have been performed.
-  size_t scores;
-
-  TraversalInfoType traversalInfo;
+  //! The number of O(d) distance calculations that have been performed.
+  size_t distanceCalculations;
 
+  //! Spare blacklist; I think it's only used by the root node.
   arma::uvec spareBlacklist;
 };
 
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
index 1ad6fd4..d0cced5 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
@@ -24,8 +24,7 @@ PellegMooreKMeansRules<MetricType, TreeType>::PellegMooreKMeansRules(
     newCentroids(newCentroids),
     counts(counts),
     metric(metric),
-    baseCases(0),
-    scores(0),
+    distanceCalculations(0),
     spareBlacklist(centroids.n_cols)
 {
   // Nothing to do.
@@ -66,7 +65,7 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
   // or not this node is dominated by a single cluster.
   const size_t whitelisted = centroids.n_cols - arma::accu(*blacklistPtr);
 
-  scores += whitelisted;
+  distanceCalculations += whitelisted;
 
   arma::vec minDistances(whitelisted);
   minDistances.fill(DBL_MAX);
@@ -117,6 +116,8 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
         centroids.col(closestCluster));
     const double otherDist = metric.Evaluate(cornerPoint, centroids.col(c));
 
+    distanceCalculations += 3; // One for cornerPoint, then two distances.
+
     if (closestDist < otherDist)
     {
       // The closest cluster dominates the node with respect to the cluster c.
@@ -159,7 +160,7 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
       if (referenceNode.Stat().Blacklist()[c] == 1)
         continue;
 
-      ++baseCases;
+      ++distanceCalculations;
 
       // The reference index is the index of the data point.
       const double distance = metric.Evaluate(centroids.col(c),



More information about the mlpack-git mailing list