[mlpack-svn] r17244 - mlpack/trunk/src/mlpack/methods/kmeans

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun Oct 12 16:31:01 EDT 2014


Author: rcurtin
Date: Sun Oct 12 16:31:01 2014
New Revision: 17244

Log:
Refactor: only track distanceCalculations, not scores and baseCases.  Also
remove traversalInfo because it's not used, and count distance calculations
during cluster domination calculation.


Modified:
   mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
   mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
   mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp

Modified: mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp	Sun Oct 12 16:31:01 2014
@@ -63,7 +63,7 @@
   // irrelevant; we are checking each node with all clusters.
   traverser.Traverse(0, *tree);
 
-  distanceCalculations += rules.BaseCases() + rules.Scores();
+  distanceCalculations += rules.DistanceCalculations();
 
   // Now, calculate how far the clusters moved, after normalizing them.
   double residual = 0.0;

Modified: mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp	Sun Oct 12 16:31:01 2014
@@ -32,23 +32,10 @@
                  TreeType& referenceNode,
                  const double oldScore);
 
-  //! Get the number of base cases that have been performed.
-  size_t BaseCases() const { return baseCases; }
-  //! Modify the number of base cases that have been performed.
-  size_t& BaseCases() { return baseCases; }
-
-  //! Get the number of scores that have been performed.
-  size_t Scores() const { return scores; }
-  //! Modify the number of scores that have been performed.
-  size_t& Scores() { return scores; }
-
-  //! Convenience typedef.
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
-
-  //! Get the traversal info.
-  const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
-  //! Modify the traversal info.
-  TraversalInfoType& TraversalInfo() { return traversalInfo; }
+  //! Get the number of distance calculations that have been performed.
+  size_t DistanceCalculations() const { return distanceCalculations; }
+  //! Modify the number of distance calculations that have been performed.
+  size_t& DistanceCalculations() { return distanceCalculations; }
 
  private:
   //! The dataset.
@@ -62,13 +49,10 @@
   //! Instantiated metric.
   MetricType& metric;
 
-  //! The number of base cases that have been performed.
-  size_t baseCases;
-  //! The number of scores that have been performed.
-  size_t scores;
-
-  TraversalInfoType traversalInfo;
+  //! The number of O(d) distance calculations that have been performed.
+  size_t distanceCalculations;
 
+  //! Spare blacklist; I think it's only used by the root node.
   arma::uvec spareBlacklist;
 };
 

Modified: mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp	Sun Oct 12 16:31:01 2014
@@ -24,8 +24,7 @@
     newCentroids(newCentroids),
     counts(counts),
     metric(metric),
-    baseCases(0),
-    scores(0),
+    distanceCalculations(0),
     spareBlacklist(centroids.n_cols)
 {
   // Nothing to do.
@@ -66,7 +65,7 @@
   // or not this node is dominated by a single cluster.
   const size_t whitelisted = centroids.n_cols - arma::accu(*blacklistPtr);
 
-  scores += whitelisted;
+  distanceCalculations += whitelisted;
 
   arma::vec minDistances(whitelisted);
   minDistances.fill(DBL_MAX);
@@ -117,6 +116,8 @@
         centroids.col(closestCluster));
     const double otherDist = metric.Evaluate(cornerPoint, centroids.col(c));
 
+    distanceCalculations += 3; // One for cornerPoint, then two distances.
+
     if (closestDist < otherDist)
     {
       // The closest cluster dominates the node with respect to the cluster c.
@@ -159,7 +160,7 @@
       if (referenceNode.Stat().Blacklist()[c] == 1)
         continue;
 
-      ++baseCases;
+      ++distanceCalculations;
 
       // The reference index is the index of the data point.
       const double distance = metric.Evaluate(centroids.col(c),



More information about the mlpack-svn mailing list