[mlpack-svn] r17244 - mlpack/trunk/src/mlpack/methods/kmeans
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Sun Oct 12 16:31:01 EDT 2014
Author: rcurtin
Date: Sun Oct 12 16:31:01 2014
New Revision: 17244
Log:
Refactor: only track distanceCalculations, not scores and baseCases. Also
remove traversalInfo because it's not used, and count distance calculations
during cluster domination calculation.
Modified:
mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
Modified: mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp Sun Oct 12 16:31:01 2014
@@ -63,7 +63,7 @@
// irrelevant; we are checking each node with all clusters.
traverser.Traverse(0, *tree);
- distanceCalculations += rules.BaseCases() + rules.Scores();
+ distanceCalculations += rules.DistanceCalculations();
// Now, calculate how far the clusters moved, after normalizing them.
double residual = 0.0;
Modified: mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp Sun Oct 12 16:31:01 2014
@@ -32,23 +32,10 @@
TreeType& referenceNode,
const double oldScore);
- //! Get the number of base cases that have been performed.
- size_t BaseCases() const { return baseCases; }
- //! Modify the number of base cases that have been performed.
- size_t& BaseCases() { return baseCases; }
-
- //! Get the number of scores that have been performed.
- size_t Scores() const { return scores; }
- //! Modify the number of scores that have been performed.
- size_t& Scores() { return scores; }
-
- //! Convenience typedef.
- typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
-
- //! Get the traversal info.
- const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
- //! Modify the traversal info.
- TraversalInfoType& TraversalInfo() { return traversalInfo; }
+ //! Get the number of distance calculations that have been performed.
+ size_t DistanceCalculations() const { return distanceCalculations; }
+ //! Modify the number of distance calculations that have been performed.
+ size_t& DistanceCalculations() { return distanceCalculations; }
private:
//! The dataset.
@@ -62,13 +49,10 @@
//! Instantiated metric.
MetricType& metric;
- //! The number of base cases that have been performed.
- size_t baseCases;
- //! The number of scores that have been performed.
- size_t scores;
-
- TraversalInfoType traversalInfo;
+ //! The number of O(d) distance calculations that have been performed.
+ size_t distanceCalculations;
+ //! Spare blacklist; I think it's only used by the root node.
arma::uvec spareBlacklist;
};
Modified: mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp (original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp Sun Oct 12 16:31:01 2014
@@ -24,8 +24,7 @@
newCentroids(newCentroids),
counts(counts),
metric(metric),
- baseCases(0),
- scores(0),
+ distanceCalculations(0),
spareBlacklist(centroids.n_cols)
{
// Nothing to do.
@@ -66,7 +65,7 @@
// or not this node is dominated by a single cluster.
const size_t whitelisted = centroids.n_cols - arma::accu(*blacklistPtr);
- scores += whitelisted;
+ distanceCalculations += whitelisted;
arma::vec minDistances(whitelisted);
minDistances.fill(DBL_MAX);
@@ -117,6 +116,8 @@
centroids.col(closestCluster));
const double otherDist = metric.Evaluate(cornerPoint, centroids.col(c));
+ distanceCalculations += 3; // One for cornerPoint, then two distances.
+
if (closestDist < otherDist)
{
// The closest cluster dominates the node with respect to the cluster c.
@@ -159,7 +160,7 @@
if (referenceNode.Stat().Blacklist()[c] == 1)
continue;
- ++baseCases;
+ ++distanceCalculations;
// The reference index is the index of the data point.
const double distance = metric.Evaluate(centroids.col(c),
More information about the mlpack-svn
mailing list