[mlpack-svn] r13564 - mlpack/trunk/src/mlpack/methods/kmeans
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Mon Sep 17 17:11:43 EDT 2012
Author: rcurtin
Date: 2012-09-17 17:11:43 -0400 (Mon, 17 Sep 2012)
New Revision: 13564
Modified:
mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp
Log:
Fix grammar in comments (utter pedantry) and a few style issues.
Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp 2012-09-17 20:59:49 UTC (rev 13563)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans_impl.hpp 2012-09-17 21:11:43 UTC (rev 13564)
@@ -79,11 +79,13 @@
arma::Col<size_t> counts(actualClusters);
counts.zeros();
- // Build the mrkd-tree on this dataset
- tree::BinarySpaceTree<typename bound::HRectBound<2>, tree::MRKDStatistic> tree(data, 1);
+ // Build the mrkd-tree on this dataset.
+ tree::BinarySpaceTree<typename bound::HRectBound<2>, tree::MRKDStatistic>
+ tree(data, 1);
Log::Debug << "Tree Built." << std::endl;
- // A pointer for traversing the mrkd-tree
- tree::BinarySpaceTree<typename bound::HRectBound<2>, tree::MRKDStatistic>* node;
+ // A pointer for traversing the mrkd-tree.
+ tree::BinarySpaceTree<typename bound::HRectBound<2>, tree::MRKDStatistic>*
+ node;
// Now, the initial assignments. First determine if they are necessary.
if (assignments.n_elem != data.n_cols)
@@ -105,30 +107,30 @@
for (size_t i = 0; i < actualClusters; i++)
centroids.col(i) /= counts[i];
- // Instead of retraversing the tree after an iteration, we will update centroid
- // positions in this matrix, which also prevents clobbering our centroids from
- // the previous iteration.
+ // Instead of retraversing the tree after an iteration, we will update
+ // centroid positions in this matrix, which also prevents clobbering our
+ // centroids from the previous iteration.
MatType newCentroids(dimensionality, centroids.n_cols);
- // Create a stack for traversing the mrkd-tree
+ // Create a stack for traversing the mrkd-tree.
std::stack<typename tree::BinarySpaceTree<typename bound::HRectBound<2>,
tree::MRKDStatistic>* > stack;
- // A variable to keep track of how many kmeans iterations we have made
+ // A variable to keep track of how many kmeans iterations we have made.
size_t iteration = 0;
// A variable to keep track of how many nodes assignments have changed in
- // each kmeans iteration
+ // each kmeans iteration.
size_t changedAssignments = 0;
// A variable to keep track of the number of times something is skipped due
- // to the blacklist
+ // to the blacklist.
size_t skip = 0;
- // A variable to keep track of the number of distances calculated
+ // A variable to keep track of the number of distances calculated.
size_t comps = 0;
- // A variable to keep track of how often we stop at a parent node
+ // A variable to keep track of how often we stop at a parent node.
size_t dominations = 0;
do
{
@@ -137,30 +139,30 @@
changedAssignments = 0;
// Reset the newCentroids so that we can store the newly calculated ones
- // here
+ // here.
newCentroids.zeros();
- // Reset the counts
+ // Reset the counts.
counts.zeros();
- // Add the root node of the tree to the stack
+ // Add the root node of the tree to the stack.
stack.push(&tree);
- // Set the top level whitelist
+ // Set the top level whitelist.
tree.Stat().whiteList.resize(centroids.n_cols, true);
- // Traverse the tree
+ // Traverse the tree.
while (!stack.empty())
{
- // Get the next node in the tree
+ // Get the next node in the tree.
node = stack.top();
- // Remove the node from the stack
+ // Remove the node from the stack.
stack.pop();
- // Get a reference to the mrkd statistic for this hyperrectangle
+ // Get a reference to the mrkd statistic for this hyperrectangle.
tree::MRKDStatistic& mrkd = node->Stat();
// We use this to store the index of the centroid with the minimum
- // distance from this hyperrectangle or point
+ // distance from this hyperrectangle or point.
size_t minIndex = 0;
// If this node is a leaf, then we calculate the distance from
@@ -176,7 +178,7 @@
// Find the minimal distance centroid for this point.
for (size_t j = 1; j < centroids.n_cols; ++j)
{
- // If this centroid is not in the whitelist, skip it
+ // If this centroid is not in the whitelist, skip it.
if (!mrkd.whiteList[j])
{
++skip;
@@ -186,22 +188,22 @@
++comps;
double distance = metric::SquaredEuclideanDistance::Evaluate(
data.col(i), centroids.col(j));
- if ( minDistance > distance )
+ if (minDistance > distance)
{
minIndex = j;
minDistance = distance;
}
}
- // Add this point to the undivided center of mass summation for
- // it's assigned centroid
+ // Add this point to the undivided center of mass summation for its
+ // assigned centroid.
newCentroids.col(minIndex) += data.col(i);
- // Increment the count for the minimum distance centroid
+ // Increment the count for the minimum distance centroid.
++counts(minIndex);
// If we actually changed assignments, increment changedAssignments
- // and modify the assignment vector for this point
+ // and modify the assignment vector for this point.
if (assignments(i) != minIndex)
{
++changedAssignments;
@@ -210,7 +212,7 @@
}
}
// If this node is not a leaf, then we continue trying to find dominant
- // centroids
+ // centroids.
else
{
bound::HRectBound<2>& bound = node->Bound();
@@ -219,24 +221,24 @@
// to all points in this hyperrectangle than any other centroid.
bool noDomination = false;
- // Calculate the center of mass of this hyperrectangle
+ // Calculate the center of mass of this hyperrectangle.
arma::vec center = mrkd.centerOfMass / mrkd.count;
// Set the minDistance to the maximum value of a double so any value
- // must be smaller than this
+ // must be smaller than this.
double minDistance = std::numeric_limits<double>::max();
- // The candidate distance we calculate for each centroid
+ // The candidate distance we calculate for each centroid.
double distance = 0.0;
// How many points are inside this hyperrectangle, we stop if we
- // see more than 1
+ // see more than 1.
size_t contains = 0;
- // Find the "owner" of this hyperrectangle, if one exists
+ // Find the "owner" of this hyperrectangle, if one exists.
for (size_t i = 0; i < centroids.n_cols; ++i)
{
- // If this centroid is not in the whitelist, skip it
+ // If this centroid is not in the whitelist, skip it.
if (!mrkd.whiteList[i])
{
++skip;
@@ -244,10 +246,10 @@
}
// Incrememnt the number of distance calculations for what we are
- // about to do
+ // about to do.
comps += 2;
- // Reinitialize the distance so += works right
+ // Reinitialize the distance so += works right.
distance = 0.0;
// We keep track of how many dimensions have nonzero distance,
@@ -307,7 +309,7 @@
}
}
- // The centroid is inside the hyperrectangle
+ // The centroid is inside the hyperrectangle.
if (nonZero == 0)
{
++contains;
@@ -338,7 +340,7 @@
distance = minDistance;
// Determine if the owner dominates this centroid only if there was
- // exactly one owner
+ // exactly one owner.
if (!noDomination)
{
for (size_t i = 0; i < centroids.n_cols; ++i)
@@ -346,7 +348,7 @@
if (i == minIndex)
continue;
// If this centroid is blacklisted for this hyperrectangle, then
- // we skip it
+ // we skip it.
if (!mrkd.whiteList[i])
{
++skip;
@@ -355,8 +357,8 @@
/*
Compute the dominating centroid for this hyperrectangle, if one
exists. We do this by calculating the point which is furthest
- from the min'th centroid in the direction of c_k - c_min. We do this
- as outlined in the Pelleg and Moore paper.
+ from the min'th centroid in the direction of c_k - c_min. We do
+ this as outlined in the Pelleg and Moore paper.
This following code is equivalent to, but faster than:
@@ -423,21 +425,21 @@
}
// If did found a centroid that was closer to every point in the
- // hyperrectangle than every other centroid, then update that centroid
+ // hyperrectangle than every other centroid, then update that centroid.
if (!noDomination)
{
// Adjust the new centroid sum for the min distance point to this
- // hyperrectangle by the center of mass of this hyperrectangle
+ // hyperrectangle by the center of mass of this hyperrectangle.
newCentroids.col(minIndex) += mrkd.centerOfMass;
- // Increment the counts for this centroid
+ // Increment the counts for this centroid.
counts(minIndex) += mrkd.count;
- // Update all assignments for this node
+ // Update all assignments for this node.
const size_t begin = node->Begin();
const size_t end = node->End();
- // TODO: Do this outside of the kmeans iterations
+ // TODO: Do this outside of the kmeans iterations.
for (size_t j = begin; j < end; ++j)
{
if (assignments(j) != minIndex)
@@ -448,7 +450,7 @@
}
mrkd.dominatingCentroid = minIndex;
- // Keep track of the number of times we found a dominating centroid
+ // Keep track of the number of times we found a dominating centroid.
++dominations;
}
@@ -456,11 +458,11 @@
// default case, where we add the children of this node to the stack.
else
{
- // Add this hyperrectangle's children to our stack
+ // Add this hyperrectangle's children to our stack.
stack.push(node->Left());
stack.push(node->Right());
- // (Re)Initialize the whiteList for the children
+ // (Re)Initialize the whiteList for the children.
node->Left()->Stat().whiteList = mrkd.whiteList;
node->Right()->Stat().whiteList = mrkd.whiteList;
}
@@ -471,17 +473,14 @@
// Divide by the number of points assigned to the centroids so that we
// have the actual center of mass and update centroids' positions.
for (size_t i = 0; i < centroids.n_cols; ++i)
- {
- if (counts(i)) {
+ if (counts(i))
centroids.col(i) = newCentroids.col(i) / counts(i);
- }
- }
// Stop when we reach max iterations or we changed no assignments
- // assignments
+ // assignments.
} while (changedAssignments > 0 && iteration != maxIterations);
- Log::Debug << "Iterations: " << iteration << std::endl
+ Log::Info << "Iterations: " << iteration << std::endl
<< "Skips: " << skip << std::endl
<< "Comparisons: " << comps << std::endl
<< "Dominations: " << dominations << std::endl;
More information about the mlpack-svn
mailing list