[mlpack-svn] r15502 - mlpack/trunk/src/mlpack/methods/kmeans
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jul 18 15:16:40 EDT 2013
Author: rcurtin
Date: Thu Jul 18 15:16:40 2013
New Revision: 15502
Log:
Minor formatting fixes, make things const 'cause that's what's cool to do, issue
a warning if -r and -I are both passed (because in that case -I is ignored).
Modified:
mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp (original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp Thu Jul 18 15:16:40 2013
@@ -52,7 +52,7 @@
PARAM_INT("max_iterations", "Maximum number of iterations before K-Means "
"terminates.", "m", 1000);
PARAM_INT("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0);
-PARAM_STRING("initial_centroid", "Start with the specified initial centroids.",
+PARAM_STRING("initial_centroids", "Start with the specified initial centroids.",
"I", "");
// This is known to not work (#251).
@@ -79,50 +79,57 @@
math::RandomSeed((size_t) std::time(NULL));
// Now do validation of options.
- string inputFile = CLI::GetParam<string>("inputFile");
- int clusters = CLI::GetParam<int>("clusters");
+ const string inputFile = CLI::GetParam<string>("inputFile");
+ const int clusters = CLI::GetParam<int>("clusters");
if (clusters < 1)
{
Log::Fatal << "Invalid number of clusters requested (" << clusters << ")! "
- << "Must be greater than or equal to 1." << std::endl;
+ << "Must be greater than or equal to 1." << endl;
}
- int maxIterations = CLI::GetParam<int>("max_iterations");
+ const int maxIterations = CLI::GetParam<int>("max_iterations");
if (maxIterations < 0)
{
Log::Fatal << "Invalid value for maximum iterations (" << maxIterations <<
- ")! Must be greater than or equal to 0." << std::endl;
+ ")! Must be greater than or equal to 0." << endl;
}
- double overclustering = CLI::GetParam<double>("overclustering");
+ const double overclustering = CLI::GetParam<double>("overclustering");
if (overclustering < 1)
{
Log::Fatal << "Invalid value for overclustering (" << overclustering <<
- ")! Must be greater than or equal to 1." << std::endl;
+ ")! Must be greater than or equal to 1." << endl;
}
// Make sure we have an output file if we're not doing the work in-place.
if (!CLI::HasParam("in_place") && !CLI::HasParam("output_file"))
{
Log::Fatal << "--outputFile not specified (and --in_place not set)."
- << std::endl;
+ << endl;
}
// Load our dataset.
arma::mat dataset;
- data::Load(inputFile.c_str(), dataset, true); // Fatal upon failure.
+ data::Load(inputFile, dataset, true); // Fatal upon failure.
// Now create the KMeans object. Because we could be using different types,
// it gets a little weird...
arma::Col<size_t> assignments;
arma::mat centroids;
-
- bool initialCentroidGuess = CLI::HasParam("initial_centroid");
+
+ const bool initialCentroidGuess = CLI::HasParam("initial_centroid");
// Load initial centroids if the user asked for it.
if (initialCentroidGuess)
{
- string initialCentroidsFile = CLI::GetParam<string>("initial_centroid");
- data::Load(initialCentroidsFile.c_str(), centroids, true);
+ string initialCentroidsFile = CLI::GetParam<string>("initial_centroids");
+ data::Load(initialCentroidsFile, centroids, true);
+
+ if (CLI::HasParam("refined_start"))
+ Log::Warn << "Initial centroids are specified, but will be ignored "
+ << "because --refined_start is also specified!" << endl;
+ else
+ Log::Info << "Using initial centroid guesses from '" <<
+ initialCentroidsFile << "'." << endl;
}
if (CLI::HasParam("allow_empty_clusters"))
@@ -134,10 +141,10 @@
if (samplings < 0)
Log::Fatal << "Number of samplings (" << samplings << ") must be "
- << "greater than 0!" << std::endl;
+ << "greater than 0!" << endl;
if (percentage <= 0.0 || percentage > 1.0)
Log::Fatal << "Percentage for sampling (" << percentage << ") must be "
- << "greater than 0.0 and less than or equal to 1.0!" << std::endl;
+ << "greater than 0.0 and less than or equal to 1.0!" << endl;
KMeans<metric::SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters>
k(maxIterations, overclustering, metric::SquaredEuclideanDistance(),
@@ -147,7 +154,7 @@
// if (CLI::HasParam("fast_kmeans"))
// k.FastCluster(dataset, clusters, assignments);
// else
- k.Cluster(dataset, clusters, assignments, centroids);
+ k.Cluster(dataset, clusters, assignments, centroids);
Timer::Stop("clustering");
}
else
@@ -159,8 +166,8 @@
// if (CLI::HasParam("fast_kmeans"))
// k.FastCluster(dataset, clusters, assignments);
// else
- k.Cluster(dataset, clusters, assignments, centroids, false,
- initialCentroidGuess);
+ k.Cluster(dataset, clusters, assignments, centroids, false,
+ initialCentroidGuess);
Timer::Stop("clustering");
}
}
@@ -173,10 +180,10 @@
if (samplings < 0)
Log::Fatal << "Number of samplings (" << samplings << ") must be "
- << "greater than 0!" << std::endl;
+ << "greater than 0!" << endl;
if (percentage <= 0.0 || percentage > 1.0)
Log::Fatal << "Percentage for sampling (" << percentage << ") must be "
- << "greater than 0.0 and less than or equal to 1.0!" << std::endl;
+ << "greater than 0.0 and less than or equal to 1.0!" << endl;
KMeans<metric::SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters>
k(maxIterations, overclustering, metric::SquaredEuclideanDistance(),
More information about the mlpack-svn
mailing list