[mlpack-svn] r15502 - mlpack/trunk/src/mlpack/methods/kmeans

fastlab-svn at coffeetalk-1.cc.gatech.edu fastlab-svn at coffeetalk-1.cc.gatech.edu
Thu Jul 18 15:16:40 EDT 2013


Author: rcurtin
Date: Thu Jul 18 15:16:40 2013
New Revision: 15502

Log:
Minor formatting fixes, make things const 'cause that's what's cool to do, issue
a warning if -r and -I are both passed (because in that case -I is ignored).


Modified:
   mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp

Modified: mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp	(original)
+++ mlpack/trunk/src/mlpack/methods/kmeans/kmeans_main.cpp	Thu Jul 18 15:16:40 2013
@@ -52,7 +52,7 @@
 PARAM_INT("max_iterations", "Maximum number of iterations before K-Means "
     "terminates.", "m", 1000);
 PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
-PARAM_STRING("initial_centroid", "Start with the specified initial centroids.",
+PARAM_STRING("initial_centroids", "Start with the specified initial centroids.",
              "I", "");
 
 // This is known to not work (#251).
@@ -79,50 +79,57 @@
     math::RandomSeed((size_t) std::time(NULL));
 
   // Now do validation of options.
-  string inputFile = CLI::GetParam<string>("inputFile");
-  int clusters = CLI::GetParam<int>("clusters");
+  const string inputFile = CLI::GetParam<string>("inputFile");
+  const int clusters = CLI::GetParam<int>("clusters");
   if (clusters < 1)
   {
     Log::Fatal << "Invalid number of clusters requested (" << clusters << ")! "
-        << "Must be greater than or equal to 1." << std::endl;
+        << "Must be greater than or equal to 1." << endl;
   }
 
-  int maxIterations = CLI::GetParam<int>("max_iterations");
+  const int maxIterations = CLI::GetParam<int>("max_iterations");
   if (maxIterations < 0)
   {
     Log::Fatal << "Invalid value for maximum iterations (" << maxIterations <<
-        ")! Must be greater than or equal to 0." << std::endl;
+        ")! Must be greater than or equal to 0." << endl;
   }
 
-  double overclustering = CLI::GetParam<double>("overclustering");
+  const double overclustering = CLI::GetParam<double>("overclustering");
   if (overclustering < 1)
   {
     Log::Fatal << "Invalid value for overclustering (" << overclustering <<
-        ")! Must be greater than or equal to 1." << std::endl;
+        ")! Must be greater than or equal to 1." << endl;
   }
 
   // Make sure we have an output file if we're not doing the work in-place.
   if (!CLI::HasParam("in_place") && !CLI::HasParam("output_file"))
   {
     Log::Fatal << "--outputFile not specified (and --in_place not set)."
-        << std::endl;
+        << endl;
   }
 
   // Load our dataset.
   arma::mat dataset;
-  data::Load(inputFile.c_str(), dataset, true); // Fatal upon failure.
+  data::Load(inputFile, dataset, true); // Fatal upon failure.
 
   // Now create the KMeans object.  Because we could be using different types,
   // it gets a little weird...
   arma::Col<size_t> assignments;
   arma::mat centroids;
-  
-  bool initialCentroidGuess = CLI::HasParam("initial_centroid");
+
+  const bool initialCentroidGuess = CLI::HasParam("initial_centroid");
   // Load initial centroids if the user asked for it.
   if (initialCentroidGuess)
   {
-    string initialCentroidsFile = CLI::GetParam<string>("initial_centroid");
-    data::Load(initialCentroidsFile.c_str(), centroids, true);
+    string initialCentroidsFile = CLI::GetParam<string>("initial_centroids");
+    data::Load(initialCentroidsFile, centroids, true);
+
+    if (CLI::HasParam("refined_start"))
+      Log::Warn << "Initial centroids are specified, but will be ignored "
+          << "because --refined_start is also specified!" << endl;
+    else
+      Log::Info << "Using initial centroid guesses from '" <<
+          initialCentroidsFile << "'." << endl;
   }
 
   if (CLI::HasParam("allow_empty_clusters"))
@@ -134,10 +141,10 @@
 
       if (samplings < 0)
         Log::Fatal << "Number of samplings (" << samplings << ") must be "
-            << "greater than 0!" << std::endl;
+            << "greater than 0!" << endl;
       if (percentage <= 0.0 || percentage > 1.0)
         Log::Fatal << "Percentage for sampling (" << percentage << ") must be "
-            << "greater than 0.0 and less than or equal to 1.0!" << std::endl;
+            << "greater than 0.0 and less than or equal to 1.0!" << endl;
 
       KMeans<metric::SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters>
           k(maxIterations, overclustering, metric::SquaredEuclideanDistance(),
@@ -147,7 +154,7 @@
 //      if (CLI::HasParam("fast_kmeans"))
 //        k.FastCluster(dataset, clusters, assignments);
 //      else
-        k.Cluster(dataset, clusters, assignments, centroids);
+      k.Cluster(dataset, clusters, assignments, centroids);
       Timer::Stop("clustering");
     }
     else
@@ -159,8 +166,8 @@
 //      if (CLI::HasParam("fast_kmeans"))
 //        k.FastCluster(dataset, clusters, assignments);
 //      else
-        k.Cluster(dataset, clusters, assignments, centroids, false,
-            initialCentroidGuess);
+      k.Cluster(dataset, clusters, assignments, centroids, false,
+          initialCentroidGuess);
       Timer::Stop("clustering");
     }
   }
@@ -173,10 +180,10 @@
 
       if (samplings < 0)
         Log::Fatal << "Number of samplings (" << samplings << ") must be "
-            << "greater than 0!" << std::endl;
+            << "greater than 0!" << endl;
       if (percentage <= 0.0 || percentage > 1.0)
         Log::Fatal << "Percentage for sampling (" << percentage << ") must be "
-            << "greater than 0.0 and less than or equal to 1.0!" << std::endl;
+            << "greater than 0.0 and less than or equal to 1.0!" << endl;
 
       KMeans<metric::SquaredEuclideanDistance, RefinedStart, AllowEmptyClusters>
           k(maxIterations, overclustering, metric::SquaredEuclideanDistance(),



More information about the mlpack-svn mailing list