[mlpack-git] master: Parameter name change and style fixes. (4a2f63b)

gitdub at mlpack.org gitdub at mlpack.org
Mon Aug 8 15:09:15 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/50aeb6d229c9a82160ff5c951df19bcd7d6e2274...1a702c583fff0319bed163f2e15bb112f1941720

>---------------------------------------------------------------

commit 4a2f63b7beeea122c94503f96708633692cdcd5c
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Aug 8 15:09:15 2016 -0400

    Parameter name change and style fixes.


>---------------------------------------------------------------

4a2f63b7beeea122c94503f96708633692cdcd5c
 HISTORY.md                                         |  3 ++
 .../preprocess/preprocess_describe_main.cpp        | 42 +++++++++++-----------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index d8c9881..58f083a 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -5,6 +5,9 @@
   * Fixed a bug in CosineTree (and thus QUIC-SVD) that caused split failures for
     some datasets (#717).
 
+  * Added mlpack_preprocess_describe program, which can be used to print
+    statistics on a given dataset (#742).
+
 ### mlpack 2.0.3
 ###### 2016-07-21
   * Added multiprobe LSH (#691).  The parameter 'T' to LSHSearch::Search() can
diff --git a/src/mlpack/methods/preprocess/preprocess_describe_main.cpp b/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
index ee59b8f..952d5c5 100644
--- a/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
@@ -46,9 +46,9 @@ PARAM_INT_IN("width", "Width of the output table.", "w", 8);
 PARAM_FLAG("population", "If specified, the program will calculate statistics "
     "assuming the dataset is the population. By default, the program will "
     "assume the dataset as a sample.", "P");
-PARAM_FLAG("rowMajor", "If specified, the program will calculate statistics "
-    "assuming the dataset is organized in row major. By default, the program "
-    "will assume the dataset is a column major.", "r");
+PARAM_FLAG("row_major", "If specified, the program will calculate statistics "
+    "across rows, not across columns.  (Remember that in mlpack, a column "
+    "represents a point, so this option is generally not necessary.)", "r");
 
 /**
 * Calculates the sum of deviations to the Nth Power.
@@ -84,12 +84,12 @@ double Skewness(const arma::rowvec& input,
   const double n = input.n_elem;
   if (population)
   {
-    // Calculate Population Skewness
+    // Calculate population skewness
     skewness = M3 / (n * S3);
   }
   else
   {
-    // Calculate Sample Skewness
+    // Calculate sample skewness.
     skewness = n * M3 / ((n - 1) * (n - 2) * S3);
   }
   return skewness;
@@ -113,13 +113,13 @@ double Kurtosis(const arma::rowvec& input,
   const double n = input.n_elem;
   if (population)
   {
-    // Calculate Population Excess Kurtosis
+    // Calculate population excess kurtosis.
     const double M2 = SumNthPowerDeviations(input, fMean, 2);
     kurtosis = n * (M4 / pow(M2, 2)) - 3;
   }
   else
   {
-    // Calculate Sample Excess Kurtosis
+    // Calculate sample excess kurtosis.
     const double S4 = pow(fStd, 4);
     const double norm3 = (3 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3));
     const double normC = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3));
@@ -150,18 +150,16 @@ int main(int argc, char** argv)
   const size_t precision = static_cast<size_t>(CLI::GetParam<int>("precision"));
   const size_t width = static_cast<size_t>(CLI::GetParam<int>("width"));
   const bool population = CLI::HasParam("population");
-  const bool rowMajor = CLI::HasParam("rowMajor");
+  const bool rowMajor = CLI::HasParam("row_major");
 
-  // Load the data
+  // Load the data.
   arma::mat data;
   data::Load(inputFile, data);
 
   // Generate boost format recipe.
-  const string widthPrecision("%-"+
-      to_string(width)+ "." +
+  const string widthPrecision("%-" + to_string(width) + "." +
       to_string(precision));
-  const string widthOnly("%-"+
-      to_string(width)+ ".");
+  const string widthOnly("%-" + to_string(width) + ".");
   string stringFormat = "";
   string numberFormat = "";
 
@@ -173,13 +171,13 @@ int main(int argc, char** argv)
   }
 
   Timer::Start("statistics");
-  // Headers
+  // Print the headers.
   Log::Info << boost::format(stringFormat)
       % "dim" % "var" % "mean" % "std" % "median" % "min" % "max"
       % "range" % "skew" % "kurt" % "SE" << endl;
 
   // Lambda function to print out the results.
-  auto printStatResults = [&](size_t dim, bool rowMajor)
+  auto PrintStatResults = [&](size_t dim, bool rowMajor)
   {
     arma::rowvec feature;
     if (rowMajor)
@@ -187,13 +185,13 @@ int main(int argc, char** argv)
     else
       feature = data.row(dim);
 
-    // f at the front means "feature"
+    // f at the front of the variable names means "feature".
     const double fMax = arma::max(feature);
     const double fMin = arma::min(feature);
     const double fMean = arma::mean(feature);
     const double fStd = arma::stddev(feature, population);
 
-    // Print statistics of the given fension.
+    // Print statistics of the given dimension.
     Log::Info << boost::format(numberFormat)
         % dim
         % arma::var(feature, population)
@@ -210,17 +208,17 @@ int main(int argc, char** argv)
   };
 
   // If the user specified dimension, describe statistics of the given
-  // dimension. If it dimension not specified, describe all dimensions.
-  if(CLI::HasParam("dimension"))
+  // dimension. If a dimension is not specified, describe all dimensions.
+  if (CLI::HasParam("dimension"))
   {
-    printStatResults(dimension, rowMajor);
+    PrintStatResults(dimension, rowMajor);
   }
   else
   {
     const size_t dimensions = rowMajor ? data.n_cols : data.n_rows;
-    for(size_t i = 0; i < dimensions; ++i)
+    for (size_t i = 0; i < dimensions; ++i)
     {
-      printStatResults(i, rowMajor);
+      PrintStatResults(i, rowMajor);
     }
   }
   Timer::Stop("statistics");




More information about the mlpack-git mailing list