[mlpack-git] master: polish describe executable program (4cf1dde)
gitdub at mlpack.org
gitdub at mlpack.org
Sat Aug 6 02:56:32 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/44a2b52f4d9ece563a5b9850db43ab60f71e5ec7...acd81e11579f69e75aa8406b2982328c88cf1fde
>---------------------------------------------------------------
commit 4cf1ddea7a9955b66c357a3da1ff66b46fa87030
Author: Keon Kim <kwk236 at gmail.com>
Date: Sat Aug 6 15:56:32 2016 +0900
polish describe executable program
>---------------------------------------------------------------
4cf1ddea7a9955b66c357a3da1ff66b46fa87030
.../preprocess/preprocess_describe_main.cpp | 137 ++++++++++-----------
1 file changed, 66 insertions(+), 71 deletions(-)
diff --git a/src/mlpack/methods/preprocess/preprocess_describe_main.cpp b/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
index 2e56c86..2f3cdd9 100644
--- a/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
@@ -46,25 +46,25 @@ PARAM_INT_IN("width", "Width of the output table.", "w", 8);
PARAM_FLAG("population", "If specified, the program will calculate statistics "
"assuming the dataset is the population. By default, the program will "
"assume the dataset as a sample.", "P");
+PARAM_FLAG("rowMajor", "If specified, the program will calculate statistics "
+ "assuming the dataset is organized in row major. By default, the program "
+ "will assume the dataset is a column major.", "r");
/**
-* Calculates the sum of deviations to the Nth Power
+* Calculates the sum of deviations to the Nth Power.
*
-* @param input Vector that captures a dimension of a dataset
+* @param input Vector that captures a dimension of a dataset.
* @param rowMean Mean of the given vector.
-* @return sum of nth power deviations
+* @param n Degree of power.
+* @return sum of nth power deviations.
*/
double SumNthPowerDeviations(const arma::rowvec& input,
- const double& rowMean,
- const size_t Nth) // Degree of Power
+ const double& fMean,
+ size_t n) // Degree of Power
{
- double sum = 0;
- for (size_t i = 0; i < input.n_elem; ++i)
- {
- sum += pow(input(i) - rowMean, Nth);
- }
- return sum;
+ return arma::sum(arma::pow(input - fMean, static_cast<double>(n)));
}
+
/**
* Calculates Skewness of the given vector.
*
@@ -74,18 +74,18 @@ double SumNthPowerDeviations(const arma::rowvec& input,
* @return Skewness of the given vector.
*/
double Skewness(const arma::rowvec& input,
- const double& rowStd,
- const double& rowMean,
+ const double& fStd,
+ const double& fMean,
const bool population)
{
double skewness = 0;
- double S3 = pow(rowStd, 3);
- double M3 = SumNthPowerDeviations(input, rowMean, 3);
- double n = input.n_elem;
+ const double S3 = pow(fStd, 3);
+ const double M3 = SumNthPowerDeviations(input, fMean, 3);
+ const double n = input.n_elem;
if (population)
{
// Calculate Population Skewness
- skewness = n * M3 / (n * n * S3);
+ skewness = M3 / (n * S3);
}
else
{
@@ -94,8 +94,9 @@ double Skewness(const arma::rowvec& input,
}
return skewness;
}
+
/**
- * Calculates kurtosis of the given vector.
+ * Calculates excess kurtosis of the given vector.
*
* @param input Vector that captures a dimension of a dataset
* @param rowStd Standard Deviation of the given vector.
@@ -103,23 +104,23 @@ double Skewness(const arma::rowvec& input,
* @return Kurtosis of the given vector.
*/
double Kurtosis(const arma::rowvec& input,
- const double& rowStd,
- const double& rowMean,
+ const double& fStd,
+ const double& fMean,
const bool population)
{
double kurtosis = 0;
- double M4 = SumNthPowerDeviations(input, rowMean, 4);
- double n = input.n_elem;
+ const double M4 = SumNthPowerDeviations(input, fMean, 4);
+ const double n = input.n_elem;
if (population)
{
// Calculate Population Excess Kurtosis
- double M2 = SumNthPowerDeviations(input, rowMean, 2);
+ double M2 = SumNthPowerDeviations(input, fMean, 2);
kurtosis = n * (M4 / pow(M2, 2)) - 3;
}
else
{
// Calculate Sample Excess Kurtosis
- double S4 = pow(rowStd, 4);
+ double S4 = pow(fStd, 4);
double norm3 = (3 * (n-1) * (n-1)) / ((n-2) * (n-3));
double normC = (n * (n+1))/((n-1) * (n-2) * (n-3));
double normM = M4 / S4;
@@ -127,6 +128,7 @@ double Kurtosis(const arma::rowvec& input,
}
return kurtosis;
}
+
/**
* Calculates standard error of standard deviation.
*
@@ -134,9 +136,9 @@ double Kurtosis(const arma::rowvec& input,
* @param rowStd Standard Deviation of the given vector.
* @return Standard error of the stanrdard devation of the given vector.
*/
-double StandardError(const arma::rowvec& input, const double rowStd)
+double StandardError(const size_t size, const double& fStd)
{
- return rowStd / sqrt(input.n_elem);
+ return fStd / sqrt(size);
}
int main(int argc, char** argv)
@@ -148,10 +150,11 @@ int main(int argc, char** argv)
const size_t precision = static_cast<size_t>(CLI::GetParam<int>("precision"));
const size_t width = static_cast<size_t>(CLI::GetParam<int>("width"));
const bool population = CLI::HasParam("population");
+ const bool rowMajor = CLI::HasParam("rowMajor");
// Load the data
arma::mat data;
- data::Load(inputFile, data, false, true /*transpose*/);
+ data::Load(inputFile, data);
// Generate boost format recipe.
const string widthPrecision("%-"+
@@ -173,57 +176,49 @@ int main(int argc, char** argv)
% "dim" % "var" % "mean" % "std" % "median" % "min" % "max"
% "range" % "skew" % "kurt" % "SE" << endl;
+ // Lambda function to print out the results.
+ auto printStatResults = [&](size_t dim, bool rowMajor)
+ {
+ arma::rowvec feature;
+ if (rowMajor)
+ feature = arma::conv_to<arma::rowvec>::from(data.col(dim));
+ else
+ feature = data.row(dim);
+
+ // f at the front means "feature"
+ const double fMax = arma::max(feature);
+ const double fMin = arma::min(feature);
+ const double fMean = arma::mean(feature);
+ const double fStd = arma::stddev(feature, population);
+
+ // Print statistics of the given fension.
+ Log::Info << boost::format(numberFormat)
+ % dim
+ % arma::var(feature, population)
+ % fMean
+ % fStd
+ % arma::median(feature)
+ % fMin
+ % fMax
+ % (fMax - fMin) // range
+ % Skewness(feature, fStd, fMean, population)
+ % Kurtosis(feature, fStd, fMean, population)
+ % StandardError(feature.n_elem, fStd)
+ << endl;
+ };
+
// If the user specified dimension, describe statistics of the given
// dimension. If it dimension not specified, describe all dimensions.
- if (CLI::HasParam("dimension"))
+ if(CLI::HasParam("dimension"))
{
- // Extract row of the data with the given dimension.
- arma::rowvec row = data.row(dimension);
- // These variables are kept for future calculations.
- double rowMax = arma::max(row);
- double rowMin = arma::min(row);
- double rowMean = arma::mean(row);
- double rowStd = arma::stddev(row, population);
-
- // Print statistics of the given dimension.
- Log::Info << boost::format(numberFormat)
- % dimension
- % arma::var(row, population)
- % rowMean
- % rowStd
- % arma::median(row)
- % rowMin
- % rowMax
- % (rowMax - rowMin) // range
- % Skewness(row, rowStd, rowMean, population)
- % Kurtosis(row, rowStd, rowMean, population)
- % StandardError(row, rowStd) << endl;
+ printStatResults(dimension, rowMajor);
}
else
{
- for (size_t i = 0; i < data.n_rows; ++i)
+ const size_t dimensions = rowMajor ? data.n_cols : data.n_rows;
+ for(size_t i = 0; i < dimensions; ++i)
{
- // Extract each dimension of the data.
- arma::rowvec row = data.row(i);
- // These variables are kept for future calculations.
- double rowMax = arma::max(row);
- double rowMin = arma::min(row);
- double rowMean = arma::mean(row);
- double rowStd = arma::stddev(row, population);
-
- // Print statistics of the row i.
- Log::Info << boost::format(numberFormat)
- % i
- % arma::var(row, population)
- % rowMean
- % rowStd
- % arma::median(row)
- % rowMin
- % rowMax
- % (rowMax - rowMin) // range
- % Skewness(row, rowStd, rowMean, population)
- % Kurtosis(row, rowStd, rowMean, population)
- % StandardError(row, rowStd) << endl;
+ printStatResults(i, rowMajor);
}
}
Timer::Stop("statistics");
More information about the mlpack-git
mailing list