[mlpack-git] master: fix transpose problem (3b8ffd0)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 12:18:42 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit 3b8ffd0766cca7b60d5a6c552b8d464ae7ac3920
Author: Keon Kim <kwk236 at gmail.com>
Date: Tue Jun 28 06:38:44 2016 +0900
fix transpose problem
>---------------------------------------------------------------
3b8ffd0766cca7b60d5a6c552b8d464ae7ac3920
.../data/imputation_methods/custom_imputation.hpp | 4 +-
.../data/imputation_methods/listwise_deletion.hpp | 8 ++--
.../data/imputation_methods/mean_imputation.hpp | 29 ++++++------
.../data/imputation_methods/median_imputation.hpp | 4 +-
.../methods/preprocess/preprocess_imputer_main.cpp | 55 +++++++++++++++-------
5 files changed, 61 insertions(+), 39 deletions(-)
diff --git a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
index c8197d6..73100e2 100644
--- a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
@@ -31,7 +31,7 @@ class CustomImputation
// replace the target value to custom value
if (transpose)
{
- for (size_t i = 0; i < input.n_rows; ++i)
+ for (size_t i = 0; i < input.n_cols; ++i)
{
if (input(dimension, i) == mappedValue)
{
@@ -41,7 +41,7 @@ class CustomImputation
}
else
{
- for (size_t i = 0; i < input.n_cols; ++i)
+ for (size_t i = 0; i < input.n_rows; ++i)
{
if (input(i, dimension) == mappedValue)
{
diff --git a/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp b/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
index f089da1..a2de05d 100644
--- a/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
+++ b/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
@@ -33,7 +33,7 @@ class ListwiseDeletion
if (transpose)
{
- for (size_t i = 0; i < input.n_rows; ++i)
+ for (size_t i = 0; i < input.n_cols; ++i)
{
if (input(dimension, i) == mappedValue)
{
@@ -43,11 +43,11 @@ class ListwiseDeletion
}
else
{
- for (size_t i = 0; i < input.n_cols; ++i)\
+ for (size_t i = 0; i < input.n_rows; ++i)\
{
- if (input(dimension, i) == mappedValue)
+ if (input(i, dimension) == mappedValue)
{
- output.shed_col(i);
+ output.shed_col(dimension);
}
}
}
diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index 7b9c935..b827405 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -30,37 +30,40 @@ class MeanImputation
// initiate output
output = input;
- double sum;
+ double sum = 0;
size_t elems = 0; // excluding nan or missing target
using PairType = std::pair<size_t, size_t>;
// dimensions and indexes are saved as pairs inside this vector.
std::vector<PairType> targets;
+
// calculate number of elements and sum of them excluding mapped value or
// nan. while doing that, remember where mappedValue or NaN exists.
if (transpose)
{
- for (size_t i = 0; i < input.n_rows; ++i)
+ Log::Debug << "transpose mean imputation" << std::endl;
+ for (size_t i = 0; i < input.n_cols; ++i)
{
- if (input(i, dimension) == mappedValue)
+ if (input(dimension, i) == mappedValue)
{
- targets.push_back(std::make_pair(i, dimension));
+ targets.emplace_back(dimension, i);
}
else
{
elems++;
- sum += input(i, dimension);
+ sum += input(dimension, i);
}
}
}
else
{
- for (size_t i = 0; i < input.n_cols; ++i)
+ Log::Debug << "un-transpose mean imputation" << std::endl;
+ for (size_t i = 0; i < input.n_rows; ++i)
{
- if (input(dimension, i) == mappedValue)
+ if (input(i, dimension) == mappedValue)
{
- targets.push_back(std::make_pair(dimension, i));
+ targets.emplace_back(i, dimension);
}
else
{
@@ -69,18 +72,16 @@ class MeanImputation
}
}
}
-
+ Log::Debug << "sum: " << sum << std::endl;
+ Log::Debug << "elems: " << elems << std::endl;
// calculate mean;
- double mean = sum / elems;
+ const double mean = sum / elems;
// Now replace the calculated mean to the missing variables
// It only needs to loop through targets vector, not the whole matrix.
for (const PairType& target : targets)
{
- if (input(target.first, target.second) == mappedValue)
- {
- output(target.first, target.second) = mean;
- }
+ output(target.first, target.second) = mean;
}
}
}; // class MeanImputation
diff --git a/src/mlpack/core/data/imputation_methods/median_imputation.hpp b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
index 84c5425..0a59103 100644
--- a/src/mlpack/core/data/imputation_methods/median_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
@@ -34,7 +34,7 @@ class MedianImputation
if (transpose)
{
arma::Mat<T> medianMat = arma::median(input, 1);
- for (size_t i = 0; i < input.n_rows; ++i)
+ for (size_t i = 0; i < input.n_cols; ++i)
{
if (input(dimension, i) == mappedValue)
{
@@ -45,7 +45,7 @@ class MedianImputation
else
{
arma::Mat<T> medianMat = arma::median(input, 0);
- for (size_t i = 0; i < input.n_cols; ++i)
+ for (size_t i = 0; i < input.n_rows; ++i)
{
if (input(i, dimension) == mappedValue)
{
diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index f423036..b833ab1 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -22,9 +22,9 @@ PARAM_STRING_REQ("input_file", "File containing data,", "i");
PARAM_STRING("output_file", "File to save output", "o", "");
PARAM_STRING("missing_value", "User defined missing value", "m", "")
PARAM_STRING("map_policy", "mapping policy to be used while loading", "p", "")
-PARAM_STRING("impute_strategy", "imputation strategy to be applied", "s", "")
+PARAM_STRING("strategy", "imputation strategy to be applied", "s", "")
PARAM_DOUBLE("custom_value", "user_defined custom value", "c", 0.0)
-PARAM_INT("feature", "the feature to apply imputation", "f", 0);
+PARAM_INT("dimension", "the dimension to apply imputation", "d", 0);
using namespace mlpack;
using namespace arma;
@@ -41,8 +41,8 @@ int main(int argc, char** argv)
const string missingValue = CLI::GetParam<string>("missing_value");
const string mapPolicy = CLI::GetParam<string>("map_policy");
const double customValue = CLI::GetParam<double>("custom_value");
- const size_t feature = (size_t) CLI::GetParam<int>("feature");
- string imputeStrategy = CLI::GetParam<string>("impute_strategy");
+ const size_t dimension = (size_t) CLI::GetParam<int>("dimension");
+ string strategy = CLI::GetParam<string>("strategy");
// missing value should be specified
if (!CLI::HasParam("missing_value"))
@@ -54,11 +54,15 @@ int main(int argc, char** argv)
Log::Warn << "--output_file is not specified, no "
<< "results from this program will be saved!" << endl;
+ // warn if user did not specify dimension
+ if (!CLI::HasParam("dimension"))
+ Log::Warn << "--dimension is required to be specified!" << endl;
+
// if custom value is specified, and imputation strategy is not,
// set imputation strategy to "custom"
if (CLI::HasParam("custom_value") && !CLI::HasParam("impute_strategy"))
{
- imputeStrategy = "custom";
+ strategy = "custom";
Log::Warn << "--custom_value is specified without --impute_strategy, "
<< "--impute_strategy is automatically set to 'custom'." << endl;
}
@@ -66,12 +70,12 @@ int main(int argc, char** argv)
// custom value and any other impute strategies cannot be specified at
// the same time.
if (CLI::HasParam("custom_value") && CLI::HasParam("impute_strategy") &&
- imputeStrategy != "custom")
+ strategy != "custom")
Log::Fatal << "--custom_value cannot be specified with "
<< "impute strategies excluding 'custom' strategy" << endl;
// custom_value must be specified when using "custom" imputation strategy
- if ((imputeStrategy == "custom") && !CLI::HasParam("custom_value"))
+ if ((strategy == "custom") && !CLI::HasParam("custom_value"))
Log::Fatal << "--custom_value must be specified when using "
<< "'custom' strategy" << endl;
@@ -87,34 +91,51 @@ int main(int argc, char** argv)
// for testing purpose
Log::Info << input << endl;
- // print how many mapping exist in each features
+ // print how many mapping exist in each dimensions
for (size_t i = 0; i < input.n_rows; ++i)
{
- Log::Info << info.NumMappings(i) << " mappings in feature " << i << "."
+ Log::Info << info.NumMappings(i) << " mappings in dimension " << i << "."
<< endl;
}
arma::Mat<double> output(input);
- Log::Info << "Performing '" << imputeStrategy << "' imputation strategy "
- << "on feature '" << feature << endl;
+ Log::Info << "Performing '" << strategy << "' imputation strategy "
+ << "on dimension '" << dimension << endl;
// custom strategy only
- if (imputeStrategy == "custom")
+ if (strategy == "custom")
{
Log::Info << "Replacing all '" << missingValue << "' with '" << customValue
<< "'." << endl;
Imputer<double, MapperType, CustomImputation<double>> impu(info);
- impu.Impute(input, output, missingValue, customValue, feature);
+ impu.Impute(input, output, missingValue, customValue, dimension);
}
else
{
Log::Info << "Replacing all '" << missingValue << "' with '"
- << imputeStrategy << "'." << endl;
-
- Imputer<double, MapperType, MeanImputation<double>> impu(info);
- impu.Impute(input, output, missingValue, feature);
+ << strategy << "' strategy." << endl;
+
+ if (strategy == "mean")
+ {
+ Imputer<double, MapperType, MeanImputation<double>> impu(info);
+ impu.Impute(input, output, missingValue, dimension);
+ }
+ else if (strategy == "median")
+ {
+ Imputer<double, MapperType, MedianImputation<double>> impu(info);
+ impu.Impute(input, output, missingValue, dimension);
+ }
+ else if (strategy == "listwise")
+ {
+ Imputer<double, MapperType, ListwiseDeletion<double>> impu(info);
+ impu.Impute(input, output, missingValue, dimension);
+ }
+ else
+ {
+ Log::Warn << "You did not choose any imputation strategy" << endl;
+ }
}
// for testing purpose
More information about the mlpack-git
mailing list