[mlpack-git] master: optimize imputation methods with output overloads (787fd82)
gitdub at mlpack.org
gitdub at mlpack.org
Sun Jul 17 22:26:43 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit 787fd8245b70dca571411f9b2bedbadf218d7dd6
Author: Keon Kim <kwk236 at gmail.com>
Date: Mon Jul 18 11:26:43 2016 +0900
optimize imputation methods with output overloads
>---------------------------------------------------------------
787fd8245b70dca571411f9b2bedbadf218d7dd6
.../data/imputation_methods/custom_imputation.hpp | 48 +++++++++++++----
.../data/imputation_methods/mean_imputation.hpp | 62 +++++++++++++++-------
.../data/imputation_methods/median_imputation.hpp | 58 +++++++++++++-------
src/mlpack/core/data/imputer.hpp | 1 -
4 files changed, 120 insertions(+), 49 deletions(-)
diff --git a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
index 35326a7..f7d8bde 100644
--- a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
@@ -43,29 +43,57 @@ class CustomImputation
const size_t dimension,
const bool columnMajor = true)
{
- // initiate output
- output = input;
+ // set size of the output
+ output.set_size(input.n_rows, input.n_cols);
// replace the target value to custom value
if (columnMajor)
{
- for (size_t i = 0; i < input.n_cols; ++i)
+ for (size_t row = 0; row < input.n_rows; ++row)
{
- if (input(dimension, i) == mappedValue ||
- std::isnan(input(dimension, i)))
+ for (size_t col = 0; col < input.n_cols; ++col)
{
- output(dimension, i) = customValue;
+ if (row == dimension)
+ {
+ if (input(row, col) == mappedValue ||
+ std::isnan(input(row, col)))
+ {
+ output(row, col) = customValue;
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
}
}
}
else
{
- for (size_t i = 0; i < input.n_rows; ++i)
+ for (size_t col = 0; col < input.n_cols; ++ col)
{
- if (input(i, dimension) == mappedValue ||
- std::isnan(input(i, dimension)))
+ for (size_t row = 0; row < input.n_rows; ++row)
{
- output(i, dimension) = customValue;
+ if (col == dimension)
+ {
+ if (input(row, col) == mappedValue ||
+ std::isnan(input(row, col)))
+ {
+ output(row, col) = customValue;
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
}
}
}
diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index 6c6a7e4..b276ca8 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -36,8 +36,8 @@ class MeanImputation
const size_t dimension,
const bool columnMajor = true)
{
- // initiate output
- output = input;
+ // set size of the output
+ output.set_size(input.n_rows, input.n_cols);
double sum = 0;
size_t elems = 0; // excluding nan or missing target
@@ -50,33 +50,55 @@ class MeanImputation
// nan. while doing that, remember where mappedValue or NaN exists.
if (columnMajor)
{
- for (size_t i = 0; i < input.n_cols; ++i)
+ for (size_t row = 0; row < input.n_rows; ++row)
{
- if (input(dimension, i) == mappedValue ||
- std::isnan(input(dimension, i)))
+ for (size_t col = 0; col < input.n_cols; ++col)
{
- targets.emplace_back(dimension, i);
- }
- else
- {
- elems++;
- sum += input(dimension, i);
+ if (row == dimension)
+ {
+ if (input(row, col) == mappedValue ||
+ std::isnan(input(row, col)))
+ {
+ targets.emplace_back(row, col);
+ }
+ else
+ {
+ elems++;
+ sum += input(row, col);
+ output(row, col) = input(row, col);
+ }
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
}
}
}
else
{
- for (size_t i = 0; i < input.n_rows; ++i)
+ for (size_t col = 0; col < input.n_cols; ++col)
{
- if (input(i, dimension) == mappedValue ||
- std::isnan(input(i, dimension)))
+ for (size_t row = 0; row < input.n_rows; ++row)
{
- targets.emplace_back(i, dimension);
- }
- else
- {
- elems++;
- sum += input(i, dimension);
+ if (col == dimension)
+ {
+ if (input(row, col) == mappedValue ||
+ std::isnan(input(row, col)))
+ {
+ targets.emplace_back(row, col);
+ }
+ else
+ {
+ elems++;
+ sum += input(row, col);
+ output(row, col) = input(row, col);
+ }
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
}
}
}
diff --git a/src/mlpack/core/data/imputation_methods/median_imputation.hpp b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
index 5c03bc2..658816e 100644
--- a/src/mlpack/core/data/imputation_methods/median_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
@@ -37,8 +37,8 @@ class MedianImputation
const size_t dimension,
const bool columnMajor = true)
{
- //initiate output
- output = input;
+ // set size of the output
+ output.set_size(input.n_rows, input.n_cols);
using PairType = std::pair<size_t, size_t>;
// dimensions and indexes are saved as pairs inside this vector.
@@ -48,31 +48,53 @@ class MedianImputation
if (columnMajor)
{
- for (size_t i = 0; i < input.n_cols; ++i)
+ for (size_t row = 0; row < input.n_rows; ++row)
{
- if (input(dimension, i) == mappedValue ||
- std::isnan(input(dimension, i)))
- {
- targets.emplace_back(dimension, i);
- }
- else
+ for (size_t col = 0; col < input.n_cols; ++col)
{
- elemsToKeep.push_back(input(dimension, i));
+ if (row == dimension)
+ {
+ if (input(row, col) == mappedValue ||
+ std::isnan(input(row, col)))
+ {
+ targets.emplace_back(row, col);
+ }
+ else
+ {
+ elemsToKeep.push_back(input(row, col));
+ output(row, col) = input(row, col);
+ }
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
}
}
}
else
{
- for (size_t i = 0; i < input.n_rows; ++i)
+ for (size_t col = 0; col < input.n_cols; ++col)
{
- if (input(i, dimension) == mappedValue ||
- std::isnan(input(i, dimension)))
- {
- targets.emplace_back(i, dimension);
- }
- else
+ for (size_t row = 0; row < input.n_rows; ++row)
{
- elemsToKeep.push_back(input(i, dimension));
+ if (col == dimension)
+ {
+ if (input(row, col) == mappedValue ||
+ std::isnan(input(row, col)))
+ {
+ targets.emplace_back(row, col);
+ }
+ else
+ {
+ elemsToKeep.push_back(input(row, col));
+ output(row, col) = input(row, col);
+ }
+ }
+ else
+ {
+ output(row, col) = input(row, col);
+ }
}
}
}
diff --git a/src/mlpack/core/data/imputer.hpp b/src/mlpack/core/data/imputer.hpp
index 4787343..f6134a7 100644
--- a/src/mlpack/core/data/imputer.hpp
+++ b/src/mlpack/core/data/imputer.hpp
@@ -79,7 +79,6 @@ class Imputer
strategy.Impute(input, mappedValue, dimension, columnMajor);
}
-
//! Get the strategy
const StrategyType& Strategy() const { return strategy; }
More information about the mlpack-git
mailing list