[mlpack-git] master: updates and fixes on imputation methods (e09d9bc)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 12:18:48 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit e09d9bc713ebc66a95a2ed824f60ed3f1b09ceff
Author: Keon Kim <kwk236 at gmail.com>
Date: Tue Jun 28 14:14:06 2016 +0900
updates and fixes on imputation methods
>---------------------------------------------------------------
e09d9bc713ebc66a95a2ed824f60ed3f1b09ceff
.../data/imputation_methods/mean_imputation.hpp | 9 ++---
src/mlpack/core/data/imputer.hpp | 41 +++++++++++++---------
src/mlpack/tests/imputation_test.cpp | 2 +-
3 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index b827405..43f14a0 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -42,7 +42,6 @@ class MeanImputation
// nan. while doing that, remember where mappedValue or NaN exists.
if (transpose)
{
- Log::Debug << "transpose mean imputation" << std::endl;
for (size_t i = 0; i < input.n_cols; ++i)
{
if (input(dimension, i) == mappedValue)
@@ -58,7 +57,6 @@ class MeanImputation
}
else
{
- Log::Debug << "un-transpose mean imputation" << std::endl;
for (size_t i = 0; i < input.n_rows; ++i)
{
if (input(i, dimension) == mappedValue)
@@ -72,8 +70,11 @@ class MeanImputation
}
}
}
- Log::Debug << "sum: " << sum << std::endl;
- Log::Debug << "elems: " << elems << std::endl;
+
+ if (elems == 0)
+ Log::Fatal << "it is impossible to calculate mean; no valid elements in "
+ << "the dimension" << std::endl;
+
// calculate mean;
const double mean = sum / elems;
diff --git a/src/mlpack/core/data/imputer.hpp b/src/mlpack/core/data/imputer.hpp
index c298d05..b7f4bee 100644
--- a/src/mlpack/core/data/imputer.hpp
+++ b/src/mlpack/core/data/imputer.hpp
@@ -2,8 +2,8 @@
* @file imputer.hpp
* @author Keon Kim
*
- * Defines Imputer(), a utility function to replace missing variables
- * in a dataset.
+ * Defines Imputer class a utility function to replace missing variables in a
+ * dataset.
*/
#ifndef MLPACK_CORE_DATA_IMPUTER_HPP
#define MLPACK_CORE_DATA_IMPUTER_HPP
@@ -14,12 +14,12 @@ namespace mlpack {
namespace data {
/**
- * This class implements a way to replace target values. It is dependent on the
- * user defined StrategyType and MapperType used to hold dataset's information.
+ * Given a dataset of a particular datatype, replace user-specified missing
+ * value with a variable dependent on the StrategyType and MapperType.
*
- * @tparam Option of imputation strategy.
- * @tparam MapperType that is used to hold dataset information.
- * @tparam primitive type of input and output's armadillo matrix.
+ * @tparam T Type of armadillo matrix used for imputation strategy.
+ * @tparam MapperType DatasetMapper that is used to hold dataset information.
+ * @tparam StrategyType Imputation strategy used.
*/
template<typename T, typename MapperType, typename StrategyType>
class Imputer
@@ -29,7 +29,9 @@ class Imputer
mapper(std::move(mapper)),
transpose(transpose)
{
- // nothing to initialize here
+ //static_assert(std::is_same<typename std::decay<MapperType>::type,
+ //data::IncrementPolicy>::value, "The type of MapperType must be "
+ //"IncrementPolicy");
}
Imputer(MapperType mapper, StrategyType strategy, bool transpose = true):
@@ -37,7 +39,9 @@ class Imputer
mapper(std::move(mapper)),
transpose(transpose)
{
- // nothing to initialize here
+ //static_assert(std::is_same<typename std::decay<MapperType>::type,
+ //data::IncrementPolicy>::value, "The type of MapperType must be "
+ //"IncrementPolicy");
}
/**
@@ -45,11 +49,9 @@ class Imputer
* strategy.
*
* @param input Input dataset to apply imputation.
- * @param output
- * @oaran targetValue
- * @param mapper DatasetInfo object that holds informations about the dataset.
- * @param dimension.
- * @param transpose.
+ * @param output Armadillo matrix to save the results
+ * @oaran missingValue User defined missing value; it can be anything.
+ * @param dimension Dimension to apply the imputation.
*/
void Impute(const arma::Mat<T>& input,
arma::Mat<T>& output,
@@ -61,8 +63,8 @@ class Imputer
}
/**
- * This overload of Impute() lets users to define custom value that
- * can be replaced with the target value.
+ * This overload of Impute() lets users to define custom value that can be
+ * replaced with the target value.
*/
void Impute(const arma::Mat<T>& input,
arma::Mat<T>& output,
@@ -71,7 +73,12 @@ class Imputer
const size_t dimension)
{
T mappedValue = static_cast<T>(mapper.UnmapValue(missingValue, dimension));
- strategy.Apply(input, output, mappedValue, customValue, dimension, transpose);
+ strategy.Apply(input,
+ output,
+ mappedValue,
+ customValue,
+ dimension,
+ transpose);
}
//! Get the strategy
diff --git a/src/mlpack/tests/imputation_test.cpp b/src/mlpack/tests/imputation_test.cpp
index 02f56b6..ed776c7 100644
--- a/src/mlpack/tests/imputation_test.cpp
+++ b/src/mlpack/tests/imputation_test.cpp
@@ -17,7 +17,7 @@
#include <mlpack/core/data/imputation_methods/median_imputation.hpp>
#include <boost/test/unit_test.hpp>
-#include "old_boost_test_definitions.hpp"
+#include "test_tools.hpp"
using namespace mlpack;
using namespace mlpack::data;
More information about the mlpack-git
mailing list