[mlpack-git] master: updates and fixes on imputation methods (e09d9bc)

gitdub at mlpack.org gitdub at mlpack.org
Mon Jul 25 12:18:48 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit e09d9bc713ebc66a95a2ed824f60ed3f1b09ceff
Author: Keon Kim <kwk236 at gmail.com>
Date:   Tue Jun 28 14:14:06 2016 +0900

    updates and fixes on imputation methods


>---------------------------------------------------------------

e09d9bc713ebc66a95a2ed824f60ed3f1b09ceff
 .../data/imputation_methods/mean_imputation.hpp    |  9 ++---
 src/mlpack/core/data/imputer.hpp                   | 41 +++++++++++++---------
 src/mlpack/tests/imputation_test.cpp               |  2 +-
 3 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index b827405..43f14a0 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -42,7 +42,6 @@ class MeanImputation
     // nan. while doing that, remember where mappedValue or NaN exists.
     if (transpose)
     {
-      Log::Debug << "transpose mean imputation" << std::endl;
       for (size_t i = 0; i < input.n_cols; ++i)
       {
         if (input(dimension, i) == mappedValue)
@@ -58,7 +57,6 @@ class MeanImputation
     }
     else
     {
-      Log::Debug << "un-transpose mean imputation" << std::endl;
       for (size_t i = 0; i < input.n_rows; ++i)
       {
         if (input(i, dimension) == mappedValue)
@@ -72,8 +70,11 @@ class MeanImputation
         }
       }
     }
-    Log::Debug << "sum: " << sum << std::endl;
-    Log::Debug << "elems: " << elems << std::endl;
+
+    if (elems == 0)
+      Log::Fatal << "it is impossible to calculate mean; no valid elements in "
+          << "the dimension" << std::endl;
+
     // calculate mean;
     const double mean = sum / elems;
 
diff --git a/src/mlpack/core/data/imputer.hpp b/src/mlpack/core/data/imputer.hpp
index c298d05..b7f4bee 100644
--- a/src/mlpack/core/data/imputer.hpp
+++ b/src/mlpack/core/data/imputer.hpp
@@ -2,8 +2,8 @@
  * @file imputer.hpp
  * @author Keon Kim
  *
- * Defines Imputer(), a utility function to replace missing variables
- * in a dataset.
+ * Defines Imputer class a utility function to replace missing variables in a
+ * dataset.
  */
 #ifndef MLPACK_CORE_DATA_IMPUTER_HPP
 #define MLPACK_CORE_DATA_IMPUTER_HPP
@@ -14,12 +14,12 @@ namespace mlpack {
 namespace data {
 
 /**
- * This class implements a way to replace target values. It is dependent on the
- * user defined StrategyType and MapperType used to hold dataset's information.
+ * Given a dataset of a particular datatype, replace user-specified missing
+ * value with a variable dependent on the StrategyType and MapperType.
  *
- * @tparam Option of imputation strategy.
- * @tparam MapperType that is used to hold dataset information.
- * @tparam primitive type of input and output's armadillo matrix.
+ * @tparam T Type of armadillo matrix used for imputation strategy.
+ * @tparam MapperType DatasetMapper that is used to hold dataset information.
+ * @tparam StrategyType Imputation strategy used.
  */
 template<typename T, typename MapperType, typename StrategyType>
 class Imputer
@@ -29,7 +29,9 @@ class Imputer
     mapper(std::move(mapper)),
     transpose(transpose)
   {
-  // nothing to initialize here
+    //static_assert(std::is_same<typename std::decay<MapperType>::type,
+        //data::IncrementPolicy>::value, "The type of MapperType must be "
+        //"IncrementPolicy");
   }
 
   Imputer(MapperType mapper, StrategyType strategy, bool transpose = true):
@@ -37,7 +39,9 @@ class Imputer
     mapper(std::move(mapper)),
     transpose(transpose)
   {
-  // nothing to initialize here
+    //static_assert(std::is_same<typename std::decay<MapperType>::type,
+        //data::IncrementPolicy>::value, "The type of MapperType must be "
+        //"IncrementPolicy");
   }
 
   /**
@@ -45,11 +49,9 @@ class Imputer
   * strategy.
   *
   * @param input Input dataset to apply imputation.
-  * @param output
-  * @oaran targetValue
-  * @param mapper DatasetInfo object that holds informations about the dataset.
-  * @param dimension.
-  * @param transpose.
+  * @param output Armadillo matrix to save the results
+  * @oaran missingValue User defined missing value; it can be anything.
+  * @param dimension Dimension to apply the imputation.
   */
   void Impute(const arma::Mat<T>& input,
               arma::Mat<T>& output,
@@ -61,8 +63,8 @@ class Imputer
   }
 
   /**
-  * This overload of Impute() lets users to define custom value that
-  * can be replaced with the target value.
+  * This overload of Impute() lets users to define custom value that can be
+  * replaced with the target value.
   */
   void Impute(const arma::Mat<T>& input,
               arma::Mat<T>& output,
@@ -71,7 +73,12 @@ class Imputer
               const size_t dimension)
   {
     T mappedValue = static_cast<T>(mapper.UnmapValue(missingValue, dimension));
-    strategy.Apply(input, output, mappedValue, customValue, dimension, transpose);
+    strategy.Apply(input,
+                   output,
+                   mappedValue,
+                   customValue,
+                   dimension,
+                   transpose);
   }
 
   //! Get the strategy
diff --git a/src/mlpack/tests/imputation_test.cpp b/src/mlpack/tests/imputation_test.cpp
index 02f56b6..ed776c7 100644
--- a/src/mlpack/tests/imputation_test.cpp
+++ b/src/mlpack/tests/imputation_test.cpp
@@ -17,7 +17,7 @@
 #include <mlpack/core/data/imputation_methods/median_imputation.hpp>
 
 #include <boost/test/unit_test.hpp>
-#include "old_boost_test_definitions.hpp"
+#include "test_tools.hpp"
 
 using namespace mlpack;
 using namespace mlpack::data;




More information about the mlpack-git mailing list