[mlpack-git] master: fix transpose problem (3b8ffd0)

gitdub at mlpack.org gitdub at mlpack.org
Mon Jul 25 12:18:42 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit 3b8ffd0766cca7b60d5a6c552b8d464ae7ac3920
Author: Keon Kim <kwk236 at gmail.com>
Date:   Tue Jun 28 06:38:44 2016 +0900

    fix transpose problem


>---------------------------------------------------------------

3b8ffd0766cca7b60d5a6c552b8d464ae7ac3920
 .../data/imputation_methods/custom_imputation.hpp  |  4 +-
 .../data/imputation_methods/listwise_deletion.hpp  |  8 ++--
 .../data/imputation_methods/mean_imputation.hpp    | 29 ++++++------
 .../data/imputation_methods/median_imputation.hpp  |  4 +-
 .../methods/preprocess/preprocess_imputer_main.cpp | 55 +++++++++++++++-------
 5 files changed, 61 insertions(+), 39 deletions(-)

diff --git a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
index c8197d6..73100e2 100644
--- a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
@@ -31,7 +31,7 @@ class CustomImputation
     // replace the target value to custom value
     if (transpose)
     {
-      for (size_t i = 0; i < input.n_rows; ++i)
+      for (size_t i = 0; i < input.n_cols; ++i)
       {
         if (input(dimension, i) == mappedValue)
         {
@@ -41,7 +41,7 @@ class CustomImputation
     }
     else
     {
-      for (size_t i = 0; i < input.n_cols; ++i)
+      for (size_t i = 0; i < input.n_rows; ++i)
       {
         if (input(i, dimension) == mappedValue)
         {
diff --git a/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp b/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
index f089da1..a2de05d 100644
--- a/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
+++ b/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
@@ -33,7 +33,7 @@ class ListwiseDeletion
 
     if (transpose)
     {
-      for (size_t i = 0; i < input.n_rows; ++i)
+      for (size_t i = 0; i < input.n_cols; ++i)
       {
          if (input(dimension, i) == mappedValue)
          {
@@ -43,11 +43,11 @@ class ListwiseDeletion
     }
     else
     {
-      for (size_t i = 0; i < input.n_cols; ++i)\
+      for (size_t i = 0; i < input.n_rows; ++i)\
       {
-        if (input(dimension, i) == mappedValue)
+        if (input(i, dimension) == mappedValue)
         {
-          output.shed_col(i);
+          output.shed_col(dimension);
         }
       }
     }
diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index 7b9c935..b827405 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -30,37 +30,40 @@ class MeanImputation
     // initiate output
     output = input;
 
-    double sum;
+    double sum = 0;
     size_t elems = 0; // excluding nan or missing target
 
     using PairType = std::pair<size_t, size_t>;
     // dimensions and indexes are saved as pairs inside this vector.
     std::vector<PairType> targets;
 
+
     // calculate number of elements and sum of them excluding mapped value or
     // nan. while doing that, remember where mappedValue or NaN exists.
     if (transpose)
     {
-      for (size_t i = 0; i < input.n_rows; ++i)
+      Log::Debug << "transpose mean imputation" << std::endl;
+      for (size_t i = 0; i < input.n_cols; ++i)
       {
-        if (input(i, dimension) == mappedValue)
+        if (input(dimension, i) == mappedValue)
         {
-          targets.push_back(std::make_pair(i, dimension));
+          targets.emplace_back(dimension, i);
         }
         else
         {
           elems++;
-          sum += input(i, dimension);
+          sum += input(dimension, i);
         }
       }
     }
     else
     {
-      for (size_t i = 0; i < input.n_cols; ++i)
+      Log::Debug << "un-transpose mean imputation" << std::endl;
+      for (size_t i = 0; i < input.n_rows; ++i)
       {
-        if (input(dimension, i) == mappedValue)
+        if (input(i, dimension) == mappedValue)
         {
-          targets.push_back(std::make_pair(dimension, i));
+          targets.emplace_back(i, dimension);
         }
         else
         {
@@ -69,18 +72,16 @@ class MeanImputation
         }
       }
     }
-
+    Log::Debug << "sum: " << sum << std::endl;
+    Log::Debug << "elems: " << elems << std::endl;
     // calculate mean;
-    double mean = sum / elems;
+    const double mean = sum / elems;
 
     // Now replace the calculated mean to the missing variables
     // It only needs to loop through targets vector, not the whole matrix.
     for (const PairType& target : targets)
     {
-      if (input(target.first, target.second) == mappedValue)
-      {
-        output(target.first, target.second) = mean;
-      }
+      output(target.first, target.second) = mean;
     }
   }
 }; // class MeanImputation
diff --git a/src/mlpack/core/data/imputation_methods/median_imputation.hpp b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
index 84c5425..0a59103 100644
--- a/src/mlpack/core/data/imputation_methods/median_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
@@ -34,7 +34,7 @@ class MedianImputation
     if (transpose)
     {
       arma::Mat<T> medianMat = arma::median(input, 1);
-      for (size_t i = 0; i < input.n_rows; ++i)
+      for (size_t i = 0; i < input.n_cols; ++i)
       {
         if (input(dimension, i) == mappedValue)
         {
@@ -45,7 +45,7 @@ class MedianImputation
     else
     {
       arma::Mat<T> medianMat = arma::median(input, 0);
-      for (size_t i = 0; i < input.n_cols; ++i)
+      for (size_t i = 0; i < input.n_rows; ++i)
       {
         if (input(i, dimension) == mappedValue)
         {
diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index f423036..b833ab1 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -22,9 +22,9 @@ PARAM_STRING_REQ("input_file", "File containing data,", "i");
 PARAM_STRING("output_file", "File to save output", "o", "");
 PARAM_STRING("missing_value", "User defined missing value", "m", "")
 PARAM_STRING("map_policy", "mapping policy to be used while loading", "p", "")
-PARAM_STRING("impute_strategy", "imputation strategy to be applied", "s", "")
+PARAM_STRING("strategy", "imputation strategy to be applied", "s", "")
 PARAM_DOUBLE("custom_value", "user_defined custom value", "c", 0.0)
-PARAM_INT("feature", "the feature to apply imputation", "f", 0);
+PARAM_INT("dimension", "the dimension to apply imputation", "d", 0);
 
 using namespace mlpack;
 using namespace arma;
@@ -41,8 +41,8 @@ int main(int argc, char** argv)
   const string missingValue = CLI::GetParam<string>("missing_value");
   const string mapPolicy = CLI::GetParam<string>("map_policy");
   const double customValue = CLI::GetParam<double>("custom_value");
-  const size_t feature = (size_t) CLI::GetParam<int>("feature");
-  string imputeStrategy = CLI::GetParam<string>("impute_strategy");
+  const size_t dimension = (size_t) CLI::GetParam<int>("dimension");
+  string strategy = CLI::GetParam<string>("strategy");
 
   // missing value should be specified
   if (!CLI::HasParam("missing_value"))
@@ -54,11 +54,15 @@ int main(int argc, char** argv)
     Log::Warn << "--output_file is not specified, no "
         << "results from this program will be saved!" << endl;
 
+  // warn if user did not specify dimension
+  if (!CLI::HasParam("dimension"))
+    Log::Warn << "--dimension is required to be specified!" << endl;
+
   // if custom value is specified, and imputation strategy is not,
   // set imputation strategy to "custom"
   if (CLI::HasParam("custom_value") && !CLI::HasParam("impute_strategy"))
   {
-    imputeStrategy = "custom";
+    strategy = "custom";
     Log::Warn << "--custom_value is specified without --impute_strategy, "
         << "--impute_strategy is automatically set to 'custom'." << endl;
   }
@@ -66,12 +70,12 @@ int main(int argc, char** argv)
   // custom value and any other impute strategies cannot be specified at
   // the same time.
   if (CLI::HasParam("custom_value") && CLI::HasParam("impute_strategy") &&
-      imputeStrategy != "custom")
+      strategy != "custom")
     Log::Fatal << "--custom_value cannot be specified with "
         << "impute strategies excluding 'custom' strategy" << endl;
 
   // custom_value must be specified when using "custom" imputation strategy
-  if ((imputeStrategy == "custom") && !CLI::HasParam("custom_value"))
+  if ((strategy == "custom") && !CLI::HasParam("custom_value"))
     Log::Fatal << "--custom_value must be specified when using "
         << "'custom' strategy" << endl;
 
@@ -87,34 +91,51 @@ int main(int argc, char** argv)
   // for testing purpose
   Log::Info << input << endl;
 
-  // print how many mapping exist in each features
+  // print how many mapping exist in each dimensions
   for (size_t i = 0; i < input.n_rows; ++i)
   {
-    Log::Info << info.NumMappings(i) << " mappings in feature " << i << "."
+    Log::Info << info.NumMappings(i) << " mappings in dimension " << i << "."
         << endl;
   }
 
   arma::Mat<double> output(input);
 
 
-  Log::Info << "Performing '" << imputeStrategy << "' imputation strategy "
-      << "on feature '" << feature << endl;
+  Log::Info << "Performing '" << strategy << "' imputation strategy "
+      << "on dimension '" << dimension << endl;
 
   // custom strategy only
-  if (imputeStrategy == "custom")
+  if (strategy == "custom")
   {
     Log::Info << "Replacing all '" << missingValue << "' with '" << customValue
         << "'." << endl;
     Imputer<double, MapperType, CustomImputation<double>> impu(info);
-    impu.Impute(input, output, missingValue, customValue, feature);
+    impu.Impute(input, output, missingValue, customValue, dimension);
   }
   else
   {
     Log::Info << "Replacing all '" << missingValue << "' with '"
-        << imputeStrategy << "'." << endl;
-
-    Imputer<double, MapperType, MeanImputation<double>> impu(info);
-    impu.Impute(input, output, missingValue, feature);
+        << strategy << "' strategy." << endl;
+
+    if (strategy == "mean")
+    {
+      Imputer<double, MapperType, MeanImputation<double>> impu(info);
+      impu.Impute(input, output, missingValue, dimension);
+    }
+    else if (strategy == "median")
+    {
+      Imputer<double, MapperType, MedianImputation<double>> impu(info);
+      impu.Impute(input, output, missingValue, dimension);
+    }
+    else if (strategy == "listwise")
+    {
+      Imputer<double, MapperType, ListwiseDeletion<double>> impu(info);
+      impu.Impute(input, output, missingValue, dimension);
+    }
+    else
+    {
+      Log::Warn << "You did not choose any imputation strategy" << endl;
+    }
   }
 
   // for testing purpose




More information about the mlpack-git mailing list