[mlpack-git] master: optimize imputation methods with output overloads (787fd82)

gitdub at mlpack.org gitdub at mlpack.org
Sun Jul 17 22:26:43 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit 787fd8245b70dca571411f9b2bedbadf218d7dd6
Author: Keon Kim <kwk236 at gmail.com>
Date:   Mon Jul 18 11:26:43 2016 +0900

    optimize imputation methods with output overloads


>---------------------------------------------------------------

787fd8245b70dca571411f9b2bedbadf218d7dd6
 .../data/imputation_methods/custom_imputation.hpp  | 48 +++++++++++++----
 .../data/imputation_methods/mean_imputation.hpp    | 62 +++++++++++++++-------
 .../data/imputation_methods/median_imputation.hpp  | 58 +++++++++++++-------
 src/mlpack/core/data/imputer.hpp                   |  1 -
 4 files changed, 120 insertions(+), 49 deletions(-)

diff --git a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
index 35326a7..f7d8bde 100644
--- a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
@@ -43,29 +43,57 @@ class CustomImputation
               const size_t dimension,
               const bool columnMajor = true)
   {
-    // initiate output
-    output = input;
+    // set size of the output
+    output.set_size(input.n_rows, input.n_cols);
 
     // replace the target value to custom value
     if (columnMajor)
     {
-      for (size_t i = 0; i < input.n_cols; ++i)
+      for (size_t row = 0; row < input.n_rows; ++row)
       {
-        if (input(dimension, i) == mappedValue ||
-            std::isnan(input(dimension, i)))
+        for (size_t col = 0; col < input.n_cols; ++col)
         {
-          output(dimension, i) = customValue;
+          if (row == dimension)
+          {
+            if (input(row, col) == mappedValue ||
+                std::isnan(input(row, col)))
+            {
+              output(row, col) = customValue;
+            }
+            else
+            {
+              output(row, col) = input(row, col);
+            }
+          }
+          else
+          {
+            output(row, col) = input(row, col);
+          }
         }
       }
     }
     else
     {
-      for (size_t i = 0; i < input.n_rows; ++i)
+      for (size_t col = 0; col < input.n_cols; ++ col)
       {
-        if (input(i, dimension) == mappedValue ||
-            std::isnan(input(i, dimension)))
+        for (size_t row = 0; row < input.n_rows; ++row)
         {
-          output(i, dimension) = customValue;
+          if (col == dimension)
+          {
+            if (input(row, col) == mappedValue ||
+                std::isnan(input(row, col)))
+            {
+              output(row, col) = customValue;
+            }
+            else
+            {
+              output(row, col) = input(row, col);
+            }
+          }
+          else
+          {
+            output(row, col) = input(row, col);
+          }
         }
       }
     }
diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index 6c6a7e4..b276ca8 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -36,8 +36,8 @@ class MeanImputation
               const size_t dimension,
               const bool columnMajor = true)
   {
-    // initiate output
-    output = input;
+    // set size of the output
+    output.set_size(input.n_rows, input.n_cols);
 
     double sum = 0;
     size_t elems = 0; // excluding nan or missing target
@@ -50,33 +50,55 @@ class MeanImputation
     // nan. while doing that, remember where mappedValue or NaN exists.
     if (columnMajor)
     {
-      for (size_t i = 0; i < input.n_cols; ++i)
+      for (size_t row = 0; row < input.n_rows; ++row)
       {
-        if (input(dimension, i) == mappedValue ||
-            std::isnan(input(dimension, i)))
+        for (size_t col = 0; col < input.n_cols; ++col)
         {
-          targets.emplace_back(dimension, i);
-        }
-        else
-        {
-          elems++;
-          sum += input(dimension, i);
+          if (row == dimension)
+          {
+            if (input(row, col) == mappedValue ||
+                std::isnan(input(row, col)))
+            {
+              targets.emplace_back(row, col);
+            }
+            else
+            {
+              elems++;
+              sum += input(row, col);
+              output(row, col) = input(row, col);
+            }
+          }
+          else
+          {
+            output(row, col) = input(row, col);
+          }
         }
       }
     }
     else
     {
-      for (size_t i = 0; i < input.n_rows; ++i)
+      for (size_t col = 0; col < input.n_cols; ++col)
       {
-        if (input(i, dimension) == mappedValue ||
-            std::isnan(input(i, dimension)))
+        for (size_t row = 0; row < input.n_rows; ++row)
         {
-          targets.emplace_back(i, dimension);
-        }
-        else
-        {
-          elems++;
-          sum += input(i, dimension);
+          if (col == dimension)
+          {
+            if (input(row, col) == mappedValue ||
+                std::isnan(input(row, col)))
+            {
+              targets.emplace_back(row, col);
+            }
+            else
+            {
+              elems++;
+              sum += input(row, col);
+              output(row, col) = input(row, col);
+            }
+          }
+          else
+          {
+            output(row, col) = input(row, col);
+          }
         }
       }
     }
diff --git a/src/mlpack/core/data/imputation_methods/median_imputation.hpp b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
index 5c03bc2..658816e 100644
--- a/src/mlpack/core/data/imputation_methods/median_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
@@ -37,8 +37,8 @@ class MedianImputation
               const size_t dimension,
               const bool columnMajor = true)
   {
-    //initiate output
-    output = input;
+    // set size of the output
+    output.set_size(input.n_rows, input.n_cols);
 
     using PairType = std::pair<size_t, size_t>;
     // dimensions and indexes are saved as pairs inside this vector.
@@ -48,31 +48,53 @@ class MedianImputation
 
     if (columnMajor)
     {
-      for (size_t i = 0; i < input.n_cols; ++i)
+      for (size_t row = 0; row < input.n_rows; ++row)
       {
-        if (input(dimension, i) == mappedValue ||
-            std::isnan(input(dimension, i)))
-        {
-          targets.emplace_back(dimension, i);
-        }
-        else
+        for (size_t col = 0; col < input.n_cols; ++col)
         {
-          elemsToKeep.push_back(input(dimension, i));
+          if (row == dimension)
+          {
+            if (input(row, col) == mappedValue ||
+                std::isnan(input(row, col)))
+            {
+              targets.emplace_back(row, col);
+            }
+            else
+            {
+              elemsToKeep.push_back(input(row, col));
+              output(row, col) = input(row, col);
+            }
+          }
+          else
+          {
+            output(row, col) = input(row, col);
+          }
         }
       }
     }
     else
     {
-      for (size_t i = 0; i < input.n_rows; ++i)
+      for (size_t col = 0; col < input.n_cols; ++col)
       {
-        if (input(i, dimension) == mappedValue ||
-            std::isnan(input(i, dimension)))
-        {
-          targets.emplace_back(i, dimension);
-        }
-        else
+        for (size_t row = 0; row < input.n_rows; ++row)
         {
-           elemsToKeep.push_back(input(i, dimension));
+          if (col == dimension)
+          {
+            if (input(row, col) == mappedValue ||
+                std::isnan(input(row, col)))
+            {
+              targets.emplace_back(row, col);
+            }
+            else
+            {
+              elemsToKeep.push_back(input(row, col));
+              output(row, col) = input(row, col);
+            }
+          }
+          else
+          {
+            output(row, col) = input(row, col);
+          }
         }
       }
     }
diff --git a/src/mlpack/core/data/imputer.hpp b/src/mlpack/core/data/imputer.hpp
index 4787343..f6134a7 100644
--- a/src/mlpack/core/data/imputer.hpp
+++ b/src/mlpack/core/data/imputer.hpp
@@ -79,7 +79,6 @@ class Imputer
     strategy.Impute(input, mappedValue, dimension, columnMajor);
   }
 
-
   //! Get the strategy
   const StrategyType& Strategy() const { return strategy; }
 




More information about the mlpack-git mailing list