[mlpack-git] master: optimize preprocess imputer executable (c3aeba1)

gitdub at mlpack.org gitdub at mlpack.org
Mon Jul 18 02:00:05 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit c3aeba1fc8481ff08e5c689907e421f747b913ad
Author: Keon Kim <kwk236 at gmail.com>
Date:   Mon Jul 18 13:40:41 2016 +0900

    optimize preprocess imputer executable


>---------------------------------------------------------------

c3aeba1fc8481ff08e5c689907e421f747b913ad
 .../methods/preprocess/preprocess_imputer_main.cpp     | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index bacc040..c25f3a9 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -99,14 +99,20 @@ int main(int argc, char** argv)
   MissingPolicy policy(missingSet);
   using MapperType = DatasetMapper<MissingPolicy>;
   DatasetMapper<MissingPolicy> info(policy);
+  std::vector<size_t> dirtyDimensions;
 
   Load(inputFile, input, info, true, true);
 
   // print how many mapping exist in each dimensions
   for (size_t i = 0; i < input.n_rows; ++i)
   {
-    Log::Info << info.NumMappings(i) << " mappings in dimension " << i << "."
+    size_t numMappings = info.NumMappings(i);
+    Log::Info << numMappings << " mappings in dimension " << i << "."
         << endl;
+    if (numMappings > 0)
+    {
+      dirtyDimensions.push_back(i);
+    }
   }
 
   // Initialize imputer class
@@ -134,6 +140,7 @@ int main(int argc, char** argv)
         << endl;
   }
 
+  Timer::Start("imputation");
   if (CLI::HasParam("dimension"))
   {
     // when --dimension is specified,
@@ -142,7 +149,7 @@ int main(int argc, char** argv)
         << "to replace '" << missingValue << "' on dimension " << dimension
         << "." << endl;
 
-    imputer.Impute(input, output, missingValue, dimension);
+    imputer.Impute(input, missingValue, dimension);
   }
   else
   {
@@ -151,16 +158,17 @@ int main(int argc, char** argv)
     Log::Info << "Performing '" << strategy << "' imputation strategy "
         << "to replace '" << missingValue << "' on all dimensions." << endl;
 
-    for (size_t i = 0; i < input.n_rows; ++i)
+    for (size_t i : dirtyDimensions)
     {
-      imputer.Impute(input, output, missingValue, i);
+      imputer.Impute(input, missingValue, i);
     }
   }
+  Timer::Stop("imputation");
 
   if (!outputFile.empty())
   {
     Log::Info << "Saving results to '" << outputFile << "'." << endl;
-    Save(outputFile, output, false);
+    Save(outputFile, input, false);
   }
 }
 




More information about the mlpack-git mailing list