[mlpack-git] master: support transpose mapping (56a0361)

gitdub at mlpack.org gitdub at mlpack.org
Tue May 31 11:29:03 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/1f562a1aba7ae55475afcc95659511c2b7f694e5...5b8fdce471328f722fcd8c0f22a6d995ce22c98b

>---------------------------------------------------------------

commit 56a036116f07e76692766b9b1f1410887f2deb96
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date:   Tue May 31 23:29:03 2016 +0800

    support transpose mapping


>---------------------------------------------------------------

56a036116f07e76692766b9b1f1410887f2deb96
 src/mlpack/core/data/load_impl.hpp | 139 +++++++++++++++++++++++++------------
 1 file changed, 95 insertions(+), 44 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index f6fc93e..cab577e 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -29,6 +29,74 @@
 namespace mlpack {
 namespace data {
 
+namespace details{
+
+template<typename Tokenizer>
+std::vector<std::string> ToTokens(Tokenizer &lineTok)
+{
+  std::vector<std::string> tokens;
+  std::transform(std::begin(lineTok), std::end(lineTok),
+                 std::back_inserter(tokens),
+                 [&tokens](std::string const &str)
+  {
+    std::string trimmedToken(str);
+    boost::trim(trimmedToken);
+    return std::move(trimmedToken);
+  });
+
+  return tokens;
+}
+
+void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
+                     std::vector<std::string> &output,
+                     size_t index)
+{
+  output.clear();
+  for(size_t i = 0; i != input.size(); ++i)
+  {
+    output.emplace_back(input[i][index]);
+  }
+}
+
+template<typename eT>
+void MapToNumerical(std::vector<std::string> const &tokens,
+                    size_t &row,
+                    DatasetInfo &info,
+                    arma::Mat<eT> &matrix)
+{
+  auto notNumber = [](std::string const &str)
+  {
+    eT val(0);
+    std::stringstream token;
+    token.str(str);
+    token>>val;
+    return token.fail();
+  };
+
+  bool const notNumeric = std::any_of(std::begin(tokens),
+                                     std::end(tokens), notNumber);
+  if(notNumeric)
+  {
+    for(size_t i = 0; i != tokens.size(); ++i)
+    {
+      eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+      matrix.at(row, i) = val;
+    }
+  }
+  else
+  {
+    std::stringstream token;
+    for(size_t i = 0; i != tokens.size(); ++i)
+    {
+      token.str(tokens[i]);
+      token>>matrix.at(row, i);
+      token.clear();
+    }
+  }
+}
+
+}
+
 template<typename eT>
 bool inline inplace_transpose(arma::Mat<eT>& X)
 {
@@ -37,7 +105,7 @@ bool inline inplace_transpose(arma::Mat<eT>& X)
     X = arma::trans(X);
     return false;
   }
-  catch (std::bad_alloc& exception)
+  catch (std::bad_alloc&)
   {
 #if (ARMA_VERSION_MAJOR >= 4) || \
     ((ARMA_VERSION_MAJOR == 3) && (ARMA_VERSION_MINOR >= 930))
@@ -388,53 +456,36 @@ bool Load(const std::string& filename,
     stream.close();
     stream.open(filename, std::fstream::in);    
 
-    auto notNumber = [](std::string const &str)
-    {
-      eT val(0);
-      std::stringstream token;
-      token.str(str);
-      token>>val;
-      return token.fail();
-    };
-    size_t row = 0;
-    while (!stream.bad() && !stream.fail() && !stream.eof())
+    if(transpose)
     {
-      // Extract line by line.
-      std::getline(stream, buffer, '\n');
-      Tokenizer lineTok(buffer, sep);
+      std::vector<std::vector<std::string>> tokensArray;
+      while (!stream.bad() && !stream.fail() && !stream.eof())
+      {
+        // Extract line by line.
+        std::getline(stream, buffer, '\n');
+        Tokenizer lineTok(buffer, sep);
+        tokensArray.emplace_back(details::ToTokens(lineTok));
+      }
       std::vector<std::string> tokens;
-      if(!transpose)
+      for(size_t i = 0; i != cols; ++i)
       {
-        std::transform(std::begin(lineTok), std::end(lineTok),
-                       std::back_inserter(tokens),
-                       [&tokens](std::string const &str)
-        {
-          std::string trimmedToken(str);
-          boost::trim(trimmedToken);
-          return std::move(trimmedToken);
-        });
-        bool const notNumeric = std::any_of(std::begin(tokens),
-                                           std::end(tokens), notNumber);
-        if(notNumeric)
-        {
-          for(size_t i = 0; i != tokens.size(); ++i)
-          {
-            eT const val = static_cast<eT>(info.MapString(tokens[i], row));
-            matrix.at(row, i) = val;
-          }
-        }
-        else
-        {          
-          std::stringstream token;
-          for(size_t i = 0; i != tokens.size(); ++i)
-          {            
-            token.str(tokens[i]);
-            token>>matrix.at(row, i);
-            token.clear();
-          }
-        }
+        details::TransPoseTokens(tokensArray, tokens, i);
+        details::MapToNumerical(tokens, i,
+                                info, matrix);
+      }
+    }
+    else
+    {
+      size_t row = 0;
+      while (!stream.bad() && !stream.fail() && !stream.eof())
+      {
+        // Extract line by line.
+        std::getline(stream, buffer, '\n');
+        Tokenizer lineTok(buffer, sep);
+        details::MapToNumerical(details::ToTokens(lineTok), row,
+                                info, matrix);
+        ++row;
       }
-      ++row;
     }
   }
   else if (extension == "arff")




More information about the mlpack-git mailing list