[mlpack-git] master: 1 : fixing issue #658, work with non-transpose data, do not work with transpose data yet (0e61fe2)

gitdub at mlpack.org gitdub at mlpack.org
Tue May 31 03:52:38 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/1f562a1aba7ae55475afcc95659511c2b7f694e5...5b8fdce471328f722fcd8c0f22a6d995ce22c98b

>---------------------------------------------------------------

commit 0e61fe24ce2fc4a79064fd16112c4f918e6cc121
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date:   Tue May 31 15:52:38 2016 +0800

    1 : fixing issue #658, work with non-transpose data, do not work with
    transpose data yet


>---------------------------------------------------------------

0e61fe24ce2fc4a79064fd16112c4f918e6cc121
 src/mlpack/core/data/load_impl.hpp | 93 ++++++++++++--------------------------
 1 file changed, 30 insertions(+), 63 deletions(-)

diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 947b360..c02fa5e 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -389,81 +389,48 @@ bool Load(const std::string& filename,
     stream.open(filename, std::fstream::in);
 
     // Extract line by line.
-    std::stringstream token;
+    auto notNumber = [](std::string const &str)
+    {
+      return std::any_of(std::begin(str), std::end(str),
+                         [](char c){ return !std::isdigit(c);});
+    };
     size_t row = 0;
     while (!stream.bad() && !stream.fail() && !stream.eof())
     {
       std::getline(stream, buffer, '\n');
-
-      // Look at each token.  Unfortunately we have to do this character by
-      // character, because things may be escaped in quotes.
       Tokenizer lineTok(buffer, sep);
-      size_t col = 0;
-      for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
+      std::vector<std::string> tokens;
+      if(!transpose)
       {
-        // Attempt to extract as type eT.  If that fails, we'll assume it's a
-        // string and map it (which may involve retroactively mapping everything
-        // we've seen so far).
-        token.clear();
-        token.str(*it);
-
-        eT val = eT(0);
-        token >> val;
-
-        if (token.fail())
+        for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
         {
-          // Conversion failed; but it may be a NaN or inf.  Armadillo has
-          // convenient functions to check.
-          if (!arma::diskio::convert_naninf(val, token.str()))
+          std::string trimmedToken(*it);
+          boost::trim(trimmedToken);
+          tokens.emplace_back(std::move(trimmedToken));
+        }
+        bool const notNumeric = std::any_of(std::begin(tokens),
+                                           std::end(tokens), notNumber);
+        if(notNumeric)
+        {
+          for(size_t i = 0; i != tokens.size(); ++i)
           {
-            // We need to perform a mapping.
-            const size_t dim = (transpose) ? col : row;
-            if (info.Type(dim) == Datatype::numeric)
-            {
-              // We must map everything we have seen up to this point and change
-              // the values in the matrix.
-              if (transpose)
-              {
-                // Whatever we've seen so far has successfully mapped to an eT.
-                // So we need to print it back to a string.  We'll use
-                // Armadillo's functionality for that.
-                for (size_t i = 0; i < row; ++i)
-                {
-                  std::stringstream sstr;
-                  arma::arma_ostream::print_elem(sstr, matrix.at(i, col),
-                      false);
-                  eT newVal = info.MapString(sstr.str(), col);
-                  matrix.at(i, col) = newVal;
-                }
-              }
-              else
-              {
-                for (size_t i = 0; i < col; ++i)
-                {
-                  std::stringstream sstr;
-                  arma::arma_ostream::print_elem(sstr, matrix.at(row, i),
-                      false);
-                  eT newVal = info.MapString(sstr.str(), row);
-                  matrix.at(row, i) = newVal;
-                }
-              }
-            }
-
-            // Strip whitespace from either side of the string.
-            std::string trimmedToken(token.str());
-            boost::trim(trimmedToken);
-            val = info.MapString(trimmedToken, dim);
+            eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+            matrix.at(row, i) = val;
           }
         }
-
-        if (transpose)
-          matrix(col, row) = val;
         else
-          matrix(row, col) = val;
-
-        ++col;
+        {
+          std::stringstream sstream;
+          for(size_t i = 0; i != tokens.size(); ++i)
+          {
+            eT val(0);
+            sstream<<tokens[i];
+            sstream>>val;
+            matrix.at(row, i) = val;
+            sstream.clear();
+          }
+        }
       }
-
       ++row;
     }
   }




More information about the mlpack-git mailing list