[mlpack-git] master: 1 : fixing issue #658, work with non-transpose data, do not work with transpose data yet (0e61fe2)
gitdub at mlpack.org
gitdub at mlpack.org
Tue May 31 03:52:38 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/1f562a1aba7ae55475afcc95659511c2b7f694e5...5b8fdce471328f722fcd8c0f22a6d995ce22c98b
>---------------------------------------------------------------
commit 0e61fe24ce2fc4a79064fd16112c4f918e6cc121
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date: Tue May 31 15:52:38 2016 +0800
1 : fixing issue #658, work with non-transpose data, do not work with
transpose data yet
>---------------------------------------------------------------
0e61fe24ce2fc4a79064fd16112c4f918e6cc121
src/mlpack/core/data/load_impl.hpp | 93 ++++++++++++--------------------------
1 file changed, 30 insertions(+), 63 deletions(-)
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 947b360..c02fa5e 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -389,81 +389,48 @@ bool Load(const std::string& filename,
stream.open(filename, std::fstream::in);
// Extract line by line.
- std::stringstream token;
+ auto notNumber = [](std::string const &str)
+ {
+ return std::any_of(std::begin(str), std::end(str),
+ [](char c){ return !std::isdigit(c);});
+ };
size_t row = 0;
while (!stream.bad() && !stream.fail() && !stream.eof())
{
std::getline(stream, buffer, '\n');
-
- // Look at each token. Unfortunately we have to do this character by
- // character, because things may be escaped in quotes.
Tokenizer lineTok(buffer, sep);
- size_t col = 0;
- for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
+ std::vector<std::string> tokens;
+ if(!transpose)
{
- // Attempt to extract as type eT. If that fails, we'll assume it's a
- // string and map it (which may involve retroactively mapping everything
- // we've seen so far).
- token.clear();
- token.str(*it);
-
- eT val = eT(0);
- token >> val;
-
- if (token.fail())
+ for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
{
- // Conversion failed; but it may be a NaN or inf. Armadillo has
- // convenient functions to check.
- if (!arma::diskio::convert_naninf(val, token.str()))
+ std::string trimmedToken(*it);
+ boost::trim(trimmedToken);
+ tokens.emplace_back(std::move(trimmedToken));
+ }
+ bool const notNumeric = std::any_of(std::begin(tokens),
+ std::end(tokens), notNumber);
+ if(notNumeric)
+ {
+ for(size_t i = 0; i != tokens.size(); ++i)
{
- // We need to perform a mapping.
- const size_t dim = (transpose) ? col : row;
- if (info.Type(dim) == Datatype::numeric)
- {
- // We must map everything we have seen up to this point and change
- // the values in the matrix.
- if (transpose)
- {
- // Whatever we've seen so far has successfully mapped to an eT.
- // So we need to print it back to a string. We'll use
- // Armadillo's functionality for that.
- for (size_t i = 0; i < row; ++i)
- {
- std::stringstream sstr;
- arma::arma_ostream::print_elem(sstr, matrix.at(i, col),
- false);
- eT newVal = info.MapString(sstr.str(), col);
- matrix.at(i, col) = newVal;
- }
- }
- else
- {
- for (size_t i = 0; i < col; ++i)
- {
- std::stringstream sstr;
- arma::arma_ostream::print_elem(sstr, matrix.at(row, i),
- false);
- eT newVal = info.MapString(sstr.str(), row);
- matrix.at(row, i) = newVal;
- }
- }
- }
-
- // Strip whitespace from either side of the string.
- std::string trimmedToken(token.str());
- boost::trim(trimmedToken);
- val = info.MapString(trimmedToken, dim);
+ eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+ matrix.at(row, i) = val;
}
}
-
- if (transpose)
- matrix(col, row) = val;
else
- matrix(row, col) = val;
-
- ++col;
+ {
+ std::stringstream sstream;
+ for(size_t i = 0; i != tokens.size(); ++i)
+ {
+ eT val(0);
+ sstream<<tokens[i];
+ sstream>>val;
+ matrix.at(row, i) = val;
+ sstream.clear();
+ }
+ }
}
-
++row;
}
}
More information about the mlpack-git
mailing list