[mlpack-git] master: Count lines by hand; and trim whitespace from tokens. (da12bda)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:41:45 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit da12bdabe41e2cf154635e8680c0fabf7d8b7a10
Author: ryan <ryan at ratml.org>
Date: Thu Sep 10 17:07:35 2015 -0400
Count lines by hand; and trim whitespace from tokens.
The std::count() idea for counting lines is elegant, but stumbles when presented
with files that don't end with newlines. So just counting with getline() works
fine.
>---------------------------------------------------------------
da12bdabe41e2cf154635e8680c0fabf7d8b7a10
src/mlpack/core/data/load_impl.hpp | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 85badd4..4f953b5 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -19,6 +19,7 @@
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/tokenizer.hpp>
+#include <boost/algorithm/string.hpp>
#include "serialization_shim.hpp"
@@ -330,21 +331,11 @@ bool Load(const std::string& filename,
// Now count the number of lines in the file. We've already counted the
// first one.
size_t rows = 1;
- stream.unsetf(std::ios_base::skipws);
- rows += std::count(std::istream_iterator<char>(stream),
- std::istream_iterator<char>(), '\n');
-
- // Back up to see if the last character in the file is an empty line.
- stream.unget();
- std::cout << "last character is " << int(stream.peek()) << ".\n";
- while (isspace(stream.peek()))
+ while (!stream.eof() && !stream.bad() && !stream.fail())
{
- if (stream.peek() == '\n')
- {
- --rows;
- break;
- }
- stream.unget();
+ std::getline(stream, buffer, '\n');
+ if (!stream.fail())
+ ++rows;
}
// Now we have the size. So resize our matrix.
@@ -380,7 +371,6 @@ bool Load(const std::string& filename,
if (token.fail())
{
- std::cout << "conversion failed\n";
// Conversion failed; but it may be a NaN or inf. Armadillo has
// convenient functions to check.
if (!arma::diskio::convert_naninf(val, token.str()))
@@ -418,7 +408,10 @@ bool Load(const std::string& filename,
}
}
- val = info.MapString(token.str(), dim);
+ // Strip whitespace from either side of the string.
+ std::string trimmedToken(token.str());
+ boost::trim(trimmedToken);
+ val = info.MapString(trimmedToken, dim);
}
}
@@ -432,9 +425,6 @@ bool Load(const std::string& filename,
++row;
}
-
- if (stream.bad() || stream.fail())
- Log::Warn << "Failure reading file '" << filename << "'." << std::endl;
}
else
{
More information about the mlpack-git
mailing list