[mlpack-git] master: support transpose mapping (56a0361)
gitdub at mlpack.org
gitdub at mlpack.org
Tue May 31 11:29:03 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/2629a4fd65684e2bd4544d5ef8ea07fa9ad594a7...ee95e2030ddd526368e377b4e1c13484d471e307
>---------------------------------------------------------------
commit 56a036116f07e76692766b9b1f1410887f2deb96
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date: Tue May 31 23:29:03 2016 +0800
support transpose mapping
>---------------------------------------------------------------
56a036116f07e76692766b9b1f1410887f2deb96
src/mlpack/core/data/load_impl.hpp | 139 +++++++++++++++++++++++++------------
1 file changed, 95 insertions(+), 44 deletions(-)
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index f6fc93e..cab577e 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -29,6 +29,74 @@
namespace mlpack {
namespace data {
+namespace details{
+
+template<typename Tokenizer>
+std::vector<std::string> ToTokens(Tokenizer &lineTok)
+{
+ std::vector<std::string> tokens;
+ std::transform(std::begin(lineTok), std::end(lineTok),
+ std::back_inserter(tokens),
+ [&tokens](std::string const &str)
+ {
+ std::string trimmedToken(str);
+ boost::trim(trimmedToken);
+ return std::move(trimmedToken);
+ });
+
+ return tokens;
+}
+
+void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
+ std::vector<std::string> &output,
+ size_t index)
+{
+ output.clear();
+ for(size_t i = 0; i != input.size(); ++i)
+ {
+ output.emplace_back(input[i][index]);
+ }
+}
+
+template<typename eT>
+void MapToNumerical(std::vector<std::string> const &tokens,
+ size_t &row,
+ DatasetInfo &info,
+ arma::Mat<eT> &matrix)
+{
+ auto notNumber = [](std::string const &str)
+ {
+ eT val(0);
+ std::stringstream token;
+ token.str(str);
+ token>>val;
+ return token.fail();
+ };
+
+ bool const notNumeric = std::any_of(std::begin(tokens),
+ std::end(tokens), notNumber);
+ if(notNumeric)
+ {
+ for(size_t i = 0; i != tokens.size(); ++i)
+ {
+ eT const val = static_cast<eT>(info.MapString(tokens[i], row));
+ matrix.at(row, i) = val;
+ }
+ }
+ else
+ {
+ std::stringstream token;
+ for(size_t i = 0; i != tokens.size(); ++i)
+ {
+ token.str(tokens[i]);
+ token>>matrix.at(row, i);
+ token.clear();
+ }
+ }
+}
+
+}
+
template<typename eT>
bool inline inplace_transpose(arma::Mat<eT>& X)
{
@@ -37,7 +105,7 @@ bool inline inplace_transpose(arma::Mat<eT>& X)
X = arma::trans(X);
return false;
}
- catch (std::bad_alloc& exception)
+ catch (std::bad_alloc&)
{
#if (ARMA_VERSION_MAJOR >= 4) || \
((ARMA_VERSION_MAJOR == 3) && (ARMA_VERSION_MINOR >= 930))
@@ -388,53 +456,36 @@ bool Load(const std::string& filename,
stream.close();
stream.open(filename, std::fstream::in);
- auto notNumber = [](std::string const &str)
- {
- eT val(0);
- std::stringstream token;
- token.str(str);
- token>>val;
- return token.fail();
- };
- size_t row = 0;
- while (!stream.bad() && !stream.fail() && !stream.eof())
+ if(transpose)
{
- // Extract line by line.
- std::getline(stream, buffer, '\n');
- Tokenizer lineTok(buffer, sep);
+ std::vector<std::vector<std::string>> tokensArray;
+ while (!stream.bad() && !stream.fail() && !stream.eof())
+ {
+ // Extract line by line.
+ std::getline(stream, buffer, '\n');
+ Tokenizer lineTok(buffer, sep);
+ tokensArray.emplace_back(details::ToTokens(lineTok));
+ }
std::vector<std::string> tokens;
- if(!transpose)
+ for(size_t i = 0; i != cols; ++i)
{
- std::transform(std::begin(lineTok), std::end(lineTok),
- std::back_inserter(tokens),
- [&tokens](std::string const &str)
- {
- std::string trimmedToken(str);
- boost::trim(trimmedToken);
- return std::move(trimmedToken);
- });
- bool const notNumeric = std::any_of(std::begin(tokens),
- std::end(tokens), notNumber);
- if(notNumeric)
- {
- for(size_t i = 0; i != tokens.size(); ++i)
- {
- eT const val = static_cast<eT>(info.MapString(tokens[i], row));
- matrix.at(row, i) = val;
- }
- }
- else
- {
- std::stringstream token;
- for(size_t i = 0; i != tokens.size(); ++i)
- {
- token.str(tokens[i]);
- token>>matrix.at(row, i);
- token.clear();
- }
- }
+ details::TransPoseTokens(tokensArray, tokens, i);
+ details::MapToNumerical(tokens, i,
+ info, matrix);
+ }
+ }
+ else
+ {
+ size_t row = 0;
+ while (!stream.bad() && !stream.fail() && !stream.eof())
+ {
+ // Extract line by line.
+ std::getline(stream, buffer, '\n');
+ Tokenizer lineTok(buffer, sep);
+ details::MapToNumerical(details::ToTokens(lineTok), row,
+ info, matrix);
+ ++row;
}
- ++row;
}
}
else if (extension == "arff")
More information about the mlpack-git
mailing list