[mlpack-git] master: Move MapToNumerical(MapTokens) to Policy class (2edbc40)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 12:19:04 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit 2edbc400adac676fa2909724a608adf96a70e024
Author: Keon Kim <kwk236 at gmail.com>
Date: Wed Jul 6 03:52:13 2016 +0900
Move MapToNumerical(MapTokens) to Policy class
>---------------------------------------------------------------
2edbc400adac676fa2909724a608adf96a70e024
src/mlpack/core/data/dataset_info.hpp | 5 +++
src/mlpack/core/data/dataset_info_impl.hpp | 11 ++++++
src/mlpack/core/data/load_impl.hpp | 26 ++------------
.../core/data/map_policies/increment_policy.hpp | 40 ++++++++++++++++++++++
.../core/data/map_policies/missing_policy.hpp | 23 +++++++++++++
5 files changed, 81 insertions(+), 24 deletions(-)
diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index f9aac15..c504540 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -78,6 +78,11 @@ class DatasetMapper
typename PolicyType::mapped_type UnmapValue(const std::string& string,
const size_t dimension);
+ template <typename eT>
+ void MapTokens(const std::vector<std::string>& tokens,
+ size_t& row,
+ arma::Mat<eT>& matrix);
+
//! Return the type of a given dimension (numeric or categorical).
Datatype Type(const size_t dimension) const;
//! Modify the type of a given dimension (be careful!).
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index d1bd1cf..015a03a 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -76,6 +76,17 @@ inline typename PolicyType::mapped_type DatasetMapper<PolicyType>::UnmapValue(
return maps[dimension].first.left.at(string);
}
+template<typename PolicyType>
+template<typename eT>
+inline void DatasetMapper<PolicyType>::MapTokens(
+ const std::vector<std::string>& tokens,
+ size_t& row,
+ arma::Mat<eT>& matrix)
+{
+ return policy.template MapTokens<eT, MapType>(tokens, row, matrix, maps,
+ types);
+}
+
// Get the type of a particular dimension.
template<typename PolicyType>
inline Datatype DatasetMapper<PolicyType>::Type(const size_t dimension) const
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index f521be4..45266b5 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -59,26 +59,6 @@ void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
}
}
-template<typename eT, typename PolicyType>
-void MapToNumerical(const std::vector<std::string>& tokens,
- size_t& row,
- DatasetMapper<PolicyType>& info,
- arma::Mat<eT>& matrix)
-{
- std::stringstream token;
- for (size_t i = 0; i != tokens.size(); ++i)
- {
- token.str(tokens[i]);
- token>>matrix.at(row, i);
- if (token.fail()) // if not number, map it to datasetmapper
- {
- const eT val = static_cast<eT>(info.MapString(tokens[i], row));
- matrix.at(row, i) = val;
- }
- token.clear();
- }
-}
-
}
template<typename eT>
@@ -458,8 +438,7 @@ bool Load(const std::string& filename,
for(size_t i = 0; i != cols; ++i)
{
details::TransPoseTokens(tokensArray, tokens, i);
- details::MapToNumerical(tokens, i,
- info, matrix);
+ info.MapTokens(tokens, i, matrix);
}
}
else
@@ -470,8 +449,7 @@ bool Load(const std::string& filename,
// Extract line by line.
std::getline(stream, buffer, '\n');
Tokenizer lineTok(buffer, sep);
- details::MapToNumerical(details::ToTokens(lineTok), row,
- info, matrix);
+ info.MapTokens(details::ToTokens(lineTok), row, matrix);
++row;
}
}
diff --git a/src/mlpack/core/data/map_policies/increment_policy.hpp b/src/mlpack/core/data/map_policies/increment_policy.hpp
index 3aa0956..68a474f 100644
--- a/src/mlpack/core/data/map_policies/increment_policy.hpp
+++ b/src/mlpack/core/data/map_policies/increment_policy.hpp
@@ -56,6 +56,46 @@ class IncrementPolicy
return maps[dimension].first.left.at(string);
}
}
+
+ template <typename eT, typename MapType>
+ void MapTokens(const std::vector<std::string>& tokens,
+ size_t& row,
+ arma::Mat<eT>& matrix,
+ MapType& maps,
+ std::vector<Datatype>& types)
+ {
+ auto notNumber = [](const std::string& str)
+ {
+ eT val(0);
+ std::stringstream token;
+ token.str(str);
+ token >> val;
+ return token.fail();
+ };
+
+ const bool notNumeric = std::any_of(std::begin(tokens),
+ std::end(tokens), notNumber);
+ if (notNumeric)
+ {
+ for (size_t i = 0; i != tokens.size(); ++i)
+ {
+ const eT val = static_cast<eT>(this->MapString(maps, types, tokens[i],
+ row));
+ double temp = (double) val;
+ matrix.at(row, i) = val;
+ }
+ }
+ else
+ {
+ std::stringstream token;
+ for (size_t i = 0; i != tokens.size(); ++i)
+ {
+ token.str(tokens[i]);
+ token >> matrix.at(row, i);
+ token.clear();
+ }
+ }
+ }
}; // class IncrementPolicy
} // namespace data
diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index 6b1fee9..59be010 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -69,6 +69,29 @@ class MissingPolicy
return NaN;
}
}
+
+ template <typename eT, typename MapType>
+ void MapTokens(const std::vector<std::string>& tokens,
+ size_t& row,
+ arma::Mat<eT>& matrix,
+ MapType& maps,
+ std::vector<Datatype>& types)
+ {
+ std::stringstream token;
+ for (size_t i = 0; i != tokens.size(); ++i)
+ {
+ token.str(tokens[i]);
+ token>>matrix.at(row, i);
+ if (token.fail()) // if not number, map it to datasetmapper
+ {
+ const eT val = static_cast<eT>(this->MapString(maps, types, tokens[i],
+ row));
+ matrix.at(row, i) = val;
+ }
+ token.clear();
+ }
+ }
+
private:
std::set<std::string> missingSet;
}; // class MissingPolicy
More information about the mlpack-git
mailing list