[mlpack-git] master: Move MapToNumerical(MapTokens) to Policy class (2edbc40)

gitdub at mlpack.org gitdub at mlpack.org
Mon Jul 25 12:19:04 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit 2edbc400adac676fa2909724a608adf96a70e024
Author: Keon Kim <kwk236 at gmail.com>
Date:   Wed Jul 6 03:52:13 2016 +0900

    Move MapToNumerical(MapTokens)  to Policy class


>---------------------------------------------------------------

2edbc400adac676fa2909724a608adf96a70e024
 src/mlpack/core/data/dataset_info.hpp              |  5 +++
 src/mlpack/core/data/dataset_info_impl.hpp         | 11 ++++++
 src/mlpack/core/data/load_impl.hpp                 | 26 ++------------
 .../core/data/map_policies/increment_policy.hpp    | 40 ++++++++++++++++++++++
 .../core/data/map_policies/missing_policy.hpp      | 23 +++++++++++++
 5 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index f9aac15..c504540 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -78,6 +78,11 @@ class DatasetMapper
   typename PolicyType::mapped_type UnmapValue(const std::string& string,
                                             const size_t dimension);
 
+  template <typename eT>
+  void MapTokens(const std::vector<std::string>& tokens,
+                 size_t& row,
+                 arma::Mat<eT>& matrix);
+
   //! Return the type of a given dimension (numeric or categorical).
   Datatype Type(const size_t dimension) const;
   //! Modify the type of a given dimension (be careful!).
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index d1bd1cf..015a03a 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -76,6 +76,17 @@ inline typename PolicyType::mapped_type DatasetMapper<PolicyType>::UnmapValue(
   return maps[dimension].first.left.at(string);
 }
 
+template<typename PolicyType>
+template<typename eT>
+inline void DatasetMapper<PolicyType>::MapTokens(
+                                        const std::vector<std::string>& tokens,
+                                        size_t& row,
+                                        arma::Mat<eT>& matrix)
+{
+  return policy.template MapTokens<eT, MapType>(tokens, row, matrix, maps,
+                                                types);
+}
+
 // Get the type of a particular dimension.
 template<typename PolicyType>
 inline Datatype DatasetMapper<PolicyType>::Type(const size_t dimension) const
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index f521be4..45266b5 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -59,26 +59,6 @@ void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
   }
 }
 
-template<typename eT, typename PolicyType>
-void MapToNumerical(const std::vector<std::string>& tokens,
-                    size_t& row,
-                    DatasetMapper<PolicyType>& info,
-                    arma::Mat<eT>& matrix)
-{
-  std::stringstream token;
-  for (size_t i = 0; i != tokens.size(); ++i)
-  {
-    token.str(tokens[i]);
-    token>>matrix.at(row, i);
-    if (token.fail()) // if not number, map it to datasetmapper
-    {
-      const eT val = static_cast<eT>(info.MapString(tokens[i], row));
-      matrix.at(row, i) = val;
-    }
-    token.clear();
-  }
-}
-
 }
 
 template<typename eT>
@@ -458,8 +438,7 @@ bool Load(const std::string& filename,
       for(size_t i = 0; i != cols; ++i)
       {
         details::TransPoseTokens(tokensArray, tokens, i);
-        details::MapToNumerical(tokens, i,
-                                info, matrix);
+        info.MapTokens(tokens, i, matrix);
       }
     }
     else
@@ -470,8 +449,7 @@ bool Load(const std::string& filename,
         // Extract line by line.
         std::getline(stream, buffer, '\n');
         Tokenizer lineTok(buffer, sep);
-        details::MapToNumerical(details::ToTokens(lineTok), row,
-                                info, matrix);
+        info.MapTokens(details::ToTokens(lineTok), row, matrix);
         ++row;
       }
     }
diff --git a/src/mlpack/core/data/map_policies/increment_policy.hpp b/src/mlpack/core/data/map_policies/increment_policy.hpp
index 3aa0956..68a474f 100644
--- a/src/mlpack/core/data/map_policies/increment_policy.hpp
+++ b/src/mlpack/core/data/map_policies/increment_policy.hpp
@@ -56,6 +56,46 @@ class IncrementPolicy
       return maps[dimension].first.left.at(string);
     }
   }
+
+  template <typename eT, typename MapType>
+  void MapTokens(const std::vector<std::string>& tokens,
+                 size_t& row,
+                 arma::Mat<eT>& matrix,
+                 MapType& maps,
+                 std::vector<Datatype>& types)
+  {
+    auto notNumber = [](const std::string& str)
+    {
+      eT val(0);
+      std::stringstream token;
+      token.str(str);
+      token >> val;
+      return token.fail();
+    };
+
+    const bool notNumeric = std::any_of(std::begin(tokens),
+                                        std::end(tokens), notNumber);
+    if (notNumeric)
+    {
+       for (size_t i = 0; i != tokens.size(); ++i)
+       {
+         const eT val = static_cast<eT>(this->MapString(maps, types, tokens[i],
+                                                        row));
+         double temp = (double) val;
+         matrix.at(row, i) = val;
+       }
+    }
+    else
+    {
+      std::stringstream token;
+      for (size_t i = 0; i != tokens.size(); ++i)
+      {
+         token.str(tokens[i]);
+         token >> matrix.at(row, i);
+         token.clear();
+      }
+    }
+  }
 }; // class IncrementPolicy
 
 } // namespace data
diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index 6b1fee9..59be010 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -69,6 +69,29 @@ class MissingPolicy
       return NaN;
     }
   }
+
+  template <typename eT, typename MapType>
+  void MapTokens(const std::vector<std::string>& tokens,
+                 size_t& row,
+                 arma::Mat<eT>& matrix,
+                 MapType& maps,
+                 std::vector<Datatype>& types)
+  {
+    std::stringstream token;
+    for (size_t i = 0; i != tokens.size(); ++i)
+    {
+      token.str(tokens[i]);
+      token>>matrix.at(row, i);
+      if (token.fail()) // if not number, map it to datasetmapper
+      {
+        const eT val = static_cast<eT>(this->MapString(maps, types, tokens[i],
+                                                       row));
+        matrix.at(row, i) = val;
+      }
+      token.clear();
+    }
+  }
+
  private:
   std::set<std::string> missingSet;
 }; // class MissingPolicy




More information about the mlpack-git mailing list