[mlpack-git] master: update data::load to accept different mappertypes (87d8d46)

gitdub at mlpack.org gitdub at mlpack.org
Mon Jul 25 12:19:03 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit 87d8d46396a42a4cd491b32be4f17e8582c9223d
Author: Keon Kim <kwk236 at gmail.com>
Date:   Fri Jul 1 17:48:28 2016 +0900

    update data::load to accept different mappertypes


>---------------------------------------------------------------

87d8d46396a42a4cd491b32be4f17e8582c9223d
 src/mlpack/core/data/dataset_info.hpp                |  1 +
 src/mlpack/core/data/dataset_info_impl.hpp           | 10 +++++++++-
 src/mlpack/core/data/load.hpp                        |  4 ++--
 src/mlpack/core/data/load_arff.hpp                   |  4 ++--
 src/mlpack/core/data/load_arff_impl.hpp              |  6 +++---
 src/mlpack/core/data/load_impl.hpp                   | 20 ++++++++++----------
 src/mlpack/core/data/map_policies/missing_policy.hpp | 20 +++++++++++++++-----
 .../methods/preprocess/preprocess_imputer_main.cpp   |  6 +++---
 8 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index 8eea1c8..bfd5b70 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -37,6 +37,7 @@ class DatasetMapper
    */
   DatasetMapper(const size_t dimensionality = 0);
 
+  DatasetMapper(MapPolicy policy, const size_t dimensionality = 0);
   /**
    * Given the string and the dimension to which it belongs, return its numeric
    * mapping.  If no mapping yet exists, the string is added to the list of
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index b8e09f7..de543ab 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -18,9 +18,17 @@ template<typename MapPolicy>
 inline DatasetMapper<MapPolicy>::DatasetMapper(const size_t dimensionality) :
     types(dimensionality, Datatype::numeric)
 {
-  // Nothing to initialize.
+  // Nothing to initialize here.
 }
 
+template<typename MapPolicy>
+inline DatasetMapper<MapPolicy>::DatasetMapper(MapPolicy policy,
+                                               const size_t dimensionality) :
+    types(dimensionality, Datatype::numeric),
+    policy(std::move(policy))
+{
+  // Nothing to initialize here.
+}
 
 // When we want to insert value into the map,
 // we could use the policy to map the string
diff --git a/src/mlpack/core/data/load.hpp b/src/mlpack/core/data/load.hpp
index 19e238a..b2009d8 100644
--- a/src/mlpack/core/data/load.hpp
+++ b/src/mlpack/core/data/load.hpp
@@ -91,10 +91,10 @@ bool Load(const std::string& filename,
  * @param transpose If true, transpose the matrix after loading.
  * @return Boolean value indicating success or failure of load.
  */
-template<typename eT>
+template<typename eT, typename MapperType>
 bool Load(const std::string& filename,
           arma::Mat<eT>& matrix,
-          DatasetInfo& info,
+          MapperType& info,
           const bool fatal = false,
           const bool transpose = true);
 
diff --git a/src/mlpack/core/data/load_arff.hpp b/src/mlpack/core/data/load_arff.hpp
index f04e38a..60579ca 100644
--- a/src/mlpack/core/data/load_arff.hpp
+++ b/src/mlpack/core/data/load_arff.hpp
@@ -42,10 +42,10 @@ void LoadARFF(const std::string& filename, arma::Mat<eT>& matrix);
  * @param info DatasetInfo object; can be default-constructed or pre-existing
  *     from another call to LoadARFF().
  */
-template<typename eT>
+template<typename eT, typename MapperType>
 void LoadARFF(const std::string& filename,
               arma::Mat<eT>& matrix,
-              DatasetInfo& info);
+              MapperType& info);
 
 } // namespace data
 } // namespace mlpack
diff --git a/src/mlpack/core/data/load_arff_impl.hpp b/src/mlpack/core/data/load_arff_impl.hpp
index 68c9184..edb9057 100644
--- a/src/mlpack/core/data/load_arff_impl.hpp
+++ b/src/mlpack/core/data/load_arff_impl.hpp
@@ -15,10 +15,10 @@
 namespace mlpack {
 namespace data {
 
-template<typename eT>
+template<typename eT, typename MapperType>
 void LoadARFF(const std::string& filename,
               arma::Mat<eT>& matrix,
-              DatasetInfo& info)
+              MapperType& info)
 {
   // First, open the file.
   std::ifstream ifs;
@@ -98,7 +98,7 @@ void LoadARFF(const std::string& filename,
   // Reset the DatasetInfo object, if needed.
   if (info.Dimensionality() == 0)
   {
-    info = DatasetInfo(dimensionality);
+    info = MapperType(dimensionality);
   }
   else if (info.Dimensionality() != dimensionality)
   {
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 5479bab..8349f4c 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -59,13 +59,13 @@ void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
   }
 }
 
-template<typename eT>
-void MapToNumerical(const std::vector<std::string> &tokens,
-                    size_t &row,
-                    DatasetInfo &info,
-                    arma::Mat<eT> &matrix)
+template<typename eT, typename MapperType>
+void MapToNumerical(const std::vector<std::string>& tokens,
+                    size_t& row,
+                    MapperType& info,
+                    arma::Mat<eT>& matrix)
 {
-  auto notNumber = [](const std::string &str)
+  auto notNumber = [](const std::string& str)
   {
     eT val(0);
     std::stringstream token;
@@ -370,10 +370,10 @@ bool Load(const std::string& filename,
 }
 
 // Load with mappings.  Unfortunately we have to implement this ourselves.
-template<typename eT>
+template<typename eT, typename MapperType>
 bool Load(const std::string& filename,
           arma::Mat<eT>& matrix,
-          DatasetInfo& info,
+          MapperType& info,
           const bool fatal,
           const bool transpose)
 {
@@ -446,12 +446,12 @@ bool Load(const std::string& filename,
     if (transpose)
     {
       matrix.set_size(cols, rows);
-      info = DatasetInfo(cols);
+      info = MapperType(cols);
     }
     else
     {
       matrix.set_size(rows, cols);
-      info = DatasetInfo(rows);
+      info = MapperType(rows);
     }
 
     stream.close();
diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index b7e0630..4cc8a96 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -26,19 +26,27 @@ class MissingPolicy
  public:
   typedef size_t mapped_type;
 
+  //explicit MissingPolicy(std::set<std::string> specificString) :
+    //specificString(std::move(specificString))
+  //{
+    //// Nothing to initialize here.
+  //}
+
+
   template <typename MapType>
   mapped_type MapString(MapType& maps,
-                       std::vector<Datatype>& types,
-                       const std::string& string,
-                       const size_t dimension)
+                        std::vector<Datatype>& types,
+                        const std::string& string,
+                        const size_t dimension)
   {
     // If this condition is true, either we have no mapping for the given string
     // or we have no mappings for the given dimension at all.  In either case,
     // we create a mapping.
-    if (maps.count(dimension) == 0 ||
+    if (//specificString.count(string) != 0 &&
+        maps.count(dimension) == 0 ||
         maps[dimension].first.left.count(string) == 0)
     {
-      // This string does not exist yet.
+       // This string does not exist yet.
       size_t& numMappings = maps[dimension].second;
 
       typedef boost::bimap<std::string, size_t>::value_type PairType;
@@ -51,6 +59,8 @@ class MissingPolicy
       return maps[dimension].first.left.at(string);
     }
   }
+ private:
+  //std::set<std::string> specificString;
 }; // class MissingPolicy
 
 } // namespace data
diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index b833ab1..6a290b9 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -9,6 +9,7 @@
 #include <mlpack/core/data/imputer.hpp>
 #include <mlpack/core/data/dataset_info.hpp>
 #include <mlpack/core/data/map_policies/increment_policy.hpp>
+#include <mlpack/core/data/map_policies/missing_policy.hpp>
 #include <mlpack/core/data/imputation_methods/mean_imputation.hpp>
 #include <mlpack/core/data/imputation_methods/median_imputation.hpp>
 #include <mlpack/core/data/imputation_methods/custom_imputation.hpp>
@@ -83,10 +84,10 @@ int main(int argc, char** argv)
   // DatasetInfo holds how the DatasetMapper should map the values.
   // can be specified by passing map_policy classes as template parameters
   // ex) DatasetMapper<IncrementPolicy> info;
-  using MapperType = DatasetMapper<IncrementPolicy>;
+  using MapperType = DatasetMapper<MissingPolicy>;
   MapperType info;
 
-  Load(inputFile, input, info,  true, true);
+  Load<double, MapperType>(inputFile, input, info,  true, true);
 
   // for testing purpose
   Log::Info << input << endl;
@@ -100,7 +101,6 @@ int main(int argc, char** argv)
 
   arma::Mat<double> output(input);
 
-
   Log::Info << "Performing '" << strategy << "' imputation strategy "
       << "on dimension '" << dimension << endl;
 




More information about the mlpack-git mailing list