[mlpack-git] master: update data::load to accept different mappertypes (87d8d46)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 12:19:03 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit 87d8d46396a42a4cd491b32be4f17e8582c9223d
Author: Keon Kim <kwk236 at gmail.com>
Date: Fri Jul 1 17:48:28 2016 +0900
update data::load to accept different mappertypes
>---------------------------------------------------------------
87d8d46396a42a4cd491b32be4f17e8582c9223d
src/mlpack/core/data/dataset_info.hpp | 1 +
src/mlpack/core/data/dataset_info_impl.hpp | 10 +++++++++-
src/mlpack/core/data/load.hpp | 4 ++--
src/mlpack/core/data/load_arff.hpp | 4 ++--
src/mlpack/core/data/load_arff_impl.hpp | 6 +++---
src/mlpack/core/data/load_impl.hpp | 20 ++++++++++----------
src/mlpack/core/data/map_policies/missing_policy.hpp | 20 +++++++++++++++-----
.../methods/preprocess/preprocess_imputer_main.cpp | 6 +++---
8 files changed, 45 insertions(+), 26 deletions(-)
diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index 8eea1c8..bfd5b70 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -37,6 +37,7 @@ class DatasetMapper
*/
DatasetMapper(const size_t dimensionality = 0);
+ DatasetMapper(MapPolicy policy, const size_t dimensionality = 0);
/**
* Given the string and the dimension to which it belongs, return its numeric
* mapping. If no mapping yet exists, the string is added to the list of
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index b8e09f7..de543ab 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -18,9 +18,17 @@ template<typename MapPolicy>
inline DatasetMapper<MapPolicy>::DatasetMapper(const size_t dimensionality) :
types(dimensionality, Datatype::numeric)
{
- // Nothing to initialize.
+ // Nothing to initialize here.
}
+template<typename MapPolicy>
+inline DatasetMapper<MapPolicy>::DatasetMapper(MapPolicy policy,
+ const size_t dimensionality) :
+ types(dimensionality, Datatype::numeric),
+ policy(std::move(policy))
+{
+ // Nothing to initialize here.
+}
// When we want to insert value into the map,
// we could use the policy to map the string
diff --git a/src/mlpack/core/data/load.hpp b/src/mlpack/core/data/load.hpp
index 19e238a..b2009d8 100644
--- a/src/mlpack/core/data/load.hpp
+++ b/src/mlpack/core/data/load.hpp
@@ -91,10 +91,10 @@ bool Load(const std::string& filename,
* @param transpose If true, transpose the matrix after loading.
* @return Boolean value indicating success or failure of load.
*/
-template<typename eT>
+template<typename eT, typename MapperType>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
- DatasetInfo& info,
+ MapperType& info,
const bool fatal = false,
const bool transpose = true);
diff --git a/src/mlpack/core/data/load_arff.hpp b/src/mlpack/core/data/load_arff.hpp
index f04e38a..60579ca 100644
--- a/src/mlpack/core/data/load_arff.hpp
+++ b/src/mlpack/core/data/load_arff.hpp
@@ -42,10 +42,10 @@ void LoadARFF(const std::string& filename, arma::Mat<eT>& matrix);
* @param info DatasetInfo object; can be default-constructed or pre-existing
* from another call to LoadARFF().
*/
-template<typename eT>
+template<typename eT, typename MapperType>
void LoadARFF(const std::string& filename,
arma::Mat<eT>& matrix,
- DatasetInfo& info);
+ MapperType& info);
} // namespace data
} // namespace mlpack
diff --git a/src/mlpack/core/data/load_arff_impl.hpp b/src/mlpack/core/data/load_arff_impl.hpp
index 68c9184..edb9057 100644
--- a/src/mlpack/core/data/load_arff_impl.hpp
+++ b/src/mlpack/core/data/load_arff_impl.hpp
@@ -15,10 +15,10 @@
namespace mlpack {
namespace data {
-template<typename eT>
+template<typename eT, typename MapperType>
void LoadARFF(const std::string& filename,
arma::Mat<eT>& matrix,
- DatasetInfo& info)
+ MapperType& info)
{
// First, open the file.
std::ifstream ifs;
@@ -98,7 +98,7 @@ void LoadARFF(const std::string& filename,
// Reset the DatasetInfo object, if needed.
if (info.Dimensionality() == 0)
{
- info = DatasetInfo(dimensionality);
+ info = MapperType(dimensionality);
}
else if (info.Dimensionality() != dimensionality)
{
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 5479bab..8349f4c 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -59,13 +59,13 @@ void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
}
}
-template<typename eT>
-void MapToNumerical(const std::vector<std::string> &tokens,
- size_t &row,
- DatasetInfo &info,
- arma::Mat<eT> &matrix)
+template<typename eT, typename MapperType>
+void MapToNumerical(const std::vector<std::string>& tokens,
+ size_t& row,
+ MapperType& info,
+ arma::Mat<eT>& matrix)
{
- auto notNumber = [](const std::string &str)
+ auto notNumber = [](const std::string& str)
{
eT val(0);
std::stringstream token;
@@ -370,10 +370,10 @@ bool Load(const std::string& filename,
}
// Load with mappings. Unfortunately we have to implement this ourselves.
-template<typename eT>
+template<typename eT, typename MapperType>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
- DatasetInfo& info,
+ MapperType& info,
const bool fatal,
const bool transpose)
{
@@ -446,12 +446,12 @@ bool Load(const std::string& filename,
if (transpose)
{
matrix.set_size(cols, rows);
- info = DatasetInfo(cols);
+ info = MapperType(cols);
}
else
{
matrix.set_size(rows, cols);
- info = DatasetInfo(rows);
+ info = MapperType(rows);
}
stream.close();
diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index b7e0630..4cc8a96 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -26,19 +26,27 @@ class MissingPolicy
public:
typedef size_t mapped_type;
+ //explicit MissingPolicy(std::set<std::string> specificString) :
+ //specificString(std::move(specificString))
+ //{
+ //// Nothing to initialize here.
+ //}
+
+
template <typename MapType>
mapped_type MapString(MapType& maps,
- std::vector<Datatype>& types,
- const std::string& string,
- const size_t dimension)
+ std::vector<Datatype>& types,
+ const std::string& string,
+ const size_t dimension)
{
// If this condition is true, either we have no mapping for the given string
// or we have no mappings for the given dimension at all. In either case,
// we create a mapping.
- if (maps.count(dimension) == 0 ||
+ if (//specificString.count(string) != 0 &&
+ maps.count(dimension) == 0 ||
maps[dimension].first.left.count(string) == 0)
{
- // This string does not exist yet.
+ // This string does not exist yet.
size_t& numMappings = maps[dimension].second;
typedef boost::bimap<std::string, size_t>::value_type PairType;
@@ -51,6 +59,8 @@ class MissingPolicy
return maps[dimension].first.left.at(string);
}
}
+ private:
+ //std::set<std::string> specificString;
}; // class MissingPolicy
} // namespace data
diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index b833ab1..6a290b9 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -9,6 +9,7 @@
#include <mlpack/core/data/imputer.hpp>
#include <mlpack/core/data/dataset_info.hpp>
#include <mlpack/core/data/map_policies/increment_policy.hpp>
+#include <mlpack/core/data/map_policies/missing_policy.hpp>
#include <mlpack/core/data/imputation_methods/mean_imputation.hpp>
#include <mlpack/core/data/imputation_methods/median_imputation.hpp>
#include <mlpack/core/data/imputation_methods/custom_imputation.hpp>
@@ -83,10 +84,10 @@ int main(int argc, char** argv)
// DatasetInfo holds how the DatasetMapper should map the values.
// can be specified by passing map_policy classes as template parameters
// ex) DatasetMapper<IncrementPolicy> info;
- using MapperType = DatasetMapper<IncrementPolicy>;
+ using MapperType = DatasetMapper<MissingPolicy>;
MapperType info;
- Load(inputFile, input, info, true, true);
+ Load<double, MapperType>(inputFile, input, info, true, true);
// for testing purpose
Log::Info << input << endl;
@@ -100,7 +101,6 @@ int main(int argc, char** argv)
arma::Mat<double> output(input);
-
Log::Info << "Performing '" << strategy << "' imputation strategy "
<< "on dimension '" << dimension << endl;
More information about the mlpack-git
mailing list