[mlpack-git] master: remove duplicate code in load function (21d94c0)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 12:19:06 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit 21d94c04652e8faadd5e8991103a3b73b4c81033
Author: Keon Kim <kwk236 at gmail.com>
Date: Sun Jul 3 03:01:54 2016 +0900
remove duplicate code in load function
>---------------------------------------------------------------
21d94c04652e8faadd5e8991103a3b73b4c81033
src/mlpack/core/data/dataset_info.hpp | 6 +-
src/mlpack/core/data/dataset_info_impl.hpp | 6 ++
src/mlpack/core/data/load.hpp | 6 +-
src/mlpack/core/data/load_impl.hpp | 167 -----------------------------
4 files changed, 15 insertions(+), 170 deletions(-)
diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index eaa6882..91e150b 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -35,9 +35,9 @@ class DatasetMapper
* the dimensionality cannot be changed later; you will have to create a new
* DatasetMapper object.
*/
- DatasetMapper(const size_t dimensionality = 0);
+ explicit DatasetMapper(const size_t dimensionality = 0);
- DatasetMapper(PolicyType& policy, const size_t dimensionality = 0);
+ explicit DatasetMapper(PolicyType& policy, const size_t dimensionality = 0);
/**
* Given the string and the dimension to which it belongs, return its numeric
* mapping. If no mapping yet exists, the string is added to the list of
@@ -101,6 +101,8 @@ class DatasetMapper
ar & data::CreateNVP(maps, "maps");
}
+ PolicyType& Policy() const;
+
private:
//! Types of each dimension.
std::vector<Datatype> types;
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index 93e2a13..c95fa1a 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -115,6 +115,12 @@ inline size_t DatasetMapper<PolicyType>::Dimensionality() const
return types.size();
}
+template<typename PolicyType>
+inline PolicyType& DatasetMapper<PolicyType>::Policy() const
+{
+ return this->policy;
+}
+
} // namespace data
} // namespace mlpack
diff --git a/src/mlpack/core/data/load.hpp b/src/mlpack/core/data/load.hpp
index da770b4..8694cc2 100644
--- a/src/mlpack/core/data/load.hpp
+++ b/src/mlpack/core/data/load.hpp
@@ -96,7 +96,11 @@ bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
DatasetMapper<PolicyType>& info,
const bool fatal = false,
- const bool transpose = true);
+ const bool transpose = true)
+{
+ PolicyType policy;
+ return Load(filename, matrix, info, policy, fatal, transpose);
+}
/**
* Loads a matrix from a file, guessing the filetype from the extension and
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index 4f03221..c44b77f 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -538,173 +538,6 @@ bool Load(const std::string& filename,
return true;
}
-
-// Load with mappings. Unfortunately we have to implement this ourselves.
-template<typename eT, typename PolicyType>
-bool Load(const std::string& filename,
- arma::Mat<eT>& matrix,
- DatasetMapper<PolicyType>& info,
- const bool fatal,
- const bool transpose)
-{
- // Get the extension and load as necessary.
- Timer::Start("loading_data");
-
- // Get the extension.
- std::string extension = Extension(filename);
-
- // Catch nonexistent files by opening the stream ourselves.
- std::fstream stream;
- stream.open(filename.c_str(), std::fstream::in);
-
- if (!stream.is_open())
- {
- Timer::Stop("loading_data");
- if (fatal)
- Log::Fatal << "Cannot open file '" << filename << "'. " << std::endl;
- else
- Log::Warn << "Cannot open file '" << filename << "'; load failed."
- << std::endl;
-
- return false;
- }
-
- if (extension == "csv" || extension == "tsv" || extension == "txt")
- {
- // True if we're looking for commas; if false, we're looking for spaces.
- bool commas = (extension == "csv");
-
- std::string type;
- if (extension == "csv")
- type = "CSV data";
- else
- type = "raw ASCII-formatted data";
-
- Log::Info << "Loading '" << filename << "' as " << type << ". "
- << std::flush;
- std::string separators;
- if (commas)
- separators = ",";
- else
- separators = " \t";
-
- // We'll load this as CSV (or CSV with spaces or tabs) according to
- // RFC4180. So the first thing to do is determine the size of the matrix.
- std::string buffer;
- size_t cols = 0;
-
- std::getline(stream, buffer, '\n');
- // Count commas and whitespace in the line, ignoring anything inside
- // quotes.
- typedef boost::tokenizer<boost::escaped_list_separator<char>> Tokenizer;
- boost::escaped_list_separator<char> sep("\\", separators, "\"");
- Tokenizer tok(buffer, sep);
- for (Tokenizer::iterator i = tok.begin(); i != tok.end(); ++i)
- ++cols;
-
- // Now count the number of lines in the file. We've already counted the
- // first one.
- size_t rows = 1;
- while (!stream.eof() && !stream.bad() && !stream.fail())
- {
- std::getline(stream, buffer, '\n');
- if (!stream.fail())
- ++rows;
- }
-
- // Now we have the size. So resize our matrix.
- if (transpose)
- {
- matrix.set_size(cols, rows);
- info = DatasetMapper<PolicyType>(cols);
- }
- else
- {
- matrix.set_size(rows, cols);
- info = DatasetMapper<PolicyType>(rows);
- }
-
- stream.close();
- stream.open(filename, std::fstream::in);
-
- if(transpose)
- {
- std::vector<std::vector<std::string>> tokensArray;
- std::vector<std::string> tokens;
- while (!stream.bad() && !stream.fail() && !stream.eof())
- {
- // Extract line by line.
- std::getline(stream, buffer, '\n');
- Tokenizer lineTok(buffer, sep);
- tokens = details::ToTokens(lineTok);
- if(tokens.size() == cols)
- {
- tokensArray.emplace_back(std::move(tokens));
- }
- }
- for(size_t i = 0; i != cols; ++i)
- {
- details::TransPoseTokens(tokensArray, tokens, i);
- details::MapToNumerical(tokens, i,
- info, matrix);
- }
- }
- else
- {
- size_t row = 0;
- while (!stream.bad() && !stream.fail() && !stream.eof())
- {
- // Extract line by line.
- std::getline(stream, buffer, '\n');
- Tokenizer lineTok(buffer, sep);
- details::MapToNumerical(details::ToTokens(lineTok), row,
- info, matrix);
- ++row;
- }
- }
- }
- else if (extension == "arff")
- {
- Log::Info << "Loading '" << filename << "' as ARFF dataset. "
- << std::flush;
- try
- {
- LoadARFF(filename, matrix, info);
-
- // We transpose by default. So, un-transpose if necessary...
- if (!transpose)
- inplace_transpose(matrix);
- }
- catch (std::exception& e)
- {
- if (fatal)
- Log::Fatal << e.what() << std::endl;
- else
- Log::Warn << e.what() << std::endl;
- }
- }
- else
- {
- // The type is unknown.
- Timer::Stop("loading_data");
- if (fatal)
- Log::Fatal << "Unable to detect type of '" << filename << "'; "
- << "incorrect extension?" << std::endl;
- else
- Log::Warn << "Unable to detect type of '" << filename << "'; load failed."
- << " Incorrect extension?" << std::endl;
-
- return false;
- }
-
- Log::Info << "Size is " << (transpose ? matrix.n_cols : matrix.n_rows)
- << " x " << (transpose ? matrix.n_rows : matrix.n_cols) << ".\n";
-
- Timer::Stop("loading_data");
-
- return true;
-}
-
// Load a model from file.
template<typename T>
bool Load(const std::string& filename,
More information about the mlpack-git
mailing list