[mlpack-git] master: delete load overload (a92afaa)
gitdub at mlpack.org
gitdub at mlpack.org
Mon Jul 25 12:19:01 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2
>---------------------------------------------------------------
commit a92afaaafb1af3deede31c1a5ef0b508bfbfe105
Author: Keon Kim <kwk236 at gmail.com>
Date: Mon Jul 4 07:07:29 2016 +0900
delete load overload
>---------------------------------------------------------------
a92afaaafb1af3deede31c1a5ef0b508bfbfe105
src/mlpack/core/data/dataset_info.hpp | 10 +++++
src/mlpack/core/data/dataset_info_impl.hpp | 7 ++++
src/mlpack/core/data/load.hpp | 46 ----------------------
src/mlpack/core/data/load_impl.hpp | 15 +++----
.../core/data/map_policies/increment_policy.hpp | 3 +-
.../core/data/map_policies/missing_policy.hpp | 9 +++--
.../methods/preprocess/preprocess_imputer_main.cpp | 2 +-
src/mlpack/tests/imputation_test.cpp | 28 +++++++++----
8 files changed, 52 insertions(+), 68 deletions(-)
diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index 91e150b..da9f3ce 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -37,7 +37,13 @@ class DatasetMapper
*/
explicit DatasetMapper(const size_t dimensionality = 0);
+ /**
+ * Create the DatasetMapper object with the given policy and dimensionality.
+ * Note that the dimensionality cannot be changed later; you will have to
+ * create a new DatasetMapper object. Policy can be modified by the modifier.
+ */
explicit DatasetMapper(PolicyType& policy, const size_t dimensionality = 0);
+
/**
* Given the string and the dimension to which it belongs, return its numeric
* mapping. If no mapping yet exists, the string is added to the list of
@@ -101,8 +107,12 @@ class DatasetMapper
ar & data::CreateNVP(maps, "maps");
}
+ //! Return the policy of the mapper.
PolicyType& Policy() const;
+ //! Modify the policy of the mapper (be careful!).
+ PolicyType& Policy();
+
private:
//! Types of each dimension.
std::vector<Datatype> types;
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index c95fa1a..4eed4a9 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -121,6 +121,13 @@ inline PolicyType& DatasetMapper<PolicyType>::Policy() const
return this->policy;
}
+template<typename PolicyType>
+inline PolicyType& DatasetMapper<PolicyType>::Policy()
+{
+ return this->policy;
+}
+
+
} // namespace data
} // namespace mlpack
diff --git a/src/mlpack/core/data/load.hpp b/src/mlpack/core/data/load.hpp
index 8694cc2..4b5debe 100644
--- a/src/mlpack/core/data/load.hpp
+++ b/src/mlpack/core/data/load.hpp
@@ -96,52 +96,6 @@ bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
DatasetMapper<PolicyType>& info,
const bool fatal = false,
- const bool transpose = true)
-{
- PolicyType policy;
- return Load(filename, matrix, info, policy, fatal, transpose);
-}
-
-/**
- * Loads a matrix from a file, guessing the filetype from the extension and
- * mapping categorical features with a DatasetMapper object. This will
- * transpose the matrix (unless the transpose parameter is set to false).
- * This particular overload of Load() can only load text-based formats, such as
- * those given below:
- *
- * - CSV (csv_ascii), denoted by .csv, or optionally .txt
- * - TSV (raw_ascii), denoted by .tsv, .csv, or .txt
- * - ASCII (raw_ascii), denoted by .txt
- *
- * If the file extension is not one of those types, an error will be given.
- * This is preferable to Armadillo's default behavior of loading an unknown
- * filetype as raw_binary, which can have very confusing effects.
- *
- * If the parameter 'fatal' is set to true, a std::runtime_error exception will
- * be thrown if the matrix does not load successfully. The parameter
- * 'transpose' controls whether or not the matrix is transposed after loading.
- * In most cases, because data is generally stored in a row-major format and
- * mlpack requires column-major matrices, this should be left at its default
- * value of 'true'.
- *
- * The DatasetMapper object passed to this function will be re-created, so any
- * mappings from previous loads will be lost. policy is passed to the
- * constructor of DatasetMapper to create a new instance.
- *
- * @param filename Name of file to load.
- * @param matrix Matrix to load contents of file into.
- * @param info DatasetMapper object to populate with mappings and data types.
- * @param policy Policy class that decides how the DatasetMapper should map.
- * @param fatal If an error should be reported as fatal (default false).
- * @param transpose If true, transpose the matrix after loading.
- * @return Boolean value indicating success or failure of load.
- */
-template<typename eT, typename PolicyType>
-bool Load(const std::string& filename,
- arma::Mat<eT>& matrix,
- DatasetMapper<PolicyType>& info,
- PolicyType& policy,
- const bool fatal = false,
const bool transpose = true);
/**
diff --git a/src/mlpack/core/data/load_impl.hpp b/src/mlpack/core/data/load_impl.hpp
index c44b77f..419b090 100644
--- a/src/mlpack/core/data/load_impl.hpp
+++ b/src/mlpack/core/data/load_impl.hpp
@@ -369,18 +369,17 @@ bool Load(const std::string& filename,
return success;
}
-// Load with mappings and policy.
+// Load with mappings. Unfortunately we have to implement this ourselves.
template<typename eT, typename PolicyType>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
DatasetMapper<PolicyType>& info,
- PolicyType& policy,
const bool fatal,
const bool transpose)
{
// Get the extension and load as necessary.
Timer::Start("loading_data");
- Log::Debug << "Load with Policy" << std::endl;
+
// Get the extension.
std::string extension = Extension(filename);
@@ -412,7 +411,7 @@ bool Load(const std::string& filename,
type = "raw ASCII-formatted data";
Log::Info << "Loading '" << filename << "' as " << type << ". "
- << std::flush;
+ << std::endl;
std::string separators;
if (commas)
separators = ",";
@@ -447,14 +446,12 @@ bool Load(const std::string& filename,
if (transpose)
{
matrix.set_size(cols, rows);
- Log::Debug << "initialize datasetmapper with policy" << std::endl;
- info = DatasetMapper<PolicyType>(policy, cols);
+ info = DatasetMapper<PolicyType>(info.Policy(), cols);
}
else
{
matrix.set_size(rows, cols);
- Log::Debug << "initialize datasetmapper with policy" << std::endl;
- info = DatasetMapper<PolicyType>(policy, rows);
+ info = DatasetMapper<PolicyType>(info.Policy(), rows);
}
stream.close();
@@ -499,7 +496,7 @@ bool Load(const std::string& filename,
else if (extension == "arff")
{
Log::Info << "Loading '" << filename << "' as ARFF dataset. "
- << std::flush;
+ << std::endl;
try
{
LoadARFF(filename, matrix, info);
diff --git a/src/mlpack/core/data/map_policies/increment_policy.hpp b/src/mlpack/core/data/map_policies/increment_policy.hpp
index f0b1d70..d4b104b 100644
--- a/src/mlpack/core/data/map_policies/increment_policy.hpp
+++ b/src/mlpack/core/data/map_policies/increment_policy.hpp
@@ -24,7 +24,8 @@ namespace data {
class IncrementPolicy
{
public:
- typedef size_t mapped_type;
+ // typedef of mapped_type
+ using mapped_type = size_t;
template <typename MapType>
mapped_type MapString(MapType& maps,
diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index 970a0ee..6c3d1d1 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -24,7 +24,8 @@ namespace data {
class MissingPolicy
{
public:
- typedef size_t mapped_type;
+ // typedef of mapped_type
+ using mapped_type = size_t;
MissingPolicy()
{
@@ -48,9 +49,10 @@ class MissingPolicy
// If this condition is true, either we have no mapping for the given string
// or we have no mappings for the given dimension at all. In either case,
// we create a mapping.
+ Log::Debug << "missingSet has: " << missingSet.count(string) << std::endl;
if (missingSet.count(string) != 0 &&
- maps.count(dimension) == 0 ||
- maps[dimension].first.left.count(string) == 0)
+ (maps.count(dimension) == 0 ||
+ maps[dimension].first.left.count(string) == 0))
{
// This string does not exist yet.
size_t& numMappings = maps[dimension].second;
@@ -62,6 +64,7 @@ class MissingPolicy
else
{
// This string already exists in the mapping.
+ Log::Debug << "string already exists in the mapping" << std::endl;
return maps[dimension].first.left.at(string);
}
}
diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index 015ad96..6857352 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -98,7 +98,7 @@ int main(int argc, char** argv)
Log::Debug << "initalize info(policy)" << endl;
DatasetMapper<MissingPolicy> info(policy);
- Load<double, MissingPolicy>(inputFile, input, info, policy, true, true);
+ Load(inputFile, input, info, true, true);
// for testing purpose
Log::Info << input << endl;
diff --git a/src/mlpack/tests/imputation_test.cpp b/src/mlpack/tests/imputation_test.cpp
index 6abbe1d..8746945 100644
--- a/src/mlpack/tests/imputation_test.cpp
+++ b/src/mlpack/tests/imputation_test.cpp
@@ -39,11 +39,12 @@ BOOST_AUTO_TEST_CASE(DatasetMapperImputerTest)
arma::mat input;
arma::mat output;
- string missingValue = "a";
- double customValue = 99;
- size_t feature = 0;
+ size_t dimension = 0;
- DatasetInfo info;
+ std::set<string> mset;
+ mset.insert("a");
+ MissingPolicy miss(mset);
+ DatasetMapper<MissingPolicy> info(miss);
BOOST_REQUIRE(data::Load("test_file.csv", input, info) == true);
BOOST_REQUIRE_EQUAL(input.n_rows, 3);
@@ -51,10 +52,21 @@ BOOST_AUTO_TEST_CASE(DatasetMapperImputerTest)
/* TODO: Connect Load with the new DatasetMapper instead of DatasetInfo*/
- //Imputer<double,
- //DatasetInfo,
- //CustomImputation<double>> impu(info);
- //impu.Impute(input, output, missingValue, customValue, feature);
+ Imputer<double,
+ DatasetMapper<MissingPolicy>,
+ CustomImputation<double>> imputer(info);
+ imputer.Impute(input, output, "a", 99, dimension); // convert a -> 99
+
+ BOOST_REQUIRE_CLOSE(output(0, 0), 99.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(0, 1), 2.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(0, 2), 3.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(1, 0), 5.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(1, 1), 6.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(1, 2), 7.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(2, 0), 8.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(2, 1), 9.0, 1e-5);
+ BOOST_REQUIRE_CLOSE(output(2, 2), 10.0, 1e-5);
+
// Remove the file.
remove("test_file.csv");
}
More information about the mlpack-git
mailing list