[mlpack-git] master: add more comments and delete impute_test.csv (03e19a4)

gitdub at mlpack.org gitdub at mlpack.org
Fri Jul 22 08:33:50 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ecbfd24defe31d9f39708c0b4c6ad352cd46ed5c...7eec0609aa21cb12aeed3cbcaa1e411dad0359f2

>---------------------------------------------------------------

commit 03e19a43f28e743485e128ff19056ac5e4b71017
Author: Keon Kim <kwk236 at gmail.com>
Date:   Fri Jul 22 21:33:50 2016 +0900

    add more comments and delete impute_test.csv


>---------------------------------------------------------------

03e19a43f28e743485e128ff19056ac5e4b71017
 src/mlpack/core/data/dataset_mapper.hpp                | 18 ++++++++++++++++--
 .../core/data/imputation_methods/custom_imputation.hpp |  1 +
 src/mlpack/tests/data/impute_test.csv                  |  3 ---
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/core/data/dataset_mapper.hpp b/src/mlpack/core/data/dataset_mapper.hpp
index 0001438..f7f12d2 100644
--- a/src/mlpack/core/data/dataset_mapper.hpp
+++ b/src/mlpack/core/data/dataset_mapper.hpp
@@ -79,9 +79,20 @@ class DatasetMapper
   typename PolicyType::MappedType UnmapValue(const std::string& string,
                                             const size_t dimension);
 
+  /**
+   * MapTokens turns vector of strings into numeric variables and puts them
+   * into a given matrix. It is uses mapping policy to store categorical values
+   * to maps. How it determines whether a value is categorical and how it
+   * stores the categorical value into map and replaces with the numerical value
+   * all depends on the mapping policy object's MapTokens() funciton.
+   *
+   * @tparam eT Type of armadillo matrix.
+   * @param tokens Vector of variables inside a dimension.
+   * @param row Position of the given tokens.
+   * @param matrix Matrix to save the data into.
+   */
   template <typename eT>
-  void MapTokens(const std::vector<std::string>& tokens,
-      size_t& row,
+  void MapTokens(const std::vector<std::string>& tokens, size_t& row,
       arma::Mat<eT>& matrix);
 
   //! Return the type of a given dimension (numeric or categorical).
@@ -134,8 +145,11 @@ class DatasetMapper
   // MapType = map<dimension, pair<bimap<string, MappedType>, numMappings>>
   using MapType = std::unordered_map<size_t, std::pair<BiMapType, size_t>>;
 
+  //! maps object stores string and numerical pairs.
   MapType maps;
 
+  //! policy object tells dataset mapper how the categorical values should be
+  //  mapped to the maps object. It is used in MapString() and MapTokens().
   PolicyType policy;
 };
 
diff --git a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
index f7d8bde..03b9d77 100644
--- a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
@@ -141,6 +141,7 @@ class CustomImputation
   }
 
  private:
+  //! A user-defined value that the user wants to replace missing values with.
   T customValue;
 }; // class CustomImputation
 
diff --git a/src/mlpack/tests/data/impute_test.csv b/src/mlpack/tests/data/impute_test.csv
deleted file mode 100644
index 06256a4..0000000
--- a/src/mlpack/tests/data/impute_test.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-a, 2, 3
-5, 6, a
-1, 9, 1




More information about the mlpack-git mailing list