[mlpack-git] master: Document the DatasetInfo class more comprehensively. (243ace7)

gitdub at mlpack.org gitdub at mlpack.org
Tue Apr 12 10:43:52 EDT 2016


Repository : https://github.com/mlpack/mlpack
On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/eeba6bdc50ad4d785cb6880edbaba78173036ca6...8d77f4231046703d5c0c05ed4795458f98267968

>---------------------------------------------------------------

commit 243ace7886e7c9c3bd53379b910cb0c54b4db01e
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Apr 8 18:58:00 2016 +0000

    Document the DatasetInfo class more comprehensively.


>---------------------------------------------------------------

243ace7886e7c9c3bd53379b910cb0c54b4db01e
 src/mlpack/core/data/dataset_info.hpp | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index 5ad688d..85287a6 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -31,8 +31,8 @@ enum Datatype : bool /* bool is all the precision we need for two types */
 /**
  * Auxiliary information for a dataset, including mappings to/from strings and
  * the datatype of each dimension.  DatasetInfo objects are optionally produced
- * by data::Load(), and store the type of each dimension (Datatype::NUMERIC or
- * Datatype::CATEGORICAL) as well as mappings from strings to unsigned integers
+ * by data::Load(), and store the type of each dimension (Datatype::numeric or
+ * Datatype::categorical) as well as mappings from strings to unsigned integers
  * and vice versa.
  */
 class DatasetInfo
@@ -56,15 +56,38 @@ class DatasetInfo
    */
   size_t MapString(const std::string& string, const size_t dimension);
 
+  /**
+   * Return the string that corresponds to a given value in a given dimension.
+   * If the string is not a valid mapping in the given dimension, a
+   * std::invalid_argument is thrown.
+   *
+   * @param value Mapped value for string.
+   * @param dimension Dimension to unmap string from.
+   */
   const std::string& UnmapString(const size_t value, const size_t dimension);
 
+  //! Return the type of a given dimension (numeric or categorical).
   Datatype Type(const size_t dimension) const;
+  //! Modify the type of a given dimension (be careful!).
   Datatype& Type(const size_t dimension);
 
+  /**
+   * Get the number of mappings for a particular dimension.  If the dimension
+   * is numeric, then this will return 0.
+   */
   size_t NumMappings(const size_t dimension) const;
 
+  /**
+   * Get the dimensionality of the DatasetInfo object (that is, how many
+   * dimensions it has information for).  If this object was created by a call
+   * to mlpack::data::Load(), then the dimensionality will be the same as the
+   * number of rows (dimensions) in the dataset.
+   */
   size_t Dimensionality() const;
 
+  /**
+   * Serialize the dataset information.
+   */
   template<typename Archive>
   void Serialize(Archive& ar, const unsigned int /* version */)
   {
@@ -73,9 +96,11 @@ class DatasetInfo
   }
 
  private:
+  //! Types of each dimension.
   std::vector<Datatype> types;
 
-  // Map entries will only exist for dimensions that are categorical.
+  //! Mappings from strings to integers.  Map entries will only exist for
+  //! dimensions that are categorical.
   std::unordered_map<size_t, std::pair<boost::bimap<std::string, size_t>,
       size_t>> maps;
 




More information about the mlpack-git mailing list