[mlpack-git] master: Allow specification of dimensionality. (ad904dc)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:44:21 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit ad904dc7772c6bb26dc7f82451006cc7888b435f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat Oct 17 09:40:46 2015 -0400

    Allow specification of dimensionality.


>---------------------------------------------------------------

ad904dc7772c6bb26dc7f82451006cc7888b435f
 src/mlpack/core/data/dataset_info.hpp      |  9 ++++++++-
 src/mlpack/core/data/dataset_info_impl.hpp | 29 ++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index 1f2110f..0387877 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -38,7 +38,7 @@ enum Datatype : bool /* bool is all the precision we need for two types */
 class DatasetInfo
 {
  public:
-  DatasetInfo();
+  DatasetInfo(const size_t dimensionality = 0);
 
   /**
    * Given the string and the dimension to which it belongs, return its numeric
@@ -54,19 +54,26 @@ class DatasetInfo
   const std::string& UnmapString(const size_t value, const size_t dimension);
 
   Datatype Type(const size_t dimension) const;
+  Datatype& Type(const size_t dimension);
 
   size_t NumMappings(const size_t dimension) const;
 
+  size_t Dimensionality() const;
+
   template<typename Archive>
   void Serialize(Archive& ar, const unsigned int /* version */)
   {
+    ar & data::CreateNVP(types, "types");
     ar & data::CreateNVP(maps, "maps");
   }
 
  private:
+  std::vector<Datatype> types;
+
   // Map entries will only exist for dimensions that are categorical.
   std::unordered_map<size_t, std::pair<boost::bimap<std::string, size_t>,
       size_t>> maps;
+
 };
 
 } // namespace data
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index 080aaec..ebd2dc1 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -13,7 +13,8 @@
 namespace mlpack {
 namespace data {
 
-inline DatasetInfo::DatasetInfo()
+inline DatasetInfo::DatasetInfo(const size_t dimensionality) :
+    types(dimensionality, Datatype::numeric)
 {
   // Nothing to initialize.
 }
@@ -30,6 +31,8 @@ inline size_t DatasetInfo::MapString(const std::string& string,
   {
     // This string does not exist yet.
     size_t& numMappings = maps[dimension].second;
+    if (numMappings == 0)
+      types[dimension] = Datatype::categorical;
     typedef boost::bimap<std::string, size_t>::value_type PairType;
     maps[dimension].first.insert(PairType(string, numMappings));
     return numMappings++;
@@ -61,8 +64,23 @@ inline const std::string& DatasetInfo::UnmapString(
 // Get the type of a particular dimension.
 inline Datatype DatasetInfo::Type(const size_t dimension) const
 {
-  return (maps.count(dimension) == 0) ? Datatype::numeric :
-      Datatype::categorical;
+  if (dimension >= types.size())
+  {
+    std::ostringstream oss;
+    oss << "requested type of dimension " << dimension << ", but dataset only "
+        << "has " << types.size() << " dimensions";
+    throw std::invalid_argument(oss.str());
+  }
+
+  return types[dimension];
+}
+
+inline Datatype& DatasetInfo::Type(const size_t dimension)
+{
+  if (dimension >= types.size())
+    types.resize(dimension + 1, Datatype::numeric);
+
+  return types[dimension];
 }
 
 inline size_t DatasetInfo::NumMappings(const size_t dimension) const
@@ -70,6 +88,11 @@ inline size_t DatasetInfo::NumMappings(const size_t dimension) const
   return (maps.count(dimension) == 0) ? 0 : maps.at(dimension).second;
 }
 
+inline size_t DatasetInfo::Dimensionality() const
+{
+  return types.size();
+}
+
 } // namespace data
 } // namespace mlpack
 



More information about the mlpack-git mailing list