[mlpack-git] master: Allow specification of dimensionality. (ad904dc)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:44:21 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit ad904dc7772c6bb26dc7f82451006cc7888b435f
Author: Ryan Curtin <ryan at ratml.org>
Date: Sat Oct 17 09:40:46 2015 -0400
Allow specification of dimensionality.
>---------------------------------------------------------------
ad904dc7772c6bb26dc7f82451006cc7888b435f
src/mlpack/core/data/dataset_info.hpp | 9 ++++++++-
src/mlpack/core/data/dataset_info_impl.hpp | 29 ++++++++++++++++++++++++++---
2 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/src/mlpack/core/data/dataset_info.hpp b/src/mlpack/core/data/dataset_info.hpp
index 1f2110f..0387877 100644
--- a/src/mlpack/core/data/dataset_info.hpp
+++ b/src/mlpack/core/data/dataset_info.hpp
@@ -38,7 +38,7 @@ enum Datatype : bool /* bool is all the precision we need for two types */
class DatasetInfo
{
public:
- DatasetInfo();
+ DatasetInfo(const size_t dimensionality = 0);
/**
* Given the string and the dimension to which it belongs, return its numeric
@@ -54,19 +54,26 @@ class DatasetInfo
const std::string& UnmapString(const size_t value, const size_t dimension);
Datatype Type(const size_t dimension) const;
+ Datatype& Type(const size_t dimension);
size_t NumMappings(const size_t dimension) const;
+ size_t Dimensionality() const;
+
template<typename Archive>
void Serialize(Archive& ar, const unsigned int /* version */)
{
+ ar & data::CreateNVP(types, "types");
ar & data::CreateNVP(maps, "maps");
}
private:
+ std::vector<Datatype> types;
+
// Map entries will only exist for dimensions that are categorical.
std::unordered_map<size_t, std::pair<boost::bimap<std::string, size_t>,
size_t>> maps;
+
};
} // namespace data
diff --git a/src/mlpack/core/data/dataset_info_impl.hpp b/src/mlpack/core/data/dataset_info_impl.hpp
index 080aaec..ebd2dc1 100644
--- a/src/mlpack/core/data/dataset_info_impl.hpp
+++ b/src/mlpack/core/data/dataset_info_impl.hpp
@@ -13,7 +13,8 @@
namespace mlpack {
namespace data {
-inline DatasetInfo::DatasetInfo()
+inline DatasetInfo::DatasetInfo(const size_t dimensionality) :
+ types(dimensionality, Datatype::numeric)
{
// Nothing to initialize.
}
@@ -30,6 +31,8 @@ inline size_t DatasetInfo::MapString(const std::string& string,
{
// This string does not exist yet.
size_t& numMappings = maps[dimension].second;
+ if (numMappings == 0)
+ types[dimension] = Datatype::categorical;
typedef boost::bimap<std::string, size_t>::value_type PairType;
maps[dimension].first.insert(PairType(string, numMappings));
return numMappings++;
@@ -61,8 +64,23 @@ inline const std::string& DatasetInfo::UnmapString(
// Get the type of a particular dimension.
inline Datatype DatasetInfo::Type(const size_t dimension) const
{
- return (maps.count(dimension) == 0) ? Datatype::numeric :
- Datatype::categorical;
+ if (dimension >= types.size())
+ {
+ std::ostringstream oss;
+ oss << "requested type of dimension " << dimension << ", but dataset only "
+ << "has " << types.size() << " dimensions";
+ throw std::invalid_argument(oss.str());
+ }
+
+ return types[dimension];
+}
+
+inline Datatype& DatasetInfo::Type(const size_t dimension)
+{
+ if (dimension >= types.size())
+ types.resize(dimension + 1, Datatype::numeric);
+
+ return types[dimension];
}
inline size_t DatasetInfo::NumMappings(const size_t dimension) const
@@ -70,6 +88,11 @@ inline size_t DatasetInfo::NumMappings(const size_t dimension) const
return (maps.count(dimension) == 0) ? 0 : maps.at(dimension).second;
}
+inline size_t DatasetInfo::Dimensionality() const
+{
+ return types.size();
+}
+
} // namespace data
} // namespace mlpack
More information about the mlpack-git
mailing list