[mlpack-git] master: Handle when we have both categorical and numeric features. (4e212e1)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:44:45 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 4e212e1cd1c5e2fa04594c7ceba62105e680beb8
Author: ryan <ryan at ratml.org>
Date: Tue Oct 20 11:26:10 2015 -0400
Handle when we have both categorical and numeric features.
This wasn't handled properly before.
>---------------------------------------------------------------
4e212e1cd1c5e2fa04594c7ceba62105e680beb8
.../methods/hoeffding_trees/hoeffding_categorical_split.hpp | 2 ++
src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp | 12 +++++++-----
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index 847e10c..9b1c46a 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -52,6 +52,8 @@ class HoeffdingCategoricalSplit
size_t MajorityClass() const;
double MajorityProbability() const;
+ size_t NumChildren() const { return sufficientStatistics.n_cols; }
+
//! Serialize the categorical split.
template<typename Archive>
void Serialize(Archive& ar, const unsigned int /* version */)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 80b7548..e086ac3 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -180,16 +180,18 @@ size_t HoeffdingSplit<
{
// Split!
splitDimension = largestIndex;
- if (datasetInfo->Type(largestIndex) == data::Datatype::categorical)
+ const size_t type = dimensionMappings->at(largestIndex).first;
+ const size_t index = dimensionMappings->at(largestIndex).second;
+ if (type == data::Datatype::categorical)
{
// I don't know if this should be here.
- majorityClass = categoricalSplits[largestIndex].MajorityClass();
- return datasetInfo->NumMappings(largestIndex);
+ majorityClass = categoricalSplits[index].MajorityClass();
+ return categoricalSplits[index].NumChildren();
}
else
{
- majorityClass = numericSplits[largestIndex].MajorityClass();
- return numericSplits[largestIndex].NumChildren();
+ majorityClass = numericSplits[index].MajorityClass();
+ return numericSplits[index].NumChildren();
}
}
else
More information about the mlpack-git
mailing list