[mlpack-git] master: Handle when we have both categorical and numeric features. (4e212e1)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:44:45 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 4e212e1cd1c5e2fa04594c7ceba62105e680beb8
Author: ryan <ryan at ratml.org>
Date:   Tue Oct 20 11:26:10 2015 -0400

    Handle when we have both categorical and numeric features.
    
    This wasn't handled properly before.


>---------------------------------------------------------------

4e212e1cd1c5e2fa04594c7ceba62105e680beb8
 .../methods/hoeffding_trees/hoeffding_categorical_split.hpp  |  2 ++
 src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp  | 12 +++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index 847e10c..9b1c46a 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -52,6 +52,8 @@ class HoeffdingCategoricalSplit
   size_t MajorityClass() const;
   double MajorityProbability() const;
 
+  size_t NumChildren() const { return sufficientStatistics.n_cols; }
+
   //! Serialize the categorical split.
   template<typename Archive>
   void Serialize(Archive& ar, const unsigned int /* version */)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 80b7548..e086ac3 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -180,16 +180,18 @@ size_t HoeffdingSplit<
   {
     // Split!
     splitDimension = largestIndex;
-    if (datasetInfo->Type(largestIndex) == data::Datatype::categorical)
+    const size_t type = dimensionMappings->at(largestIndex).first;
+    const size_t index = dimensionMappings->at(largestIndex).second;
+    if (type == data::Datatype::categorical)
     {
       // I don't know if this should be here.
-      majorityClass = categoricalSplits[largestIndex].MajorityClass();
-      return datasetInfo->NumMappings(largestIndex);
+      majorityClass = categoricalSplits[index].MajorityClass();
+      return categoricalSplits[index].NumChildren();
     }
     else
     {
-      majorityClass = numericSplits[largestIndex].MajorityClass();
-      return numericSplits[largestIndex].NumChildren();
+      majorityClass = numericSplits[index].MajorityClass();
+      return numericSplits[index].NumChildren();
     }
   }
   else



More information about the mlpack-git mailing list