[mlpack-git] master: Add flag for using information gain. (1196eaa)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:46:12 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 1196eaa0ffbe2c624d0b9f165baa2fecb05170c8
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Nov 12 11:43:44 2015 -0500

    Add flag for using information gain.


>---------------------------------------------------------------

1196eaa0ffbe2c624d0b9f165baa2fecb05170c8
 .../hoeffding_trees/hoeffding_tree_main.cpp        | 37 ++++++++++++++++------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
index 32e465e..86f22cf 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
@@ -7,6 +7,7 @@
 #include <mlpack/core.hpp>
 #include <mlpack/methods/hoeffding_trees/hoeffding_tree.hpp>
 #include <mlpack/methods/hoeffding_trees/binary_numeric_split.hpp>
+#include <mlpack/methods/hoeffding_trees/information_gain.hpp>
 
 using namespace std;
 using namespace mlpack;
@@ -35,6 +36,8 @@ PARAM_STRING("numeric_split_strategy", "The splitting strategy to use for "
 PARAM_FLAG("batch_mode", "If true, samples will be considered in batch instead "
     "of as a stream.  This generally results in better trees but at the cost of"
     " memory usage and runtime.", "b");
+PARAM_FLAG("info_gain", "If set, information gain is used instead of Gini "
+    "impurity for calculating Hoeffding bounds.", "i");
 
 // Helper function for once we have chosen a tree type.
 template<typename TreeType>
@@ -70,16 +73,32 @@ int main(int argc, char** argv)
   if (trainingFile.empty() && CLI::HasParam("batch_mode"))
     Log::Warn << "--batch_mode (-b) ignored; no training set provided." << endl;
 
-  if (numericSplitStrategy == "domingos")
-    PerformActions<HoeffdingTree<GiniImpurity, HoeffdingDoubleNumericSplit,
-        HoeffdingCategoricalSplit>>();
-  else if (numericSplitStrategy == "binary")
-    PerformActions<HoeffdingTree<GiniImpurity, BinaryDoubleNumericSplit,
-        HoeffdingCategoricalSplit>>();
+  if (CLI::HasParam("info_gain"))
+  {
+    if (numericSplitStrategy == "domingos")
+      PerformActions<HoeffdingTree<InformationGain, HoeffdingDoubleNumericSplit,
+          HoeffdingCategoricalSplit>>();
+    else if (numericSplitStrategy == "binary")
+      PerformActions<HoeffdingTree<InformationGain, BinaryDoubleNumericSplit,
+          HoeffdingCategoricalSplit>>();
+    else
+      Log::Fatal << "Unrecognized numeric split strategy ("
+          << numericSplitStrategy << ")!  Must be 'domingos' or 'binary'."
+          << endl;
+  }
   else
-    Log::Fatal << "Unrecognized numeric split strategy ("
-        << numericSplitStrategy << ")!  Must be 'domingos' or 'binary'."
-        << endl;
+  {
+    if (numericSplitStrategy == "domingos")
+      PerformActions<HoeffdingTree<GiniImpurity, HoeffdingDoubleNumericSplit,
+          HoeffdingCategoricalSplit>>();
+    else if (numericSplitStrategy == "binary")
+      PerformActions<HoeffdingTree<GiniImpurity, BinaryDoubleNumericSplit,
+          HoeffdingCategoricalSplit>>();
+    else
+      Log::Fatal << "Unrecognized numeric split strategy ("
+          << numericSplitStrategy << ")!  Must be 'domingos' or 'binary'."
+          << endl;
+  }
 }
 
 template<typename TreeType>



More information about the mlpack-git mailing list