[mlpack-git] master: Incremental check-in so I can work from a different system. (4713e20)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:42:05 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 4713e207e241a1854c80f556705c6f72be3e1bb9
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Sep 21 17:13:28 2015 +0000

    Incremental check-in so I can work from a different system.


>---------------------------------------------------------------

4713e207e241a1854c80f556705c6f72be3e1bb9
 .../hoeffding_trees/categorical_split_info.hpp     |  4 +-
 src/mlpack/tests/hoeffding_tree_test.cpp           | 43 ++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
index 965f128..04791c2 100644
--- a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
@@ -18,11 +18,11 @@ class CategoricalSplitInfo
   CategoricalSplitInfo(const size_t categories) : categories(categories) { }
 
   template<typename eT>
-  void CalculateDirection(const eT& value)
+  size_t CalculateDirection(const eT& value)
   {
     // We have a child for each categorical value, and value should be in the
     // range [0, categories).
-    return value;
+    return size_t(value);
   }
 
  private:
diff --git a/src/mlpack/tests/hoeffding_tree_test.cpp b/src/mlpack/tests/hoeffding_tree_test.cpp
index 91a76bf..5bf877e 100644
--- a/src/mlpack/tests/hoeffding_tree_test.cpp
+++ b/src/mlpack/tests/hoeffding_tree_test.cpp
@@ -7,6 +7,7 @@
 #include <mlpack/core.hpp>
 #include <mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp>
 #include <mlpack/methods/hoeffding_trees/gini_impurity.hpp>
+#include <mlpack/methods/hoeffding_trees/hoeffding_split.hpp>
 #include <mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp>
 
 #include <boost/test/unit_test.hpp>
@@ -153,4 +154,46 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitEasyFitnessCheck)
   BOOST_REQUIRE_GT(split.EvaluateFitnessFunction(), 0.0);
 }
 
+/**
+ * Ensure that the fitness function returns 0 (no improvement) when a split
+ * would not get us any improvement.
+ */
+BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitNoImprovementFitnessTest)
+{
+  HoeffdingCategoricalSplit<GiniImpurity> split(2, 2);
+
+  // No training has yet happened, so a split would get us nothing.
+  BOOST_REQUIRE_SMALL(split.EvaluateFitnessFunction(), 1e-10);
+
+  split.Train(0, 0);
+  split.Train(1, 0);
+  split.Train(0, 1);
+  split.Train(1, 1);
+
+  // Now, a split still gets us only 50% accuracy in each split bin.
+  BOOST_REQUIRE_SMALL(split.EvaluateFitnessFunction(), 1e-10);
+}
+
+/**
+ * Test that when we do split, we get reasonable split information.
+ */
+BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitSplitTest)
+{
+  HoeffdingCategoricalSplit<GiniImpurity> split(3, 3); // 3 categories.
+
+  // No training is necessary because we can just call CreateChildren().
+  std::vector<StreamingDecisionTree<HoeffdingSplit>> children;
+  data::DatasetInfo info;
+  info.MapString("hello", 0); // Make dimension 0 categorical.
+  HoeffdingCategoricalSplit<GiniImpurity>::SplitInfo splitInfo;
+
+  // Create the children.
+  split.CreateChildren(children, info, splitInfo);
+
+  BOOST_REQUIRE_EQUAL(children.size(), 3);
+  BOOST_REQUIRE_EQUAL(splitInfo.CalculateDirection(0), 0);
+  BOOST_REQUIRE_EQUAL(splitInfo.CalculateDirection(1), 1);
+  BOOST_REQUIRE_EQUAL(splitInfo.CalculateDirection(2), 2);
+}
+
 BOOST_AUTO_TEST_SUITE_END();



More information about the mlpack-git mailing list