[mlpack-git] master: Incremental check-in so I can work from a different system. (4713e20)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:42:05 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 4713e207e241a1854c80f556705c6f72be3e1bb9
Author: Ryan Curtin <ryan at ratml.org>
Date: Mon Sep 21 17:13:28 2015 +0000
Incremental check-in so I can work from a different system.
>---------------------------------------------------------------
4713e207e241a1854c80f556705c6f72be3e1bb9
.../hoeffding_trees/categorical_split_info.hpp | 4 +-
src/mlpack/tests/hoeffding_tree_test.cpp | 43 ++++++++++++++++++++++
2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
index 965f128..04791c2 100644
--- a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
@@ -18,11 +18,11 @@ class CategoricalSplitInfo
CategoricalSplitInfo(const size_t categories) : categories(categories) { }
template<typename eT>
- void CalculateDirection(const eT& value)
+ size_t CalculateDirection(const eT& value)
{
// We have a child for each categorical value, and value should be in the
// range [0, categories).
- return value;
+ return size_t(value);
}
private:
diff --git a/src/mlpack/tests/hoeffding_tree_test.cpp b/src/mlpack/tests/hoeffding_tree_test.cpp
index 91a76bf..5bf877e 100644
--- a/src/mlpack/tests/hoeffding_tree_test.cpp
+++ b/src/mlpack/tests/hoeffding_tree_test.cpp
@@ -7,6 +7,7 @@
#include <mlpack/core.hpp>
#include <mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp>
#include <mlpack/methods/hoeffding_trees/gini_impurity.hpp>
+#include <mlpack/methods/hoeffding_trees/hoeffding_split.hpp>
#include <mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp>
#include <boost/test/unit_test.hpp>
@@ -153,4 +154,46 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitEasyFitnessCheck)
BOOST_REQUIRE_GT(split.EvaluateFitnessFunction(), 0.0);
}
+/**
+ * Ensure that the fitness function returns 0 (no improvement) when a split
+ * would not get us any improvement.
+ */
+BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitNoImprovementFitnessTest)
+{
+ HoeffdingCategoricalSplit<GiniImpurity> split(2, 2);
+
+ // No training has yet happened, so a split would get us nothing.
+ BOOST_REQUIRE_SMALL(split.EvaluateFitnessFunction(), 1e-10);
+
+ split.Train(0, 0);
+ split.Train(1, 0);
+ split.Train(0, 1);
+ split.Train(1, 1);
+
+ // Now, a split still gets us only 50% accuracy in each split bin.
+ BOOST_REQUIRE_SMALL(split.EvaluateFitnessFunction(), 1e-10);
+}
+
+/**
+ * Test that when we do split, we get reasonable split information.
+ */
+BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitSplitTest)
+{
+ HoeffdingCategoricalSplit<GiniImpurity> split(3, 3); // 3 categories.
+
+ // No training is necessary because we can just call CreateChildren().
+ std::vector<StreamingDecisionTree<HoeffdingSplit>> children;
+ data::DatasetInfo info;
+ info.MapString("hello", 0); // Make dimension 0 categorical.
+ HoeffdingCategoricalSplit<GiniImpurity>::SplitInfo splitInfo;
+
+ // Create the children.
+ split.CreateChildren(children, info, splitInfo);
+
+ BOOST_REQUIRE_EQUAL(children.size(), 3);
+ BOOST_REQUIRE_EQUAL(splitInfo.CalculateDirection(0), 0);
+ BOOST_REQUIRE_EQUAL(splitInfo.CalculateDirection(1), 1);
+ BOOST_REQUIRE_EQUAL(splitInfo.CalculateDirection(2), 2);
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list