[mlpack-git] master: A better attempt at batch training. (26ba0e2)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:45:55 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 26ba0e2b009bd8a455a57db03c31d1638de88115
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Nov 3 06:45:35 2015 -0800
A better attempt at batch training.
>---------------------------------------------------------------
26ba0e2b009bd8a455a57db03c31d1638de88115
src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
index f7c48ab..d8193c8 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
@@ -187,8 +187,12 @@ void HoeffdingTree<
{
// Pass all the points through the nodes, and then split only after that.
checkInterval = data.n_cols; // Only split on the last sample.
+ // Don't split if there are fewer than five points.
+ size_t oldMaxSamples = maxSamples;
+ maxSamples = std::max(size_t(data.n_cols - 1), size_t(5));
for (size_t i = 0; i < data.n_cols; ++i)
Train(data.col(i), labels[i]);
+ maxSamples = oldMaxSamples;
// Now, if we did split, find out which points go to which child, and
// perform the same batch training.
@@ -215,6 +219,11 @@ void HoeffdingTree<
// batch-mode training.
for (size_t i = 0; i < children.size(); ++i)
{
+ // If we don't have any points that go to the child in question, don't
+ // train that child.
+ if (counts[i] == 0)
+ continue;
+
// The submatrix here is non-contiguous, but I think this will be faster
// than copying the points to an ordered state. We still have to
// assemble the labels vector, though.
More information about the mlpack-git
mailing list