[mlpack-git] master: Actually take multiple passes over the data when streaming. (d118921)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:46:28 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit d118921e03f500660a9350768ba00eb5f1808540
Author: Ryan Curtin <ryan at ratml.org>
Date: Thu Nov 19 06:58:26 2015 -0800
Actually take multiple passes over the data when streaming.
>---------------------------------------------------------------
d118921e03f500660a9350768ba00eb5f1808540
src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
index 549a108..0a70d34 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
@@ -119,8 +119,10 @@ void PerformActions()
const string testFile = CLI::GetParam<string>("test_file");
const string predictionsFile = CLI::GetParam<string>("predictions_file");
const string probabilitiesFile = CLI::GetParam<string>("probabilities_file");
- const bool batchTraining = CLI::HasParam("batch_mode");
+ bool batchTraining = CLI::HasParam("batch_mode");
const size_t passes = (size_t) CLI::GetParam<int>("passes");
+ if (passes > 1)
+ batchTraining = false; // We already warned about this earlier.
TreeType* tree = NULL;
DatasetInfo datasetInfo;
@@ -138,8 +140,14 @@ void PerformActions()
// Now create the decision tree.
Timer::Start("tree_training");
+ if (passes > 1)
+ Log::Info << "Taking " << passes << " passes over the dataset." << endl;
+
tree = new TreeType(trainingSet, datasetInfo, labels, max(labels) + 1,
- batchTraining, confidence, maxSamples);
+ batchTraining, confidence, maxSamples, 100, minSamples);
+
+ for (size_t i = 1; i < passes; ++i)
+ tree->Train(trainingSet, labels, false);
Timer::Stop("tree_training");
}
else
More information about the mlpack-git
mailing list