[mlpack-git] master: Actually take multiple passes over the data when streaming. (d118921)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:46:28 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit d118921e03f500660a9350768ba00eb5f1808540
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Nov 19 06:58:26 2015 -0800

    Actually take multiple passes over the data when streaming.


>---------------------------------------------------------------

d118921e03f500660a9350768ba00eb5f1808540
 src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
index 549a108..0a70d34 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
@@ -119,8 +119,10 @@ void PerformActions()
   const string testFile = CLI::GetParam<string>("test_file");
   const string predictionsFile = CLI::GetParam<string>("predictions_file");
   const string probabilitiesFile = CLI::GetParam<string>("probabilities_file");
-  const bool batchTraining = CLI::HasParam("batch_mode");
+  bool batchTraining = CLI::HasParam("batch_mode");
   const size_t passes = (size_t) CLI::GetParam<int>("passes");
+  if (passes > 1)
+    batchTraining = false; // We already warned about this earlier.
 
   TreeType* tree = NULL;
   DatasetInfo datasetInfo;
@@ -138,8 +140,14 @@ void PerformActions()
 
     // Now create the decision tree.
     Timer::Start("tree_training");
+    if (passes > 1)
+      Log::Info << "Taking " << passes << " passes over the dataset." << endl;
+
     tree = new TreeType(trainingSet, datasetInfo, labels, max(labels) + 1,
-        batchTraining, confidence, maxSamples);
+        batchTraining, confidence, maxSamples, 100, minSamples);
+
+    for (size_t i = 1; i < passes; ++i)
+      tree->Train(trainingSet, labels, false);
     Timer::Stop("tree_training");
   }
   else



More information about the mlpack-git mailing list