[mlpack-git] master: Add a command-line program for streaming decision trees. (cf6f031)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:42:57 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit cf6f031430d43b59c140264fa756fe06b15fed7d
Author: ryan <ryan at ratml.org>
Date:   Tue Sep 29 11:11:16 2015 -0400

    Add a command-line program for streaming decision trees.


>---------------------------------------------------------------

cf6f031430d43b59c140264fa756fe06b15fed7d
 src/mlpack/methods/hoeffding_trees/CMakeLists.txt  |  7 ++++
 .../hoeffding_trees/hoeffding_split_impl.hpp       |  3 +-
 .../streaming_decision_tree_main.cpp               | 38 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/mlpack/methods/hoeffding_trees/CMakeLists.txt b/src/mlpack/methods/hoeffding_trees/CMakeLists.txt
index b6e6258..ff116f1 100644
--- a/src/mlpack/methods/hoeffding_trees/CMakeLists.txt
+++ b/src/mlpack/methods/hoeffding_trees/CMakeLists.txt
@@ -22,3 +22,10 @@ endforeach()
 # Append sources (with directory name) to list of all MLPACK sources (used at
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(streaming_decision_tree
+  streaming_decision_tree_main.cpp
+)
+target_link_libraries(streaming_decision_tree
+  mlpack
+)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 7303874..5dffd01 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -35,7 +35,8 @@ HoeffdingSplit<
     if (datasetInfo.Type(i) == data::Datatype::categorical)
       categoricalSplits.push_back(
           CategoricalSplitType(datasetInfo.NumMappings(i), numClasses));
-    // else, numeric splits (not yet!)
+    else
+      numericSplits.push_back(NumericSplitType(numClasses));
   }
 }
 
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp
new file mode 100644
index 0000000..073ea1d
--- /dev/null
+++ b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp
@@ -0,0 +1,38 @@
+/**
+ * @file streaming_decision_tree_main.cpp
+ * @author Ryan Curtin
+ *
+ * A command-line executable that can build a streaming decision tree.
+ */
+#include <mlpack/core.hpp>
+#include <mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp>
+#include <mlpack/methods/hoeffding_trees/hoeffding_split.hpp>
+
+using namespace std;
+using namespace mlpack;
+using namespace mlpack::tree;
+using namespace mlpack::data;
+
+PARAM_STRING_REQ("training_file", "Training dataset file.", "t");
+PARAM_STRING("labels_file", "Labels for training dataset.", "l", "");
+
+int main(int argc, char** argv)
+{
+  CLI::ParseCommandLine(argc, argv);
+
+  const string trainingFile = CLI::GetParam<string>("training_file");
+  const string labelsFile = CLI::GetParam<string>("labels_file");
+
+  arma::mat trainingSet;
+  DatasetInfo datasetInfo;
+  data::Load(trainingFile, trainingSet, datasetInfo, true);
+
+  arma::Row<size_t> labels;
+  data::Load(labelsFile, labels, true);
+
+  // Now create the decision tree.
+  StreamingDecisionTree<HoeffdingSplit<>> tree(trainingSet, datasetInfo, labels,
+      max(labels) + 1);
+
+  // Great.  Good job team.
+}



More information about the mlpack-git mailing list