[mlpack-git] master: Remove StreamingDecisionTree; refactor main executable. (b0d8816)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:45:37 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit b0d881605d4ae7977f088be0a600c6206a5da4e5
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Nov 1 17:40:27 2015 +0000

    Remove StreamingDecisionTree; refactor main executable.


>---------------------------------------------------------------

b0d881605d4ae7977f088be0a600c6206a5da4e5
 ...ision_tree_main.cpp => hoeffding_tree_main.cpp} |   0
 .../hoeffding_trees/streaming_decision_tree.hpp    | 109 ---------------
 .../streaming_decision_tree_impl.hpp               | 151 ---------------------
 3 files changed, 260 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
similarity index 100%
rename from src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp
rename to src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
deleted file mode 100644
index 0e2fb98..0000000
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * @file streaming_decision_tree.hpp
- * @author Ryan Curtin
- *
- * The core class for a streaming decision tree.
- */
-#ifndef __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_HPP
-#define __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace tree {
-
-template<
-  typename SplitType,
-  typename MatType = arma::mat
->
-class StreamingDecisionTree
-{
- public:
-  StreamingDecisionTree(const MatType& data,
-                        const data::DatasetInfo& datasetInfo,
-                        const arma::Row<size_t>& labels,
-                        const size_t numClasses,
-                        const double confidence = 0.95,
-                        const size_t numSamples = 5000,
-                        const size_t checkInterval = 100);
-
-  StreamingDecisionTree(const data::DatasetInfo& datasetInfo,
-                        const size_t numClasses,
-                        const double confidence = 0.95,
-                        const size_t numSamples = 5000,
-                        const size_t checkInterval = 100,
-                        std::unordered_map<size_t, std::pair<size_t, size_t>>*
-                            dimensionMappings = NULL);
-
-  StreamingDecisionTree(const StreamingDecisionTree& other);
-
-  size_t NumChildren() const { return children.size(); }
-  StreamingDecisionTree& Child(const size_t i) { return children[i]; }
-  const StreamingDecisionTree& Child(const size_t i) const { return children[i];
-}
-
-  const SplitType& Split() const { return split; }
-  SplitType& Split() { return split; }
-
-  template<typename VecType>
-  void Train(const VecType& data, const size_t label);
-
-  void Train(const MatType& data, const arma::Row<size_t>& labels);
-
-  template<typename VecType>
-  size_t Classify(const VecType& data);
-
-  template<typename VecType>
-  void Classify(const VecType& data, size_t& prediction, double& probability);
-
-  void Classify(const MatType& data, arma::Row<size_t>& predictions);
-
-  void Classify(const MatType& data,
-                arma::Row<size_t>& predictions,
-                arma::rowvec& probabilities);
-
-  size_t& MajorityClass() { return split.MajorityClass(); }
-
-  // How do we encode the actual split itself?
-
-  // that's just a split dimension and a rule (categorical or numeric)
-
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(split, "split");
-    ar & data::CreateNVP(checkInterval, "checkInterval");
-
-    size_t numChildren;
-    if (Archive::is_saving::value)
-      numChildren = children.size();
-    ar & data::CreateNVP(numChildren, "numChildren");
-    if (Archive::is_loading::value)
-      children.resize(numChildren, StreamingDecisionTree(data::DatasetInfo(), 0,
-          0));
-
-    for (size_t i = 0; i < numChildren; ++i)
-    {
-      std::ostringstream name;
-      name << "child" << i;
-      ar & data::CreateNVP(children[i], name.str());
-    }
-  }
-
- private:
-  std::vector<StreamingDecisionTree> children;
-  size_t checkInterval;
-
-  SplitType split;
-};
-
-} // namespace tree
-} // namespace mlpack
-
-// Include implementation.
-#include "streaming_decision_tree_impl.hpp"
-
-// Include convenience typedefs.
-#include "typedef.hpp"
-
-#endif
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
deleted file mode 100644
index e9284f9..0000000
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * @file streaming_decision_tree_impl.hpp
- * @author Ryan Curtin
- *
- * Implementation of a streaming decision tree.
- */
-#ifndef __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_IMPL_HPP
-#define __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "streaming_decision_tree.hpp"
-
-namespace mlpack {
-namespace tree {
-
-template<typename SplitType, typename MatType>
-StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
-    const MatType& data,
-    const data::DatasetInfo& datasetInfo,
-    const arma::Row<size_t>& labels,
-    const size_t numClasses,
-    const double confidence,
-    const size_t numSamples,
-    const size_t checkInterval) :
-    split(datasetInfo, numClasses, confidence, numSamples, checkInterval)
-{
-  Train(data, labels);
-}
-
-template<typename SplitType, typename MatType>
-StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
-    const data::DatasetInfo& datasetInfo,
-    const size_t numClasses,
-    const double confidence,
-    const size_t numSamples,
-    const size_t checkInterval,
-    std::unordered_map<size_t, std::pair<size_t, size_t>>* dimensionMappings) :
-    split(datasetInfo, numClasses, confidence, numSamples, checkInterval,
-        dimensionMappings)
-{
-  // No training.  Anything else to do...?
-}
-
-template<typename SplitType, typename MatType>
-StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
-    const StreamingDecisionTree& other) :
-    split(other.split)
-{
-  // Copy the children of the other tree.
-}
-
-template<typename SplitType, typename MatType>
-template<typename VecType>
-void StreamingDecisionTree<SplitType, MatType>::Train(const VecType& data,
-                                                      const size_t label)
-{
-  if (children.size() == 0)
-  {
-    split.Train(data, label);
-
-    const size_t numChildren = split.SplitCheck();
-    if (numChildren > 0)
-    {
-      // We need to add a bunch of children.
-      // Delete children, if we have them.
-      if (children.size() > 0)
-        children.clear();
-
-      // The split knows how to add the children.
-      //split.CreateChildren(children);
-    }
-  }
-  else
-  {
-    // We've already split this node.  But we need to train the child nodes.
-    size_t direction = split.CalculateDirection(data);
-    children[direction].Train(data, label);
-  }
-}
-
-template<typename SplitType, typename MatType>
-void StreamingDecisionTree<SplitType, MatType>::Train(
-    const MatType& data,
-    const arma::Row<size_t>& labels)
-{
-  // Train on each point sequentially.
-  for (size_t i = 0; i < data.n_cols; ++i)
-    Train(data.col(i), labels[i]);
-}
-
-template<typename SplitType, typename MatType>
-template<typename VecType>
-size_t StreamingDecisionTree<SplitType, MatType>::Classify(const VecType& data)
-{
-  // Get the direction we need to go, and continue classification.
-  // If we're at a leaf, we don't need to go any deeper.
-  if (children.size() == 0)
-  {
-    return split.Classify(data);
-  }
-  else
-  {
-    const size_t direction = split.CalculateDirection(data);
-    return children[direction].Classify(data);
-  }
-}
-
-template<typename SplitType, typename MatType>
-template<typename VecType>
-void StreamingDecisionTree<SplitType, MatType>::Classify(
-    const VecType& data,
-    size_t& prediction,
-    double& probability)
-{
-  if (children.size() == 0)
-  {
-    split.Classify(data, prediction, probability);
-  }
-  else
-  {
-    const size_t direction = split.CalculateDirection(data);
-    children[direction].Classify(data, prediction, probability);
-  }
-}
-
-template<typename SplitType, typename MatType>
-void StreamingDecisionTree<SplitType, MatType>::Classify(
-    const MatType& data,
-    arma::Row<size_t>& predictions)
-{
-  predictions.set_size(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; ++i)
-    predictions[i] = Classify(data.col(i));
-}
-
-template<typename SplitType, typename MatType>
-void StreamingDecisionTree<SplitType, MatType>::Classify(
-    const MatType& data,
-    arma::Row<size_t>& predictions,
-    arma::rowvec& probabilities)
-{
-  predictions.set_size(data.n_cols);
-  probabilities.set_size(data.n_cols);
-  for (size_t i = 0; i < data.n_cols; ++i)
-    Classify(data.col(i), predictions[i], probabilities[i]);
-}
-
-} // namespace tree
-} // namespace mlpack
-
-#endif



More information about the mlpack-git mailing list