[mlpack-git] master: Remove StreamingDecisionTree; refactor main executable. (b0d8816)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:45:37 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit b0d881605d4ae7977f088be0a600c6206a5da4e5
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Nov 1 17:40:27 2015 +0000
Remove StreamingDecisionTree; refactor main executable.
>---------------------------------------------------------------
b0d881605d4ae7977f088be0a600c6206a5da4e5
...ision_tree_main.cpp => hoeffding_tree_main.cpp} | 0
.../hoeffding_trees/streaming_decision_tree.hpp | 109 ---------------
.../streaming_decision_tree_impl.hpp | 151 ---------------------
3 files changed, 260 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
similarity index 100%
rename from src/mlpack/methods/hoeffding_trees/streaming_decision_tree_main.cpp
rename to src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
deleted file mode 100644
index 0e2fb98..0000000
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * @file streaming_decision_tree.hpp
- * @author Ryan Curtin
- *
- * The core class for a streaming decision tree.
- */
-#ifndef __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_HPP
-#define __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace tree {
-
-template<
- typename SplitType,
- typename MatType = arma::mat
->
-class StreamingDecisionTree
-{
- public:
- StreamingDecisionTree(const MatType& data,
- const data::DatasetInfo& datasetInfo,
- const arma::Row<size_t>& labels,
- const size_t numClasses,
- const double confidence = 0.95,
- const size_t numSamples = 5000,
- const size_t checkInterval = 100);
-
- StreamingDecisionTree(const data::DatasetInfo& datasetInfo,
- const size_t numClasses,
- const double confidence = 0.95,
- const size_t numSamples = 5000,
- const size_t checkInterval = 100,
- std::unordered_map<size_t, std::pair<size_t, size_t>>*
- dimensionMappings = NULL);
-
- StreamingDecisionTree(const StreamingDecisionTree& other);
-
- size_t NumChildren() const { return children.size(); }
- StreamingDecisionTree& Child(const size_t i) { return children[i]; }
- const StreamingDecisionTree& Child(const size_t i) const { return children[i];
-}
-
- const SplitType& Split() const { return split; }
- SplitType& Split() { return split; }
-
- template<typename VecType>
- void Train(const VecType& data, const size_t label);
-
- void Train(const MatType& data, const arma::Row<size_t>& labels);
-
- template<typename VecType>
- size_t Classify(const VecType& data);
-
- template<typename VecType>
- void Classify(const VecType& data, size_t& prediction, double& probability);
-
- void Classify(const MatType& data, arma::Row<size_t>& predictions);
-
- void Classify(const MatType& data,
- arma::Row<size_t>& predictions,
- arma::rowvec& probabilities);
-
- size_t& MajorityClass() { return split.MajorityClass(); }
-
- // How do we encode the actual split itself?
-
- // that's just a split dimension and a rule (categorical or numeric)
-
- template<typename Archive>
- void Serialize(Archive& ar, const unsigned int /* version */)
- {
- ar & data::CreateNVP(split, "split");
- ar & data::CreateNVP(checkInterval, "checkInterval");
-
- size_t numChildren;
- if (Archive::is_saving::value)
- numChildren = children.size();
- ar & data::CreateNVP(numChildren, "numChildren");
- if (Archive::is_loading::value)
- children.resize(numChildren, StreamingDecisionTree(data::DatasetInfo(), 0,
- 0));
-
- for (size_t i = 0; i < numChildren; ++i)
- {
- std::ostringstream name;
- name << "child" << i;
- ar & data::CreateNVP(children[i], name.str());
- }
- }
-
- private:
- std::vector<StreamingDecisionTree> children;
- size_t checkInterval;
-
- SplitType split;
-};
-
-} // namespace tree
-} // namespace mlpack
-
-// Include implementation.
-#include "streaming_decision_tree_impl.hpp"
-
-// Include convenience typedefs.
-#include "typedef.hpp"
-
-#endif
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
deleted file mode 100644
index e9284f9..0000000
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * @file streaming_decision_tree_impl.hpp
- * @author Ryan Curtin
- *
- * Implementation of a streaming decision tree.
- */
-#ifndef __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_IMPL_HPP
-#define __MLPACK_METHODS_HOEFFDING_TREES_STREAMING_DECISION_TREE_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "streaming_decision_tree.hpp"
-
-namespace mlpack {
-namespace tree {
-
-template<typename SplitType, typename MatType>
-StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
- const MatType& data,
- const data::DatasetInfo& datasetInfo,
- const arma::Row<size_t>& labels,
- const size_t numClasses,
- const double confidence,
- const size_t numSamples,
- const size_t checkInterval) :
- split(datasetInfo, numClasses, confidence, numSamples, checkInterval)
-{
- Train(data, labels);
-}
-
-template<typename SplitType, typename MatType>
-StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
- const data::DatasetInfo& datasetInfo,
- const size_t numClasses,
- const double confidence,
- const size_t numSamples,
- const size_t checkInterval,
- std::unordered_map<size_t, std::pair<size_t, size_t>>* dimensionMappings) :
- split(datasetInfo, numClasses, confidence, numSamples, checkInterval,
- dimensionMappings)
-{
- // No training. Anything else to do...?
-}
-
-template<typename SplitType, typename MatType>
-StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
- const StreamingDecisionTree& other) :
- split(other.split)
-{
- // Copy the children of the other tree.
-}
-
-template<typename SplitType, typename MatType>
-template<typename VecType>
-void StreamingDecisionTree<SplitType, MatType>::Train(const VecType& data,
- const size_t label)
-{
- if (children.size() == 0)
- {
- split.Train(data, label);
-
- const size_t numChildren = split.SplitCheck();
- if (numChildren > 0)
- {
- // We need to add a bunch of children.
- // Delete children, if we have them.
- if (children.size() > 0)
- children.clear();
-
- // The split knows how to add the children.
- //split.CreateChildren(children);
- }
- }
- else
- {
- // We've already split this node. But we need to train the child nodes.
- size_t direction = split.CalculateDirection(data);
- children[direction].Train(data, label);
- }
-}
-
-template<typename SplitType, typename MatType>
-void StreamingDecisionTree<SplitType, MatType>::Train(
- const MatType& data,
- const arma::Row<size_t>& labels)
-{
- // Train on each point sequentially.
- for (size_t i = 0; i < data.n_cols; ++i)
- Train(data.col(i), labels[i]);
-}
-
-template<typename SplitType, typename MatType>
-template<typename VecType>
-size_t StreamingDecisionTree<SplitType, MatType>::Classify(const VecType& data)
-{
- // Get the direction we need to go, and continue classification.
- // If we're at a leaf, we don't need to go any deeper.
- if (children.size() == 0)
- {
- return split.Classify(data);
- }
- else
- {
- const size_t direction = split.CalculateDirection(data);
- return children[direction].Classify(data);
- }
-}
-
-template<typename SplitType, typename MatType>
-template<typename VecType>
-void StreamingDecisionTree<SplitType, MatType>::Classify(
- const VecType& data,
- size_t& prediction,
- double& probability)
-{
- if (children.size() == 0)
- {
- split.Classify(data, prediction, probability);
- }
- else
- {
- const size_t direction = split.CalculateDirection(data);
- children[direction].Classify(data, prediction, probability);
- }
-}
-
-template<typename SplitType, typename MatType>
-void StreamingDecisionTree<SplitType, MatType>::Classify(
- const MatType& data,
- arma::Row<size_t>& predictions)
-{
- predictions.set_size(data.n_cols);
- for (size_t i = 0; i < data.n_cols; ++i)
- predictions[i] = Classify(data.col(i));
-}
-
-template<typename SplitType, typename MatType>
-void StreamingDecisionTree<SplitType, MatType>::Classify(
- const MatType& data,
- arma::Row<size_t>& predictions,
- arma::rowvec& probabilities)
-{
- predictions.set_size(data.n_cols);
- probabilities.set_size(data.n_cols);
- for (size_t i = 0; i < data.n_cols; ++i)
- Classify(data.col(i), predictions[i], probabilities[i]);
-}
-
-} // namespace tree
-} // namespace mlpack
-
-#endif
More information about the mlpack-git
mailing list