[mlpack-git] master: Add (unimplemented) batch training functionality. (676f406)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:45:26 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 676f40607f9cd5f8229bae274fe316365b77386a
Author: Ryan Curtin <ryan at ratml.org>
Date: Fri Oct 30 17:29:23 2015 +0000
Add (unimplemented) batch training functionality.
>---------------------------------------------------------------
676f40607f9cd5f8229bae274fe316365b77386a
.../methods/hoeffding_trees/hoeffding_tree.hpp | 64 ++++++++++++++++++----
.../hoeffding_trees/hoeffding_tree_impl.hpp | 37 +++++++++++++
2 files changed, 90 insertions(+), 11 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
index e7394e9..d290776 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
@@ -56,10 +56,39 @@ class HoeffdingTree
{
public:
/**
- * Construct the Hoeffding split object with the given parameters. The
- * dimensionMappings parameter is only used if it is desired that this node
- * does not create its own dimensionMappings object (for instance, if this is
- * a child of another node in the tree).
+ * Construct the Hoeffding tree with the given parameters and given training
+ * data. The tree may be trained either in batch mode (which looks at all
+ * points before splitting, and propagates these points to the created
+ * children for further training), or in streaming mode, where each point is
+ * only considered once. (In general, batch mode will give better-performing
+ * trees, but will have higher memory and runtime costs for the same dataset.)
+ *
+ * @param data Dataset to train on.
+ * @param datasetInfo Information on the dataset (types of each feature).
+ * @param numClasses Number of classes in the dataset.
+ * @param batchTraining Whether or not to train in batch.
+ * @param successProbability Probability of success required in Hoeffding
+ * bounds before a split can happen.
+ * @param maxSamples Maximum number of samples before a split is forced (0
+ * never forces a split); ignored in batch training mode.
+ * @param checkInterval Number of samples required before each split; ignored
+ * in batch training mode.
+ */
+ template<typename MatType>
+ HoeffdingTree(const MatType& data,
+ const arma::Col<size_t>& labels,
+ const data::DatasetInfo& datasetInfo,
+ const size_t numClasses,
+ const bool batchTraining = true,
+ const double successProbability = 0.95,
+ const size_t maxSamples = 0,
+ const size_t checkInterval = 100);
+
+ /**
+ * Construct the Hoeffding tree with the given parameters, but training on no
+ * data. The dimensionMappings parameter is only used if it is desired that
+ * this node does not create its own dimensionMappings object (for instance,
+ * if this is a child of another node in the tree).
*
* @param dimensionality Dimensionality of the dataset.
* @param numClasses Number of classes in the dataset.
@@ -73,13 +102,13 @@ class HoeffdingTree
* be created.
*/
HoeffdingTree(const size_t dimensionality,
- const size_t numClasses,
- const data::DatasetInfo& datasetInfo,
- const double successProbability,
- const size_t maxSamples,
- const size_t checkInterval,
- std::unordered_map<size_t, std::pair<size_t, size_t>>*
- dimensionMappings = NULL);
+ const size_t numClasses,
+ const data::DatasetInfo& datasetInfo,
+ const double successProbability,
+ const size_t maxSamples,
+ const size_t checkInterval,
+ std::unordered_map<size_t, std::pair<size_t, size_t>>*
+ dimensionMappings = NULL);
/**
* Clean up memory.
@@ -87,6 +116,19 @@ class HoeffdingTree
~HoeffdingTree();
/**
+ * Train on a set of points, either in streaming mode or in batch mode, with
+ * the given labels.
+ *
+ * @param data Data points to train on.
+ * @param label Labels of data points.
+ * @param batchTraining If true, perform training in batch.
+ */
+ template<typename MatType>
+ void Train(const MatType& data,
+ const arma::Col<size_t>& labels,
+ const bool batchTraining = true);
+
+ /**
* Train on a single point in streaming mode, with the given label.
*
* @param point Point to train on.
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
index 5aa2093..128d14d 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
@@ -16,6 +16,26 @@ namespace tree {
template<typename FitnessFunction,
template<typename> class NumericSplitType,
template<typename> class CategoricalSplitType>
+template<typename MatType>
+HoeffdingTree<
+ FitnessFunction,
+ NumericSplitType,
+ CategoricalSplitType
+>::HoeffdingTree(const MatType& data,
+ const arma::Col<size_t>& labels,
+ const data::DatasetInfo& datasetInfo,
+ const size_t numClasses,
+ const bool batchTraining,
+ const double successProbability,
+ const size_t maxSamples,
+ const size_t checkInterval)
+{
+ // Not yet implemented.
+}
+
+template<typename FitnessFunction,
+ template<typename> class NumericSplitType,
+ template<typename> class CategoricalSplitType>
HoeffdingTree<
FitnessFunction,
NumericSplitType,
@@ -88,6 +108,23 @@ HoeffdingTree<FitnessFunction, NumericSplitType, CategoricalSplitType>::
delete dimensionMappings;
}
+//! Train on a set of points.
+template<typename FitnessFunction,
+ template<typename> class NumericSplitType,
+ template<typename> class CategoricalSplitType>
+template<typename MatType>
+void HoeffdingTree<
+ FitnessFunction,
+ NumericSplitType,
+ CategoricalSplitType
+>::Train(const MatType& data,
+ const arma::Col<size_t>& labels,
+ const bool batchTraining)
+{
+ // Not yet implemented.
+}
+
+//! Train on one point.
template<typename FitnessFunction,
template<typename> class NumericSplitType,
template<typename> class CategoricalSplitType>
More information about the mlpack-git
mailing list