[mlpack-git] master: Add (unimplemented) batch training functionality. (676f406)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:45:26 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 676f40607f9cd5f8229bae274fe316365b77386a
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Oct 30 17:29:23 2015 +0000

    Add (unimplemented) batch training functionality.


>---------------------------------------------------------------

676f40607f9cd5f8229bae274fe316365b77386a
 .../methods/hoeffding_trees/hoeffding_tree.hpp     | 64 ++++++++++++++++++----
 .../hoeffding_trees/hoeffding_tree_impl.hpp        | 37 +++++++++++++
 2 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
index e7394e9..d290776 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
@@ -56,10 +56,39 @@ class HoeffdingTree
 {
  public:
   /**
-   * Construct the Hoeffding split object with the given parameters.  The
-   * dimensionMappings parameter is only used if it is desired that this node
-   * does not create its own dimensionMappings object (for instance, if this is
-   * a child of another node in the tree).
+   * Construct the Hoeffding tree with the given parameters and given training
+   * data.  The tree may be trained either in batch mode (which looks at all
+   * points before splitting, and propagates these points to the created
+   * children for further training), or in streaming mode, where each point is
+   * only considered once.  (In general, batch mode will give better-performing
+   * trees, but will have higher memory and runtime costs for the same dataset.)
+   *
+   * @param data Dataset to train on.
+   * @param datasetInfo Information on the dataset (types of each feature).
+   * @param numClasses Number of classes in the dataset.
+   * @param batchTraining Whether or not to train in batch.
+   * @param successProbability Probability of success required in Hoeffding
+   *      bounds before a split can happen.
+   * @param maxSamples Maximum number of samples before a split is forced (0
+   *      never forces a split); ignored in batch training mode.
+   * @param checkInterval Number of samples required before each split; ignored
+   *      in batch training mode.
+   */
+  template<typename MatType>
+  HoeffdingTree(const MatType& data,
+                const arma::Col<size_t>& labels,
+                const data::DatasetInfo& datasetInfo,
+                const size_t numClasses,
+                const bool batchTraining = true,
+                const double successProbability = 0.95,
+                const size_t maxSamples = 0,
+                const size_t checkInterval = 100);
+
+  /**
+   * Construct the Hoeffding tree with the given parameters, but training on no
+   * data.  The dimensionMappings parameter is only used if it is desired that
+   * this node does not create its own dimensionMappings object (for instance,
+   * if this is a child of another node in the tree).
    *
    * @param dimensionality Dimensionality of the dataset.
    * @param numClasses Number of classes in the dataset.
@@ -73,13 +102,13 @@ class HoeffdingTree
    *      be created.
    */
   HoeffdingTree(const size_t dimensionality,
-                 const size_t numClasses,
-                 const data::DatasetInfo& datasetInfo,
-                 const double successProbability,
-                 const size_t maxSamples,
-                 const size_t checkInterval,
-                 std::unordered_map<size_t, std::pair<size_t, size_t>>*
-                     dimensionMappings = NULL);
+                const size_t numClasses,
+                const data::DatasetInfo& datasetInfo,
+                const double successProbability,
+                const size_t maxSamples,
+                const size_t checkInterval,
+                std::unordered_map<size_t, std::pair<size_t, size_t>>*
+                    dimensionMappings = NULL);
 
   /**
    * Clean up memory.
@@ -87,6 +116,19 @@ class HoeffdingTree
   ~HoeffdingTree();
 
   /**
+   * Train on a set of points, either in streaming mode or in batch mode, with
+   * the given labels.
+   *
+   * @param data Data points to train on.
+   * @param label Labels of data points.
+   * @param batchTraining If true, perform training in batch.
+   */
+  template<typename MatType>
+  void Train(const MatType& data,
+             const arma::Col<size_t>& labels,
+             const bool batchTraining = true);
+
+  /**
    * Train on a single point in streaming mode, with the given label.
    *
    * @param point Point to train on.
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
index 5aa2093..128d14d 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp
@@ -16,6 +16,26 @@ namespace tree {
 template<typename FitnessFunction,
          template<typename> class NumericSplitType,
          template<typename> class CategoricalSplitType>
+template<typename MatType>
+HoeffdingTree<
+    FitnessFunction,
+    NumericSplitType,
+    CategoricalSplitType
+>::HoeffdingTree(const MatType& data,
+                 const arma::Col<size_t>& labels,
+                 const data::DatasetInfo& datasetInfo,
+                 const size_t numClasses,
+                 const bool batchTraining,
+                 const double successProbability,
+                 const size_t maxSamples,
+                 const size_t checkInterval)
+{
+  // Not yet implemented.
+}
+
+template<typename FitnessFunction,
+         template<typename> class NumericSplitType,
+         template<typename> class CategoricalSplitType>
 HoeffdingTree<
     FitnessFunction,
     NumericSplitType,
@@ -88,6 +108,23 @@ HoeffdingTree<FitnessFunction, NumericSplitType, CategoricalSplitType>::
     delete dimensionMappings;
 }
 
+//! Train on a set of points.
+template<typename FitnessFunction,
+         template<typename> class NumericSplitType,
+         template<typename> class CategoricalSplitType>
+template<typename MatType>
+void HoeffdingTree<
+    FitnessFunction,
+    NumericSplitType,
+    CategoricalSplitType
+>::Train(const MatType& data,
+         const arma::Col<size_t>& labels,
+         const bool batchTraining)
+{
+  // Not yet implemented.
+}
+
+//! Train on one point.
 template<typename FitnessFunction,
          template<typename> class NumericSplitType,
          template<typename> class CategoricalSplitType>



More information about the mlpack-git mailing list