[mlpack-git] master: Fix compilation with newer Boost on OS X. (22ada51)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:42:13 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 22ada51e8bf802c5b7e0d11c17e861cbccb7ebd0
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue Sep 22 06:51:41 2015 -0700

    Fix compilation with newer Boost on OS X.


>---------------------------------------------------------------

22ada51e8bf802c5b7e0d11c17e861cbccb7ebd0
 .../hoeffding_trees/categorical_split_info.hpp     |  7 +---
 .../hoeffding_categorical_split.hpp                |  2 +-
 .../hoeffding_categorical_split_impl.hpp           |  2 +-
 .../hoeffding_trees/hoeffding_numeric_split.hpp    |  2 +
 .../methods/hoeffding_trees/hoeffding_split.hpp    | 12 +++---
 .../hoeffding_trees/hoeffding_split_impl.hpp       | 43 +++++++++++++---------
 .../hoeffding_trees/streaming_decision_tree.hpp    |  7 +++-
 .../streaming_decision_tree_impl.hpp               | 13 ++++---
 src/mlpack/tests/hoeffding_tree_test.cpp           | 13 ++++---
 9 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
index 04791c2..21a2927 100644
--- a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
@@ -15,18 +15,15 @@ namespace tree {
 class CategoricalSplitInfo
 {
  public:
-  CategoricalSplitInfo(const size_t categories) : categories(categories) { }
+  CategoricalSplitInfo(const size_t /* categories */) { }
 
   template<typename eT>
-  size_t CalculateDirection(const eT& value)
+  static size_t CalculateDirection(const eT& value)
   {
     // We have a child for each categorical value, and value should be in the
     // range [0, categories).
     return size_t(value);
   }
-
- private:
-  const size_t categories;
 };
 
 } // namespace tree
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index 551744a..ce053cf 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -48,7 +48,7 @@ class HoeffdingCategoricalSplit
   double EvaluateFitnessFunction() const;
 
   template<typename StreamingDecisionTreeType>
-  void CreateChildren(std::vector<StreamingDecisionTreeType*>& children,
+  void CreateChildren(std::vector<StreamingDecisionTreeType>& children,
                       data::DatasetInfo& datasetInfo,
                       SplitInfo& splitInfo);
 
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
index 7d5b74e..c489799 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
@@ -42,7 +42,7 @@ double HoeffdingCategoricalSplit<FitnessFunction>::EvaluateFitnessFunction()
 template<typename FitnessFunction>
 template<typename StreamingDecisionTreeType>
 void HoeffdingCategoricalSplit<FitnessFunction>::CreateChildren(
-    std::vector<StreamingDecisionTreeType*>& children,
+    std::vector<StreamingDecisionTreeType>& children,
     data::DatasetInfo& datasetInfo,
     SplitInfo& splitInfo)
 {
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
index ce53128..172d7bc 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
@@ -14,6 +14,8 @@ template<typename FitnessFunction>
 class HoeffdingNumericSplit
 {
  public:
+  typedef size_t SplitInfo;
+
   HoeffdingNumericSplit();
 
   template<typename eT>
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
index 1db2847..a60f06c 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
@@ -17,14 +17,15 @@ namespace mlpack {
 namespace tree {
 
 template<typename FitnessFunction = GiniImpurity,
-         typename NumericSplitType = HoeffdingNumericSplit,
-         typename CategoricalSplitType = HoeffdingCategoricalSplit>
+         typename NumericSplitType = HoeffdingNumericSplit<GiniImpurity>,
+         typename CategoricalSplitType = HoeffdingCategoricalSplit<GiniImpurity>
+>
 class HoeffdingSplit
 {
  public:
   HoeffdingSplit(const size_t dimensionality,
                  const size_t numClasses,
-                 const DatasetInfo& datasetInfo,
+                 const data::DatasetInfo& datasetInfo,
                  const double successProbability);
 
   template<typename VecType>
@@ -49,9 +50,10 @@ class HoeffdingSplit
   std::vector<NumericSplitType> numericSplits;
   std::vector<CategoricalSplitType> categoricalSplits;
 
-  const DatasetInfo& datasetInfo;
-  double successProbability;
+  const data::DatasetInfo& datasetInfo;
+  size_t numClasses;
   size_t numSamples;
+  double successProbability;
 
   // And we need to keep some information for after we have split.
   size_t splitDimension;
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 62499c29..c0d3788 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -19,26 +19,32 @@ HoeffdingSplit<
     CategoricalSplitType
 >::HoeffdingSplit(const size_t dimensionality,
                   const size_t numClasses,
-                  const DatasetInfo& datasetInfo)
+                  const data::DatasetInfo& datasetInfo,
+                  const double successProbability) :
+    numSamples(0),
+    numClasses(numClasses),
+    datasetInfo(datasetInfo),
+    successProbability(successProbability),
+    categoricalSplit(0)
 {
   for (size_t i = 0; i < dimensionality; ++i)
   {
-    if (datasetInfo.Type(i) == Datatype.categorical)
+    if (datasetInfo.Type(i) == data::Datatype::categorical)
       categoricalSplits.push_back(
-          CategoricalSplitType(datasetInfo.NumMappings(), numClasses));
+          CategoricalSplitType(datasetInfo.NumMappings(i), numClasses));
     // else, numeric splits (not yet!)
   }
 }
 
-template<typename VecType>
 template<typename FitnessFunction,
          typename NumericSplitType,
          typename CategoricalSplitType>
+template<typename VecType>
 void HoeffdingSplit<
     FitnessFunction,
     NumericSplitType,
     CategoricalSplitType
->::Train(VecType& point, const size_t label)
+>::Train(const VecType& point, const size_t label)
 {
   if (splitDimension == size_t(-1))
   {
@@ -47,9 +53,9 @@ void HoeffdingSplit<
     size_t categoricalIndex = 0;
     for (size_t i = 0; i < point.n_rows; ++i)
     {
-      if (datasetInfo.Type(i) == Datatype.categorical)
+      if (datasetInfo.Type(i) == data::Datatype::categorical)
         categoricalSplits[categoricalIndex++].Train(point[i], label);
-      else if (datasetInfo.Type(i) == Datatype.numeric)
+      else if (datasetInfo.Type(i) == data::Datatype::numeric)
         numericSplits[numericIndex++].Train(point[i], label);
     }
   }
@@ -111,7 +117,7 @@ size_t HoeffdingSplit<
   {
     // Split!
     splitDimension = largestIndex;
-    if (datasetInfo[largestIndex].Type == Datatype.categorical)
+    if (datasetInfo[largestIndex].Type == data::Datatype::categorical)
     {
       // I don't know if this should be here.
       majorityClass = categoricalSplit[largestIndex].MajorityClass();
@@ -125,32 +131,32 @@ size_t HoeffdingSplit<
   }
 }
 
-template<typename VecType>
 template<
     typename FitnessFunction,
     typename NumericSplitType,
     typename CategoricalSplitType
 >
+template<typename VecType>
 size_t HoeffdingSplit<
     FitnessFunction,
     NumericSplitType,
     CategoricalSplitType
->::CalculateDirection(VecType& point) const
+>::CalculateDirection(const VecType& point) const
 {
   // Don't call this before the node is split...
-  if (datasetInfo.Type(splitDimension) == Datatype::numeric)
+  if (datasetInfo.Type(splitDimension) == data::Datatype::numeric)
     return numericSplit.CalculateDirection(point[splitDimension]);
-  else if (datasetInfo.Type(splitDimension) == Datatype::categorical)
+  else if (datasetInfo.Type(splitDimension) == data::Datatype::categorical)
     return categoricalSplit.CalculateDirection(point[splitDimension]);
 }
 
-template<typename VecType>
 template<
     typename FitnessFunction,
     typename NumericSplitType,
     typename CategoricalSplitType
 >
-void HoeffdingSplit<
+template<typename VecType>
+size_t HoeffdingSplit<
     FitnessFunction,
     NumericSplitType,
     CategoricalSplitType
@@ -166,6 +172,7 @@ template<
     typename NumericSplitType,
     typename CategoricalSplitType
 >
+template<typename StreamingDecisionTreeType>
 void HoeffdingSplit<
     FitnessFunction,
     NumericSplitType,
@@ -177,17 +184,17 @@ void HoeffdingSplit<
   size_t categoricalSplitIndex = 0;
   for (size_t i = 0; i < splitDimension; ++i)
   {
-    if (datasetInfo.Type(i) == Datatype::numeric)
+    if (datasetInfo.Type(i) == data::Datatype::numeric)
       ++numericSplitIndex;
-    if (datasetInfo.Type(i) == Datatype::categorical)
+    if (datasetInfo.Type(i) == data::Datatype::categorical)
       ++categoricalSplitIndex;
   }
 
-  if (datasetInfo.Type(splitDimension) == Datatype::numeric)
+  if (datasetInfo.Type(splitDimension) == data::Datatype::numeric)
   {
     numericSplits[numericSplitIndex + 1].CreateChildren(children, numericSplit);
   }
-  else if (datasetInfo.Type(splitDimension) == Datatype::categorical)
+  else if (datasetInfo.Type(splitDimension) == data::Datatype::categorical)
   {
     categoricalSplits[categoricalSplitIndex + 1].CreateChildren(children,
         categoricalSplit);
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
index af2a160..dc4a214 100644
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
+++ b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
@@ -38,9 +38,9 @@ class StreamingDecisionTree
   void Train(const MatType& data, const arma::Row<size_t>& labels);
 
   template<typename VecType>
-  size_t Predict(const VecType& data);
+  size_t Classify(const VecType& data);
 
-  void Predict(const MatType& data, arma::Row<size_t>& predictions);
+  void Classify(const MatType& data, arma::Row<size_t>& predictions);
 
   // How do we encode the actual split itself?
 
@@ -55,4 +55,7 @@ class StreamingDecisionTree
 } // namespace tree
 } // namespace mlpack
 
+// Include implementation.
+#include "streaming_decision_tree_impl.hpp"
+
 #endif
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
index b40a502..f97a4f5 100644
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
@@ -18,7 +18,7 @@ StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
     const MatType& data,
     const data::DatasetInfo& datasetInfo,
     const arma::Row<size_t>& labels) :
-    split(datasetInfo)
+    split(0, 0, datasetInfo, 0)
 {
   Train(data, labels);
 }
@@ -26,20 +26,21 @@ StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
 template<typename SplitType, typename MatType>
 StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
     const data::DatasetInfo& datasetInfo) :
-    split(datasetInfo)
+    split(0, 0, datasetInfo, 0)
 {
   // No training.  Anything else to do...?
 }
 
 template<typename SplitType, typename MatType>
 StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
-    const StreamingDecisionTree& other)
+    const StreamingDecisionTree& other) :
+    split(other.split)
 {
   // Copy the children of the other tree.
 }
 
-template<typename VecType>
 template<typename SplitType, typename MatType>
+template<typename VecType>
 void StreamingDecisionTree<SplitType, MatType>::Train(const VecType& data,
                                                       const size_t label)
 {
@@ -54,7 +55,7 @@ void StreamingDecisionTree<SplitType, MatType>::Train(const VecType& data,
       children.clear();
 
     // The split knows how to add the children.
-    SplitType.CreateChildren(children);
+    split.CreateChildren(children);
   }
 }
 
@@ -68,8 +69,8 @@ void StreamingDecisionTree<SplitType, MatType>::Train(
     Train(data.col(i), labels[i]);
 }
 
-template<typename VecType>
 template<typename SplitType, typename MatType>
+template<typename VecType>
 size_t StreamingDecisionTree<SplitType, MatType>::Classify(const VecType& data)
 {
   // Get the direction we need to go, and continue classification.
diff --git a/src/mlpack/tests/hoeffding_tree_test.cpp b/src/mlpack/tests/hoeffding_tree_test.cpp
index 5bf877e..58d34d9 100644
--- a/src/mlpack/tests/hoeffding_tree_test.cpp
+++ b/src/mlpack/tests/hoeffding_tree_test.cpp
@@ -16,6 +16,7 @@
 using namespace std;
 using namespace arma;
 using namespace mlpack;
+using namespace mlpack::math;
 using namespace mlpack::data;
 using namespace mlpack::tree;
 
@@ -110,7 +111,7 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitMajorityClassTest)
 
   for (size_t i = 0; i < 500; ++i)
   {
-    split.Train(math::RandInt(0, 10), 1);
+    split.Train(mlpack::math::RandInt(0, 10), 1);
     BOOST_REQUIRE_EQUAL(split.MajorityClass(), 1);
   }
 }
@@ -123,11 +124,11 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitHarderMajorityClassTest)
   // Ten categories, three classes.
   HoeffdingCategoricalSplit<GiniImpurity> split(10, 3);
 
-  split.Train(math::RandInt(0, 10), 1);
+  split.Train(mlpack::math::RandInt(0, 10), 1);
   for (size_t i = 0; i < 250; ++i)
   {
-    split.Train(math::RandInt(0, 10), 1);
-    split.Train(math::RandInt(0, 10), 2);
+    split.Train(mlpack::math::RandInt(0, 10), 1);
+    split.Train(mlpack::math::RandInt(0, 10), 2);
     BOOST_REQUIRE_EQUAL(split.MajorityClass(), 1);
   }
 }
@@ -182,10 +183,10 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitSplitTest)
   HoeffdingCategoricalSplit<GiniImpurity> split(3, 3); // 3 categories.
 
   // No training is necessary because we can just call CreateChildren().
-  std::vector<StreamingDecisionTree<HoeffdingSplit>> children;
+  std::vector<StreamingDecisionTree<HoeffdingSplit<>>> children;
   data::DatasetInfo info;
   info.MapString("hello", 0); // Make dimension 0 categorical.
-  HoeffdingCategoricalSplit<GiniImpurity>::SplitInfo splitInfo;
+  HoeffdingCategoricalSplit<GiniImpurity>::SplitInfo splitInfo(3);
 
   // Create the children.
   split.CreateChildren(children, info, splitInfo);



More information about the mlpack-git mailing list