[mlpack-git] master: Fix compilation with newer Boost on OS X. (22ada51)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:42:13 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 22ada51e8bf802c5b7e0d11c17e861cbccb7ebd0
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue Sep 22 06:51:41 2015 -0700
Fix compilation with newer Boost on OS X.
>---------------------------------------------------------------
22ada51e8bf802c5b7e0d11c17e861cbccb7ebd0
.../hoeffding_trees/categorical_split_info.hpp | 7 +---
.../hoeffding_categorical_split.hpp | 2 +-
.../hoeffding_categorical_split_impl.hpp | 2 +-
.../hoeffding_trees/hoeffding_numeric_split.hpp | 2 +
.../methods/hoeffding_trees/hoeffding_split.hpp | 12 +++---
.../hoeffding_trees/hoeffding_split_impl.hpp | 43 +++++++++++++---------
.../hoeffding_trees/streaming_decision_tree.hpp | 7 +++-
.../streaming_decision_tree_impl.hpp | 13 ++++---
src/mlpack/tests/hoeffding_tree_test.cpp | 13 ++++---
9 files changed, 57 insertions(+), 44 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
index 04791c2..21a2927 100644
--- a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
@@ -15,18 +15,15 @@ namespace tree {
class CategoricalSplitInfo
{
public:
- CategoricalSplitInfo(const size_t categories) : categories(categories) { }
+ CategoricalSplitInfo(const size_t /* categories */) { }
template<typename eT>
- size_t CalculateDirection(const eT& value)
+ static size_t CalculateDirection(const eT& value)
{
// We have a child for each categorical value, and value should be in the
// range [0, categories).
return size_t(value);
}
-
- private:
- const size_t categories;
};
} // namespace tree
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index 551744a..ce053cf 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -48,7 +48,7 @@ class HoeffdingCategoricalSplit
double EvaluateFitnessFunction() const;
template<typename StreamingDecisionTreeType>
- void CreateChildren(std::vector<StreamingDecisionTreeType*>& children,
+ void CreateChildren(std::vector<StreamingDecisionTreeType>& children,
data::DatasetInfo& datasetInfo,
SplitInfo& splitInfo);
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
index 7d5b74e..c489799 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
@@ -42,7 +42,7 @@ double HoeffdingCategoricalSplit<FitnessFunction>::EvaluateFitnessFunction()
template<typename FitnessFunction>
template<typename StreamingDecisionTreeType>
void HoeffdingCategoricalSplit<FitnessFunction>::CreateChildren(
- std::vector<StreamingDecisionTreeType*>& children,
+ std::vector<StreamingDecisionTreeType>& children,
data::DatasetInfo& datasetInfo,
SplitInfo& splitInfo)
{
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
index ce53128..172d7bc 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
@@ -14,6 +14,8 @@ template<typename FitnessFunction>
class HoeffdingNumericSplit
{
public:
+ typedef size_t SplitInfo;
+
HoeffdingNumericSplit();
template<typename eT>
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
index 1db2847..a60f06c 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
@@ -17,14 +17,15 @@ namespace mlpack {
namespace tree {
template<typename FitnessFunction = GiniImpurity,
- typename NumericSplitType = HoeffdingNumericSplit,
- typename CategoricalSplitType = HoeffdingCategoricalSplit>
+ typename NumericSplitType = HoeffdingNumericSplit<GiniImpurity>,
+ typename CategoricalSplitType = HoeffdingCategoricalSplit<GiniImpurity>
+>
class HoeffdingSplit
{
public:
HoeffdingSplit(const size_t dimensionality,
const size_t numClasses,
- const DatasetInfo& datasetInfo,
+ const data::DatasetInfo& datasetInfo,
const double successProbability);
template<typename VecType>
@@ -49,9 +50,10 @@ class HoeffdingSplit
std::vector<NumericSplitType> numericSplits;
std::vector<CategoricalSplitType> categoricalSplits;
- const DatasetInfo& datasetInfo;
- double successProbability;
+ const data::DatasetInfo& datasetInfo;
+ size_t numClasses;
size_t numSamples;
+ double successProbability;
// And we need to keep some information for after we have split.
size_t splitDimension;
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 62499c29..c0d3788 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -19,26 +19,32 @@ HoeffdingSplit<
CategoricalSplitType
>::HoeffdingSplit(const size_t dimensionality,
const size_t numClasses,
- const DatasetInfo& datasetInfo)
+ const data::DatasetInfo& datasetInfo,
+ const double successProbability) :
+ numSamples(0),
+ numClasses(numClasses),
+ datasetInfo(datasetInfo),
+ successProbability(successProbability),
+ categoricalSplit(0)
{
for (size_t i = 0; i < dimensionality; ++i)
{
- if (datasetInfo.Type(i) == Datatype.categorical)
+ if (datasetInfo.Type(i) == data::Datatype::categorical)
categoricalSplits.push_back(
- CategoricalSplitType(datasetInfo.NumMappings(), numClasses));
+ CategoricalSplitType(datasetInfo.NumMappings(i), numClasses));
// else, numeric splits (not yet!)
}
}
-template<typename VecType>
template<typename FitnessFunction,
typename NumericSplitType,
typename CategoricalSplitType>
+template<typename VecType>
void HoeffdingSplit<
FitnessFunction,
NumericSplitType,
CategoricalSplitType
->::Train(VecType& point, const size_t label)
+>::Train(const VecType& point, const size_t label)
{
if (splitDimension == size_t(-1))
{
@@ -47,9 +53,9 @@ void HoeffdingSplit<
size_t categoricalIndex = 0;
for (size_t i = 0; i < point.n_rows; ++i)
{
- if (datasetInfo.Type(i) == Datatype.categorical)
+ if (datasetInfo.Type(i) == data::Datatype::categorical)
categoricalSplits[categoricalIndex++].Train(point[i], label);
- else if (datasetInfo.Type(i) == Datatype.numeric)
+ else if (datasetInfo.Type(i) == data::Datatype::numeric)
numericSplits[numericIndex++].Train(point[i], label);
}
}
@@ -111,7 +117,7 @@ size_t HoeffdingSplit<
{
// Split!
splitDimension = largestIndex;
- if (datasetInfo[largestIndex].Type == Datatype.categorical)
+ if (datasetInfo[largestIndex].Type == data::Datatype::categorical)
{
// I don't know if this should be here.
majorityClass = categoricalSplit[largestIndex].MajorityClass();
@@ -125,32 +131,32 @@ size_t HoeffdingSplit<
}
}
-template<typename VecType>
template<
typename FitnessFunction,
typename NumericSplitType,
typename CategoricalSplitType
>
+template<typename VecType>
size_t HoeffdingSplit<
FitnessFunction,
NumericSplitType,
CategoricalSplitType
->::CalculateDirection(VecType& point) const
+>::CalculateDirection(const VecType& point) const
{
// Don't call this before the node is split...
- if (datasetInfo.Type(splitDimension) == Datatype::numeric)
+ if (datasetInfo.Type(splitDimension) == data::Datatype::numeric)
return numericSplit.CalculateDirection(point[splitDimension]);
- else if (datasetInfo.Type(splitDimension) == Datatype::categorical)
+ else if (datasetInfo.Type(splitDimension) == data::Datatype::categorical)
return categoricalSplit.CalculateDirection(point[splitDimension]);
}
-template<typename VecType>
template<
typename FitnessFunction,
typename NumericSplitType,
typename CategoricalSplitType
>
-void HoeffdingSplit<
+template<typename VecType>
+size_t HoeffdingSplit<
FitnessFunction,
NumericSplitType,
CategoricalSplitType
@@ -166,6 +172,7 @@ template<
typename NumericSplitType,
typename CategoricalSplitType
>
+template<typename StreamingDecisionTreeType>
void HoeffdingSplit<
FitnessFunction,
NumericSplitType,
@@ -177,17 +184,17 @@ void HoeffdingSplit<
size_t categoricalSplitIndex = 0;
for (size_t i = 0; i < splitDimension; ++i)
{
- if (datasetInfo.Type(i) == Datatype::numeric)
+ if (datasetInfo.Type(i) == data::Datatype::numeric)
++numericSplitIndex;
- if (datasetInfo.Type(i) == Datatype::categorical)
+ if (datasetInfo.Type(i) == data::Datatype::categorical)
++categoricalSplitIndex;
}
- if (datasetInfo.Type(splitDimension) == Datatype::numeric)
+ if (datasetInfo.Type(splitDimension) == data::Datatype::numeric)
{
numericSplits[numericSplitIndex + 1].CreateChildren(children, numericSplit);
}
- else if (datasetInfo.Type(splitDimension) == Datatype::categorical)
+ else if (datasetInfo.Type(splitDimension) == data::Datatype::categorical)
{
categoricalSplits[categoricalSplitIndex + 1].CreateChildren(children,
categoricalSplit);
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
index af2a160..dc4a214 100644
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
+++ b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree.hpp
@@ -38,9 +38,9 @@ class StreamingDecisionTree
void Train(const MatType& data, const arma::Row<size_t>& labels);
template<typename VecType>
- size_t Predict(const VecType& data);
+ size_t Classify(const VecType& data);
- void Predict(const MatType& data, arma::Row<size_t>& predictions);
+ void Classify(const MatType& data, arma::Row<size_t>& predictions);
// How do we encode the actual split itself?
@@ -55,4 +55,7 @@ class StreamingDecisionTree
} // namespace tree
} // namespace mlpack
+// Include implementation.
+#include "streaming_decision_tree_impl.hpp"
+
#endif
diff --git a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
index b40a502..f97a4f5 100644
--- a/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/streaming_decision_tree_impl.hpp
@@ -18,7 +18,7 @@ StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
const MatType& data,
const data::DatasetInfo& datasetInfo,
const arma::Row<size_t>& labels) :
- split(datasetInfo)
+ split(0, 0, datasetInfo, 0)
{
Train(data, labels);
}
@@ -26,20 +26,21 @@ StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
template<typename SplitType, typename MatType>
StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
const data::DatasetInfo& datasetInfo) :
- split(datasetInfo)
+ split(0, 0, datasetInfo, 0)
{
// No training. Anything else to do...?
}
template<typename SplitType, typename MatType>
StreamingDecisionTree<SplitType, MatType>::StreamingDecisionTree(
- const StreamingDecisionTree& other)
+ const StreamingDecisionTree& other) :
+ split(other.split)
{
// Copy the children of the other tree.
}
-template<typename VecType>
template<typename SplitType, typename MatType>
+template<typename VecType>
void StreamingDecisionTree<SplitType, MatType>::Train(const VecType& data,
const size_t label)
{
@@ -54,7 +55,7 @@ void StreamingDecisionTree<SplitType, MatType>::Train(const VecType& data,
children.clear();
// The split knows how to add the children.
- SplitType.CreateChildren(children);
+ split.CreateChildren(children);
}
}
@@ -68,8 +69,8 @@ void StreamingDecisionTree<SplitType, MatType>::Train(
Train(data.col(i), labels[i]);
}
-template<typename VecType>
template<typename SplitType, typename MatType>
+template<typename VecType>
size_t StreamingDecisionTree<SplitType, MatType>::Classify(const VecType& data)
{
// Get the direction we need to go, and continue classification.
diff --git a/src/mlpack/tests/hoeffding_tree_test.cpp b/src/mlpack/tests/hoeffding_tree_test.cpp
index 5bf877e..58d34d9 100644
--- a/src/mlpack/tests/hoeffding_tree_test.cpp
+++ b/src/mlpack/tests/hoeffding_tree_test.cpp
@@ -16,6 +16,7 @@
using namespace std;
using namespace arma;
using namespace mlpack;
+using namespace mlpack::math;
using namespace mlpack::data;
using namespace mlpack::tree;
@@ -110,7 +111,7 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitMajorityClassTest)
for (size_t i = 0; i < 500; ++i)
{
- split.Train(math::RandInt(0, 10), 1);
+ split.Train(mlpack::math::RandInt(0, 10), 1);
BOOST_REQUIRE_EQUAL(split.MajorityClass(), 1);
}
}
@@ -123,11 +124,11 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitHarderMajorityClassTest)
// Ten categories, three classes.
HoeffdingCategoricalSplit<GiniImpurity> split(10, 3);
- split.Train(math::RandInt(0, 10), 1);
+ split.Train(mlpack::math::RandInt(0, 10), 1);
for (size_t i = 0; i < 250; ++i)
{
- split.Train(math::RandInt(0, 10), 1);
- split.Train(math::RandInt(0, 10), 2);
+ split.Train(mlpack::math::RandInt(0, 10), 1);
+ split.Train(mlpack::math::RandInt(0, 10), 2);
BOOST_REQUIRE_EQUAL(split.MajorityClass(), 1);
}
}
@@ -182,10 +183,10 @@ BOOST_AUTO_TEST_CASE(HoeffdingCategoricalSplitSplitTest)
HoeffdingCategoricalSplit<GiniImpurity> split(3, 3); // 3 categories.
// No training is necessary because we can just call CreateChildren().
- std::vector<StreamingDecisionTree<HoeffdingSplit>> children;
+ std::vector<StreamingDecisionTree<HoeffdingSplit<>>> children;
data::DatasetInfo info;
info.MapString("hello", 0); // Make dimension 0 categorical.
- HoeffdingCategoricalSplit<GiniImpurity>::SplitInfo splitInfo;
+ HoeffdingCategoricalSplit<GiniImpurity>::SplitInfo splitInfo(3);
// Create the children.
split.CreateChildren(children, info, splitInfo);
More information about the mlpack-git
mailing list