[mlpack-git] master: Eliminate redundant classCounts. (1ff6f2f)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:43:09 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 1ff6f2f88c2478536d4470f66ed2b115bd09c887
Author: Ryan Curtin <ryan at ratml.org>
Date: Wed Sep 30 15:24:52 2015 -0400
Eliminate redundant classCounts.
>---------------------------------------------------------------
1ff6f2f88c2478536d4470f66ed2b115bd09c887
.../methods/hoeffding_trees/hoeffding_split.hpp | 5 ++-
.../hoeffding_trees/hoeffding_split_impl.hpp | 48 ++++++++++++++++++----
2 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
index 0a9bfcd..4d18b75 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split.hpp
@@ -38,8 +38,10 @@ class HoeffdingSplit
//! Get the splitting dimension (size_t(-1) if no split).
size_t SplitDimension() const { return splitDimension; }
+ //! Get the majority class.
+ size_t MajorityClass() const;
//! Modify the majority class.
- size_t& MajorityClass() { return majorityClass; }
+ size_t& MajorityClass();
// Return index that we should go towards.
template<typename VecType>
@@ -62,7 +64,6 @@ class HoeffdingSplit
size_t numSamples;
size_t numClasses;
size_t maxSamples;
- arma::Col<size_t> classCounts;
const data::DatasetInfo& datasetInfo;
double successProbability;
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
index 0c2b60f..b43a426 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_split_impl.hpp
@@ -25,7 +25,6 @@ HoeffdingSplit<
numSamples(0),
numClasses(numClasses),
maxSamples(maxSamples),
- classCounts(arma::zeros<arma::Col<size_t>>(numClasses)),
datasetInfo(datasetInfo),
successProbability(successProbability),
splitDimension(size_t(-1)),
@@ -62,12 +61,6 @@ void HoeffdingSplit<
{
if (splitDimension == size_t(-1))
{
- // Update majority counts.
- classCounts(label)++;
- arma::uword tmp;
- classCounts.max(tmp);
- majorityClass = size_t(tmp);
-
++numSamples;
size_t numericIndex = 0;
size_t categoricalIndex = 0;
@@ -164,6 +157,45 @@ template<
typename NumericSplitType,
typename CategoricalSplitType
>
+size_t HoeffdingSplit<
+ FitnessFunction,
+ NumericSplitType,
+ CategoricalSplitType
+>::MajorityClass() const
+{
+ // If the node is not split yet, we have to grab the majority class from any
+ // of the structures figuring out what to split on.
+ if (splitDimension == size_t(-1))
+ {
+ // Grab majority class from splits.
+ if (categoricalSplits.size() > 0)
+ majorityClass = categoricalSplits[0].MajorityClass();
+ else
+ majorityClass = numericSplits[0].MajorityClass();
+ }
+
+ return majorityClass;
+}
+
+template<
+ typename FitnessFunction,
+ typename NumericSplitType,
+ typename CategoricalSplitType
+>
+size_t& HoeffdingSplit<
+ FitnessFunction,
+ NumericSplitType,
+ CategoricalSplitType
+>::MajorityClass()
+{
+ return majorityClass;
+}
+
+template<
+ typename FitnessFunction,
+ typename NumericSplitType,
+ typename CategoricalSplitType
+>
template<typename VecType>
size_t HoeffdingSplit<
FitnessFunction,
@@ -227,7 +259,7 @@ void HoeffdingSplit<
for (size_t i = 0; i < childMajorities.n_elem; ++i)
{
children.push_back(StreamingDecisionTreeType(datasetInfo, dimensionality,
- classCounts.n_elem, successProbability, numSamples));
+ numClasses, successProbability, numSamples));
children[i].MajorityClass() = childMajorities[i];
}
}
More information about the mlpack-git
mailing list