[mlpack-git] master: Refactor EvaluateFitnessFunction(). (81a8b6c)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:46:16 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125
>---------------------------------------------------------------
commit 81a8b6c9260ce8ec57b5bf2beb5be6881c3794f4
Author: Ryan Curtin <ryan at ratml.org>
Date: Thu Nov 12 15:18:06 2015 -0500
Refactor EvaluateFitnessFunction().
>---------------------------------------------------------------
81a8b6c9260ce8ec57b5bf2beb5be6881c3794f4
.../hoeffding_trees/binary_numeric_split.hpp | 10 +++++++++-
.../hoeffding_trees/binary_numeric_split_impl.hpp | 22 +++++++++++++++-------
.../hoeffding_categorical_split.hpp | 11 +++++++++--
.../hoeffding_categorical_split_impl.hpp | 8 +++++---
.../hoeffding_trees/hoeffding_numeric_split.hpp | 12 ++++++++++--
.../hoeffding_numeric_split_impl.hpp | 10 ++++++----
6 files changed, 54 insertions(+), 19 deletions(-)
diff --git a/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp
index f83c95e..faa0f77 100644
--- a/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp
@@ -65,8 +65,16 @@ class BinaryNumericSplit
* best possible gain of a binary split. Note that this takes O(n) time,
* where n is the number of points seen so far. So this may not exactly be
* fast...
+ *
+ * The best possible split will be stored in bestFitness, and the second best
+ * possible split will be stored in secondBestFitness.
+ *
+ * @param bestFitness Fitness function value for best possible split.
+ * @param secondBestFitness Fitness function value for second best possible
+ * split.
*/
- double EvaluateFitnessFunction();
+ void EvaluateFitnessFunction(double& bestFitness,
+ double& secondBestFitness);
// Return the number of children if this node were to split on this feature.
size_t NumChildren() const { return 2; }
diff --git a/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp
index 468bfdd..a0a174a 100644
--- a/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp
@@ -38,8 +38,9 @@ void BinaryNumericSplit<FitnessFunction, ObservationType>::Train(
}
template<typename FitnessFunction, typename ObservationType>
-double BinaryNumericSplit<FitnessFunction, ObservationType>::
- EvaluateFitnessFunction()
+void BinaryNumericSplit<FitnessFunction, ObservationType>::
+ EvaluateFitnessFunction(double& bestFitness,
+ double& secondBestFitness)
{
// Unfortunately, we have to iterate over the map.
bestSplit = std::numeric_limits<ObservationType>::min();
@@ -49,7 +50,8 @@ double BinaryNumericSplit<FitnessFunction, ObservationType>::
counts.col(0).zeros();
counts.col(1) = classCounts;
- double bestValue = FitnessFunction::Evaluate(counts);
+ bestFitness = FitnessFunction::Evaluate(counts);
+ secondBestFitness = 0.0;
// Initialize to the first observation, so we don't calculate gain on the
// first iteration (it will be 0).
@@ -64,11 +66,15 @@ double BinaryNumericSplit<FitnessFunction, ObservationType>::
lastObservation = (*it).first;
const double value = FitnessFunction::Evaluate(counts);
- if (value > bestValue)
+ if (value > bestFitness)
{
- bestValue = value;
+ bestFitness = value;
bestSplit = (*it).first;
}
+ else if (value > secondBestFitness)
+ {
+ secondBestFitness = value;
+ }
}
// Move the point to the right side of the split.
@@ -77,7 +83,6 @@ double BinaryNumericSplit<FitnessFunction, ObservationType>::
}
isAccurate = true;
- return bestValue;
}
template<typename FitnessFunction, typename ObservationType>
@@ -86,7 +91,10 @@ void BinaryNumericSplit<FitnessFunction, ObservationType>::Split(
SplitInfo& splitInfo)
{
if (!isAccurate)
- EvaluateFitnessFunction();
+ {
+ double bestGain, secondBestGain;
+ EvaluateFitnessFunction(bestGain, secondBestGain);
+ }
// Make one child for each side of the split.
childMajorities.set_size(2);
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index 1f8f720..291fb75 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -63,9 +63,16 @@ class HoeffdingCategoricalSplit
/**
* Given the points seen so far, evaluate the fitness function, returning the
- * gain if a split was to be made.
+ * gain for the best possible split and the second best possible split. In
+ * this splitting technique, we only split one possible way, so
+ * secondBestFitness will always be 0.
+ *
+ * @param bestFitness The fitness function result for this split.
+ * @param secondBestFitness This is always set to 0 (this split only splits
+ * one way).
*/
- double EvaluateFitnessFunction() const;
+ void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
+ const;
//! Return the number of children, if the node were to split.
size_t NumChildren() const { return sufficientStatistics.n_cols; }
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
index b189c9c..c008f17 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
@@ -33,10 +33,12 @@ void HoeffdingCategoricalSplit<FitnessFunction>::Train(eT value,
}
template<typename FitnessFunction>
-double HoeffdingCategoricalSplit<FitnessFunction>::EvaluateFitnessFunction()
- const
+void HoeffdingCategoricalSplit<FitnessFunction>::EvaluateFitnessFunction(
+ double& bestFitness,
+ double& secondBestFitness) const
{
- return FitnessFunction::Evaluate(sufficientStatistics);
+ bestFitness = FitnessFunction::Evaluate(sufficientStatistics);
+ secondBestFitness = 0.0; // We only split one possible way.
}
template<typename FitnessFunction>
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
index 38bbc35..fb6c4c1 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
@@ -78,9 +78,17 @@ class HoeffdingNumericSplit
/**
* Evaluate the fitness function given what has been calculated so far. In
* this case, if binning has not yet been performed, 0 will be returned (i.e.,
- * no gain).
+ * no gain). Because this split can only split one possible way,
+ * secondBestFitness (the fitness function for the second best possible split)
+ * will be set to 0.
+ *
+ * @param bestFitness Value of the fitness function for the best possible
+ * split.
+ * @param secondBestFitness Value of the fitness function for the second best
+ * possible split (always 0 for this split).
*/
- double EvaluateFitnessFunction() const;
+ void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
+ const;
//! Return the number of children if this node splits on this feature.
size_t NumChildren() const { return bins; }
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp
index b8bb34f..bcd9f92 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp
@@ -84,13 +84,15 @@ void HoeffdingNumericSplit<FitnessFunction, ObservationType>::Train(
}
template<typename FitnessFunction, typename ObservationType>
-double HoeffdingNumericSplit<FitnessFunction, ObservationType>::
- EvaluateFitnessFunction() const
+void HoeffdingNumericSplit<FitnessFunction, ObservationType>::
+ EvaluateFitnessFunction(double& bestFitness,
+ double& secondBestFitness) const
{
+ secondBestFitness = 0.0; // We can only split one way.
if (samplesSeen < observationsBeforeBinning)
- return 0.0;
+ bestFitness = 0.0;
else
- return FitnessFunction::Evaluate(sufficientStatistics);
+ bestFitness = FitnessFunction::Evaluate(sufficientStatistics);
}
template<typename FitnessFunction, typename ObservationType>
More information about the mlpack-git
mailing list