[mlpack-git] master: Refactor EvaluateFitnessFunction(). (81a8b6c)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Wed Dec 23 11:46:16 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/de9cc4b05069e1fa4793d9355f2f595af5ff45d2...6070527af14296cd99739de6c62666cc5d2a2125

>---------------------------------------------------------------

commit 81a8b6c9260ce8ec57b5bf2beb5be6881c3794f4
Author: Ryan Curtin <ryan at ratml.org>
Date:   Thu Nov 12 15:18:06 2015 -0500

    Refactor EvaluateFitnessFunction().


>---------------------------------------------------------------

81a8b6c9260ce8ec57b5bf2beb5be6881c3794f4
 .../hoeffding_trees/binary_numeric_split.hpp       | 10 +++++++++-
 .../hoeffding_trees/binary_numeric_split_impl.hpp  | 22 +++++++++++++++-------
 .../hoeffding_categorical_split.hpp                | 11 +++++++++--
 .../hoeffding_categorical_split_impl.hpp           |  8 +++++---
 .../hoeffding_trees/hoeffding_numeric_split.hpp    | 12 ++++++++++--
 .../hoeffding_numeric_split_impl.hpp               | 10 ++++++----
 6 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp
index f83c95e..faa0f77 100644
--- a/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/binary_numeric_split.hpp
@@ -65,8 +65,16 @@ class BinaryNumericSplit
    * best possible gain of a binary split.  Note that this takes O(n) time,
    * where n is the number of points seen so far.  So this may not exactly be
    * fast...
+   *
+   * The best possible split will be stored in bestFitness, and the second best
+   * possible split will be stored in secondBestFitness.
+   *
+   * @param bestFitness Fitness function value for best possible split.
+   * @param secondBestFitness Fitness function value for second best possible
+   *      split.
    */
-  double EvaluateFitnessFunction();
+  void EvaluateFitnessFunction(double& bestFitness,
+                               double& secondBestFitness);
 
   // Return the number of children if this node were to split on this feature.
   size_t NumChildren() const { return 2; }
diff --git a/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp
index 468bfdd..a0a174a 100644
--- a/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/binary_numeric_split_impl.hpp
@@ -38,8 +38,9 @@ void BinaryNumericSplit<FitnessFunction, ObservationType>::Train(
 }
 
 template<typename FitnessFunction, typename ObservationType>
-double BinaryNumericSplit<FitnessFunction, ObservationType>::
-    EvaluateFitnessFunction()
+void BinaryNumericSplit<FitnessFunction, ObservationType>::
+    EvaluateFitnessFunction(double& bestFitness,
+                            double& secondBestFitness)
 {
   // Unfortunately, we have to iterate over the map.
   bestSplit = std::numeric_limits<ObservationType>::min();
@@ -49,7 +50,8 @@ double BinaryNumericSplit<FitnessFunction, ObservationType>::
   counts.col(0).zeros();
   counts.col(1) = classCounts;
 
-  double bestValue = FitnessFunction::Evaluate(counts);
+  bestFitness = FitnessFunction::Evaluate(counts);
+  secondBestFitness = 0.0;
 
   // Initialize to the first observation, so we don't calculate gain on the
   // first iteration (it will be 0).
@@ -64,11 +66,15 @@ double BinaryNumericSplit<FitnessFunction, ObservationType>::
       lastObservation = (*it).first;
 
       const double value = FitnessFunction::Evaluate(counts);
-      if (value > bestValue)
+      if (value > bestFitness)
       {
-        bestValue = value;
+        bestFitness = value;
         bestSplit = (*it).first;
       }
+      else if (value > secondBestFitness)
+      {
+        secondBestFitness = value;
+      }
     }
 
     // Move the point to the right side of the split.
@@ -77,7 +83,6 @@ double BinaryNumericSplit<FitnessFunction, ObservationType>::
   }
 
   isAccurate = true;
-  return bestValue;
 }
 
 template<typename FitnessFunction, typename ObservationType>
@@ -86,7 +91,10 @@ void BinaryNumericSplit<FitnessFunction, ObservationType>::Split(
     SplitInfo& splitInfo)
 {
   if (!isAccurate)
-    EvaluateFitnessFunction();
+  {
+    double bestGain, secondBestGain;
+    EvaluateFitnessFunction(bestGain, secondBestGain);
+  }
 
   // Make one child for each side of the split.
   childMajorities.set_size(2);
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index 1f8f720..291fb75 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -63,9 +63,16 @@ class HoeffdingCategoricalSplit
 
   /**
    * Given the points seen so far, evaluate the fitness function, returning the
-   * gain if a split was to be made.
+   * gain for the best possible split and the second best possible split.  In
+   * this splitting technique, we only split one possible way, so
+   * secondBestFitness will always be 0.
+   *
+   * @param bestFitness The fitness function result for this split.
+   * @param secondBestFitness This is always set to 0 (this split only splits
+   *      one way).
    */
-  double EvaluateFitnessFunction() const;
+  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
+      const;
 
   //! Return the number of children, if the node were to split.
   size_t NumChildren() const { return sufficientStatistics.n_cols; }
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
index b189c9c..c008f17 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split_impl.hpp
@@ -33,10 +33,12 @@ void HoeffdingCategoricalSplit<FitnessFunction>::Train(eT value,
 }
 
 template<typename FitnessFunction>
-double HoeffdingCategoricalSplit<FitnessFunction>::EvaluateFitnessFunction()
-    const
+void HoeffdingCategoricalSplit<FitnessFunction>::EvaluateFitnessFunction(
+    double& bestFitness,
+    double& secondBestFitness) const
 {
-  return FitnessFunction::Evaluate(sufficientStatistics);
+  bestFitness = FitnessFunction::Evaluate(sufficientStatistics);
+  secondBestFitness = 0.0; // We only split one possible way.
 }
 
 template<typename FitnessFunction>
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
index 38bbc35..fb6c4c1 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
@@ -78,9 +78,17 @@ class HoeffdingNumericSplit
   /**
    * Evaluate the fitness function given what has been calculated so far.  In
    * this case, if binning has not yet been performed, 0 will be returned (i.e.,
-   * no gain).
+   * no gain).  Because this split can only split one possible way,
+   * secondBestFitness (the fitness function for the second best possible split)
+   * will be set to 0.
+   *
+   * @param bestFitness Value of the fitness function for the best possible
+   *      split.
+   * @param secondBestFitness Value of the fitness function for the second best
+   *      possible split (always 0 for this split).
    */
-  double EvaluateFitnessFunction() const;
+  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
+      const;
 
   //! Return the number of children if this node splits on this feature.
   size_t NumChildren() const { return bins; }
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp
index b8bb34f..bcd9f92 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split_impl.hpp
@@ -84,13 +84,15 @@ void HoeffdingNumericSplit<FitnessFunction, ObservationType>::Train(
 }
 
 template<typename FitnessFunction, typename ObservationType>
-double HoeffdingNumericSplit<FitnessFunction, ObservationType>::
-    EvaluateFitnessFunction() const
+void HoeffdingNumericSplit<FitnessFunction, ObservationType>::
+    EvaluateFitnessFunction(double& bestFitness,
+                            double& secondBestFitness) const
 {
+  secondBestFitness = 0.0; // We can only split one way.
   if (samplesSeen < observationsBeforeBinning)
-    return 0.0;
+    bestFitness = 0.0;
   else
-    return FitnessFunction::Evaluate(sufficientStatistics);
+    bestFitness = FitnessFunction::Evaluate(sufficientStatistics);
 }
 
 template<typename FitnessFunction, typename ObservationType>



More information about the mlpack-git mailing list