[mlpack-git] master, mlpack-1.0.x: Rewinding the code review (4699840)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 21:50:11 EST 2015


Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 469984022f30eca14c57892b88d40c60098bda81
Author: Udit Saxena <saxena.udit at gmail.com>
Date:   Mon Jun 30 18:13:29 2014 +0000

    Rewinding the code review


>---------------------------------------------------------------

469984022f30eca14c57892b88d40c60098bda81
 src/mlpack/methods/decision_stump/decision_stump_impl.hpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 625e12e..051d1da 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -158,6 +158,8 @@ double DecisionStump<MatType>::SetupSplitAttribute(
     count++;
     if (i == sortedLabels.n_elem - 1)
     { 
+      // if we're at the end, then don't worry about the bucket size
+      // just take this as the last bin.
       begin = i - count + 1;
       end = i;
 
@@ -168,16 +170,21 @@ double DecisionStump<MatType>::SetupSplitAttribute(
       zSubColAtts.fill(0.0);
 
       subColLabels = sortedLabels.cols(begin, end) + zSubColLabels; 
+              // arma::zeros<arma::rowvec>((sortedLabels.cols(begin, end)).n_elem);
 
       subColAtts = sortedAtt.cols(begin, end) + zSubColAtts;
+              // arma::zeros<arma::rowvec>((sortedAtt.cols(begin, end)).n_elem);
 
       entropy += CalculateEntropy(subColAtts, subColLabels);
       i++;
     }
     else if (sortedLabels(i) != sortedLabels(i + 1))
     {
+      // if we're not at the last element of sortedLabels, then check whether
+      // count is less than the current bucket size.
       if (count < bucketSize)
       { 
+        // if it is, then take the minimum bucket size anyways
         begin = i - count + 1;
         end = begin + bucketSize - 1;
 
@@ -186,6 +193,7 @@ double DecisionStump<MatType>::SetupSplitAttribute(
       }
       else
       {
+        // if it is not, then take the bucket size as the value of count.
         begin = i - count + 1;
         end = i;
       }
@@ -197,10 +205,12 @@ double DecisionStump<MatType>::SetupSplitAttribute(
       zSubColAtts.fill(0.0);
 
       subColLabels = sortedLabels.cols(begin, end) + zSubColLabels;
+              // arma::zeros<arma::rowvec>((sortedLabels.cols(begin, end)).n_elem);
 
       subColAtts = sortedAtt.cols(begin, end) + zSubColAtts;
+              // arma::zeros<arma::rowvec>((sortedAtt.cols(begin, end)).n_elem);
 
-      // Now use subColLabels and subColAtts to calculate entropy.
+      // now using subColLabels and subColAtts to calculate entropuy
       entropy += CalculateEntropy(subColAtts, subColLabels);
 
       i = end + 1;
@@ -285,7 +295,7 @@ void DecisionStump<MatType>::TrainOnAtt(const arma::rowvec& attribute,
 
       // Find the most frequent element in subCols so as to assign a label to
       // the bucket of subCols.
-      mostFreq = CountMostFreq<double>(subCols);
+      mostFreq = CountMostFreq<double>(subCols);//sortedLabels.subvec(begin, end));
 
       split.resize(split.n_elem + 1);
       split(split.n_elem - 1) = sortedSplitAtt(begin);



More information about the mlpack-git mailing list