[mlpack-svn] r17054 - in mlpack/trunk/src/mlpack: methods/adaboost methods/decision_stump tests

Sun Aug 17 03:09:20 EDT 2014

Author: saxena.udit
Date: Sun Aug 17 03:09:20 2014
New Revision: 17054

Log:
Changes to Decision stump and AdaBoost.

Modified:
   mlpack/trunk/src/mlpack/methods/adaboost/adaboost.hpp
   mlpack/trunk/src/mlpack/methods/adaboost/adaboost_impl.hpp
   mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump.hpp
   mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
   mlpack/trunk/src/mlpack/tests/adaboost_test.cpp

Modified: mlpack/trunk/src/mlpack/methods/adaboost/adaboost.hpp
==============================================================================

--- mlpack/trunk/src/mlpack/methods/adaboost/adaboost.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/adaboost/adaboost.hpp	Sun Aug 17 03:09:20 2014
@@ -53,8 +53,8 @@
   // Stores the final classification of the Labels.
   arma::Row<size_t> finalHypothesis;
 
-  // To check for the bound for the hammingLoss.
-  double ztAccumulator;
+  // Return the value of ztProduct
+  double GetztProduct() { return ztProduct; }
 
   // The tolerance for change in rt and when to stop.
   double tolerance;
@@ -78,8 +78,9 @@
   
   std::vector<WeakLearner> wl;
   std::vector<double> alpha;
-  std::vector<double> z;
 
+  // To check for the bound for the hammingLoss.
+  double ztProduct;
   
 }; // class AdaBoost
 

Modified: mlpack/trunk/src/mlpack/methods/adaboost/adaboost_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/adaboost/adaboost_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/adaboost/adaboost_impl.hpp	Sun Aug 17 03:09:20 2014
@@ -52,12 +52,10 @@
   // crt is for stopping the iterations when rt
   // stops changing by less than a tolerant value.
 
-  ztAccumulator = 1.0;
-
   // crt is cumulative rt for stopping the iterations when rt
   // stops changing by less than a tolerant value.
 
-  ztAccumulator = 1.0;
+  ztProduct = 1.0;
   // ztAccumulator is
 
   // To be used for prediction by the Weak Learner for prediction.
@@ -183,8 +181,7 @@
     D = D / zt;
 
     // Accumulating the value of zt for the Hamming Loss bound.
-    ztAccumulator *= zt;
-    z.push_back(zt);
+    ztProduct *= zt;
   }
 
   // Iterations are over, now build a strong hypothesis
@@ -213,7 +210,7 @@
     arma::Row<size_t>& predictedLabels)
 {
   arma::Row<size_t> tempPredictedLabels(predictedLabels.n_cols);
-  arma::mat cMatrix(test.n_cols, numClasses);
+  arma::mat cMatrix(numClasses, test.n_cols);
 
   cMatrix.zeros();
   predictedLabels.zeros();
@@ -223,7 +220,7 @@
     wl[i].Classify(test, tempPredictedLabels);
 
     for (int j = 0; j < tempPredictedLabels.n_cols; j++)
-      cMatrix(j, tempPredictedLabels(j)) += (alpha[i] * tempPredictedLabels(j));
+      cMatrix(tempPredictedLabels(j), j) += (alpha[i] * tempPredictedLabels(j));
   }
 
   arma::rowvec cMRow;
@@ -261,6 +258,15 @@
   }
 }
 
+/*/**
+ * Return the value of ztProduct
+ */
+ /*
+template <typename MatType, typename WeakLearner>
+double GetztProduct()
+{
+  return ztProduct;
+}*/
 } // namespace adaboost
 } // namespace mlpack
 

Modified: mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump.hpp	Sun Aug 17 03:09:20 2014
@@ -110,10 +110,10 @@
    *     candidate for the splitting attribute.
    * @param isWeight Whether we need to run a weighted Decision Stump.
    */
-  template <typename W>
+  template <bool isWeight>
   double SetupSplitAttribute(const arma::rowvec& attribute,
                              const arma::Row<size_t>& labels,
-                             W isWeight);
+                             const arma::rowvec& weightD);
 
   /**
    * After having decided the attribute on which to split, train on that
@@ -154,9 +154,9 @@
    * @param labels Corresponding labels of the attribute.
    * @param isWeight Whether we need to run a weighted Decision Stump.
    */
-  template <typename LabelType, typename W>
+  template <typename LabelType, bool isWeight>
   double CalculateEntropy(arma::subview_row<LabelType> labels, int begin,
-                          W isWeight);
+                          const arma::rowvec& tempD);
 
   /**
    * Train the decision stump on the given data and labels.
@@ -165,14 +165,10 @@
    * @param labels Labels for dataset.
    * @param isWeight Whether we need to run a weighted Decision Stump.
    */
-  template <typename W>
-  void Train(const MatType& data, const arma::Row<size_t>& labels, W isWeight);
+  template <bool isWeight>
+  void Train(const MatType& data, const arma::Row<size_t>& labels, 
+             const arma::rowvec& weightD);
 
-  //! To store the weight vectors for boosting purposes.
-  arma::rowvec weightD;
-
-  //! To store reordered weight vectors for boosting purposes.
-  arma::rowvec tempD;
 };
 
 }; // namespace decision_stump

Modified: mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
==============================================================================
--- mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp	(original)
+++ mlpack/trunk/src/mlpack/methods/decision_stump/decision_stump_impl.hpp	Sun Aug 17 03:09:20 2014
@@ -30,9 +30,10 @@
 {
   numClass = classes;
   bucketSize = inpBucketSize;
-  const bool isWeight = false;
 
-  Train<bool>(data, labels, isWeight);
+  arma::rowvec weightD;
+
+  Train<false>(data, labels, weightD);
 }
 
 /**
@@ -43,14 +44,15 @@
  * @param isWeight Whether we need to run a weighted Decision Stump.
  */
 template<typename MatType>
-template <typename W>
-void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels, W isWeight)
+template <bool isWeight>
+void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels,
+                                    const arma::rowvec& weightD)
 {
   // If classLabels are not all identical, proceed with training.
   int bestAtt = 0;
   double entropy;
-  const double rootEntropy = CalculateEntropy<size_t, W>(
-      labels.subvec(0, labels.n_elem - 1), 0, isWeight);
+  const double rootEntropy = CalculateEntropy<size_t, isWeight>(
+      labels.subvec(0, labels.n_elem - 1), 0, weightD);
 
   double gain, bestGain = 0.0;
   for (int i = 0; i < data.n_rows; i++)
@@ -60,7 +62,7 @@
     {
       // For each attribute with non-identical values, treat it as a potential
       // splitting attribute and calculate entropy if split on it.
-      entropy = SetupSplitAttribute<W>(data.row(i), labels, isWeight);
+      entropy = SetupSplitAttribute<isWeight>(data.row(i), labels, weightD);
 
       gain = rootEntropy - entropy;
       // Find the attribute with the best entropy so that the gain is
@@ -137,10 +139,10 @@
   numClass = other.numClass;
   bucketSize = other.bucketSize;
 
-  weightD = weights;
-  tempD = weightD;
-  const bool isWeight = true;
-  Train<bool>(data, labels, isWeight);
+  // weightD = weights;
+  // tempD = weightD;
+
+  Train<true>(data, labels, weights);
 }
 
 /**
@@ -152,11 +154,11 @@
  * @param isWeight Whether we need to run a weighted Decision Stump.
  */
 template <typename MatType>
-template <typename W>
+template <bool isWeight>
 double DecisionStump<MatType>::SetupSplitAttribute(
     const arma::rowvec& attribute,
     const arma::Row<size_t>& labels,
-    W isWeight)
+    const arma::rowvec& weightD)
 {
   int i, count, begin, end;
   double entropy = 0.0;
@@ -171,7 +173,7 @@
   arma::Row<size_t> sortedLabels(attribute.n_elem);
   sortedLabels.fill(0);
 
-  tempD = arma::rowvec(weightD.n_cols);
+  arma::rowvec tempD = arma::rowvec(weightD.n_cols);
 
   for (i = 0; i < attribute.n_elem; i++)
   {
@@ -199,8 +201,8 @@
       // Use ratioEl to calculate the ratio of elements in this split.
       const double ratioEl = ((double) (end - begin + 1) / sortedLabels.n_elem);
 
-      entropy += ratioEl * CalculateEntropy<size_t, W>(
-          sortedLabels.subvec(begin, end), begin, isWeight);
+      entropy += ratioEl * CalculateEntropy<size_t, isWeight>(
+          sortedLabels.subvec(begin, end), begin, tempD);
       i++;
     }
     else if (sortedLabels(i) != sortedLabels(i + 1))
@@ -226,8 +228,8 @@
       }
       const double ratioEl = ((double) (end - begin + 1) / sortedLabels.n_elem);
 
-      entropy += ratioEl * CalculateEntropy<size_t, W>(
-          sortedLabels.subvec(begin, end), begin, isWeight);
+      entropy += ratioEl * CalculateEntropy<size_t, isWeight>(
+          sortedLabels.subvec(begin, end), begin, tempD);
 
       i = end + 1;
       count = 0;
@@ -418,10 +420,10 @@
  * @param isWeight Whether we need to run a weighted Decision Stump.
  */
 template<typename MatType>
-template<typename LabelType, typename W>
+template<typename LabelType, bool isWeight>
 double DecisionStump<MatType>::CalculateEntropy(
     arma::subview_row<LabelType> labels,
-    int begin, W isWeight)
+    int begin, const arma::rowvec& tempD)
 {
   double entropy = 0.0;
   size_t j;

Modified: mlpack/trunk/src/mlpack/tests/adaboost_test.cpp
==============================================================================
--- mlpack/trunk/src/mlpack/tests/adaboost_test.cpp	(original)
+++ mlpack/trunk/src/mlpack/tests/adaboost_test.cpp	Sun Aug 17 03:09:20 2014
@@ -53,7 +53,8 @@
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
-  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+  double ztP = a.GetztProduct();
+  BOOST_REQUIRE(hammingLoss <= ztP);
 }
 
 /**
@@ -139,7 +140,8 @@
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
-  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+  double ztP = a.GetztProduct();
+  BOOST_REQUIRE(hammingLoss <= ztP);
 }
 
 /**
@@ -226,7 +228,8 @@
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
-  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+  double ztP = a.GetztProduct();
+  BOOST_REQUIRE(hammingLoss <= ztP);
 }
 
 /**
@@ -316,7 +319,8 @@
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
-  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+  double ztP = a.GetztProduct();
+  BOOST_REQUIRE(hammingLoss <= ztP);
 }
 
 /**
@@ -413,7 +417,8 @@
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
-  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+  double ztP = a.GetztProduct();
+  BOOST_REQUIRE(hammingLoss <= ztP);
 }
 
 /**
@@ -508,7 +513,8 @@
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
-  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+  double ztP = a.GetztProduct();
+  BOOST_REQUIRE(hammingLoss <= ztP);
 }
 
 /**