[mlpack-git] master: Style and documentation consistency fixes. (74f5ac9)

gitdub at big.cc.gt.atl.ga.us gitdub at big.cc.gt.atl.ga.us
Mon Nov 30 17:24:21 EST 2015


Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/10b9d45b806a3e879b0564d78ccb183ebc7051ba...31c557d9cc7e4da57fd8a246085c19e076d12271

>---------------------------------------------------------------

commit 74f5ac9c2f091b55ad9ba1b63d067116e625001d
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat Nov 21 15:01:02 2015 +0000

    Style and documentation consistency fixes.


>---------------------------------------------------------------

74f5ac9c2f091b55ad9ba1b63d067116e625001d
 src/mlpack/methods/adaboost/adaboost.hpp      |  77 +++++--
 src/mlpack/methods/adaboost/adaboost_impl.hpp | 145 ++++++------
 src/mlpack/tests/adaboost_test.cpp            | 303 ++++++++++----------------
 3 files changed, 234 insertions(+), 291 deletions(-)

diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index a964ed7..320982d 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -30,19 +30,63 @@
 namespace mlpack {
 namespace adaboost {
 
+/**
+ * The AdaBoost class.  AdaBoost is a boosting algorithm, meaning that it
+ * combines an ensemble of weak learners to produce a strong learner.  For more
+ * information on AdaBoost, see the following paper:
+ *
+ * @code
+ * @article{schapire1999improved,
+ *   author = {Schapire, Robert E. and Singer, Yoram},
+ *   title = {Improved Boosting Algorithms Using Confidence-rated Predictions},
+ *   journal = {Machine Learning},
+ *   volume = {37},
+ *   number = {3},
+ *   month = dec,
+ *   year = {1999},
+ *   issn = {0885-6125},
+ *   pages = {297--336},
+ * }
+ * @endcode
+ *
+ * This class is general, and can be used with any type of weak learner, so long
+ * as the learner implements the following functions:
+ *
+ * @code
+ * // A boosting constructor, which learns using the training parameters of the
+ * // given other WeakLearner, but uses the given instance weights for training.
+ * WeakLearner(WeakLearner& other,
+ *             const MatType& data,
+ *             const arma::Row<size_t>& labels,
+ *             const arma::rowvec& weights);
+ *
+ * // Given the test points, classify them and output predictions into
+ * // predictedLabels.
+ * void Classify(const MatType& data, arma::Row<size_t>& predictedLabels);
+ * @endcode
+ *
+ * For more information on and examples of weak learners, see
+ * perceptron::Perceptron<> and decision_stump::DecisionStump<>.
+ *
+ * @tparam MatType Data matrix type (i.e. arma::mat or arma::sp_mat).
+ * @tparam WeakLearner Type of weak learner to use.
+ */
 template<typename MatType = arma::mat,
          typename WeakLearner = mlpack::perceptron::Perceptron<> >
 class AdaBoost
 {
  public:
   /**
-   * Constructor. Currently runs the AdaBoost.mh algorithm.
+   * Constructor.  This runs the AdaBoost.MH algorithm to provide a trained
+   * boosting model.  This constructor takes an already-initialized weak
+   * learner; all other weak learners will learn with the same parameters as the
+   * given weak learner.
    *
    * @param data Input data.
    * @param labels Corresponding labels.
    * @param iterations Number of boosting rounds.
    * @param tol The tolerance for change in values of rt.
-   * @param other Weak Learner, which has been initialized already.
+   * @param other Weak learner that has already been initialized.
    */
   AdaBoost(const MatType& data,
            const arma::Row<size_t>& labels,
@@ -50,39 +94,42 @@ class AdaBoost
            const double tol,
            const WeakLearner& other);
 
-  // Stores the final classification of the Labels.
+  // Stores the final classification of the labels.
   arma::Row<size_t> finalHypothesis;
 
-  // Return the value of ztProduct
+  // Return the value of ztProduct.
   double GetztProduct() { return ztProduct; }
 
   // The tolerance for change in rt and when to stop.
   double tolerance;
 
   /**
-   * Classification Function.
+   * Classify the given test points.
+   *
    * @param test Testing data.
-   * @param predictedLabels Vector to store the predicted labels of the
-   *                         test set.
+   * @param predictedLabels Vector in which to the predicted labels of the test
+   *      set will be stored.
    */
   void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
 
 private:
   /**
-   *  This function helps in building the Weight Distribution matrix
-   *  which is updated during every iteration. It calculates the
-   *  "difficulty" in classifying a point by adding the weights for all
-   *  instances, using D.
+   * This function helps in building the Weight Distribution matrix which is
+   * updated during every iteration. It calculates the "difficulty" in
+   * classifying a point by adding the weights for all instances, using D.
    *
-   *  @param D The 2 Dimensional weight matrix from which the weights are
-   *            to be calculated.
-   *  @param weights The output weight vector.
+   * @param D The 2 Dimensional weight matrix from which the weights are
+   *     to be calculated.
+   * @param weights The output weight vector.
    */
   void BuildWeightMatrix(const arma::mat& D, arma::rowvec& weights);
 
-  size_t numClasses;
+  //! The number of classes in the model.
+  size_t classes;
 
+  //! The vector of weak learners.
   std::vector<WeakLearner> wl;
+  //! The weights corresponding to each weak learner.
   std::vector<double> alpha;
 
   // To check for the bound for the hammingLoss.
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index dc585f3..367c975 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -27,12 +27,13 @@ namespace mlpack {
 namespace adaboost {
 
 /**
- *  Constructor. Currently runs the AdaBoost.mh algorithm
+ * Constructor. Currently runs the AdaBoost.MH algorithm.
  *
- *  @param data Input data
- *  @param labels Corresponding labels
- *  @param iterations Number of boosting rounds
- *  @param other Weak Learner, which has been initialized already
+ * @param data Input data
+ * @param labels Corresponding labels
+ * @param iterations Number of boosting rounds
+ * @param tol Tolerance for termination of Adaboost.MH.
+ * @param other Weak Learner, which has been initialized already.
  */
 template<typename MatType, typename WeakLearner>
 AdaBoost<MatType, WeakLearner>::AdaBoost(
@@ -43,81 +44,65 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
     const WeakLearner& other)
 {
   // Count the number of classes.
-  numClasses = (arma::max(labels) - arma::min(labels)) + 1;
+  classes = (arma::max(labels) - arma::min(labels)) + 1;
   tolerance = tol;
 
+  // crt is the cumulative rt value for terminating the optimization when rt is
+  // changing by less than the tolerance.
   double rt, crt, alphat = 0.0, zt;
 
-  // crt is for stopping the iterations when rt
-  // stops changing by less than a tolerant value.
-
-  // crt is cumulative rt for stopping the iterations when rt
-  // stops changing by less than a tolerant value.
-
   ztProduct = 1.0;
 
-  // To be used for prediction by the Weak Learner for prediction.
+  // To be used for prediction by the weak learner.
   arma::Row<size_t> predictedLabels(labels.n_cols);
 
-  // Use tempData to modify input Data for incorporating weights.
+  // Use tempData to modify input data for incorporating weights.
   MatType tempData(data);
 
   // This matrix is a helper matrix used to calculate the final hypothesis.
-  arma::mat sumFinalH(predictedLabels.n_cols, numClasses);
+  arma::mat sumFinalH(predictedLabels.n_cols, classes);
   sumFinalH.fill(0.0);
 
-  // load the initial weights into a 2-D matrix
-  const double initWeight = 1.0 / double(data.n_cols * numClasses);
-  arma::mat D(data.n_cols, numClasses);
+  // Load the initial weights into a 2-D matrix.
+  const double initWeight = 1.0 / double(data.n_cols * classes);
+  arma::mat D(data.n_cols, classes);
   D.fill(initWeight);
 
-  // Weights are to be compressed into this rowvector
-  // for focussing on the perceptron weights.
+  // Weights are stored in this row vector.
   arma::rowvec weights(predictedLabels.n_cols);
 
   // This is the final hypothesis.
   arma::Row<size_t> finalH(predictedLabels.n_cols);
 
-  // now start the boosting rounds
+  // Now, start the boosting rounds.
   for (int i = 0; i < iterations; i++)
   {
-    // Initialized to zero in every round.
-    // rt is used for calculation of alphat, is the weighted error
-    // rt = (sum)D(i)y(i)ht(xi)
+    // Initialized to zero in every round.  rt is used for calculation of
+    // alphat; it is the weighted error.
+    // rt = (sum) D(i) y(i) ht(xi)
     rt = 0.0;
 
     // zt is used for weight normalization.
     zt = 0.0;
 
-    // Build the weight vectors
+    // Build the weight vectors.
     BuildWeightMatrix(D, weights);
 
-    // call the other weak learner and train the labels.
+    // Use the existing weak learner to train a new one with new weights.
     WeakLearner w(other, tempData, labels, weights);
     w.Classify(tempData, predictedLabels);
 
     // Now from predictedLabels, build ht, the weak hypothesis
     // buildClassificationMatrix(ht, predictedLabels);
 
-    // Now, start calculation of alpha(t) using ht
-
-    for (size_t j = 0;j < D.n_rows; j++) // instead of D, ht
+    // Now, calculate alpha(t) using ht.
+    for (size_t j = 0; j < D.n_rows; j++) // instead of D, ht
     {
       if (predictedLabels(j) == labels(j))
-      {
-        // for (int k = 0;k < numClasses; k++)
-        //   rt += D(j,k);
         rt += arma::accu(D.row(j));
-      }
-
       else
-      {
-        // for (int k = 0;k < numClasses; k++)
-        //   rt -= D(j,k);
         rt -= arma::accu(D.row(j));
-      }
     }
-    // end calculation of rt
 
     if (i > 0)
     {
@@ -126,61 +111,59 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
     }
     crt = rt;
 
-    // our goal is to find alphat which mizimizes or approximately minimizes
-    // the value of Z as a function of alpha.
+    // Our goal is to find alphat which mizimizes or approximately minimizes the
+    // value of Z as a function of alpha.
     alphat = 0.5 * log((1 + rt) / (1 - rt));
-    // end calculation of alphat
 
     alpha.push_back(alphat);
     wl.push_back(w);
 
-    // now start modifying weights
-    for (size_t j = 0;j < D.n_rows; j++)
+    // Now start modifying the weights.
+    for (size_t j = 0; j < D.n_rows; j++)
     {
       double expo = exp(alphat);
       if (predictedLabels(j) == labels(j))
       {
-          for (size_t k = 0;k < D.n_cols; k++)
-          {
-            // we calculate zt, the normalization constant
-            zt += D(j,k) / expo; // * exp(-1 * alphat * yt(j,k) * ht(j,k));
-            D(j,k) = D(j,k) / expo;
-
-            // adding to the matrix of FinalHypothesis
-            // sumFinalH(j,k) += (alphat * ht(j,k));
-            if (k == labels(j))
-              sumFinalH(j,k) += (alphat);// * ht(j,k));
-            else
-              sumFinalH(j,k) -= (alphat);
-          }
+        for (size_t k = 0; k < D.n_cols; k++)
+        {
+          // We calculate zt, the normalization constant.
+          zt += D(j, k) / expo; // * exp(-1 * alphat * yt(j,k) * ht(j,k));
+          D(j, k) = D(j, k) / expo;
+
+          // Add to the final hypothesis matrix.
+          // sumFinalH(j, k) += (alphat * ht(j, k));
+          if (k == labels(j))
+            sumFinalH(j, k) += (alphat); // * ht(j, k));
+          else
+            sumFinalH(j, k) -= (alphat);
+        }
       }
       else
       {
-        for (size_t k = 0;k < D.n_cols; k++)
-          {
-            // we calculate zt, the normalization constant
-            zt += D(j,k) * expo;
-            D(j,k) = D(j,k) * expo;
-
-            // adding to the matrix of FinalHypothesis
-            if (k == labels(j))
-              sumFinalH(j,k) += (alphat);// * ht(j,k));
-            else
-              sumFinalH(j,k) -= (alphat);
-          }
+        for (size_t k = 0; k < D.n_cols; k++)
+        {
+          // We calculate zt, the normalization constant
+          zt += D(j, k) * expo;
+          D(j, k) = D(j, k) * expo;
+
+          // Add to the final hypothesis matrix.
+          if (k == labels(j))
+            sumFinalH(j, k) += (alphat); // * ht(j,k));
+          else
+            sumFinalH(j, k) -= (alphat);
+        }
       }
     }
 
-    // normalization of D
-    D = D / zt;
+    // Normalize D.
+    D /= zt;
 
-    // Accumulating the value of zt for the Hamming Loss bound.
+    // Accumulate the value of zt for the Hamming loss bound.
     ztProduct *= zt;
   }
 
-  // Iterations are over, now build a strong hypothesis
-  // from a weighted combination of these weak hypotheses.
-
+  // Iterations are over, now build a strong hypothesis from a weighted
+  // combination of these weak hypotheses.
   arma::colvec tempSumFinalH;
   arma::uword max_index;
   arma::mat sfh = sumFinalH.t();
@@ -191,14 +174,12 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
     tempSumFinalH.max(max_index);
     finalH(i) = max_index;
   }
+
   finalHypothesis = finalH;
 }
 
 /**
- * Classification Function.
- * @param test Testing data.
- * @param predictedLabels Vector to store the predicted labels of the
- *                         test set.
+ * Classify the given test points.
  */
 template <typename MatType, typename WeakLearner>
 void AdaBoost<MatType, WeakLearner>::Classify(
@@ -206,12 +187,12 @@ void AdaBoost<MatType, WeakLearner>::Classify(
     arma::Row<size_t>& predictedLabels)
 {
   arma::Row<size_t> tempPredictedLabels(predictedLabels.n_cols);
-  arma::mat cMatrix(numClasses, test.n_cols);
+  arma::mat cMatrix(classes, test.n_cols);
 
   cMatrix.zeros();
   predictedLabels.zeros();
 
-  for (size_t i = 0;i < wl.size(); i++)
+  for (size_t i = 0; i < wl.size(); i++)
   {
     wl[i].Classify(test, tempPredictedLabels);
 
@@ -248,10 +229,8 @@ void AdaBoost<MatType, WeakLearner>::BuildWeightMatrix(
   weights.fill(0.0);
 
   for (i = 0; i < D.n_rows; i++)
-  {
     for (j = 0; j < D.n_cols; j++)
       weights(i) += D(i, j);
-  }
 }
 
 } // namespace adaboost
diff --git a/src/mlpack/tests/adaboost_test.cpp b/src/mlpack/tests/adaboost_test.cpp
index ef8269c..de6c582 100644
--- a/src/mlpack/tests/adaboost_test.cpp
+++ b/src/mlpack/tests/adaboost_test.cpp
@@ -18,39 +18,35 @@ using namespace mlpack::adaboost;
 BOOST_AUTO_TEST_SUITE(AdaBoostTest);
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.
- *  It checks whether the hamming loss breaches the upperbound, which
- *  is provided by ztAccumulator.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.  It
+ * checks whether the hamming loss breaches the upperbound, which is provided by
+ * ztAccumulator.
  */
 BOOST_AUTO_TEST_CASE(HammingLossBoundIris)
 {
   arma::mat inputData;
-
   if (!data::Load("iris.txt", inputData))
     BOOST_FAIL("Cannot load test dataset iris.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("iris_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for iris iris_labels.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 400;
 
   perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
       perceptronIter);
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 100;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
 
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
@@ -59,10 +55,9 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundIris)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.  It
+ * checks if the error returned by running a single instance of the weak learner
+ * is worse than running the boosted weak learner using adaboost.
  */
 BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
 {
@@ -76,10 +71,8 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
   if (!data::Load("iris_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for iris iris_labels.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 400;
 
   arma::Row<size_t> perceptronPrediction(labels.n_cols);
@@ -89,17 +82,17 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
 
   int countWeakLearnerError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != perceptronPrediction(i))
+    if (labels(i) != perceptronPrediction(i))
       countWeakLearnerError++;
   double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 100;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double error = (double) countError / labels.n_cols;
 
@@ -107,39 +100,34 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- *  Column dataset.
- *  It checks whether the hamming loss breaches the upperbound, which
- *  is provided by ztAccumulator.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset.  It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.
  */
 BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn)
 {
   arma::mat inputData;
-
   if (!data::Load("vc2.txt", inputData))
     BOOST_FAIL("Cannot load test dataset vc2.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("vc2_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for vc2_labels.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 800;
 
   perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
       perceptronIter);
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
@@ -148,28 +136,22 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- *  Column dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset.  It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
  */
 BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
 {
   arma::mat inputData;
-
   if (!data::Load("vc2.txt", inputData))
     BOOST_FAIL("Cannot load test dataset vc2.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("vc2_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for vc2_labels.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 800;
 
   arma::Row<size_t> perceptronPrediction(labels.n_cols);
@@ -179,11 +161,11 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
 
   int countWeakLearnerError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != perceptronPrediction(i))
+    if (labels(i) != perceptronPrediction(i))
       countWeakLearnerError++;
   double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
@@ -197,40 +179,34 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on non-linearly
- *  separable dataset.
- *  It checks whether the hamming loss breaches the upperbound, which
- *  is provided by ztAccumulator.
+ * This test case runs the AdaBoost.mh algorithm on non-linearly separable
+ * dataset.  It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.
  */
 BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData)
 {
   arma::mat inputData;
-
   if (!data::Load("train_nonlinsep.txt", inputData))
     BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
 
   arma::Mat<size_t> labels;
-
-
   if (!data::Load("train_labels_nonlinsep.txt",labels))
     BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 800;
 
   perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
       perceptronIter);
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
@@ -239,28 +215,22 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on a non-linearly
- *  separable dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
+ * This test case runs the AdaBoost.mh algorithm on a non-linearly separable
+ * dataset.  It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using AdaBoost.
  */
 BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
 {
   arma::mat inputData;
-
   if (!data::Load("train_nonlinsep.txt", inputData))
     BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("train_labels_nonlinsep.txt",labels))
     BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 800;
 
   arma::Row<size_t> perceptronPrediction(labels.n_cols);
@@ -274,13 +244,13 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
       countWeakLearnerError++;
   double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double error = (double) countError / labels.n_cols;
 
@@ -288,28 +258,23 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.
- *  It checks whether the hamming loss breaches the upperbound, which
- *  is provided by ztAccumulator.
- *  This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.  It
+ * checks whether the Hamming loss breaches the upper bound, which is provided
+ * by ztAccumulator.  This uses decision stumps as the weak learner.
  */
 BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
 {
   arma::mat inputData;
-
   if (!data::Load("iris.txt", inputData))
     BOOST_FAIL("Cannot load test dataset iris.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("iris_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for iris_labels.txt");
 
-  // no need to map the labels here
-
-  // Define your own weak learner, Decision Stumps in this case.
+  // Define your own weak learner, decision stumps in this case.
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   const size_t numClasses = 3;
   const size_t inpBucketSize = 6;
 
@@ -318,11 +283,11 @@ BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
   int iterations = 50;
   double tolerance = 1e-10;
 
-  AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
+  AdaBoost<arma::mat, decision_stump::DecisionStump<>> a(inputData,
           labels.row(0), iterations, tolerance, ds);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
@@ -331,28 +296,24 @@ BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on a non-linearly
- *  separable dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
- *  This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on a non-linearly separable
+ * dataset.  It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This is for the weak learner: decision stumps.
  */
 BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris_DS)
 {
   arma::mat inputData;
-
   if (!data::Load("iris.txt", inputData))
     BOOST_FAIL("Cannot load test dataset iris.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("iris_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for iris_labels.txt");
 
   // no need to map the labels here
 
-  // Define your own weak learner, Decision Stump in this case.
+  // Define your own weak learner, decision stumps in this case.
 
   const size_t numClasses = 3;
   const size_t inpBucketSize = 6;
@@ -369,59 +330,52 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris_DS)
       countWeakLearnerError++;
   double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
 
-  AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
-           labels.row(0), iterations, tolerance, ds);
+  AdaBoost<arma::mat, decision_stump::DecisionStump<> > a(inputData,
+      labels.row(0), iterations, tolerance, ds);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double error = (double) countError / labels.n_cols;
 
   BOOST_REQUIRE(error <= weakLearnerErrorRate);
 }
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- *  Column dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
- *  This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset.  It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This is for the weak learner: decision stumps.
  */
 BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn_DS)
 {
   arma::mat inputData;
-
   if (!data::Load("vc2.txt", inputData))
     BOOST_FAIL("Cannot load test dataset vc2.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("vc2_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for vc2_labels.txt");
 
-  // no need to map the labels here
-
-  // Define your own weak learner, Decision Stump in this case.
-
-  // Define parameters for the adaboost
+  // Define your own weak learner, decision stumps in this case.
   const size_t numClasses = 3;
   const size_t inpBucketSize = 6;
 
   decision_stump::DecisionStump<> ds(inputData, labels.row(0),
                                      numClasses, inpBucketSize);
 
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
 
-  AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
-           labels.row(0), iterations, tolerance, ds);
+  AdaBoost<arma::mat, decision_stump::DecisionStump<>> a(inputData,
+      labels.row(0), iterations, tolerance, ds);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
@@ -430,87 +384,74 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn_DS)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- *  Column dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
- *  This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset.  It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This is for the weak learner: decision stumps.
  */
 BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn_DS)
 {
   arma::mat inputData;
-
   if (!data::Load("vc2.txt", inputData))
     BOOST_FAIL("Cannot load test dataset vc2.txt!");
 
   arma::Mat<size_t> labels;
-
-  if (!data::Load("vc2_labels.txt",labels))
+  if (!data::Load("vc2_labels.txt", labels))
     BOOST_FAIL("Cannot load labels for vc2_labels.txt");
 
-  // no need to map the labels here
-
-  // Define your own weak learner, Decision Stump in this case.
-
+  // Define your own weak learner, decision stumps in this case.
   const size_t numClasses = 3;
   const size_t inpBucketSize = 6;
-
   arma::Row<size_t> dsPrediction(labels.n_cols);
 
-  decision_stump::DecisionStump<> ds(inputData, labels.row(0),
-                                     numClasses, inpBucketSize);
+  decision_stump::DecisionStump<> ds(inputData, labels.row(0), numClasses,
+      inpBucketSize);
 
   int countWeakLearnerError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != dsPrediction(i))
+    if (labels(i) != dsPrediction(i))
       countWeakLearnerError++;
+
   double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
-  AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
-           labels.row(0), iterations, tolerance, ds);
+  AdaBoost<arma::mat, decision_stump::DecisionStump<>> a(inputData,
+      labels.row(0), iterations, tolerance, ds);
+
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double error = (double) countError / labels.n_cols;
 
   BOOST_REQUIRE(error <= weakLearnerErrorRate);
 }
+
 /**
- *  This test case runs the AdaBoost.mh algorithm on non-linearly
- *  separable dataset.
- *  It checks whether the hamming loss breaches the upperbound, which
- *  is provided by ztAccumulator.
- *  This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on non-linearly separable
+ * dataset.  It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.  This is for the weak learner: decision stumps.
  */
 BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
 {
   arma::mat inputData;
-
   if (!data::Load("train_nonlinsep.txt", inputData))
     BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("train_labels_nonlinsep.txt",labels))
     BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
 
-
-  // no need to map the labels here
-
-  // Define your own weak learner, Decision Stump in this case.
-
-  // Define parameters for the adaboost
+  // Define your own weak learner, decision stumps in this case.
   const size_t numClasses = 2;
   const size_t inpBucketSize = 6;
 
   decision_stump::DecisionStump<> ds(inputData, labels.row(0),
                                      numClasses, inpBucketSize);
 
+  // Define parameters for Adaboost.
   int iterations = 50;
   double tolerance = 1e-10;
 
@@ -518,7 +459,7 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
            labels.row(0), iterations, tolerance, ds);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double hammingLoss = (double) countError / labels.n_cols;
 
@@ -527,29 +468,22 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on a non-linearly
- *  separable dataset.
- *  It checks if the error returned by running a single instance of the
- *  weak learner is worse than running the boosted weak learner using
- *  adaboost.
- *  This for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on a non-linearly separable
+ * dataset.  It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This for the weak learner: decision stumps.
  */
 BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
 {
   arma::mat inputData;
-
   if (!data::Load("train_nonlinsep.txt", inputData))
     BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("train_labels_nonlinsep.txt",labels))
     BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
 
-  // no need to map the labels here
-
-  // Define your own weak learner, Decision Stump in this case.
-
+  // Define your own weak learner, decision stumps in this case.
   const size_t numClasses = 2;
   const size_t inpBucketSize = 3;
 
@@ -564,7 +498,7 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
       countWeakLearnerError++;
   double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 500;
   double tolerance = 1e-23;
 
@@ -572,7 +506,7 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
            labels.row(0), iterations, tolerance, ds);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
-    if(labels(i) != a.finalHypothesis(i))
+    if (labels(i) != a.finalHypothesis(i))
       countError++;
   double error = (double) countError / labels.n_cols;
 
@@ -580,28 +514,23 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- *  Column dataset.
- *  It tests the Classify function and checks for a satisfiable error rate.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset.  It tests the Classify function and checks for a satisfactory error
+ * rate.
  */
 BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
 {
   mlpack::math::RandomSeed(std::time(NULL));
   arma::mat inputData;
-
   if (!data::Load("vc2.txt", inputData))
     BOOST_FAIL("Cannot load test dataset vc2.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("vc2_labels.txt",labels))
     BOOST_FAIL("Cannot load labels for vc2_labels.txt");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
-
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 1000;
 
   arma::mat testData;
@@ -619,8 +548,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
       perceptronIter);
   p.Classify(inputData, perceptronPrediction);
 
-  // Define parameters for the adaboost
-
+  // Define parameters for AdaBoost.
   int iterations = 100;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
@@ -631,37 +559,30 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
   int localError = 0;
 
   for (size_t i = 0; i < trueTestLabels.n_cols; i++)
-    if(trueTestLabels(i) != predictedLabels(i))
+    if (trueTestLabels(i) != predictedLabels(i))
       localError++;
 
   double lError = (double) localError / trueTestLabels.n_cols;
 
   BOOST_REQUIRE(lError <= 0.30);
-
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on a non linearly
- *  separable dataset.
- *  It tests the Classify function and checks for a satisfiable error rate.
+ * This test case runs the AdaBoost.mh algorithm on a non linearly separable
+ * dataset.  It tests the Classify function and checks for a satisfactory error
+ * rate.
  */
 BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
 {
   arma::mat inputData;
-
   if (!data::Load("train_nonlinsep.txt", inputData))
     BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("train_labels_nonlinsep.txt",labels))
     BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
 
-  // no need to map the labels here
-
-  // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
-
+  // Define your own weak learner; in this test decision stumps are used.
   const size_t numClasses = 2;
   const size_t inpBucketSize = 3;
 
@@ -680,7 +601,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
   decision_stump::DecisionStump<> ds(inputData, labels.row(0),
                                      numClasses, inpBucketSize);
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
   AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(
@@ -691,7 +612,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
 
   int localError = 0;
   for (size_t i = 0; i < trueTestLabels.n_cols; i++)
-    if(trueTestLabels(i) != predictedLabels(i))
+    if (trueTestLabels(i) != predictedLabels(i))
       localError++;
 
   double lError = (double) localError / trueTestLabels.n_cols;
@@ -700,33 +621,29 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
 }
 
 /**
- *  This test case runs the AdaBoost.mh algorithm on the UCI Iris Dataset.
- *  It trains it on two thirds of the Iris dataset (iris_train.csv),
- *  and tests on the remaining third of the dataset (iris_test.csv).
- *  It tests the Classify function and checks for a satisfiable error rate.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris Dataset.  It
+ * trains it on two thirds of the Iris dataset (iris_train.csv), and tests on
+ * the remaining third of the dataset (iris_test.csv).  It tests the Classify()
+ * function and checks for a satisfactory error rate.
  */
 BOOST_AUTO_TEST_CASE(ClassifyTest_IRIS)
 {
   arma::mat inputData;
-
   if (!data::Load("iris_train.csv", inputData))
     BOOST_FAIL("Cannot load test dataset iris_train.csv!");
 
   arma::Mat<size_t> labels;
-
   if (!data::Load("iris_train_labels.csv",labels))
     BOOST_FAIL("Cannot load labels for iris_train_labels.csv");
 
-  // no need to map the labels here
-
   // Define your own weak learner, perceptron in this case.
-  // Run the perceptron for perceptron_iter iterations.
+  // Run the perceptron for perceptronIter iterations.
   int perceptronIter = 800;
 
   perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
       perceptronIter);
 
-  // Define parameters for the adaboost
+  // Define parameters for AdaBoost.
   int iterations = 50;
   double tolerance = 1e-10;
   AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
@@ -745,7 +662,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_IRIS)
 
   int localError = 0;
   for (size_t i = 0; i < trueTestLabels.n_cols; i++)
-    if(trueTestLabels(i) != predictedLabels(i))
+    if (trueTestLabels(i) != predictedLabels(i))
       localError++;
   double lError = (double) localError / labels.n_cols;
 



More information about the mlpack-git mailing list