[mlpack-git] master: Style and documentation consistency fixes. (74f5ac9)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Nov 30 17:24:21 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/10b9d45b806a3e879b0564d78ccb183ebc7051ba...31c557d9cc7e4da57fd8a246085c19e076d12271
>---------------------------------------------------------------
commit 74f5ac9c2f091b55ad9ba1b63d067116e625001d
Author: Ryan Curtin <ryan at ratml.org>
Date: Sat Nov 21 15:01:02 2015 +0000
Style and documentation consistency fixes.
>---------------------------------------------------------------
74f5ac9c2f091b55ad9ba1b63d067116e625001d
src/mlpack/methods/adaboost/adaboost.hpp | 77 +++++--
src/mlpack/methods/adaboost/adaboost_impl.hpp | 145 ++++++------
src/mlpack/tests/adaboost_test.cpp | 303 ++++++++++----------------
3 files changed, 234 insertions(+), 291 deletions(-)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index a964ed7..320982d 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -30,19 +30,63 @@
namespace mlpack {
namespace adaboost {
+/**
+ * The AdaBoost class. AdaBoost is a boosting algorithm, meaning that it
+ * combines an ensemble of weak learners to produce a strong learner. For more
+ * information on AdaBoost, see the following paper:
+ *
+ * @code
+ * @article{schapire1999improved,
+ * author = {Schapire, Robert E. and Singer, Yoram},
+ * title = {Improved Boosting Algorithms Using Confidence-rated Predictions},
+ * journal = {Machine Learning},
+ * volume = {37},
+ * number = {3},
+ * month = dec,
+ * year = {1999},
+ * issn = {0885-6125},
+ * pages = {297--336},
+ * }
+ * @endcode
+ *
+ * This class is general, and can be used with any type of weak learner, so long
+ * as the learner implements the following functions:
+ *
+ * @code
+ * // A boosting constructor, which learns using the training parameters of the
+ * // given other WeakLearner, but uses the given instance weights for training.
+ * WeakLearner(WeakLearner& other,
+ * const MatType& data,
+ * const arma::Row<size_t>& labels,
+ * const arma::rowvec& weights);
+ *
+ * // Given the test points, classify them and output predictions into
+ * // predictedLabels.
+ * void Classify(const MatType& data, arma::Row<size_t>& predictedLabels);
+ * @endcode
+ *
+ * For more information on and examples of weak learners, see
+ * perceptron::Perceptron<> and decision_stump::DecisionStump<>.
+ *
+ * @tparam MatType Data matrix type (i.e. arma::mat or arma::sp_mat).
+ * @tparam WeakLearner Type of weak learner to use.
+ */
template<typename MatType = arma::mat,
typename WeakLearner = mlpack::perceptron::Perceptron<> >
class AdaBoost
{
public:
/**
- * Constructor. Currently runs the AdaBoost.mh algorithm.
+ * Constructor. This runs the AdaBoost.MH algorithm to provide a trained
+ * boosting model. This constructor takes an already-initialized weak
+ * learner; all other weak learners will learn with the same parameters as the
+ * given weak learner.
*
* @param data Input data.
* @param labels Corresponding labels.
* @param iterations Number of boosting rounds.
* @param tol The tolerance for change in values of rt.
- * @param other Weak Learner, which has been initialized already.
+ * @param other Weak learner that has already been initialized.
*/
AdaBoost(const MatType& data,
const arma::Row<size_t>& labels,
@@ -50,39 +94,42 @@ class AdaBoost
const double tol,
const WeakLearner& other);
- // Stores the final classification of the Labels.
+ // Stores the final classification of the labels.
arma::Row<size_t> finalHypothesis;
- // Return the value of ztProduct
+ // Return the value of ztProduct.
double GetztProduct() { return ztProduct; }
// The tolerance for change in rt and when to stop.
double tolerance;
/**
- * Classification Function.
+ * Classify the given test points.
+ *
* @param test Testing data.
- * @param predictedLabels Vector to store the predicted labels of the
- * test set.
+ * @param predictedLabels Vector in which to the predicted labels of the test
+ * set will be stored.
*/
void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
private:
/**
- * This function helps in building the Weight Distribution matrix
- * which is updated during every iteration. It calculates the
- * "difficulty" in classifying a point by adding the weights for all
- * instances, using D.
+ * This function helps in building the Weight Distribution matrix which is
+ * updated during every iteration. It calculates the "difficulty" in
+ * classifying a point by adding the weights for all instances, using D.
*
- * @param D The 2 Dimensional weight matrix from which the weights are
- * to be calculated.
- * @param weights The output weight vector.
+ * @param D The 2 Dimensional weight matrix from which the weights are
+ * to be calculated.
+ * @param weights The output weight vector.
*/
void BuildWeightMatrix(const arma::mat& D, arma::rowvec& weights);
- size_t numClasses;
+ //! The number of classes in the model.
+ size_t classes;
+ //! The vector of weak learners.
std::vector<WeakLearner> wl;
+ //! The weights corresponding to each weak learner.
std::vector<double> alpha;
// To check for the bound for the hammingLoss.
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index dc585f3..367c975 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -27,12 +27,13 @@ namespace mlpack {
namespace adaboost {
/**
- * Constructor. Currently runs the AdaBoost.mh algorithm
+ * Constructor. Currently runs the AdaBoost.MH algorithm.
*
- * @param data Input data
- * @param labels Corresponding labels
- * @param iterations Number of boosting rounds
- * @param other Weak Learner, which has been initialized already
+ * @param data Input data
+ * @param labels Corresponding labels
+ * @param iterations Number of boosting rounds
+ * @param tol Tolerance for termination of Adaboost.MH.
+ * @param other Weak Learner, which has been initialized already.
*/
template<typename MatType, typename WeakLearner>
AdaBoost<MatType, WeakLearner>::AdaBoost(
@@ -43,81 +44,65 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
const WeakLearner& other)
{
// Count the number of classes.
- numClasses = (arma::max(labels) - arma::min(labels)) + 1;
+ classes = (arma::max(labels) - arma::min(labels)) + 1;
tolerance = tol;
+ // crt is the cumulative rt value for terminating the optimization when rt is
+ // changing by less than the tolerance.
double rt, crt, alphat = 0.0, zt;
- // crt is for stopping the iterations when rt
- // stops changing by less than a tolerant value.
-
- // crt is cumulative rt for stopping the iterations when rt
- // stops changing by less than a tolerant value.
-
ztProduct = 1.0;
- // To be used for prediction by the Weak Learner for prediction.
+ // To be used for prediction by the weak learner.
arma::Row<size_t> predictedLabels(labels.n_cols);
- // Use tempData to modify input Data for incorporating weights.
+ // Use tempData to modify input data for incorporating weights.
MatType tempData(data);
// This matrix is a helper matrix used to calculate the final hypothesis.
- arma::mat sumFinalH(predictedLabels.n_cols, numClasses);
+ arma::mat sumFinalH(predictedLabels.n_cols, classes);
sumFinalH.fill(0.0);
- // load the initial weights into a 2-D matrix
- const double initWeight = 1.0 / double(data.n_cols * numClasses);
- arma::mat D(data.n_cols, numClasses);
+ // Load the initial weights into a 2-D matrix.
+ const double initWeight = 1.0 / double(data.n_cols * classes);
+ arma::mat D(data.n_cols, classes);
D.fill(initWeight);
- // Weights are to be compressed into this rowvector
- // for focussing on the perceptron weights.
+ // Weights are stored in this row vector.
arma::rowvec weights(predictedLabels.n_cols);
// This is the final hypothesis.
arma::Row<size_t> finalH(predictedLabels.n_cols);
- // now start the boosting rounds
+ // Now, start the boosting rounds.
for (int i = 0; i < iterations; i++)
{
- // Initialized to zero in every round.
- // rt is used for calculation of alphat, is the weighted error
- // rt = (sum)D(i)y(i)ht(xi)
+ // Initialized to zero in every round. rt is used for calculation of
+ // alphat; it is the weighted error.
+ // rt = (sum) D(i) y(i) ht(xi)
rt = 0.0;
// zt is used for weight normalization.
zt = 0.0;
- // Build the weight vectors
+ // Build the weight vectors.
BuildWeightMatrix(D, weights);
- // call the other weak learner and train the labels.
+ // Use the existing weak learner to train a new one with new weights.
WeakLearner w(other, tempData, labels, weights);
w.Classify(tempData, predictedLabels);
// Now from predictedLabels, build ht, the weak hypothesis
// buildClassificationMatrix(ht, predictedLabels);
- // Now, start calculation of alpha(t) using ht
-
- for (size_t j = 0;j < D.n_rows; j++) // instead of D, ht
+ // Now, calculate alpha(t) using ht.
+ for (size_t j = 0; j < D.n_rows; j++) // instead of D, ht
{
if (predictedLabels(j) == labels(j))
- {
- // for (int k = 0;k < numClasses; k++)
- // rt += D(j,k);
rt += arma::accu(D.row(j));
- }
-
else
- {
- // for (int k = 0;k < numClasses; k++)
- // rt -= D(j,k);
rt -= arma::accu(D.row(j));
- }
}
- // end calculation of rt
if (i > 0)
{
@@ -126,61 +111,59 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
}
crt = rt;
- // our goal is to find alphat which mizimizes or approximately minimizes
- // the value of Z as a function of alpha.
+ // Our goal is to find alphat which mizimizes or approximately minimizes the
+ // value of Z as a function of alpha.
alphat = 0.5 * log((1 + rt) / (1 - rt));
- // end calculation of alphat
alpha.push_back(alphat);
wl.push_back(w);
- // now start modifying weights
- for (size_t j = 0;j < D.n_rows; j++)
+ // Now start modifying the weights.
+ for (size_t j = 0; j < D.n_rows; j++)
{
double expo = exp(alphat);
if (predictedLabels(j) == labels(j))
{
- for (size_t k = 0;k < D.n_cols; k++)
- {
- // we calculate zt, the normalization constant
- zt += D(j,k) / expo; // * exp(-1 * alphat * yt(j,k) * ht(j,k));
- D(j,k) = D(j,k) / expo;
-
- // adding to the matrix of FinalHypothesis
- // sumFinalH(j,k) += (alphat * ht(j,k));
- if (k == labels(j))
- sumFinalH(j,k) += (alphat);// * ht(j,k));
- else
- sumFinalH(j,k) -= (alphat);
- }
+ for (size_t k = 0; k < D.n_cols; k++)
+ {
+ // We calculate zt, the normalization constant.
+ zt += D(j, k) / expo; // * exp(-1 * alphat * yt(j,k) * ht(j,k));
+ D(j, k) = D(j, k) / expo;
+
+ // Add to the final hypothesis matrix.
+ // sumFinalH(j, k) += (alphat * ht(j, k));
+ if (k == labels(j))
+ sumFinalH(j, k) += (alphat); // * ht(j, k));
+ else
+ sumFinalH(j, k) -= (alphat);
+ }
}
else
{
- for (size_t k = 0;k < D.n_cols; k++)
- {
- // we calculate zt, the normalization constant
- zt += D(j,k) * expo;
- D(j,k) = D(j,k) * expo;
-
- // adding to the matrix of FinalHypothesis
- if (k == labels(j))
- sumFinalH(j,k) += (alphat);// * ht(j,k));
- else
- sumFinalH(j,k) -= (alphat);
- }
+ for (size_t k = 0; k < D.n_cols; k++)
+ {
+ // We calculate zt, the normalization constant
+ zt += D(j, k) * expo;
+ D(j, k) = D(j, k) * expo;
+
+ // Add to the final hypothesis matrix.
+ if (k == labels(j))
+ sumFinalH(j, k) += (alphat); // * ht(j,k));
+ else
+ sumFinalH(j, k) -= (alphat);
+ }
}
}
- // normalization of D
- D = D / zt;
+ // Normalize D.
+ D /= zt;
- // Accumulating the value of zt for the Hamming Loss bound.
+ // Accumulate the value of zt for the Hamming loss bound.
ztProduct *= zt;
}
- // Iterations are over, now build a strong hypothesis
- // from a weighted combination of these weak hypotheses.
-
+ // Iterations are over, now build a strong hypothesis from a weighted
+ // combination of these weak hypotheses.
arma::colvec tempSumFinalH;
arma::uword max_index;
arma::mat sfh = sumFinalH.t();
@@ -191,14 +174,12 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
tempSumFinalH.max(max_index);
finalH(i) = max_index;
}
+
finalHypothesis = finalH;
}
/**
- * Classification Function.
- * @param test Testing data.
- * @param predictedLabels Vector to store the predicted labels of the
- * test set.
+ * Classify the given test points.
*/
template <typename MatType, typename WeakLearner>
void AdaBoost<MatType, WeakLearner>::Classify(
@@ -206,12 +187,12 @@ void AdaBoost<MatType, WeakLearner>::Classify(
arma::Row<size_t>& predictedLabels)
{
arma::Row<size_t> tempPredictedLabels(predictedLabels.n_cols);
- arma::mat cMatrix(numClasses, test.n_cols);
+ arma::mat cMatrix(classes, test.n_cols);
cMatrix.zeros();
predictedLabels.zeros();
- for (size_t i = 0;i < wl.size(); i++)
+ for (size_t i = 0; i < wl.size(); i++)
{
wl[i].Classify(test, tempPredictedLabels);
@@ -248,10 +229,8 @@ void AdaBoost<MatType, WeakLearner>::BuildWeightMatrix(
weights.fill(0.0);
for (i = 0; i < D.n_rows; i++)
- {
for (j = 0; j < D.n_cols; j++)
weights(i) += D(i, j);
- }
}
} // namespace adaboost
diff --git a/src/mlpack/tests/adaboost_test.cpp b/src/mlpack/tests/adaboost_test.cpp
index ef8269c..de6c582 100644
--- a/src/mlpack/tests/adaboost_test.cpp
+++ b/src/mlpack/tests/adaboost_test.cpp
@@ -18,39 +18,35 @@ using namespace mlpack::adaboost;
BOOST_AUTO_TEST_SUITE(AdaBoostTest);
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.
- * It checks whether the hamming loss breaches the upperbound, which
- * is provided by ztAccumulator.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset. It
+ * checks whether the hamming loss breaches the upperbound, which is provided by
+ * ztAccumulator.
*/
BOOST_AUTO_TEST_CASE(HammingLossBoundIris)
{
arma::mat inputData;
-
if (!data::Load("iris.txt", inputData))
BOOST_FAIL("Cannot load test dataset iris.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("iris_labels.txt",labels))
BOOST_FAIL("Cannot load labels for iris iris_labels.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 400;
perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
perceptronIter);
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 100;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double hammingLoss = (double) countError / labels.n_cols;
@@ -59,10 +55,9 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundIris)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset. It
+ * checks if the error returned by running a single instance of the weak learner
+ * is worse than running the boosted weak learner using adaboost.
*/
BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
{
@@ -76,10 +71,8 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
if (!data::Load("iris_labels.txt",labels))
BOOST_FAIL("Cannot load labels for iris iris_labels.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 400;
arma::Row<size_t> perceptronPrediction(labels.n_cols);
@@ -89,17 +82,17 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
int countWeakLearnerError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != perceptronPrediction(i))
+ if (labels(i) != perceptronPrediction(i))
countWeakLearnerError++;
double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 100;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double error = (double) countError / labels.n_cols;
@@ -107,39 +100,34 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- * Column dataset.
- * It checks whether the hamming loss breaches the upperbound, which
- * is provided by ztAccumulator.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset. It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.
*/
BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn)
{
arma::mat inputData;
-
if (!data::Load("vc2.txt", inputData))
BOOST_FAIL("Cannot load test dataset vc2.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("vc2_labels.txt",labels))
BOOST_FAIL("Cannot load labels for vc2_labels.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 800;
perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
perceptronIter);
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double hammingLoss = (double) countError / labels.n_cols;
@@ -148,28 +136,22 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- * Column dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset. It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
*/
BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
{
arma::mat inputData;
-
if (!data::Load("vc2.txt", inputData))
BOOST_FAIL("Cannot load test dataset vc2.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("vc2_labels.txt",labels))
BOOST_FAIL("Cannot load labels for vc2_labels.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 800;
arma::Row<size_t> perceptronPrediction(labels.n_cols);
@@ -179,11 +161,11 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
int countWeakLearnerError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != perceptronPrediction(i))
+ if (labels(i) != perceptronPrediction(i))
countWeakLearnerError++;
double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
@@ -197,40 +179,34 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
}
/**
- * This test case runs the AdaBoost.mh algorithm on non-linearly
- * separable dataset.
- * It checks whether the hamming loss breaches the upperbound, which
- * is provided by ztAccumulator.
+ * This test case runs the AdaBoost.mh algorithm on non-linearly separable
+ * dataset. It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.
*/
BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData)
{
arma::mat inputData;
-
if (!data::Load("train_nonlinsep.txt", inputData))
BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
arma::Mat<size_t> labels;
-
-
if (!data::Load("train_labels_nonlinsep.txt",labels))
BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 800;
perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
perceptronIter);
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double hammingLoss = (double) countError / labels.n_cols;
@@ -239,28 +215,22 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData)
}
/**
- * This test case runs the AdaBoost.mh algorithm on a non-linearly
- * separable dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
+ * This test case runs the AdaBoost.mh algorithm on a non-linearly separable
+ * dataset. It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using AdaBoost.
*/
BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
{
arma::mat inputData;
-
if (!data::Load("train_nonlinsep.txt", inputData))
BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("train_labels_nonlinsep.txt",labels))
BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 800;
arma::Row<size_t> perceptronPrediction(labels.n_cols);
@@ -274,13 +244,13 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
countWeakLearnerError++;
double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double error = (double) countError / labels.n_cols;
@@ -288,28 +258,23 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset.
- * It checks whether the hamming loss breaches the upperbound, which
- * is provided by ztAccumulator.
- * This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris dataset. It
+ * checks whether the Hamming loss breaches the upper bound, which is provided
+ * by ztAccumulator. This uses decision stumps as the weak learner.
*/
BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
{
arma::mat inputData;
-
if (!data::Load("iris.txt", inputData))
BOOST_FAIL("Cannot load test dataset iris.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("iris_labels.txt",labels))
BOOST_FAIL("Cannot load labels for iris_labels.txt");
- // no need to map the labels here
-
- // Define your own weak learner, Decision Stumps in this case.
+ // Define your own weak learner, decision stumps in this case.
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
const size_t numClasses = 3;
const size_t inpBucketSize = 6;
@@ -318,11 +283,11 @@ BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
int iterations = 50;
double tolerance = 1e-10;
- AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
+ AdaBoost<arma::mat, decision_stump::DecisionStump<>> a(inputData,
labels.row(0), iterations, tolerance, ds);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double hammingLoss = (double) countError / labels.n_cols;
@@ -331,28 +296,24 @@ BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
}
/**
- * This test case runs the AdaBoost.mh algorithm on a non-linearly
- * separable dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
- * This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on a non-linearly separable
+ * dataset. It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This is for the weak learner: decision stumps.
*/
BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris_DS)
{
arma::mat inputData;
-
if (!data::Load("iris.txt", inputData))
BOOST_FAIL("Cannot load test dataset iris.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("iris_labels.txt",labels))
BOOST_FAIL("Cannot load labels for iris_labels.txt");
// no need to map the labels here
- // Define your own weak learner, Decision Stump in this case.
+ // Define your own weak learner, decision stumps in this case.
const size_t numClasses = 3;
const size_t inpBucketSize = 6;
@@ -369,59 +330,52 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris_DS)
countWeakLearnerError++;
double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
- AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
- labels.row(0), iterations, tolerance, ds);
+ AdaBoost<arma::mat, decision_stump::DecisionStump<> > a(inputData,
+ labels.row(0), iterations, tolerance, ds);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double error = (double) countError / labels.n_cols;
BOOST_REQUIRE(error <= weakLearnerErrorRate);
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- * Column dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
- * This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset. It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This is for the weak learner: decision stumps.
*/
BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn_DS)
{
arma::mat inputData;
-
if (!data::Load("vc2.txt", inputData))
BOOST_FAIL("Cannot load test dataset vc2.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("vc2_labels.txt",labels))
BOOST_FAIL("Cannot load labels for vc2_labels.txt");
- // no need to map the labels here
-
- // Define your own weak learner, Decision Stump in this case.
-
- // Define parameters for the adaboost
+ // Define your own weak learner, decision stumps in this case.
const size_t numClasses = 3;
const size_t inpBucketSize = 6;
decision_stump::DecisionStump<> ds(inputData, labels.row(0),
numClasses, inpBucketSize);
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
- AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
- labels.row(0), iterations, tolerance, ds);
+ AdaBoost<arma::mat, decision_stump::DecisionStump<>> a(inputData,
+ labels.row(0), iterations, tolerance, ds);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double hammingLoss = (double) countError / labels.n_cols;
@@ -430,87 +384,74 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn_DS)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- * Column dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
- * This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset. It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This is for the weak learner: decision stumps.
*/
BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn_DS)
{
arma::mat inputData;
-
if (!data::Load("vc2.txt", inputData))
BOOST_FAIL("Cannot load test dataset vc2.txt!");
arma::Mat<size_t> labels;
-
- if (!data::Load("vc2_labels.txt",labels))
+ if (!data::Load("vc2_labels.txt", labels))
BOOST_FAIL("Cannot load labels for vc2_labels.txt");
- // no need to map the labels here
-
- // Define your own weak learner, Decision Stump in this case.
-
+ // Define your own weak learner, decision stumps in this case.
const size_t numClasses = 3;
const size_t inpBucketSize = 6;
-
arma::Row<size_t> dsPrediction(labels.n_cols);
- decision_stump::DecisionStump<> ds(inputData, labels.row(0),
- numClasses, inpBucketSize);
+ decision_stump::DecisionStump<> ds(inputData, labels.row(0), numClasses,
+ inpBucketSize);
int countWeakLearnerError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != dsPrediction(i))
+ if (labels(i) != dsPrediction(i))
countWeakLearnerError++;
+
double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
- AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(inputData,
- labels.row(0), iterations, tolerance, ds);
+ AdaBoost<arma::mat, decision_stump::DecisionStump<>> a(inputData,
+ labels.row(0), iterations, tolerance, ds);
+
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double error = (double) countError / labels.n_cols;
BOOST_REQUIRE(error <= weakLearnerErrorRate);
}
+
/**
- * This test case runs the AdaBoost.mh algorithm on non-linearly
- * separable dataset.
- * It checks whether the hamming loss breaches the upperbound, which
- * is provided by ztAccumulator.
- * This is for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on non-linearly separable
+ * dataset. It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator. This is for the weak learner: decision stumps.
*/
BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
{
arma::mat inputData;
-
if (!data::Load("train_nonlinsep.txt", inputData))
BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("train_labels_nonlinsep.txt",labels))
BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
-
- // no need to map the labels here
-
- // Define your own weak learner, Decision Stump in this case.
-
- // Define parameters for the adaboost
+ // Define your own weak learner, decision stumps in this case.
const size_t numClasses = 2;
const size_t inpBucketSize = 6;
decision_stump::DecisionStump<> ds(inputData, labels.row(0),
numClasses, inpBucketSize);
+ // Define parameters for Adaboost.
int iterations = 50;
double tolerance = 1e-10;
@@ -518,7 +459,7 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
labels.row(0), iterations, tolerance, ds);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double hammingLoss = (double) countError / labels.n_cols;
@@ -527,29 +468,22 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
}
/**
- * This test case runs the AdaBoost.mh algorithm on a non-linearly
- * separable dataset.
- * It checks if the error returned by running a single instance of the
- * weak learner is worse than running the boosted weak learner using
- * adaboost.
- * This for the weak learner: Decision Stumps.
+ * This test case runs the AdaBoost.mh algorithm on a non-linearly separable
+ * dataset. It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using adaboost.
+ * This for the weak learner: decision stumps.
*/
BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
{
arma::mat inputData;
-
if (!data::Load("train_nonlinsep.txt", inputData))
BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("train_labels_nonlinsep.txt",labels))
BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
- // no need to map the labels here
-
- // Define your own weak learner, Decision Stump in this case.
-
+ // Define your own weak learner, decision stumps in this case.
const size_t numClasses = 2;
const size_t inpBucketSize = 3;
@@ -564,7 +498,7 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
countWeakLearnerError++;
double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 500;
double tolerance = 1e-23;
@@ -572,7 +506,7 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
labels.row(0), iterations, tolerance, ds);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
- if(labels(i) != a.finalHypothesis(i))
+ if (labels(i) != a.finalHypothesis(i))
countError++;
double error = (double) countError / labels.n_cols;
@@ -580,28 +514,23 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData_DS)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral
- * Column dataset.
- * It tests the Classify function and checks for a satisfiable error rate.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Vertebral Column
+ * dataset. It tests the Classify function and checks for a satisfactory error
+ * rate.
*/
BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
{
mlpack::math::RandomSeed(std::time(NULL));
arma::mat inputData;
-
if (!data::Load("vc2.txt", inputData))
BOOST_FAIL("Cannot load test dataset vc2.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("vc2_labels.txt",labels))
BOOST_FAIL("Cannot load labels for vc2_labels.txt");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
-
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 1000;
arma::mat testData;
@@ -619,8 +548,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
perceptronIter);
p.Classify(inputData, perceptronPrediction);
- // Define parameters for the adaboost
-
+ // Define parameters for AdaBoost.
int iterations = 100;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
@@ -631,37 +559,30 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_VERTEBRALCOL)
int localError = 0;
for (size_t i = 0; i < trueTestLabels.n_cols; i++)
- if(trueTestLabels(i) != predictedLabels(i))
+ if (trueTestLabels(i) != predictedLabels(i))
localError++;
double lError = (double) localError / trueTestLabels.n_cols;
BOOST_REQUIRE(lError <= 0.30);
-
}
/**
- * This test case runs the AdaBoost.mh algorithm on a non linearly
- * separable dataset.
- * It tests the Classify function and checks for a satisfiable error rate.
+ * This test case runs the AdaBoost.mh algorithm on a non linearly separable
+ * dataset. It tests the Classify function and checks for a satisfactory error
+ * rate.
*/
BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
{
arma::mat inputData;
-
if (!data::Load("train_nonlinsep.txt", inputData))
BOOST_FAIL("Cannot load test dataset train_nonlinsep.txt!");
arma::Mat<size_t> labels;
-
if (!data::Load("train_labels_nonlinsep.txt",labels))
BOOST_FAIL("Cannot load labels for train_labels_nonlinsep.txt");
- // no need to map the labels here
-
- // Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
-
+ // Define your own weak learner; in this test decision stumps are used.
const size_t numClasses = 2;
const size_t inpBucketSize = 3;
@@ -680,7 +601,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
decision_stump::DecisionStump<> ds(inputData, labels.row(0),
numClasses, inpBucketSize);
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
AdaBoost<arma::mat, mlpack::decision_stump::DecisionStump<> > a(
@@ -691,7 +612,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
int localError = 0;
for (size_t i = 0; i < trueTestLabels.n_cols; i++)
- if(trueTestLabels(i) != predictedLabels(i))
+ if (trueTestLabels(i) != predictedLabels(i))
localError++;
double lError = (double) localError / trueTestLabels.n_cols;
@@ -700,33 +621,29 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_NONLINSEP)
}
/**
- * This test case runs the AdaBoost.mh algorithm on the UCI Iris Dataset.
- * It trains it on two thirds of the Iris dataset (iris_train.csv),
- * and tests on the remaining third of the dataset (iris_test.csv).
- * It tests the Classify function and checks for a satisfiable error rate.
+ * This test case runs the AdaBoost.mh algorithm on the UCI Iris Dataset. It
+ * trains it on two thirds of the Iris dataset (iris_train.csv), and tests on
+ * the remaining third of the dataset (iris_test.csv). It tests the Classify()
+ * function and checks for a satisfactory error rate.
*/
BOOST_AUTO_TEST_CASE(ClassifyTest_IRIS)
{
arma::mat inputData;
-
if (!data::Load("iris_train.csv", inputData))
BOOST_FAIL("Cannot load test dataset iris_train.csv!");
arma::Mat<size_t> labels;
-
if (!data::Load("iris_train_labels.csv",labels))
BOOST_FAIL("Cannot load labels for iris_train_labels.csv");
- // no need to map the labels here
-
// Define your own weak learner, perceptron in this case.
- // Run the perceptron for perceptron_iter iterations.
+ // Run the perceptron for perceptronIter iterations.
int perceptronIter = 800;
perceptron::Perceptron<> p(inputData, labels.row(0), max(labels.row(0)) + 1,
perceptronIter);
- // Define parameters for the adaboost
+ // Define parameters for AdaBoost.
int iterations = 50;
double tolerance = 1e-10;
AdaBoost<> a(inputData, labels.row(0), iterations, tolerance, p);
@@ -745,7 +662,7 @@ BOOST_AUTO_TEST_CASE(ClassifyTest_IRIS)
int localError = 0;
for (size_t i = 0; i < trueTestLabels.n_cols; i++)
- if(trueTestLabels(i) != predictedLabels(i))
+ if (trueTestLabels(i) != predictedLabels(i))
localError++;
double lError = (double) localError / labels.n_cols;
More information about the mlpack-git
mailing list