[mlpack-git] master, mlpack-1.0.x: Adaboost design issues, to be discussed, then changed later on (4f7d98c)

Thu Mar 5 21:53:19 EST 2015

Repository : https://github.com/mlpack/mlpack

On branches: master,mlpack-1.0.x
Link       : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40

>---------------------------------------------------------------

commit 4f7d98cb5f5c5f4873d33942d016c6f332e39a99
Author: Udit Saxena <saxena.udit at gmail.com>
Date:   Tue Jul 15 11:26:08 2014 +0000

    Adaboost design issues, to be discussed, then changed later on


>---------------------------------------------------------------

4f7d98cb5f5c5f4873d33942d016c6f332e39a99
 src/mlpack/methods/CMakeLists.txt                  |  1 +
 .../{decision_stump => adaboost}/CMakeLists.txt    | 12 +--
 src/mlpack/methods/adaboost/adaboost.hpp           | 31 ++++++++
 src/mlpack/methods/adaboost/adaboost_impl.hpp      | 87 ++++++++++++++++++++++
 .../adaboost_main.cpp}                             | 18 ++---
 .../methods/decision_stump/decision_stump.hpp      | 18 +++++
 .../methods/decision_stump/decision_stump_impl.hpp | 32 ++++++++
 src/mlpack/methods/perceptron/perceptron.hpp       | 14 ++++
 src/mlpack/methods/perceptron/perceptron_impl.hpp  | 20 +++++
 src/mlpack/tests/perceptron_test.cpp               | 17 +++++
 10 files changed, 232 insertions(+), 18 deletions(-)

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 93289ea..925e74d 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -1,5 +1,6 @@
 # Recurse into each method mlpack provides.
 set(DIRS
+# adaboost 
   amf
   cf
   decision_stump
diff --git a/src/mlpack/methods/decision_stump/CMakeLists.txt b/src/mlpack/methods/adaboost/CMakeLists.txt
similarity index 70%
copy from src/mlpack/methods/decision_stump/CMakeLists.txt
copy to src/mlpack/methods/adaboost/CMakeLists.txt
index 3c811b8..7e29581 100644
--- a/src/mlpack/methods/decision_stump/CMakeLists.txt
+++ b/src/mlpack/methods/adaboost/CMakeLists.txt
@@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 2.8)
 # Define the files we need to compile.
 # Anything not in this list will not be compiled into MLPACK.
 set(SOURCES
-  decision_stump.hpp
-  decision_stump_impl.hpp
+  adaboost.hpp
+  adaboost_impl.hpp
 )
 
 # Add directory name to sources.
@@ -16,11 +16,11 @@ endforeach()
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
 
-add_executable(decision_stump
-  decision_stump_main.cpp
+add_executable(adaboost
+  adaboost_main.cpp
 )
-target_link_libraries(decision_stump
+target_link_libraries(adaboost
   mlpack
 )
 
-install(TARGETS decision_stump RUNTIME DESTINATION bin)
+install(TARGETS adaboost RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
new file mode 100644
index 0000000..8aafb07
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -0,0 +1,31 @@
+/**
+ * @file adaboost.hpp
+ * @author Udit Saxena
+ *
+ * AdaBoost header file
+ */
+
+#ifndef _MLPACK_METHODS_ADABOOST_ADABOOST_HPP
+#define _MLPACK_METHODS_ADABOOST_ADABOOST_HPP
+
+#include <mlpack/core.hpp>
+#include "../perceptron/main/perceptron.hpp"
+ 
+namespace mlpack {
+namespace adaboost {
+
+template <typename MatType = arma::mat, typename WeakLearner = 
+          mlpack::perceptron::Perceptron<> >
+class Adaboost 
+{
+public:
+  Adaboost(const MatType& data, const arma::Row<size_t>& labels,
+           int iterations, size_t classes, const WeakLearner& other);
+}; // class Adaboost
+
+} // namespace adaboost
+} // namespace mlpack
+
+#include "adaboost_impl.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
new file mode 100644
index 0000000..da99936
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -0,0 +1,87 @@
+/*
+ * @file adaboost_impl.hpp
+ * @author Udit Saxena
+ *
+ * Implementation of the AdaBoost class
+ *
+ */
+
+#ifndef _MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
+#define _MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
+
+#include "adaboost.hpp"
+
+namespace mlpack {
+namespace adaboost {
+
+template<typename MatType, typename WeakLearner>
+Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data, const arma::Row<size_t>& labels,
+         int iterations, size_t classes, const WeakLearner& other)
+{
+  int j, i;
+  
+  // load the initial weights
+  
+  const double initWeight = 1 / (data.n_cols * classes);
+  arma::Row<double> D(data.n_cols);
+  D.fill(initWeight);
+
+  double rt, alphat = 0.0, zt;
+  arma::Row<size_t> predictedLabels(labels.n_cols);
+  MatType tempData(data);
+  // now start the boosting rounds
+  for (i = 0; i < iterations; i++)
+  {
+    rt = 0.0;
+    zt = 0.0;
+
+    //transform data, as per rules for perceptron
+    for (j = 0;j < tempData.n_cols;j++)
+      tempData.col(i) = D(i) * tempData.col(i);
+
+    // for now, perceptron initialized with default parameters
+    //mlpack::perceptron::Perceptron<> p(tempData, labels, 1000);
+    WeakLearner w(other);
+    w.Classify(tempData, predictedLabels);
+
+    // Now, start calculation of alpha(t)
+
+    // building a helper rowvector, mispredict to help in calculations.
+    // this stores the value of Yi(l)*ht(xi,l)
+    
+    arma::Row<double> mispredict(predictedLabels.n_cols);
+    
+    for(j = 0;j < predictedLabels.n_cols; j++)
+    {
+      if (predictedLabels(j) != labels(j))
+        mispredict(j) = -predictedLabels(j);
+      else
+        mispredict(j) = predictedLabels(j);
+    }
+
+    // begin calculation of rt
+
+    for (j = 0;j < predictedLabels.n_cols; j++)
+      rt +=(D(j) * mispredict(j));
+
+    // end calculation of rt
+
+    alphat = 0.5 * log((1 + rt) / (1 - rt));
+
+    // end calculation of alphat
+    
+    for (j = 0;j < mispredict.n_cols; j++)
+    {
+      zt += D(i) * exp(-1 * alphat * mispredict(i));
+      D(i) = D(i) * exp(-1 * alphat * mispredict(i));
+    }
+
+    D = D / zt;
+
+  }
+
+}
+
+} // namespace adaboost
+} // namespace mlpack
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/perceptron/perceptron_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
similarity index 80%
copy from src/mlpack/methods/perceptron/perceptron_main.cpp
copy to src/mlpack/methods/adaboost/adaboost_main.cpp
index a6082d7..1c30be0 100644
--- a/src/mlpack/methods/perceptron/perceptron_main.cpp
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -1,15 +1,14 @@
 /*
- * @file: perceptron_main.cpp
+ * @file: adaboost_main.cpp
  * @author: Udit Saxena
  *
  *
  */
 
 #include <mlpack/core.hpp>
-#include "perceptron.hpp"
+#include "adaboost.hpp"
 
 using namespace mlpack;
-using namespace mlpack::perceptron;
 using namespace std;
 using namespace arma;
 
@@ -24,8 +23,9 @@ PARAM_STRING_REQ("test_file", "A file containing the test set.", "te");
 //optional parameters.
 PARAM_STRING("output", "The file in which the predicted labels for the test set"
     " will be written.", "o", "output.csv");
-PARAM_INT("iterations","The maximum number of iterations the perceptron is "
-  "to be run", "i", 1000)
+PARAM_INT("iterations","The maximum number of boosting iterations "
+  "to be run", "i", 1000);
+PARAM_INT("classes","The number of classes in the input label set.","c");
 
 int main(int argc, char *argv[])
 {
@@ -62,20 +62,14 @@ int main(int argc, char *argv[])
   int iterations = CLI::GetParam<int>("iterations");
   
   Timer::Start("Training");
-  Perceptron<> p(trainingData, labels, iterations);
+  Adaboost<> a(trainingData, labels, iterations, classes);
   Timer::Stop("Training");
 
-  Row<size_t> predictedLabels(testingData.n_cols);
-  Timer::Start("Testing");
-  p.Classify(testingData, predictedLabels);
-  Timer::Stop("Testing");
-
   vec results;
   data::RevertLabels(predictedLabels, mappings, results);
 
   const string outputFilename = CLI::GetParam<string>("output");
   data::Save(outputFilename, results, true, true);
-  // saving the predictedLabels in the transposed manner in output
 
   return 0;
 }
\ No newline at end of file
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index fb4d6c9..3c0adcb 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -53,6 +53,24 @@ class DecisionStump
    */
   void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
 
+  /**
+   *
+   *
+   *
+   *
+   */
+  DecisionStump(const DecisionStump<>& ds);
+
+  /**
+   *
+   *
+   *
+   *
+   *
+   *
+  ModifyData(MatType& data, const arma::Row<double>& D);
+  */
+  
   //! Access the splitting attribute.
   int SplitAttribute() const { return splitAttribute; }
   //! Modify the splitting attribute (be careful!).
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 7300521..80d961c 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -104,6 +104,38 @@ void DecisionStump<MatType>::Classify(const MatType& test,
 }
 
 /**
+ *
+ *
+ *
+ *
+ *
+ */
+template <typename MatType>
+DecisionStump<MatType>::DecisionStump(const DecisionStump<>& ds)
+{
+  numClass = ds.numClass;
+
+  splitAttribute = ds.splitAttribute;
+
+  bucketSize = ds.bucketSize;
+
+  split = ds.split;
+
+  binLabels = ds.binLabels;
+}
+
+/**
+ *
+ *
+ *
+ *
+ *
+ *
+template <typename MatType>
+DecisionStump<MatType>::ModifyData(MatType& data, const arma::Row<double>& D)
+ */
+
+/**
  * Sets up attribute as if it were splitting on it and finds entropy when
  * splitting on attribute.
  *
diff --git a/src/mlpack/methods/perceptron/perceptron.hpp b/src/mlpack/methods/perceptron/perceptron.hpp
index 7842e35..2ef86d2 100644
--- a/src/mlpack/methods/perceptron/perceptron.hpp
+++ b/src/mlpack/methods/perceptron/perceptron.hpp
@@ -53,6 +53,20 @@ class Perceptron
    */
   void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
 
+  /**
+   *
+   *
+   *
+   */
+  Perceptron(const Perceptron<>& p);
+
+  /**
+   *
+   *
+   *
+   *
+   ModifyData(MatType& data, const arma::Row<double>& D);
+   */
 private:
   //! Stores the class labels for the input data.
   arma::Row<size_t> classLabels;
diff --git a/src/mlpack/methods/perceptron/perceptron_impl.hpp b/src/mlpack/methods/perceptron/perceptron_impl.hpp
index bcec50f..48c53d4 100644
--- a/src/mlpack/methods/perceptron/perceptron_impl.hpp
+++ b/src/mlpack/methods/perceptron/perceptron_impl.hpp
@@ -110,6 +110,26 @@ void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Classify(
   }
 }
 
+template <typename LearnPolicy, typename WeightInitializationPolicy, typename MatType>
+Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Perceptron(
+  const Perceptron<>& p)
+{
+  classLabels = p.classLabels;
+
+  weightVectors = p.weightVectors;
+
+  trainData = p.trainData;
+}
+
+/*
+template <typename LearnPolicy, typename WeightInitializationPolicy, typename MatType>
+Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::ModifyData(
+  MatType& data, const arma::Row<double>& D)
+{
+  for (int j = 0;j < data.n_cols;j++)
+      data.col(i) = D(i) * data.col(i);
+}
+*/
 }; // namespace perceptron
 }; // namespace mlpack
 
diff --git a/src/mlpack/tests/perceptron_test.cpp b/src/mlpack/tests/perceptron_test.cpp
index 07f99bd..aadb4f4 100644
--- a/src/mlpack/tests/perceptron_test.cpp
+++ b/src/mlpack/tests/perceptron_test.cpp
@@ -149,4 +149,21 @@ BOOST_AUTO_TEST_CASE(NonLinearlySeparableDataset)
   BOOST_CHECK_EQUAL(predictedLabels(0, 3), 1);
 }
 
+BOOST_AUTO_TEST_CASE(SecondaryConstructor)
+{
+  mat trainData;
+  trainData << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8
+            << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << endr
+            << 1 << 1 << 1 << 1 << 1 << 1 << 1 << 1
+            << 2 << 2 << 2 << 2 << 2 << 2 << 2 << 2 << endr;
+
+  Mat<size_t> labels;
+  labels << 0 << 0 << 0 << 1 << 0 << 1 << 1 << 1
+         << 0 << 0 << 0 << 1 << 0 << 1 << 1 << 1;
+         
+  Perceptron<> p1(trainData, labels.row(0), 1000);
+
+  Perceptron<> p2(p1);
+}
+
 BOOST_AUTO_TEST_SUITE_END();