[mlpack-svn] r12577 - mlpack/trunk/src/mlpack/methods/lars

Mon Apr 30 17:21:59 EDT 2012

Author: rcurtin
Date: 2012-04-30 17:21:58 -0400 (Mon, 30 Apr 2012)
New Revision: 12577

Modified:
   mlpack/trunk/src/mlpack/methods/lars/lars.cpp
   mlpack/trunk/src/mlpack/methods/lars/lars.hpp
Log:
Refactor LARS.  Fix comments, remove unnecessary methods, and consolidate
constructors.


Modified: mlpack/trunk/src/mlpack/methods/lars/lars.cpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/lars/lars.cpp	2012-04-30 20:21:48 UTC (rev 12576)
+++ mlpack/trunk/src/mlpack/methods/lars/lars.cpp	2012-04-30 21:21:58 UTC (rev 12577)
@@ -7,65 +7,42 @@
 
 #include "lars.hpp"
 
-// we are explicit with std:: to avoid confusing std::vector with arma::vec
-// we use arma namespace too often to explicitly use arma:: everywhere
-//using namespace std;
 using namespace arma;
 using namespace mlpack;
 using namespace mlpack::regression;
 
-LARS::LARS(const bool useCholesky) :
-    useCholesky(useCholesky),
-    lasso(false),
-    elasticNet(false)
-{ /* nothing left to do */ }
-
 LARS::LARS(const bool useCholesky,
-           const double lambda1) :
+           const double lambda1,
+           const double lambda2,
+           const double tolerance) :
+    matGram(matGramInternal),
     useCholesky(useCholesky),
-    lasso(true),
+    lasso((lambda1 != 0)),
     lambda1(lambda1),
-    elasticNet(false),
-    lambda2(0)
-{ /* nothing left to do */ }
+    elasticNet((lambda1 != 0) && (lambda2 != 0)),
+    lambda2(lambda2),
+    tolerance(tolerance)
+{ /* Nothing left to do. */ }
 
 LARS::LARS(const bool useCholesky,
+           const arma::mat& gramMatrix,
            const double lambda1,
-           const double lambda2) :
+           const double lambda2,
+           const double tolerance) :
+    matGram(gramMatrix),
     useCholesky(useCholesky),
-    lasso(true),
+    lasso((lambda1 != 0)),
     lambda1(lambda1),
-    elasticNet(true),
-    lambda2(lambda2)
-{ /* nothing left to do */ }
+    elasticNet((lambda1 != 0) && (lambda2 != 0)),
+    lambda2(lambda2),
+    tolerance(tolerance)
+{ /* Nothing left to do */ }
 
-void LARS::SetGram(const mat& matGram)
-{
-  this->matGram = matGram;
-}
-
-void LARS::SetGramMem(double* matGramMemPtr, uword nDims)
-{
-  this->matGram = mat(matGramMemPtr, nDims, nDims, false);
-}
-
-void LARS::ComputeGram(const mat& matX)
-{
-  if (elasticNet)
-  {
-    matGram = trans(matX) * matX + lambda2 * eye(matX.n_cols, matX.n_cols);
-  }
-  else
-  {
-    matGram = trans(matX) * matX;
-  }
-}
-
 void LARS::DoLARS(const mat& matX, const vec& y)
 {
   // compute Xty
   vec vecXTy = trans(matX) * y;
-  
+
   // set up active set variables
   nActive = 0;
   activeSet = std::vector<uword>(0);
@@ -79,12 +56,6 @@
 
   bool lassocond = false;
 
-  // used for elastic net
-  if (!elasticNet)
-  {
-    lambda2 = 0; // just in case it is accidentally used, the code still will be correct
-  }
-  
   vec corr = vecXTy;
   vec absCorr = abs(corr);
   uword changeInd;
@@ -92,7 +63,7 @@
 
   betaPath.push_back(beta);
   lambdaPath.push_back(maxCorr);
-  
+
   // don't even start!
   if (maxCorr < lambda1)
   {
@@ -100,22 +71,20 @@
     return;
   }
 
-  // compute Gram matrix
-  if (!useCholesky && matGram.is_empty())
+  // Compute the Gram matrix.  If this is the elastic net problem, we will add
+  // lambda2 * I_n to the matrix.
+  if (matGram.n_elem == 0)
   {
-    ComputeGram(matX);
+    // In this case, matGram should reference matGramInternal.
+    matGramInternal = trans(matX) * matX;
+
+    if (elasticNet && !useCholesky)
+      matGramInternal += lambda2 * eye(matX.n_cols, matX.n_cols);
   }
-  else if(useCholesky && matGram.is_empty()) {
-    //Log::Info << "You probably should compute the Gram matrix ahead of time when in Cholesky mode!\n";
-    matGram = trans(matX) * matX;
-  }
-  
-  //uword iterations_run = 0;
-  // MAIN LOOP
-  while ((nActive < matX.n_cols) && (maxCorr > EPS))
+
+  // Main loop.
+  while ((nActive < matX.n_cols) && (maxCorr > tolerance))
   {
-    //iterations_run++;
-    //printf("iteration %d\t\n", iterations_run);
 
     // explicit computation of max correlation, among inactive indices
     changeInd = -1;
@@ -143,8 +112,8 @@
         // {
         //   newGramCol[i] = dot(matX.col(activeSet[i]), matX.col(changeInd));
         // }
-	vec newGramCol = matGram.elem(changeInd * matX.n_cols + conv_to< uvec >::from(activeSet)); // this is equivalent to the above 5 lines - check this!
-	
+        vec newGramCol = matGram.elem(changeInd * matX.n_cols + conv_to< uvec >::from(activeSet)); // this is equivalent to the above 5 lines - check this!
+
         //CholeskyInsert(matX.col(changeInd), newGramCol);
         CholeskyInsert(matGram(changeInd, changeInd), newGramCol);
       }
@@ -282,7 +251,7 @@
         beta(activeSet[changeInd]) = 0;
       }
     }
-    
+
     betaPath.push_back(beta);
 
     if (lassocond)
@@ -330,9 +299,7 @@
 }
 
 
-
-  ////////// private functions //////////
-
+// Private functions.
 void LARS::Deactivate(uword activeVarInd)
 {
   nActive--;
@@ -419,7 +386,7 @@
     {
       sqNormNewX += lambda2;
     }
-      
+
     vec matUtriCholFactork = solve(trimatl(trans(matUtriCholFactor)),
 				   newGramCol);
 
@@ -433,7 +400,7 @@
   }
 }
 
-void LARS::GivensRotate(const vec::fixed<2>& x, vec::fixed<2>& rotatedX, mat& matG) 
+void LARS::GivensRotate(const vec::fixed<2>& x, vec::fixed<2>& rotatedX, mat& matG)
 {
   if (x(1) == 0)
   {

Modified: mlpack/trunk/src/mlpack/methods/lars/lars.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/lars/lars.hpp	2012-04-30 20:21:48 UTC (rev 12576)
+++ mlpack/trunk/src/mlpack/methods/lars/lars.hpp	2012-04-30 21:21:58 UTC (rev 12577)
@@ -4,6 +4,17 @@
  *
  * Definition of the LARS class, which performs Least Angle Regression and the
  * LASSO.
+ *
+ * Only minor modifications of LARS are necessary to handle the constrained
+ * version of the problem:
+ *
+ * \f[
+ * \min_{\beta} 0.5 || X \beta - y ||_2^2 + 0.5 \lambda_2 || \beta ||_2^2
+ * \f]
+ * subject to \f$ ||\beta||_1 <= \tau \f$
+ *
+ * Although this option currently is not implemented, it will be implemented
+ * very soon.
  */
 #ifndef __MLPACK_METHODS_LARS_LARS_HPP
 #define __MLPACK_METHODS_LARS_LARS_HPP
@@ -11,8 +22,6 @@
 #include <armadillo>
 #include <mlpack/core.hpp>
 
-#define EPS 1e-16
-
 namespace mlpack {
 namespace regression {
 
@@ -21,28 +30,28 @@
 
 /**
  * An implementation of LARS, a stage-wise homotopy-based algorithm for
- * l1 regularized linear regression (LASSO) and l1+l2 regularized linear
+ * l1-regularized linear regression (LASSO) and l1+l2 regularized linear
  * regression (Elastic Net).
- * Let X be a matrix where each row is a point and each column is a dimension,
- * and let y be a vector of targets.
+ *
+ * Let \f$ X \f$ be a matrix where each row is a point and each column is a
+ * dimension and let \f$ y \f$ be a vector of targets.
+ *
  * The Elastic Net problem is to solve
- * min_beta 0.5 ||X beta - y||_2^2 + lambda_1 ||beta||_1 + 0.5 lambda_2 ||beta||_2^2
- * If lambda_1 > 0, lambda_2 = 0, the problem is the LASSO.
- * If lambda_1 > 0, lambda_2 > 0, the problem is the Elastic Net.
- * If lambda_1 = 0, lambda_2 > 0, the problem is Ridge Regression.
- * If lambda_1 = 0, lambda_2 = 0, the problem is unregularized linear
- *     regression.
  *
+ * \f[ \min_{\beta} 0.5 || X \beta - y ||_2^2 + \lambda_1 || \beta ||_1 +
+ *     0.5 \lambda_2 || \beta ||_2^2 \f]
+ *
+ * If \f$ \lambda_1 > 0 \f$ and \f$ \lambda_2 = 0 \f$, the problem is the LASSO.
+ * If \f$ \lambda_1 > 0 \f$ and \f$ \lambda_2 > 0 \f$, the problem is the
+ *   elastic net.
+ * If \f$ \lambda_1 = 0 \f$ and \f$ \lambda_2 > 0 \f$, the problem is ridge
+ *   regression.
+ * If \f$ \lambda_1 = 0 \f$ and \f$ \lambda_2 = 0 \f$, the problem is
+ *   unregularized linear regression.
+ *
  * Note: This algorithm is not recommended for use (in terms of efficiency)
- * when lambda_1 = 0.
+ * when \f$ \lambda_1 \f$ = 0.
  *
- * Only minor modifications are necessary to handle the constrained version of
- * the problem:
- *   min_beta 0.5 ||X beta - y||_2^2 + 0.5 lambda_2 ||beta||_2^2
- *   subject to ||beta||_1 <= tau
- * Although this option currently is not implemented, it will be implemented
- * very soon.
- *
  * For more details, see the following papers:
  *
  * @code
@@ -79,55 +88,35 @@
    *
    * @param useCholesky Whether or not to use Cholesky decomposition when
    *    solving linear system. If no, compute full Gram matrix at beginning.
+   * @param lambda1 Regularization parameter for l1-norm penalty.
+   * @param lambda2 Regularization parameter for l2-norm penalty.
+   * @param tolerance Run until the maximum correlation of elements in (X^T y)
+   *     is less than this.
    */
-  LARS(const bool useCholesky);
-
-  /**
-   * Set the parameters to LARS.  lambda2 defaults to 0.
-   *
-   * @param useCholesky Whether or not to use Cholesky decomposition when
-   *    solving linear system. If no, compute full Gram matrix at beginning.
-   * @param lambda1 Regularization parameter for l_1-norm penalty
-   */
   LARS(const bool useCholesky,
-       const double lambda1);
+       const double lambda1 = 0.0,
+       const double lambda2 = 0.0,
+       const double tolerance = 1e-16);
 
   /**
-   * Set the parameters to LARS.
+   * Set the parameters to LARS, and pass in a precalculated Gram matrix.  Both
+   * lambda1 and lambda2 default to 0.
    *
    * @param useCholesky Whether or not to use Cholesky decomposition when
-   *    solving linear system. If no, compute full Gram matrix at beginning.
-   * @param lambda1 Regularization parameter for l_1-norm penalty
-   * @param lambda2 Regularization parameter for l_2-norm penalty
+   *    solving linear system.
+   * @param gramMatrix Gram matrix.
+   * @param lambda1 Regularization parameter for l1-norm penalty.
+   * @param lambda2 Regularization parameter for l2-norm penalty.
+   * @param tolerance Run until the maximum correlation of elements in (X^T y)
+   *     is less than this.
    */
   LARS(const bool useCholesky,
-       const double lambda1,
-       const double lambda2);
+       const arma::mat& gramMatrix,
+       const double lambda1 = 0.0,
+       const double lambda2 = 0.0,
+       const double tolerance = 1e-16);
 
-  ~LARS() { }
-
   /**
-   * Set the Gram matrix (done before calling DoLars).
-   *
-   * @param matGram Matrix to which to set Gram matrix
-   */
-  void SetGram(const arma::mat& matGram);
-
-  /**
-   * Set the Gram matrix (done before calling DoLars) by reusing memory.
-   *
-   * @param matGram Matrix to which to set Gram matrix
-   */
-  void SetGramMem(double* matGramMemPtr, arma::uword nDims);
-
-  /**
-   * Compute Gram matrix. If elastic net, add lambda2 * identity to diagonal.
-   *
-   * @param matX Data matrix to use for computing Gram matrix
-   */
-  void ComputeGram(const arma::mat& matX);
-
-  /**
    * Run LARS.
    *
    * @param matX Input data into the algorithm - a matrix where each row is a
@@ -154,39 +143,58 @@
   const arma::mat& MatUtriCholFactor() const { return matUtriCholFactor; }
 
 private:
-  // Gram matrix
-  arma::mat matGram;
+  //! Gram matrix.
+  arma::mat matGramInternal;
 
-  // Upper triangular cholesky factor; initially 0x0 arma::matrix.
+  //! Reference to the Gram matrix we will use.
+  const arma::mat& matGram;
+
+  //! Upper triangular cholesky factor; initially 0x0 matrix.
   arma::mat matUtriCholFactor;
 
+  //! Whether or not to use Cholesky decomposition when solving linear system.
   bool useCholesky;
 
+  //! True if this is the LASSO problem.
   bool lasso;
+  //! Regularization parameter for l1 penalty.
   double lambda1;
 
+  //! True if this is the elastic net problem.
   bool elasticNet;
+  //! Regularization parameter for l2 penalty.
   double lambda2;
 
-  // solution path
+  //! Tolerance for main loop.
+  double tolerance;
+
+  //! Solution path.
   std::vector<arma::vec> betaPath;
 
-  // value of lambda1 for each solution in solution path
+  //! Value of lambda_1 for each solution in solution path.
   std::vector<double> lambdaPath;
 
-  // number of dimensions in active set
+  //! Number of dimensions in active set.
   arma::uword nActive;
 
-  // active set of dimensions
+  //! Active set of dimensions.
   std::vector<arma::uword> activeSet;
 
-  // active set membership indicator (for each dimension)
+  //! Active set membership indicator (for each dimension).
   std::vector<bool> isActive;
 
-  // remove activeVarInd'th element from active set
+  /**
+   * Remove activeVarInd'th element from active set.
+   *
+   * @param activeVarInd Index of element to remove from active set.
+   */
   void Deactivate(arma::uword activeVarInd);
 
-  // add dimension varInd to active set
+  /**
+   * Add dimension varInd to active set.
+   *
+   * @param varInd Dimension to add to active set.
+   */
   void Activate(arma::uword varInd);
 
   // compute "equiangular" direction in output space
@@ -204,7 +212,6 @@
   void GivensRotate(const arma::vec::fixed<2>& x, arma::vec::fixed<2>& rotatedX, arma::mat& G);
 
   void CholeskyDelete(arma::uword colToKill);
-
 };
 
 }; // namespace regression