[mlpack-svn] r10818 - mlpack/trunk/src/mlpack/methods/lars

Wed Dec 14 18:54:40 EST 2011

Author: rcurtin
Date: 2011-12-14 18:54:40 -0500 (Wed, 14 Dec 2011)
New Revision: 10818

Modified:
   mlpack/trunk/src/mlpack/methods/lars/lars.cpp
   mlpack/trunk/src/mlpack/methods/lars/lars.hpp
   mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp
Log:
Format LARS a little better and give good program documentation.


Modified: mlpack/trunk/src/mlpack/methods/lars/lars.cpp
===================================================================

--- mlpack/trunk/src/mlpack/methods/lars/lars.cpp	2011-12-14 23:27:16 UTC (rev 10817)
+++ mlpack/trunk/src/mlpack/methods/lars/lars.cpp	2011-12-14 23:54:40 UTC (rev 10818)
@@ -11,11 +11,9 @@
 // we use arma namespace too often to explicitly use arma:: everywhere
 //using namespace std;
 using namespace arma;
+using namespace mlpack;
+using namespace mlpack::regression;
 
-
-namespace mlpack {
-namespace lars {
-
 LARS::LARS(const bool useCholesky) :
     useCholesky(useCholesky),
     lasso(false),
@@ -132,9 +130,9 @@
       {
         vec newGramCol = vec(nActive);
         for (u32 i = 0; i < nActive; i++)
-	{
+  {
           newGramCol[i] = dot(matX.col(activeSet[i]), matX.col(changeInd));
-	}
+  }
 
         CholeskyInsert(matX.col(changeInd), newGramCol);
       }
@@ -214,13 +212,13 @@
         double val1 = (maxCorr - corr(ind)) / (normalization - dirCorr);
         double val2 = (maxCorr + corr(ind)) / (normalization + dirCorr);
         if ((val1 > 0) && (val1 < gamma))
-	{
-	  gamma = val1;
-	}
+  {
+    gamma = val1;
+  }
         if((val2 > 0) && (val2 < gamma))
-	{
-	  gamma = val2;
-	}
+  {
+    gamma = val2;
+  }
       }
     }
 
@@ -332,8 +330,8 @@
 }
 
  void LARS::ComputeYHatDirection(const mat& matX,
-				 const vec& betaDirection,
-				 vec& yHatDirection)
+         const vec& betaDirection,
+         vec& yHatDirection)
 {
   yHatDirection.fill(0);
   for(u32 i = 0; i < nActive; i++)
@@ -468,12 +466,9 @@
       if (k < n - 1)
       {
         matUtriCholFactor(span(k, k + 1), span(k + 1, n - 1)) = 
-	  matG * matUtriCholFactor(span(k, k + 1), span(k + 1, n - 1));
+    matG * matUtriCholFactor(span(k, k + 1), span(k + 1, n - 1));
       }
     }
     matUtriCholFactor.shed_row(n);
   }
 }
-
-}; // namespace lars
-}; // namespace mlpack

Modified: mlpack/trunk/src/mlpack/methods/lars/lars.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/lars/lars.hpp	2011-12-14 23:27:16 UTC (rev 10817)
+++ mlpack/trunk/src/mlpack/methods/lars/lars.hpp	2011-12-14 23:54:40 UTC (rev 10818)
@@ -14,36 +14,38 @@
 #define EPS 1e-16
 
 namespace mlpack {
-namespace lars {
+namespace regression {
 
 // beta is the estimator
 // yHat is the prediction from the current estimator
 
 /**
- * An implementation of LARS, a stage-wise homotopy-based algorithm for 
- * l1 regularized linear regression (LASSO) and l1+l2 regularized linear 
+ * An implementation of LARS, a stage-wise homotopy-based algorithm for
+ * l1 regularized linear regression (LASSO) and l1+l2 regularized linear
  * regression (Elastic Net).
- * Let X be a matrix where each row is a point and each column is a dimension, 
- * and let y be a vector of targets. 
+ * Let X be a matrix where each row is a point and each column is a dimension,
+ * and let y be a vector of targets.
  * The Elastic Net problem is to solve
  * min_beta ||X beta - y||_2^2 + lambda_1 ||beta||_1 + 0.5 lambda_2 ||beta||_2^2
  * If lambda_1 > 0, lambda_2 = 0, the problem is the LASSO.
  * If lambda_1 > 0, lambda_2 > 0, the problem is the Elastic Net.
  * If lambda_1 = 0, lambda_2 > 0, the problem is Ridge Regression.
- * If lambda_1 = 0, lambda_2 = 0, the problem is unregularized linear regression.
+ * If lambda_1 = 0, lambda_2 = 0, the problem is unregularized linear
+ *     regression.
  *
- * Note: This algorithm is not recommended for use (in terms of efficiency) 
+ * Note: This algorithm is not recommended for use (in terms of efficiency)
  * when lambda_1 = 0.
- * 
+ *
  * Only minor modifications are necessary to handle the constrained version of
- * the problem: 
+ * the problem:
  *   min_beta ||X beta - y||_2^2 + 0.5 lambda_2 ||beta||_2^2
  *   subject to ||beta||_1 <= tau
- * Although this option currently is not implemented, it will be implemented 
+ * Although this option currently is not implemented, it will be implemented
  * very soon.
  *
  * For more details, see the following papers:
  *
+ * @code
  * @article{efron2004least,
  *   title={Least angle regression},
  *   author={Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani, R.},
@@ -54,7 +56,9 @@
  *   year={2004},
  *   publisher={Institute of Mathematical Statistics}
  * }
+ * @endcode
  *
+ * @code
  * @article{zou2005regularization,
  *   title={Regularization and variable selection via the elastic net},
  *   author={Zou, H. and Hastie, T.},
@@ -65,26 +69,23 @@
  *   year={2005},
  *   publisher={Royal Statistical Society}
  * }
+ * @endcode
  */
-class LARS {
-
+class LARS
+{
  public:
-  
   /**
-   * Set the parameters to LARS
-   * Both lambda1 and lambda2 default to 0
+   * Set the parameters to LARS.  Both lambda1 and lambda2 default to 0.
    *
-   * @param useCholesky Whether or not to use Cholesky decomposition when 
+   * @param useCholesky Whether or not to use Cholesky decomposition when
    *    solving linear system. If no, compute full Gram matrix at beginning.
-   * @param lambda1 Regularization parameter for l_1-norm penalty
    */
   LARS(const bool useCholesky);
 
   /**
-   * Set the parameters to LARS
-   * lambda2 defaults to 0
+   * Set the parameters to LARS.  lambda2 defaults to 0.
    *
-   * @param useCholesky Whether or not to use Cholesky decomposition when 
+   * @param useCholesky Whether or not to use Cholesky decomposition when
    *    solving linear system. If no, compute full Gram matrix at beginning.
    * @param lambda1 Regularization parameter for l_1-norm penalty
    */
@@ -92,9 +93,9 @@
        const double lambda1);
 
   /**
-   * Set the parameters to LARS
+   * Set the parameters to LARS.
    *
-   * @param useCholesky Whether or not to use Cholesky decomposition when 
+   * @param useCholesky Whether or not to use Cholesky decomposition when
    *    solving linear system. If no, compute full Gram matrix at beginning.
    * @param lambda1 Regularization parameter for l_1-norm penalty
    * @param lambda2 Regularization parameter for l_2-norm penalty
@@ -105,110 +106,90 @@
 
   ~LARS() { }
 
-  /*
-   * Set the Gram matrix (done before calling DoLars)
+  /**
+   * Set the Gram matrix (done before calling DoLars).
    *
    * @param matGram Matrix to which to set Gram matrix
    */
   void SetGram(const arma::mat& matGram);
-  
-  /*
+
+  /**
    * Compute Gram matrix. If elastic net, add lambda2 * identity to diagonal.
    *
    * @param matX Data matrix to use for computing Gram matrix
    */
   void ComputeGram(const arma::mat& matX);
-  
-  /*
-   * Accessor for activeSet
-   */
-  const std::vector<arma::u32> ActiveSet()
-  {
-    return activeSet;
-  }
-  
-  /*
-   * Accessor for betaPath
-   */
-  const std::vector<arma::vec> BetaPath()
-  {
-    return betaPath;
-  }
-  
-  /*
-   * Accessor for lambdaPath
-   */
-  const std::vector<double> LambdaPath()
-  {
-    return lambdaPath;
-  }
-  
-  /* 
-   * Accessor for matUtriCholFactor
-   */
-  const arma::mat MatUtriCholFactor()
-  {
-    return matUtriCholFactor;
-  }
-  
-  /* Run LARS
-   * 
+
+  /**
+   * Run LARS.
+   *
    * @param matX Input data into the algorithm - a matrix where each row is a
    *    point and each column is a dimension
    * @param y A vector of targets
    */
   void DoLARS(const arma::mat& matX, const arma::vec& y);
-  
-  /* 
+
+  /*
    * Load the solution vector, which is the last vector from the solution path
    */
   void Solution(arma::vec& beta);
-  
-  
+
+  //! Accessor for activeSet.
+  const std::vector<arma::u32>& ActiveSet() const { return activeSet; }
+
+  //! Accessor for betaPath.
+  const std::vector<arma::vec>& BetaPath() const { return betaPath; }
+
+  //! Accessor for lambdaPath.
+  const std::vector<double>& LambdaPath() const { return lambdaPath; }
+
+  //! Accessor for matUtriCholFactor.
+  const arma::mat& MatUtriCholFactor() const { return matUtriCholFactor; }
+
 private:
   // Gram matrix
   arma::mat matGram;
-  
+
   // Upper triangular cholesky factor; initially 0x0 arma::matrix.
   arma::mat matUtriCholFactor;
-  
+
   bool useCholesky;
-  
+
   bool lasso;
   double lambda1;
 
   bool elasticNet;
   double lambda2;
-  
+
   // solution path
   std::vector<arma::vec> betaPath;
-  
+
   // value of lambda1 for each solution in solution path
   std::vector<double> lambdaPath;
-  
+
   // number of dimensions in active set
   arma::u32 nActive;
-  
+
   // active set of dimensions
   std::vector<arma::u32> activeSet;
-  
+
   // active set membership indicator (for each dimension)
   std::vector<bool> isActive;
-  
+
   // remove activeVarInd'th element from active set
   void Deactivate(arma::u32 activeVarInd);
-  
+
   // add dimension varInd to active set
   void Activate(arma::u32 varInd);
-  
+
   // compute "equiangular" direction in output space
   void ComputeYHatDirection(const arma::mat& matX,
-			    const arma::vec& betaDirection,
+                            const arma::vec& betaDirection,
                             arma::vec& yHatDirection);
 
   // interpolate to compute last solution vector
   void InterpolateBeta();
-  
+
   void CholeskyInsert(const arma::vec& newX, const arma::mat& X);
 
   void CholeskyInsert(const arma::vec& newX, const arma::vec& newGramCol);
@@ -216,10 +197,10 @@
   void GivensRotate(const arma::vec& x, arma::vec& rotatedX, arma::mat& G);
 
   void CholeskyDelete(arma::u32 colToKill);
-  
+
 };
 
-}; // namespace lars
+}; // namespace regression
 }; // namespace mlpack
 
 #endif

Modified: mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp	2011-12-14 23:27:16 UTC (rev 10817)
+++ mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp	2011-12-14 23:54:40 UTC (rev 10818)
@@ -4,58 +4,110 @@
  *
  * Executable for LARS
  */
+#include <mlpack/core.hpp>
 
-#include <mlpack/core.hpp>
-#include <armadillo>
 #include "lars.hpp"
 
-using namespace arma;
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::lars;
+PROGRAM_INFO("LARS", "An implementation of LARS: Least Angle Regression "
+    "(Stagewise/laSso).  This is a stage-wise homotopy-based algorithm for "
+    "L1-regularized linear regression (LASSO) and L1+L2-regularized linear "
+    "regression (Elastic Net).\n"
+    "\n"
+    "Let X be a matrix where each row is a point and each column is a "
+    "dimension, and let y be a vector of targets.\n"
+    "\n"
+    "The Elastic Net problem is to solve\n\n"
+    "  min_beta || X * beta - y ||_2^2 + lambda_1 ||beta||_1 +\n"
+    "      0.5 lambda_2 ||beta||_2^2\n\n"
+    "If lambda_1 > 0 and lambda_2 = 0, the problem is the LASSO.\n"
+    "If lambda_1 > 0 and lambda_2 > 0, the problem is the Elastic Net.\n"
+    "If lambda_1 = 0 and lambda_2 > 0, the problem is Ridge Regression.\n"
+    "If lambda_1 = 0 and lambda_2 = 0, the problem is unregularized linear "
+    "regression.\n"
+    "\n"
+    "For efficiency reasons, it is not recommended to use this algorithm with "
+    "lambda_1 = 0.\n"
+    "\n"
+    "For more details, see the following papers:\n"
+    "\n"
+    "@article{\n"
+    "  title = {Least angle regression},\n"
+    "  author = {Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani,"
+    "R.},\n"
+    "  journal = {The Annals of Statistics},\n"
+    "  volume = {32},\n"
+    "  number = {2},\n"
+    "  pages = {407--499},\n"
+    "  year = {2004},\n"
+    "  publisher = {Institute of Mathematical Statistics}\n"
+    "}\n"
+    "\n"
+    "@article{\n"
+    "  title = {Regularization and variable selection via the elastic net},\n"
+    "  author = {Zou, H. and Hastie, T.},\n"
+    "  journal = {Journal of the Royal Statistical Society Series B},\n"
+    "  volume = {67},\n"
+    "  number = {2},\n"
+    "  pages = {301--320},\n"
+    "  year = {2005},\n"
+    "  publisher = {Royal Statistical Society}\n"
+    "}");
 
-PROGRAM_INFO("LARS", "An implementation of LARS: Least Angle Regression (Stagewise/laSso)");
+PARAM_STRING_REQ("input_file", "File containing covariates (X)",
+    "i");
+PARAM_STRING_REQ("responses_file", "File containing y "
+    "(responses/observations).", "r");
 
-PARAM_STRING_REQ("X", "Covariates filename (observations of input random "
-		 "variables)", "");
-PARAM_STRING_REQ("y", "Targets filename (observations of output random "
-		 "variable", "");
-PARAM_STRING_REQ("beta", "Solution filename (linear estimator)", "");
+PARAM_STRING("output_file", "File to save beta (linear estimator) to", "o",
+    "output.csv");
 
 PARAM_DOUBLE("lambda1", "Regularization parameter for l1-norm penalty", "", 0);
 PARAM_DOUBLE("lambda2", "Regularization parameter for l2-norm penalty", "", 0);
 PARAM_FLAG("use_cholesky", "Use Cholesky decomposition during computation "
-	   "rather than explicitly computing full Gram matrix", "");
+    "rather than explicitly computing the full Gram matrix", "");
 
+using namespace arma;
+using namespace std;
+using namespace mlpack;
+using namespace mlpack::regression;
 
 int main(int argc, char* argv[])
 {
-  
-  // Handle parameters
+  // Handle parameters,
   CLI::ParseCommandLine(argc, argv);
-  
+
   double lambda1 = CLI::GetParam<double>("lambda1");
   double lambda2 = CLI::GetParam<double>("lambda2");
   bool useCholesky = CLI::GetParam<bool>("use_cholesky");
 
-  // load covariates
-  const std::string matXFilename = CLI::GetParam<std::string>("X");
+  // Load covariates.
+  const string matXFilename = CLI::GetParam<string>("input_file");
   mat matX;
-  matX.load(matXFilename, raw_ascii);
-  
-  // load targets
-  const std::string yFilename = CLI::GetParam<std::string>("y");
-  vec y;
-  y.load(yFilename, raw_ascii);
-  
-  // do LARS
+  data::Load(matXFilename.c_str(), matX, true);
+
+  // Load targets.
+  const string yFilename = CLI::GetParam<string>("responses_file");
+  mat matY; // Will be a vector.
+  data::Load(yFilename.c_str(), matY, true);
+
+  // Make sure y is oriented the right way.
+  if (matY.n_rows == 1)
+    matY = trans(matY);
+  if (matY.n_cols > 1)
+    Log::Fatal << "Only one column or row allowed in responses file!" << endl;
+
+  if (matY.n_elem != matX.n_cols)
+    Log::Fatal << "Number of responses must be equal to number of rows of X!"
+        << endl;
+
+  // Do LARS.
   LARS lars(useCholesky, lambda1, lambda2);
-  lars.DoLARS(matX, y);
-  
-  // get and save solution
+  lars.DoLARS(trans(matX), matY.unsafe_col(0));
+
+  // Get and save solution.
   vec beta;
   lars.Solution(beta);
-  
-  const std::string betaFilename = CLI::GetParam<std::string>("beta");
+
+  const string betaFilename = CLI::GetParam<string>("output_file");
   beta.save(betaFilename, raw_ascii);
 }