[mlpack-svn] r10818 - mlpack/trunk/src/mlpack/methods/lars
fastlab-svn at coffeetalk-1.cc.gatech.edu
fastlab-svn at coffeetalk-1.cc.gatech.edu
Wed Dec 14 18:54:40 EST 2011
Author: rcurtin
Date: 2011-12-14 18:54:40 -0500 (Wed, 14 Dec 2011)
New Revision: 10818
Modified:
mlpack/trunk/src/mlpack/methods/lars/lars.cpp
mlpack/trunk/src/mlpack/methods/lars/lars.hpp
mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp
Log:
Format LARS a little better and give good program documentation.
Modified: mlpack/trunk/src/mlpack/methods/lars/lars.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/lars/lars.cpp 2011-12-14 23:27:16 UTC (rev 10817)
+++ mlpack/trunk/src/mlpack/methods/lars/lars.cpp 2011-12-14 23:54:40 UTC (rev 10818)
@@ -11,11 +11,9 @@
// we use arma namespace too often to explicitly use arma:: everywhere
//using namespace std;
using namespace arma;
+using namespace mlpack;
+using namespace mlpack::regression;
-
-namespace mlpack {
-namespace lars {
-
LARS::LARS(const bool useCholesky) :
useCholesky(useCholesky),
lasso(false),
@@ -132,9 +130,9 @@
{
vec newGramCol = vec(nActive);
for (u32 i = 0; i < nActive; i++)
- {
+ {
newGramCol[i] = dot(matX.col(activeSet[i]), matX.col(changeInd));
- }
+ }
CholeskyInsert(matX.col(changeInd), newGramCol);
}
@@ -214,13 +212,13 @@
double val1 = (maxCorr - corr(ind)) / (normalization - dirCorr);
double val2 = (maxCorr + corr(ind)) / (normalization + dirCorr);
if ((val1 > 0) && (val1 < gamma))
- {
- gamma = val1;
- }
+ {
+ gamma = val1;
+ }
if((val2 > 0) && (val2 < gamma))
- {
- gamma = val2;
- }
+ {
+ gamma = val2;
+ }
}
}
@@ -332,8 +330,8 @@
}
void LARS::ComputeYHatDirection(const mat& matX,
- const vec& betaDirection,
- vec& yHatDirection)
+ const vec& betaDirection,
+ vec& yHatDirection)
{
yHatDirection.fill(0);
for(u32 i = 0; i < nActive; i++)
@@ -468,12 +466,9 @@
if (k < n - 1)
{
matUtriCholFactor(span(k, k + 1), span(k + 1, n - 1)) =
- matG * matUtriCholFactor(span(k, k + 1), span(k + 1, n - 1));
+ matG * matUtriCholFactor(span(k, k + 1), span(k + 1, n - 1));
}
}
matUtriCholFactor.shed_row(n);
}
}
-
-}; // namespace lars
-}; // namespace mlpack
Modified: mlpack/trunk/src/mlpack/methods/lars/lars.hpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/lars/lars.hpp 2011-12-14 23:27:16 UTC (rev 10817)
+++ mlpack/trunk/src/mlpack/methods/lars/lars.hpp 2011-12-14 23:54:40 UTC (rev 10818)
@@ -14,36 +14,38 @@
#define EPS 1e-16
namespace mlpack {
-namespace lars {
+namespace regression {
// beta is the estimator
// yHat is the prediction from the current estimator
/**
- * An implementation of LARS, a stage-wise homotopy-based algorithm for
- * l1 regularized linear regression (LASSO) and l1+l2 regularized linear
+ * An implementation of LARS, a stage-wise homotopy-based algorithm for
+ * l1 regularized linear regression (LASSO) and l1+l2 regularized linear
* regression (Elastic Net).
- * Let X be a matrix where each row is a point and each column is a dimension,
- * and let y be a vector of targets.
+ * Let X be a matrix where each row is a point and each column is a dimension,
+ * and let y be a vector of targets.
* The Elastic Net problem is to solve
* min_beta ||X beta - y||_2^2 + lambda_1 ||beta||_1 + 0.5 lambda_2 ||beta||_2^2
* If lambda_1 > 0, lambda_2 = 0, the problem is the LASSO.
* If lambda_1 > 0, lambda_2 > 0, the problem is the Elastic Net.
* If lambda_1 = 0, lambda_2 > 0, the problem is Ridge Regression.
- * If lambda_1 = 0, lambda_2 = 0, the problem is unregularized linear regression.
+ * If lambda_1 = 0, lambda_2 = 0, the problem is unregularized linear
+ * regression.
*
- * Note: This algorithm is not recommended for use (in terms of efficiency)
+ * Note: This algorithm is not recommended for use (in terms of efficiency)
* when lambda_1 = 0.
- *
+ *
* Only minor modifications are necessary to handle the constrained version of
- * the problem:
+ * the problem:
* min_beta ||X beta - y||_2^2 + 0.5 lambda_2 ||beta||_2^2
* subject to ||beta||_1 <= tau
- * Although this option currently is not implemented, it will be implemented
+ * Although this option currently is not implemented, it will be implemented
* very soon.
*
* For more details, see the following papers:
*
+ * @code
* @article{efron2004least,
* title={Least angle regression},
* author={Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani, R.},
@@ -54,7 +56,9 @@
* year={2004},
* publisher={Institute of Mathematical Statistics}
* }
+ * @endcode
*
+ * @code
* @article{zou2005regularization,
* title={Regularization and variable selection via the elastic net},
* author={Zou, H. and Hastie, T.},
@@ -65,26 +69,23 @@
* year={2005},
* publisher={Royal Statistical Society}
* }
+ * @endcode
*/
-class LARS {
-
+class LARS
+{
public:
-
/**
- * Set the parameters to LARS
- * Both lambda1 and lambda2 default to 0
+ * Set the parameters to LARS. Both lambda1 and lambda2 default to 0.
*
- * @param useCholesky Whether or not to use Cholesky decomposition when
+ * @param useCholesky Whether or not to use Cholesky decomposition when
* solving linear system. If no, compute full Gram matrix at beginning.
- * @param lambda1 Regularization parameter for l_1-norm penalty
*/
LARS(const bool useCholesky);
/**
- * Set the parameters to LARS
- * lambda2 defaults to 0
+ * Set the parameters to LARS. lambda2 defaults to 0.
*
- * @param useCholesky Whether or not to use Cholesky decomposition when
+ * @param useCholesky Whether or not to use Cholesky decomposition when
* solving linear system. If no, compute full Gram matrix at beginning.
* @param lambda1 Regularization parameter for l_1-norm penalty
*/
@@ -92,9 +93,9 @@
const double lambda1);
/**
- * Set the parameters to LARS
+ * Set the parameters to LARS.
*
- * @param useCholesky Whether or not to use Cholesky decomposition when
+ * @param useCholesky Whether or not to use Cholesky decomposition when
* solving linear system. If no, compute full Gram matrix at beginning.
* @param lambda1 Regularization parameter for l_1-norm penalty
* @param lambda2 Regularization parameter for l_2-norm penalty
@@ -105,110 +106,90 @@
~LARS() { }
- /*
- * Set the Gram matrix (done before calling DoLars)
+ /**
+ * Set the Gram matrix (done before calling DoLars).
*
* @param matGram Matrix to which to set Gram matrix
*/
void SetGram(const arma::mat& matGram);
-
- /*
+
+ /**
* Compute Gram matrix. If elastic net, add lambda2 * identity to diagonal.
*
* @param matX Data matrix to use for computing Gram matrix
*/
void ComputeGram(const arma::mat& matX);
-
- /*
- * Accessor for activeSet
- */
- const std::vector<arma::u32> ActiveSet()
- {
- return activeSet;
- }
-
- /*
- * Accessor for betaPath
- */
- const std::vector<arma::vec> BetaPath()
- {
- return betaPath;
- }
-
- /*
- * Accessor for lambdaPath
- */
- const std::vector<double> LambdaPath()
- {
- return lambdaPath;
- }
-
- /*
- * Accessor for matUtriCholFactor
- */
- const arma::mat MatUtriCholFactor()
- {
- return matUtriCholFactor;
- }
-
- /* Run LARS
- *
+
+ /**
+ * Run LARS.
+ *
* @param matX Input data into the algorithm - a matrix where each row is a
* point and each column is a dimension
* @param y A vector of targets
*/
void DoLARS(const arma::mat& matX, const arma::vec& y);
-
- /*
+
+ /*
* Load the solution vector, which is the last vector from the solution path
*/
void Solution(arma::vec& beta);
-
-
+
+ //! Accessor for activeSet.
+ const std::vector<arma::u32>& ActiveSet() const { return activeSet; }
+
+ //! Accessor for betaPath.
+ const std::vector<arma::vec>& BetaPath() const { return betaPath; }
+
+ //! Accessor for lambdaPath.
+ const std::vector<double>& LambdaPath() const { return lambdaPath; }
+
+ //! Accessor for matUtriCholFactor.
+ const arma::mat& MatUtriCholFactor() const { return matUtriCholFactor; }
+
private:
// Gram matrix
arma::mat matGram;
-
+
// Upper triangular cholesky factor; initially 0x0 arma::matrix.
arma::mat matUtriCholFactor;
-
+
bool useCholesky;
-
+
bool lasso;
double lambda1;
bool elasticNet;
double lambda2;
-
+
// solution path
std::vector<arma::vec> betaPath;
-
+
// value of lambda1 for each solution in solution path
std::vector<double> lambdaPath;
-
+
// number of dimensions in active set
arma::u32 nActive;
-
+
// active set of dimensions
std::vector<arma::u32> activeSet;
-
+
// active set membership indicator (for each dimension)
std::vector<bool> isActive;
-
+
// remove activeVarInd'th element from active set
void Deactivate(arma::u32 activeVarInd);
-
+
// add dimension varInd to active set
void Activate(arma::u32 varInd);
-
+
// compute "equiangular" direction in output space
void ComputeYHatDirection(const arma::mat& matX,
- const arma::vec& betaDirection,
+ const arma::vec& betaDirection,
arma::vec& yHatDirection);
// interpolate to compute last solution vector
void InterpolateBeta();
-
+
void CholeskyInsert(const arma::vec& newX, const arma::mat& X);
void CholeskyInsert(const arma::vec& newX, const arma::vec& newGramCol);
@@ -216,10 +197,10 @@
void GivensRotate(const arma::vec& x, arma::vec& rotatedX, arma::mat& G);
void CholeskyDelete(arma::u32 colToKill);
-
+
};
-}; // namespace lars
+}; // namespace regression
}; // namespace mlpack
#endif
Modified: mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp
===================================================================
--- mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp 2011-12-14 23:27:16 UTC (rev 10817)
+++ mlpack/trunk/src/mlpack/methods/lars/lars_main.cpp 2011-12-14 23:54:40 UTC (rev 10818)
@@ -4,58 +4,110 @@
*
* Executable for LARS
*/
+#include <mlpack/core.hpp>
-#include <mlpack/core.hpp>
-#include <armadillo>
#include "lars.hpp"
-using namespace arma;
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::lars;
+PROGRAM_INFO("LARS", "An implementation of LARS: Least Angle Regression "
+ "(Stagewise/laSso). This is a stage-wise homotopy-based algorithm for "
+ "L1-regularized linear regression (LASSO) and L1+L2-regularized linear "
+ "regression (Elastic Net).\n"
+ "\n"
+ "Let X be a matrix where each row is a point and each column is a "
+ "dimension, and let y be a vector of targets.\n"
+ "\n"
+ "The Elastic Net problem is to solve\n\n"
+ " min_beta || X * beta - y ||_2^2 + lambda_1 ||beta||_1 +\n"
+ " 0.5 lambda_2 ||beta||_2^2\n\n"
+ "If lambda_1 > 0 and lambda_2 = 0, the problem is the LASSO.\n"
+ "If lambda_1 > 0 and lambda_2 > 0, the problem is the Elastic Net.\n"
+ "If lambda_1 = 0 and lambda_2 > 0, the problem is Ridge Regression.\n"
+ "If lambda_1 = 0 and lambda_2 = 0, the problem is unregularized linear "
+ "regression.\n"
+ "\n"
+ "For efficiency reasons, it is not recommended to use this algorithm with "
+ "lambda_1 = 0.\n"
+ "\n"
+ "For more details, see the following papers:\n"
+ "\n"
+ "@article{\n"
+ " title = {Least angle regression},\n"
+ " author = {Efron, B. and Hastie, T. and Johnstone, I. and Tibshirani,"
+ "R.},\n"
+ " journal = {The Annals of Statistics},\n"
+ " volume = {32},\n"
+ " number = {2},\n"
+ " pages = {407--499},\n"
+ " year = {2004},\n"
+ " publisher = {Institute of Mathematical Statistics}\n"
+ "}\n"
+ "\n"
+ "@article{\n"
+ " title = {Regularization and variable selection via the elastic net},\n"
+ " author = {Zou, H. and Hastie, T.},\n"
+ " journal = {Journal of the Royal Statistical Society Series B},\n"
+ " volume = {67},\n"
+ " number = {2},\n"
+ " pages = {301--320},\n"
+ " year = {2005},\n"
+ " publisher = {Royal Statistical Society}\n"
+ "}");
-PROGRAM_INFO("LARS", "An implementation of LARS: Least Angle Regression (Stagewise/laSso)");
+PARAM_STRING_REQ("input_file", "File containing covariates (X)",
+ "i");
+PARAM_STRING_REQ("responses_file", "File containing y "
+ "(responses/observations).", "r");
-PARAM_STRING_REQ("X", "Covariates filename (observations of input random "
- "variables)", "");
-PARAM_STRING_REQ("y", "Targets filename (observations of output random "
- "variable", "");
-PARAM_STRING_REQ("beta", "Solution filename (linear estimator)", "");
+PARAM_STRING("output_file", "File to save beta (linear estimator) to", "o",
+ "output.csv");
PARAM_DOUBLE("lambda1", "Regularization parameter for l1-norm penalty", "", 0);
PARAM_DOUBLE("lambda2", "Regularization parameter for l2-norm penalty", "", 0);
PARAM_FLAG("use_cholesky", "Use Cholesky decomposition during computation "
- "rather than explicitly computing full Gram matrix", "");
+ "rather than explicitly computing the full Gram matrix", "");
+using namespace arma;
+using namespace std;
+using namespace mlpack;
+using namespace mlpack::regression;
int main(int argc, char* argv[])
{
-
- // Handle parameters
+ // Handle parameters,
CLI::ParseCommandLine(argc, argv);
-
+
double lambda1 = CLI::GetParam<double>("lambda1");
double lambda2 = CLI::GetParam<double>("lambda2");
bool useCholesky = CLI::GetParam<bool>("use_cholesky");
- // load covariates
- const std::string matXFilename = CLI::GetParam<std::string>("X");
+ // Load covariates.
+ const string matXFilename = CLI::GetParam<string>("input_file");
mat matX;
- matX.load(matXFilename, raw_ascii);
-
- // load targets
- const std::string yFilename = CLI::GetParam<std::string>("y");
- vec y;
- y.load(yFilename, raw_ascii);
-
- // do LARS
+ data::Load(matXFilename.c_str(), matX, true);
+
+ // Load targets.
+ const string yFilename = CLI::GetParam<string>("responses_file");
+ mat matY; // Will be a vector.
+ data::Load(yFilename.c_str(), matY, true);
+
+ // Make sure y is oriented the right way.
+ if (matY.n_rows == 1)
+ matY = trans(matY);
+ if (matY.n_cols > 1)
+ Log::Fatal << "Only one column or row allowed in responses file!" << endl;
+
+ if (matY.n_elem != matX.n_cols)
+ Log::Fatal << "Number of responses must be equal to number of rows of X!"
+ << endl;
+
+ // Do LARS.
LARS lars(useCholesky, lambda1, lambda2);
- lars.DoLARS(matX, y);
-
- // get and save solution
+ lars.DoLARS(trans(matX), matY.unsafe_col(0));
+
+ // Get and save solution.
vec beta;
lars.Solution(beta);
-
- const std::string betaFilename = CLI::GetParam<std::string>("beta");
+
+ const string betaFilename = CLI::GetParam<string>("output_file");
beta.save(betaFilename, raw_ascii);
}
More information about the mlpack-svn
mailing list