[mlpack-git] master: add intercept term to softmax regression (918df71)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 22:17:46 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40
>---------------------------------------------------------------
commit 918df7156c20075839c61f9af50e0e1d189bed9e
Author: apir8181 <kazenoyumechen at gmail.com>
Date: Thu Mar 5 16:01:38 2015 +0800
add intercept term to softmax regression
>---------------------------------------------------------------
918df7156c20075839c61f9af50e0e1d189bed9e
.../softmax_regression/softmax_regression.hpp | 12 +++-
.../softmax_regression_function.cpp | 75 +++++++++++++++++-----
.../softmax_regression_function.hpp | 25 +++++++-
.../softmax_regression/softmax_regression_impl.hpp | 25 ++++++--
src/mlpack/tests/softmax_regression_test.cpp | 44 +++++++++++++
5 files changed, 160 insertions(+), 21 deletions(-)
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression.hpp b/src/mlpack/methods/softmax_regression/softmax_regression.hpp
index 88db1c3..bf5e6c7 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression.hpp
@@ -73,12 +73,14 @@ class SoftmaxRegression
* @param inputSize Size of the input feature vector.
* @param numClasses Number of classes for classification.
* @param lambda L2-regularization constant.
+ * @param fitIntercept add intercept term or not.
*/
SoftmaxRegression(const arma::mat& data,
const arma::vec& labels,
const size_t inputSize,
const size_t numClasses,
- const double lambda = 0.0001);
+ const double lambda = 0.0001,
+ const bool fitIntercept = false);
/**
* Construct the softmax regression model with the given training data. This
@@ -147,6 +149,12 @@ class SoftmaxRegression
return lambda;
}
+ //! Gets the intercept term flag.
+ bool FitIntercept() const
+ {
+ return fitIntercept;
+ }
+
private:
//! Parameters after optimization.
arma::mat parameters;
@@ -156,6 +164,8 @@ class SoftmaxRegression
size_t numClasses;
//! L2-regularization constant.
double lambda;
+ //! Intercept term flag.
+ bool fitIntercept;
};
}; // namespace regression
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression_function.cpp b/src/mlpack/methods/softmax_regression/softmax_regression_function.cpp
index 97a4a2b..9fb5e64 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression_function.cpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression_function.cpp
@@ -13,11 +13,13 @@ SoftmaxRegressionFunction::SoftmaxRegressionFunction(const arma::mat& data,
const arma::vec& labels,
const size_t inputSize,
const size_t numClasses,
- const double lambda) :
+ const double lambda,
+ const bool fitIntercept) :
data(data),
inputSize(inputSize),
numClasses(numClasses),
- lambda(lambda)
+ lambda(lambda),
+ fitIntercept(fitIntercept)
{
// Intialize the parameters to suitable values.
initialPoint = InitializeWeights();
@@ -35,8 +37,12 @@ const arma::mat SoftmaxRegressionFunction::InitializeWeights()
{
// Initialize values to 0.005 * r. 'r' is a matrix of random values taken from
// a Gaussian distribution with mean zero and variance one.
+ // If the fitIntercept flag is true, parameters.col(0) is the intercept.
arma::mat parameters;
- parameters.randn(numClasses, inputSize);
+ if (fitIntercept)
+ parameters.randn(numClasses, inputSize + 1);
+ else
+ parameters.randn(numClasses, inputSize);
parameters = 0.005 * parameters;
return parameters;
@@ -77,6 +83,35 @@ void SoftmaxRegressionFunction::GetGroundTruthMatrix(const arma::vec& labels,
}
/**
+ * Evaluate the probabilities matrix. If fitIntercept flag is true,
+ * it should consider the parameters.cols(0) intercept term.
+ */
+void SoftmaxRegressionFunction::GetProbabilitiesMatrix(
+ const arma::mat& parameters, arma::mat& probabilities) const
+{
+ arma::mat hypothesis;
+
+ if (fitIntercept)
+ {
+ // In order to add the intercept term, we should compute following matrix:
+ // [1; data] = arma::join_cols(ones(1, data.n_cols), data)
+ // hypothesis = arma::exp(parameters * [1; data]).
+ //
+ // Since the cost of join maybe high due to the copy of original data,
+ // split the hypothesis computation to two components.
+ hypothesis = arma::exp(arma::repmat(parameters.col(0), 1, data.n_cols) +
+ parameters.cols(1, parameters.n_cols - 1) * data);
+ }
+ else
+ {
+ hypothesis = arma::exp(parameters * data);
+ }
+
+ probabilities = hypothesis / arma::repmat(arma::sum(hypothesis, 0),
+ numClasses, 1);
+}
+
+/**
* Evaluates the objective function given the parameters.
*/
double SoftmaxRegressionFunction::Evaluate(const arma::mat& parameters) const
@@ -97,11 +132,8 @@ double SoftmaxRegressionFunction::Evaluate(const arma::mat& parameters) const
// The sum is calculated over all the classes.
// x_i is the input vector for a particular training example.
// theta_j is the parameter vector associated with a particular class.
- arma::mat hypothesis, probabilities;
-
- hypothesis = arma::exp(parameters * data);
- probabilities = hypothesis / arma::repmat(arma::sum(hypothesis, 0),
- numClasses, 1);
+ arma::mat probabilities;
+ GetProbabilitiesMatrix(parameters, probabilities);
// Calculate the log likelihood and regularization terms.
double logLikelihood, weightDecay, cost;
@@ -129,13 +161,26 @@ void SoftmaxRegressionFunction::Gradient(const arma::mat& parameters,
// The sum is calculated over all the classes.
// x_i is the input vector for a particular training example.
// theta_j is the parameter vector associated with a particular class.
- arma::mat hypothesis, probabilities;
-
- hypothesis = arma::exp(parameters * data);
- probabilities = hypothesis / arma::repmat(arma::sum(hypothesis, 0),
- numClasses, 1);
+ arma::mat probabilities;
+ GetProbabilitiesMatrix(parameters, probabilities);
// Calculate the parameter gradients.
- gradient = (probabilities - groundTruth) * data.t() / data.n_cols +
- lambda * parameters;
+ gradient.set_size(parameters.n_rows, parameters.n_cols);
+ if (fitIntercept)
+ {
+ // Treating the intercept term parameters.col(0) seperately to avoid
+ // the cost of building matrix [1; data].
+ arma::mat inner = probabilities - groundTruth;
+ gradient.col(0) =
+ inner * arma::ones<arma::mat>(data.n_cols, 1) / data.n_cols +
+ lambda * parameters.col(0);
+ gradient.cols(1, parameters.n_cols - 1) =
+ inner * data.t() / data.n_cols +
+ lambda * parameters.cols(1, parameters.n_cols - 1);
+ }
+ else
+ {
+ gradient = (probabilities - groundTruth) * data.t() / data.n_cols +
+ lambda * parameters;
+ }
}
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp b/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp
index fc22384..bd09041 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp
@@ -25,12 +25,14 @@ class SoftmaxRegressionFunction
* @param inputSize Size of the input feature vector.
* @param numClasses Number of classes for classification.
* @param lambda L2-regularization constant.
+ * @param fitIntercept Intercept term flag.
*/
SoftmaxRegressionFunction(const arma::mat& data,
const arma::vec& labels,
const size_t inputSize,
const size_t numClasses,
- const double lambda = 0.0001);
+ const double lambda = 0.0001,
+ const bool fitIntercept = false);
//! Initializes the parameters of the model to suitable values.
const arma::mat InitializeWeights();
@@ -44,6 +46,18 @@ class SoftmaxRegressionFunction
void GetGroundTruthMatrix(const arma::vec& labels, arma::sp_mat& groundTruth);
/**
+ * Evaluate the probabilities matrix with the passed parameters.
+ * probabilities(i, j) =
+ * exp(\theta_i * data_j) / sum_k(exp(\theta_k * data_j)).
+ * It represents the probability of data_j belongs to class i.
+ *
+ * @param parameters Current values of the model parameters.
+ * @param probabilities Pointer to arma::mat which stores the probabilities.
+ */
+ void GetProbabilitiesMatrix(const arma::mat& parameters,
+ arma::mat& probabilities) const;
+
+ /**
* Evaluates the objective function of the softmax regression model using the
* given parameters. The cost function has terms for the log likelihood error
* and the regularization cost. The objective function takes a low value when
@@ -104,6 +118,12 @@ class SoftmaxRegressionFunction
return lambda;
}
+ //! Gets the intercept flag.
+ bool FitIntercept() const
+ {
+ return fitIntercept;
+ }
+
private:
//! Training data matrix.
const arma::mat& data;
@@ -117,6 +137,9 @@ class SoftmaxRegressionFunction
size_t numClasses;
//! L2-regularization constant.
double lambda;
+ //! Intercept term flag.
+ bool fitIntercept;
+
};
}; // namespace regression
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp b/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp
index b16ea0f..9a810ac 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression_impl.hpp
@@ -18,13 +18,15 @@ SoftmaxRegression<OptimizerType>::SoftmaxRegression(const arma::mat& data,
const arma::vec& labels,
const size_t inputSize,
const size_t numClasses,
- const double lambda) :
+ const double lambda,
+ const bool fitIntercept) :
inputSize(inputSize),
numClasses(numClasses),
- lambda(lambda)
+ lambda(lambda),
+ fitIntercept(fitIntercept)
{
SoftmaxRegressionFunction regressor(data, labels, inputSize, numClasses,
- lambda);
+ lambda, fitIntercept);
OptimizerType<SoftmaxRegressionFunction> optimizer(regressor);
parameters = regressor.GetInitialPoint();
@@ -61,8 +63,23 @@ void SoftmaxRegression<OptimizerType>::Predict(const arma::mat& testData,
{
// Calculate the probabilities for each test input.
arma::mat hypothesis, probabilities;
+ if (fitIntercept)
+ {
+ // In order to add the intercept term, we should compute following matrix:
+ // [1; data] = arma::join_cols(ones(1, data.n_cols), data)
+ // hypothesis = arma::exp(parameters * [1; data]).
+ //
+ // Since the cost of join maybe high due to the copy of original data,
+ // split the hypothesis computation to two components.
+ hypothesis = arma::exp(
+ arma::repmat(parameters.col(0), 1, testData.n_cols) +
+ parameters.cols(1, parameters.n_cols - 1) * testData);
+ }
+ else
+ {
+ hypothesis = arma::exp(parameters * testData);
+ }
- hypothesis = arma::exp(parameters * testData);
probabilities = hypothesis / arma::repmat(arma::sum(hypothesis, 0),
numClasses, 1);
diff --git a/src/mlpack/tests/softmax_regression_test.cpp b/src/mlpack/tests/softmax_regression_test.cpp
index da6a522..01b0eae 100644
--- a/src/mlpack/tests/softmax_regression_test.cpp
+++ b/src/mlpack/tests/softmax_regression_test.cpp
@@ -215,6 +215,50 @@ BOOST_AUTO_TEST_CASE(SoftmaxRegressionTwoClasses)
BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6);
}
+BOOST_AUTO_TEST_CASE(SoftmaxRegressionFitIntercept)
+{
+ // Generate a two-Gaussian dataset,
+ // which can't be seperated without adding the intercept term.
+ GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+ GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+ arma::mat data(3, 1000);
+ arma::vec responses(1000);
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 501; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ // Now train a logistic regression object on it.
+ SoftmaxRegression<> lr(data, responses, 3, 2, 0.01, true);
+
+ // Ensure that the error is close to zero.
+ const double acc = lr.ComputeAccuracy(data, responses);
+ BOOST_REQUIRE_CLOSE(acc, 100.0, 2.0);
+
+ // Create a test set.
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 501; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ // Ensure that the error is close to zero.
+ const double testAcc = lr.ComputeAccuracy(data, responses);
+ BOOST_REQUIRE_CLOSE(testAcc, 100.0, 2.0);
+}
+
BOOST_AUTO_TEST_CASE(SoftmaxRegressionMultipleClasses)
{
const size_t points = 5000;
More information about the mlpack-git
mailing list