[mlpack-git] master: Add Classify() functions and tests. (5546ebc)
gitdub at mlpack.org
gitdub at mlpack.org
Wed Jun 1 14:27:23 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/e6d2ca7bf64b47a36ac489335cf0dd8933e13076...5546ebcf02598c9da06e19ed447e73ddcd0d3347
>---------------------------------------------------------------
commit 5546ebcf02598c9da06e19ed447e73ddcd0d3347
Author: Ryan Curtin <ryan at ratml.org>
Date: Wed Jun 1 11:26:09 2016 -0700
Add Classify() functions and tests.
>---------------------------------------------------------------
5546ebcf02598c9da06e19ed447e73ddcd0d3347
.../logistic_regression/logistic_regression.hpp | 41 ++++++
.../logistic_regression_impl.hpp | 31 +++++
src/mlpack/tests/logistic_regression_test.cpp | 150 +++++++++++++++++++++
3 files changed, 222 insertions(+)
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression.hpp b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
index 0d56e0c..008193a 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
@@ -152,6 +152,8 @@ class LogisticRegression
* the decision boundary, the response is taken to be 1; otherwise, it is 0.
* By default the decision boundary is 0.5.
*
+ * This method is deprecated---you should use Classify() instead.
+ *
* @param predictors Input predictors.
* @param responses Vector to put output predictions of responses into.
* @param decisionBoundary Decision boundary (default 0.5).
@@ -161,6 +163,45 @@ class LogisticRegression
const double decisionBoundary = 0.5) const;
/**
+ * Classify the given point. The predicted label is returned. Optionally,
+ * specify the decision boundary; logistic regression returns a value between
+ * 0 and 1. If the value is greater than the decision boundary, the response
+ * is taken to be 1; otherwise, it is 0. By default the decision boundary is
+ * 0.5.
+ *
+ * @param point Point to classify.
+ * @param decisionBoundary Decision boundary (default 0.5).
+ * @return Predicted label of point.
+ */
+ template<typename VecType>
+ size_t Classify(const VecType& point,
+ const double decisionBoundary = 0.5) const;
+
+ /**
+ * Classify the given points, returning the predicted labels for each point.
+ * Optionally, specify the decision boundary; logistic regression returns a
+ * value between 0 and 1. If the value is greater than the decision boundary,
+ * the response is taken to be 1; otherwise, it is 0. By default the decision
+ * boundary is 0.5.
+ *
+ * @param dataset Set of points to classify.
+ * @param labels Predicted labels for each point.
+ * @param decisionBoundary Decision boundary (default 0.5).
+ */
+ void Classify(const MatType& dataset,
+ arma::Row<size_t>& labels,
+ const double decisionBoundary = 0.5) const;
+
+ /**
+ * Classify the given points, returning class probabilities for each point.
+ *
+ * @param dataset Set of points to classify.
+ * @param probabilities Class probabilities for each point (output).
+ */
+ void Classify(const MatType& dataset,
+ arma::mat& probabilities) const;
+
+ /**
* Compute the accuracy of the model on the given predictors and responses,
* optionally using the given decision boundary. The responses should be
* either 0 or 1. Logistic regression returns a value between 0 and 1. If
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
index ee4396e..5b6a2c6 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
@@ -106,6 +106,37 @@ void LogisticRegression<MatType>::Predict(const MatType& predictors,
}
template<typename MatType>
+template<typename VecType>
+size_t LogisticRegression<MatType>::Classify(const VecType& point,
+ const double decisionBoundary)
+ const
+{
+ return size_t(1.0 / (1.0 + std::exp(-parameters(0) - arma::dot(point,
+ parameters.subvec(1, parameters.n_elem - 1)))) +
+ (1.0 - decisionBoundary));
+}
+
+template<typename MatType>
+void LogisticRegression<MatType>::Classify(const MatType& dataset,
+ arma::Row<size_t>& labels,
+ const double decisionBoundary) const
+{
+ Predict(dataset, labels, decisionBoundary);
+}
+
+template<typename MatType>
+void LogisticRegression<MatType>::Classify(const MatType& dataset,
+ arma::mat& probabilities) const
+{
+ // Set correct size of output matrix.
+ probabilities.set_size(2, dataset.n_cols);
+
+ probabilities.row(1) = 1.0 / (1.0 + arma::exp(-parameters(0) - dataset.t() *
+ parameters.subvec(1, parameters.n_elem - 1))).t();
+ probabilities.row(0) = 1.0 - probabilities.row(1);
+}
+
+template<typename MatType>
double LogisticRegression<MatType>::ComputeError(
const MatType& predictors,
const arma::Row<size_t>& responses) const
diff --git a/src/mlpack/tests/logistic_regression_test.cpp b/src/mlpack/tests/logistic_regression_test.cpp
index f567049..7881bb2 100644
--- a/src/mlpack/tests/logistic_regression_test.cpp
+++ b/src/mlpack/tests/logistic_regression_test.cpp
@@ -807,4 +807,154 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionSparseSGDTest)
BOOST_REQUIRE_CLOSE(lr.Parameters()[i], lrSparse.Parameters()[i], 1e-5);
}
+/**
+ * Test multi-point classification (Classify()).
+ */
+BOOST_AUTO_TEST_CASE(ClassifyTest)
+{
+ // Generate a two-Gaussian dataset.
+ GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+ GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+ arma::mat data(3, 1000);
+ arma::Row<size_t> responses(1000);
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 500; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ // Now train a logistic regression object on it.
+ LogisticRegression<> lr(data.n_rows, 0.5);
+ lr.Train<>(data, responses);
+
+ // Create a test set.
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 500; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ arma::Row<size_t> predictions;
+ lr.Classify(data, predictions);
+
+ BOOST_REQUIRE_GE((double) arma::accu(predictions == responses), 900);
+}
+
+/**
+ * Test that single-point classification gives the same results as multi-point
+ * classification.
+ */
+BOOST_AUTO_TEST_CASE(SinglePointClassifyTest)
+{
+ // Generate a two-Gaussian dataset.
+ GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+ GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+ arma::mat data(3, 1000);
+ arma::Row<size_t> responses(1000);
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 500; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ // Now train a logistic regression object on it.
+ LogisticRegression<> lr(data.n_rows, 0.5);
+ lr.Train<>(data, responses);
+
+ // Create a test set.
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 500; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ arma::Row<size_t> predictions;
+ lr.Classify(data, predictions);
+
+ for (size_t i = 0; i < data.n_cols; ++i)
+ {
+ size_t pred = lr.Classify(data.col(i));
+
+ BOOST_REQUIRE_EQUAL(pred, predictions[i]);
+ }
+}
+
+/**
+ * Test that giving point probabilities works.
+ */
+BOOST_AUTO_TEST_CASE(ClassifyProbabilitiesTest)
+{
+ // Generate a two-Gaussian dataset.
+ GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+ GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+ arma::mat data(3, 1000);
+ arma::Row<size_t> responses(1000);
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 500; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ // Now train a logistic regression object on it.
+ LogisticRegression<> lr(data.n_rows, 0.5);
+ lr.Train<>(data, responses);
+
+ // Create a test set.
+ for (size_t i = 0; i < 500; ++i)
+ {
+ data.col(i) = g1.Random();
+ responses[i] = 0;
+ }
+ for (size_t i = 500; i < 1000; ++i)
+ {
+ data.col(i) = g2.Random();
+ responses[i] = 1;
+ }
+
+ arma::mat probabilities;
+ lr.Classify(data, probabilities);
+
+ BOOST_REQUIRE_EQUAL(probabilities.n_cols, data.n_cols);
+ BOOST_REQUIRE_EQUAL(probabilities.n_rows, 2);
+
+ for (size_t i = 0; i < data.n_cols; ++i)
+ {
+ BOOST_REQUIRE_CLOSE(probabilities(0, i) + probabilities(1, i), 1.0, 1e-5);
+
+ // 10% tolerance.
+ if (responses[i] == 0)
+ BOOST_REQUIRE_CLOSE(probabilities(0, i), 1.0, 10.0);
+ else
+ BOOST_REQUIRE_CLOSE(probabilities(1, i), 1.0, 10.0);
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list