[mlpack-git] master: some comments for the algorithm implementation (8b97a08)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Mon Feb 2 15:16:53 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/bb6e5c56aab07e6449d86021246b52a9e323f3a0...bd6cb33f8d8270b02a84e81e38727679bb6c319a
>---------------------------------------------------------------
commit 8b97a08de65da5547949fa803f25a820446c99a2
Author: Stephen Tu <tu.stephenl at gmail.com>
Date: Wed Jan 28 19:02:48 2015 -0800
some comments for the algorithm implementation
>---------------------------------------------------------------
8b97a08de65da5547949fa803f25a820446c99a2
.../core/optimizers/sdp/primal_dual_impl.hpp | 66 +++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)
diff --git a/src/mlpack/core/optimizers/sdp/primal_dual_impl.hpp b/src/mlpack/core/optimizers/sdp/primal_dual_impl.hpp
index 7e1cbb2..df77c6a 100644
--- a/src/mlpack/core/optimizers/sdp/primal_dual_impl.hpp
+++ b/src/mlpack/core/optimizers/sdp/primal_dual_impl.hpp
@@ -16,6 +16,10 @@
*
* Note there are many optimizations that still need to be implemented. See the
* code comments for more details.
+ *
+ * Also note the current implementation assumes the SDP problem has a strictly
+ * feasible primal/dual point (and therefore the duality gap is zero), and
+ * that the constraint matrices are linearly independent.
*/
#ifndef __MLPACK_CORE_OPTIMIZERS_SDP_PRIMAL_DUAL_IMPL_HPP
#define __MLPACK_CORE_OPTIMIZERS_SDP_PRIMAL_DUAL_IMPL_HPP
@@ -60,6 +64,9 @@ PrimalDualSolver<SDPType>::PrimalDualSolver(const SDPType& sdp,
{
arma::mat tmp;
+ // Note that the algorithm we implement requires primal iterate X and
+ // dual multiplier Z to be positive definite (but not feasible).
+
if (initialX.n_rows != sdp.N() || initialX.n_cols != sdp.N())
Log::Fatal << "PrimalDualSolver::PrimalDualSolver(): "
<< "initialX needs to be square n x n matrix"
@@ -142,6 +149,23 @@ SolveLyapunov(arma::mat& X, const arma::mat& A, const arma::mat& H)
arma::syl(X, A, A, -H);
}
+/**
+ * Solve the following KKT system (2.10) of [AHO98]:
+ *
+ * [ 0 A^T I ] [ dsx ] = [ rd ]
+ * [ A 0 0 ] [ dy ] = [ rp ]
+ * [ E 0 F ] [ dsz ] = [ rc ]
+ * \---- M ----/
+ *
+ * where
+ *
+ * A = [ Asparse ]
+ * [ Adense ]
+ * dy = [ dysparse dydense ]
+ * E = Z sym I
+ * F = X sym I
+ *
+ */
static inline void
SolveKKTSystem(const arma::sp_mat& Asparse,
const arma::mat& Adense,
@@ -160,6 +184,10 @@ SolveKKTSystem(const arma::sp_mat& Asparse,
Einv_Frd_ATdy_rc_Mat, Frd_ATdy_rc_Mat;
arma::vec Einv_Frd_rc, Einv_Frd_ATdy_rc, dy;
+ // Note: Whenever a formula calls for E^(-1) v for some v, we solve Lyapunov
+ // equations instead of forming an explicit inverse.
+
+ // Compute the RHS of (2.12)
math::Smat(F * rd - rc, Frd_rc_Mat);
SolveLyapunov(Einv_Frd_rc_Mat, Z, 2. * Frd_rc_Mat);
math::Svec(Einv_Frd_rc_Mat, Einv_Frd_rc);
@@ -180,16 +208,19 @@ SolveKKTSystem(const arma::sp_mat& Asparse,
if (Adense.n_rows)
dydense = dy(arma::span(Asparse.n_rows, numConstraints - 1));
+ // Compute dx from (2.13)
math::Smat(F * (rd - Asparse.t() * dysparse - Adense.t() * dydense) - rc, Frd_ATdy_rc_Mat);
SolveLyapunov(Einv_Frd_ATdy_rc_Mat, Z, 2. * Frd_ATdy_rc_Mat);
math::Svec(Einv_Frd_ATdy_rc_Mat, Einv_Frd_ATdy_rc);
dsx = -Einv_Frd_ATdy_rc;
+
+ // Compute dz from (2.14)
dsz = rd - Asparse.t() * dysparse - Adense.t() * dydense;
}
namespace private_ {
-// TODO(stephentu): should we move this somewhere more general
+// TODO(stephentu): should we move this somewhere more general?
template <typename T> struct vectype { };
template <typename eT> struct vectype<arma::Mat<eT>> { typedef arma::Col<eT> type; };
template <typename eT> struct vectype<arma::SpMat<eT>> { typedef arma::SpCol<eT> type; };
@@ -209,6 +240,9 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
const size_t n = sdp.N();
const size_t n2bar = sdp.N2bar();
+ // Form the A matrix in (2.7). Note we explicitly handle
+ // sparse and dense constraints separately.
+
arma::sp_mat Asparse(sdp.NumSparseConstraints(), n2bar);
arma::sp_vec Aisparse;
@@ -254,6 +288,13 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
double primalObj = 0., alpha, beta;
for (size_t iteration = 1; iteration != maxIterations; iteration++)
{
+ // Note: The Mehrotra PC algorithm works like this at a high level.
+ // We first solve a KKT system with mu=0. Then, we use the results
+ // of this KKT system to get a better estimate of mu and solve
+ // the KKT system again. Empirically, this PC step has been shown to
+ // significantly reduce the number of required iterations (and is used
+ // by most practical solver implementations).
+
if (sdp.NumSparseConstraints())
rp(arma::span(0, sdp.NumSparseConstraints() - 1)) =
sdp.SparseB() - Asparse * sx;
@@ -261,10 +302,13 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
rp(arma::span(sdp.NumSparseConstraints(), sdp.NumConstraints() - 1)) =
sdp.DenseB() - Adense * sx;
+ // Rd = C - Z - smat A^T y
rd = sc - sz - Asparse.t() * ysparse - Adense.t() * ydense;
math::SymKronId(X, F);
+ // We compute E^(-1) F A^T by solving Lyapunov equations.
+ // See (2.16).
for (size_t i = 0; i < sdp.NumSparseConstraints(); i++)
{
SolveLyapunov(Gk, Z, X * sdp.SparseA()[i] + sdp.SparseA()[i] * X);
@@ -279,6 +323,10 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
Einv_F_AdenseT.col(i) = gk;
}
+ // Form the M = A E^(-1) F A^T matrix (2.15)
+ //
+ // Since we split A up into its sparse and dense components,
+ // we have to handle each block separately.
if (sdp.NumSparseConstraints())
{
M.submat(arma::span(0,
@@ -318,18 +366,23 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
// the cholesky decomposition of X and Z which we should have available
// when we use more efficient methods above.
+ // This solves step (1) of Section 7, the "predictor" step.
Rc = -0.5*(X*Z + Z*X);
math::Svec(Rc, rc);
SolveKKTSystem(Asparse, Adense, Z, M, F, rp, rd, rc, dsx, dysparse, dydense, dsz);
math::Smat(dsx, dX);
math::Smat(dsz, dZ);
+
+ // Step (2), determine step size lengths (alpha, beta)
alpha = Alpha(X, dX, tau);
beta = Alpha(Z, dZ, tau);
+ // See (7.1)
const double sigma =
std::pow(arma::dot(X + alpha * dX, Z + beta * dZ) / sxdotsz, 3);
const double mu = sigma * sxdotsz / n;
+ // Step (3), the "corrector" step.
Rc = mu*arma::eye<arma::mat>(n, n) - 0.5*(X*Z + Z*X + dX*dZ + dZ*dX);
math::Svec(Rc, rc);
SolveKKTSystem(Asparse, Adense, Z, M, F, rp, rd, rc, dsx, dysparse, dydense, dsz);
@@ -338,6 +391,7 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
alpha = Alpha(X, dX, tau);
beta = Alpha(Z, dZ, tau);
+ // Iterate update
X += alpha * dX;
math::Svec(X, sx);
ysparse += beta * dysparse;
@@ -345,6 +399,16 @@ PrimalDualSolver<SDPType>::Optimize(arma::mat& X,
Z += beta * dZ;
math::Svec(Z, sz);
+ // Below, we check the KKT conditions. Recall the KKT conditions are
+ //
+ // (1) Primal feasibility
+ // (2) Dual feasibility
+ // (3) XZ = 0 (slackness condition)
+ //
+ // If the KKT conditions are satisfied to a certain degree of precision,
+ // then we consider this a valid certificate of optimality and terminate.
+ // Otherwise, we proceed onwards.
+
const double normXZ = arma::norm(X * Z, "fro");
const double sparsePrimalInfeas = arma::norm(sdp.SparseB() - Asparse * sx, 2);
More information about the mlpack-git
mailing list