[mlpack-git] master: Add option to set the input parameter when calculating the gradient. (fb3994c)
gitdub at mlpack.org
gitdub at mlpack.org
Sun Apr 10 09:49:35 EDT 2016
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/592fdcda156aa58aba3209017b585e7cfd12c345...fb3994c48e59d47b51931d9e7fbfa4777f181ca3
>---------------------------------------------------------------
commit fb3994c48e59d47b51931d9e7fbfa4777f181ca3
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date: Sun Apr 10 15:49:35 2016 +0200
Add option to set the input parameter when calculating the gradient.
>---------------------------------------------------------------
fb3994c48e59d47b51931d9e7fbfa4777f181ca3
src/mlpack/methods/ann/cnn.hpp | 6 ++---
src/mlpack/methods/ann/ffn.hpp | 6 ++---
src/mlpack/methods/ann/layer/bias_layer.hpp | 29 ++++++----------------
src/mlpack/methods/ann/layer/conv_layer.hpp | 9 ++++---
src/mlpack/methods/ann/layer/dropconnect_layer.hpp | 11 +++++---
src/mlpack/methods/ann/layer/empty_layer.hpp | 6 +++--
src/mlpack/methods/ann/layer/linear_layer.hpp | 29 ++++++++++++----------
src/mlpack/methods/ann/layer/lstm_layer.hpp | 18 ++++++++++++--
src/mlpack/methods/ann/layer/recurrent_layer.hpp | 7 ++++--
src/mlpack/methods/ann/layer/sparse_bias_layer.hpp | 7 ++++--
.../methods/ann/layer/sparse_input_layer.hpp | 12 +++++----
.../methods/ann/layer/sparse_output_layer.hpp | 10 ++++----
src/mlpack/methods/ann/rnn.hpp | 16 ++++++------
13 files changed, 93 insertions(+), 73 deletions(-)
diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
index e1872e4..56d6be2 100644
--- a/src/mlpack/methods/ann/cnn.hpp
+++ b/src/mlpack/methods/ann/cnn.hpp
@@ -378,15 +378,15 @@ class CNN
template<typename T, typename P, typename D>
typename std::enable_if<
- HasGradientCheck<T, void(T::*)(const D&, P&)>::value, void>::type
+ HasGradientCheck<T, P&(T::*)()>::value, void>::type
Update(T& layer, P& /* unused */, D& delta)
{
- layer.Gradient(delta, layer.Gradient());
+ layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
}
template<typename T, typename P, typename D>
typename std::enable_if<
- !HasGradientCheck<T, void(T::*)(const P&, D&)>::value, void>::type
+ !HasGradientCheck<T, P&(T::*)()>::value, void>::type
Update(T& /* unused */, P& /* unused */, D& /* unused */)
{
/* Nothing to do here */
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index 2bed68a..43972c7 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -380,15 +380,15 @@ private:
template<typename T, typename P, typename D>
typename std::enable_if<
- HasGradientCheck<T, void(T::*)(const D&, P&)>::value, void>::type
+ HasGradientCheck<T, P&(T::*)()>::value, void>::type
Update(T& layer, P& /* unused */, D& delta)
{
- layer.Gradient(delta, layer.Gradient());
+ layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
}
template<typename T, typename P, typename D>
typename std::enable_if<
- !HasGradientCheck<T, void(T::*)(const P&, D&)>::value, void>::type
+ !HasGradientCheck<T, P&(T::*)()>::value, void>::type
Update(T& /* unused */, P& /* unused */, D& /* unused */)
{
/* Nothing to do here */
diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp
index 343d281..83edd48 100644
--- a/src/mlpack/methods/ann/layer/bias_layer.hpp
+++ b/src/mlpack/methods/ann/layer/bias_layer.hpp
@@ -97,29 +97,16 @@ class BiasLayer
/*
* Calculate the gradient using the output delta and the bias.
*
- * @param d The calculated error.
- * @param g The calculated gradient.
+ * @param input The propagated input.
+ * @param error The calculated error.
+ * @param gradient The calculated gradient.
*/
- template<typename eT>
- void Gradient(const arma::Cube<eT>& d, InputDataType& g)
- {
- g = arma::Mat<eT>(weights.n_rows, weights.n_cols);
- for (size_t s = 0; s < d.n_slices; s++)
- {
- g(s) = arma::accu(d.slice(s)) * bias;
- }
- }
-
- /*
- * Calculate the gradient using the output delta and the bias.
- *
- * @param d The calculated error.
- * @param g The calculated gradient.
- */
- template<typename eT>
- void Gradient(const arma::Mat<eT>& d, InputDataType& g)
+ template<typename eT, typename ErrorType, typename GradientType>
+ void Gradient(const arma::Mat<eT>& /* input */,
+ const ErrorType& error,
+ GradientType& gradient)
{
- g = d * bias;
+ gradient = error * bias;
}
//! Get the weights.
diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp
index aeb6c07..4a3cc79 100644
--- a/src/mlpack/methods/ann/layer/conv_layer.hpp
+++ b/src/mlpack/methods/ann/layer/conv_layer.hpp
@@ -134,11 +134,14 @@ class ConvLayer
/*
* Calculate the gradient using the output delta and the input activation.
*
+ * @param input The input parameter used for calculating the gradient.
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename eT>
- void Gradient(const arma::Cube<eT>& d, arma::Cube<eT>& g)
+ template<typename InputType, typename eT>
+ void Gradient(const InputType& input,
+ const arma::Cube<eT>& d,
+ arma::Cube<eT>& g)
{
g = arma::zeros<arma::Cube<eT> >(weights.n_rows, weights.n_cols,
weights.n_slices);
@@ -147,7 +150,7 @@ class ConvLayer
{
for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps)
{
- arma::Cube<eT> inputSlices = inputParameter.slices(inMap, inMap);
+ arma::Cube<eT> inputSlices = input.slices(inMap, inMap);
arma::Cube<eT> deltaSlices = d.slices(outMap, outMap);
arma::Cube<eT> output;
diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
index bd8fe6e..826a7a8 100644
--- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
+++ b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
@@ -175,22 +175,25 @@ class DropConnectLayer
/**
* Calculate the gradient using the output delta and the input activation.
*
+ * @param input The propagated input.
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename eT, typename GradientDataType>
- void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+ template<typename InputType, typename eT, typename GradientDataType>
+ void Gradient(const InputType& input,
+ const arma::Mat<eT>& d,
+ GradientDataType& g)
{
if(uselayer)
{
- baseLayer.Gradient(d, g);
+ baseLayer.Gradient(input, d, g);
// Denoise the weights.
baseLayer.Weights() = denoise;
}
else
{
- g = d * inputParameter.t();
+ g = d * input.t();
// Denoise the weights.
weights = denoise;
diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/empty_layer.hpp
index 7eb58ec..0ff6d93 100644
--- a/src/mlpack/methods/ann/layer/empty_layer.hpp
+++ b/src/mlpack/methods/ann/layer/empty_layer.hpp
@@ -68,8 +68,10 @@ class EmptyLayer
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename ErrorType, typename GradientType>
- void Gradient(const ErrorType& /* d */, GradientType& /* g */)
+ template<typename InputType, typename ErrorType, typename GradientType>
+ void Gradient(const InputType& /* input */,
+ const ErrorType& /* error */,
+ GradientType& /* gradient */)
{
/* Nothing to do here. */
}
diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp
index b34e978..1108e94 100644
--- a/src/mlpack/methods/ann/layer/linear_layer.hpp
+++ b/src/mlpack/methods/ann/layer/linear_layer.hpp
@@ -73,7 +73,8 @@ class LinearLayer
for (size_t i = 0; i < data.n_cols; i++, c++)
{
data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) *
- input.n_rows * input.n_cols - 1) = arma::vectorise(input.slice(c));
+ input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise(
+ input.slice(c), 1));
}
}
@@ -97,17 +98,19 @@ class LinearLayer
g = weights.t() * gy;
}
-
/*
* Calculate the gradient using the output delta and the input activation.
*
- * @param d The calculated error.
- * @param g The calculated gradient.
+ * @param input The propagated input.
+ * @param error The calculated error.
+ * @param gradient The calculated gradient.
*/
- template<typename eT, typename GradientDataType>
- void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+ template<typename InputType, typename ErrorType, typename GradientType>
+ void Gradient(const InputType& input,
+ const ErrorType& error,
+ GradientType& gradient)
{
- GradientDelta(inputParameter, d, g);
+ GradientDelta(input, error, gradient);
}
//! Get the weights.
@@ -145,7 +148,7 @@ class LinearLayer
}
private:
- /*
+ /*
* Calculate the gradient using the output delta (3rd order tensor) and the
* input activation (3rd order tensor).
*
@@ -170,7 +173,7 @@ class LinearLayer
data.row(i).subvec(s * input.n_rows *
input.n_cols, (s + 1) *
input.n_rows *
- input.n_cols - 1) = arma::vectorise(
+ input.n_cols - 1) = arma::vectorise(
input.slice(c), 1);
}
}
@@ -187,12 +190,12 @@ class LinearLayer
* @param g The calculated gradient.
*/
template<typename eT>
- void GradientDelta(const arma::Mat<eT>& /* input unused */,
+ void GradientDelta(const arma::Mat<eT>& input,
const arma::Mat<eT>& d,
arma::Cube<eT>& g)
{
g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
- Gradient(d, g.slice(0));
+ Gradient(input, d, g.slice(0));
}
/*
@@ -204,12 +207,12 @@ class LinearLayer
* @param g The calculated gradient.
*/
template<typename eT>
- void GradientDelta(const arma::Cube<eT>& /* input unused */,
+ void GradientDelta(const arma::Cube<eT>& input,
const arma::Mat<eT>& d,
arma::Mat<eT>& g)
{
arma::Cube<eT> grad = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
- Gradient(d, grad);
+ Gradient(input, d, grad);
g = grad.slice(0);
}
diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp
index 59c103c..86e4174 100644
--- a/src/mlpack/methods/ann/layer/lstm_layer.hpp
+++ b/src/mlpack/methods/ann/layer/lstm_layer.hpp
@@ -59,6 +59,10 @@ class LSTMLayer
peepholeWeights.set_size(outSize, 3);
peepholeDerivatives = arma::zeros<OutputDataType>(outSize, 3);
}
+ else
+ {
+ peepholeWeights.set_size(0, 0);
+ }
}
/**
@@ -90,6 +94,7 @@ class LSTMLayer
// Split up the inputactivation into the 3 parts (inGate, forgetGate,
// outGate).
inGate.col(offset) = input.submat(0, 0, outSize - 1, 0);
+
forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0);
outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0);
@@ -226,8 +231,17 @@ class LSTMLayer
offset = (offset + 1) % seqLen;
}
- template<typename eT, typename GradientDataType>
- void Gradient(const arma::Mat<eT>& /* unused */, GradientDataType& /* unused */)
+ /**
+ * Ordinary feed backward pass of the lstm layer.
+ *
+ * @param input The propagated input activation.
+ * @param gy The backpropagated error.
+ * @param g The calculated gradient.
+ */
+ template<typename InputType, typename eT, typename GradientDataType>
+ void Gradient(const InputType& /* input */,
+ const arma::Mat<eT>& /* gy */,
+ GradientDataType& /* g */)
{
if (peepholes && offset == 0)
{
diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
index 639852e..616794c 100644
--- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp
+++ b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
@@ -90,11 +90,14 @@ class RecurrentLayer
/*
* Calculate the gradient using the output delta and the input activation.
*
+ * @param input The propagated input activation.
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename eT, typename GradientDataType>
- void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+ template<typename InputType, typename eT, typename GradientDataType>
+ void Gradient(const InputType& /* input */,
+ const arma::Mat<eT>& d,
+ GradientDataType& g)
{
g = d * recurrentParameter.t();
}
diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
index 9b79536..f25f9f0 100644
--- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
+++ b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
@@ -77,11 +77,14 @@ class SparseBiasLayer
/*
* Calculate the gradient using the output delta and the bias.
*
+ * @param input The propagated input.
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename eT>
- void Gradient(const arma::Mat<eT>& d, InputDataType& g)
+ template<typename InputType, typename eT>
+ void Gradient(const InputType& /* input */,
+ const arma::Mat<eT>& d,
+ InputDataType& g)
{
g = arma::sum(d, 1) / static_cast<typename InputDataType::value_type>(
batchSize);
diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
index ce5ce7d..2858695 100644
--- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
+++ b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
@@ -82,15 +82,17 @@ class SparseInputLayer
/*
* Calculate the gradient using the output delta and the input activation.
*
+ * @param input The propagated input.
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename eT, typename GradientDataType>
- void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
+ template<typename InputType, typename eT, typename GradientDataType>
+ void Gradient(const InputType& input,
+ const arma::Mat<eT>& d,
+ GradientDataType& g)
{
- g = d * inputParameter.t() /
- static_cast<typename InputDataType::value_type>(inputParameter.n_cols) +
- lambda * weights;
+ g = d * input.t() / static_cast<typename InputType::value_type>(
+ input.n_cols) + lambda * weights;
}
//! Get the weights.
diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
index 3022e2a..6245825 100644
--- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
+++ b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
@@ -91,15 +91,15 @@ class SparseOutputLayer
/*
* Calculate the gradient using the output delta and the input activation.
*
+ * @param input The propagated input.
* @param d The calculated error.
* @param g The calculated gradient.
*/
- template<typename eT>
- void Gradient(const arma::Mat<eT>& d, arma::Mat<eT>& g)
+ template<typename InputType, typename eT>
+ void Gradient(const InputType input, const arma::Mat<eT>& d, arma::Mat<eT>& g)
{
- g = d * inputParameter.t() /
- static_cast<typename InputDataType::value_type>(inputParameter.n_cols) +
- lambda * weights;
+ g = d * input.t() / static_cast<typename InputType::value_type>(
+ input.n_cols) + lambda * weights;
}
//! Sets the KL divergence parameter.
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
index 1389a87..be2bdd5 100644
--- a/src/mlpack/methods/ann/rnn.hpp
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -356,7 +356,7 @@ class RNN
*/
template<typename T, typename P, typename D>
typename std::enable_if<
- HasGradientCheck<T, void(T::*)(const D&, P&)>::value, void>::type
+ HasGradientCheck<T, P&(T::*)()>::value, void>::type
Init(T& layer, P& /* unused */, D& /* unused */)
{
// Initialize the input size only once.
@@ -368,7 +368,7 @@ class RNN
template<typename T, typename P, typename D>
typename std::enable_if<
- !HasGradientCheck<T, void(T::*)(const P&, D&)>::value, void>::type
+ !HasGradientCheck<T, P&(T::*)()>::value, void>::type
Init(T& /* unused */, P& /* unused */, D& /* unused */)
{
/* Nothing to do here */
@@ -696,20 +696,20 @@ class RNN
template<typename T1, typename P1, typename D1, typename T2, typename P2,
typename D2>
typename std::enable_if<
- HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+ HasGradientCheck<T1, P1&(T1::*)()>::value &&
HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */,
P2& /* unused */, D2& delta2)
{
- layer.Gradient(delta2, layer.Gradient());
+ layer.Gradient(layer.InputParameter(), delta2, layer.Gradient());
}
template<typename T1, typename P1, typename D1, typename T2, typename P2,
typename D2>
typename std::enable_if<
- (!HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+ (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
!HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value) ||
- (!HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+ (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value), void>::type
Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */,
P2& /* unused */, D2& /* unused */)
@@ -720,12 +720,12 @@ class RNN
template<typename T1, typename P1, typename D1, typename T2, typename P2,
typename D2>
typename std::enable_if<
- HasGradientCheck<T1, void(T1::*)(const D1&, P1&)>::value &&
+ HasGradientCheck<T1, P1&(T1::*)()>::value &&
!HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */,
P2& /* unused */, D2& /* unused */)
{
- layer.Gradient(delta1, layer.Gradient());
+ layer.Gradient(layer.InputParameter(), delta1, layer.Gradient());
}
/*
More information about the mlpack-git
mailing list