[mlpack-git] master: Inplace gradient calculation for peephole connections. (4d2caf8)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Sun Mar 8 18:12:24 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/4389c0c500e340cdc0f573b7900a47cbcf9f9130...4d2caf87ea6d07dfcc5993318504a4305049a4af
>---------------------------------------------------------------
commit 4d2caf87ea6d07dfcc5993318504a4305049a4af
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date: Sun Mar 8 23:12:18 2015 +0100
Inplace gradient calculation for peephole connections.
>---------------------------------------------------------------
4d2caf87ea6d07dfcc5993318504a4305049a4af
src/mlpack/methods/ann/layer/lstm_layer.hpp | 54 ++++++++++++++++++++---------
src/mlpack/methods/ann/rnn.hpp | 5 ++-
2 files changed, 42 insertions(+), 17 deletions(-)
diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp
index bc8dfaf..8dee7a5 100644
--- a/src/mlpack/methods/ann/layer/lstm_layer.hpp
+++ b/src/mlpack/methods/ann/layer/lstm_layer.hpp
@@ -13,7 +13,7 @@
#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/optimizer/rpropp.hpp>
+#include <mlpack/methods/ann/optimizer/steepest_descent.hpp>
namespace mlpack {
namespace ann /** Artificial Neural Network. */ {
@@ -37,7 +37,7 @@ template <
class StateActivationFunction = TanhFunction,
class OutputActivationFunction = TanhFunction,
class WeightInitRule = NguyenWidrowInitialization<>,
- typename OptimizerType = RPROPp<>,
+ typename OptimizerType = SteepestDescent<>,
typename MatType = arma::mat,
typename VecType = arma::colvec
>
@@ -80,21 +80,18 @@ class LSTMLayer
{
weightInitRule.Initialize(inGatePeepholeWeights, layerSize, 1);
inGatePeepholeDerivatives = arma::zeros<VecType>(layerSize);
- inGatePeepholeGradient = arma::zeros<MatType>(layerSize, 1);
inGatePeepholeOptimizer = std::auto_ptr<OptimizerType>(
- new OptimizerType(1, 2));
+ new OptimizerType(1, layerSize));
weightInitRule.Initialize(forgetGatePeepholeWeights, layerSize, 1);
forgetGatePeepholeDerivatives = arma::zeros<VecType>(layerSize);
- forgetGatePeepholeGradient = arma::zeros<MatType>(layerSize, 1);
forgetGatePeepholeOptimizer = std::auto_ptr<OptimizerType>(
- new OptimizerType(1, 2));
+ new OptimizerType(1, layerSize));
weightInitRule.Initialize(outGatePeepholeWeights, layerSize, 1);
outGatePeepholeDerivatives = arma::zeros<VecType>(layerSize);
- outGatePeepholeGradient = arma::zeros<MatType>(layerSize, 1);
outGatePeepholeOptimizer = std::auto_ptr<OptimizerType>(
- new OptimizerType(1, 2));
+ new OptimizerType(1, layerSize));
}
}
@@ -147,7 +144,8 @@ class LSTMLayer
if (peepholes && offset > 0)
{
inGate.col(offset) += inGatePeepholeWeights % state.col(offset - 1);
- forgetGate.col(offset) += forgetGatePeepholeWeights % state.col(offset);
+ forgetGate.col(offset) += forgetGatePeepholeWeights %
+ state.col(offset - 1);
}
VecType inGateActivation = inGateAct.unsafe_col(offset);
@@ -276,18 +274,26 @@ class LSTMLayer
if (peepholes && offset == 0)
{
+ inGatePeepholeGradient = (inGatePeepholeWeights.t() *
+ (inGateError.col(queryOffset) % inGatePeepholeDerivatives)) *
+ inGate.col(queryOffset).t();
+
+ forgetGatePeepholeGradient = (forgetGatePeepholeWeights.t() *
+ (forgetGateError.col(queryOffset) % forgetGatePeepholeDerivatives)) *
+ forgetGate.col(queryOffset).t();
+
+ outGatePeepholeGradient = (outGatePeepholeWeights.t() *
+ (outGateError.col(queryOffset) % outGatePeepholeDerivatives)) *
+ outGate.col(queryOffset).t();
+
inGatePeepholeOptimizer->UpdateWeights(inGatePeepholeWeights,
- inGatePeepholeGradient, 0);
+ inGatePeepholeGradient.t(), 0);
forgetGatePeepholeOptimizer->UpdateWeights(forgetGatePeepholeWeights,
- forgetGatePeepholeGradient, 0);
+ forgetGatePeepholeGradient.t(), 0);
outGatePeepholeOptimizer->UpdateWeights(outGatePeepholeWeights,
- outGatePeepholeGradient, 0);
-
- inGatePeepholeGradient.zeros();
- forgetGatePeepholeGradient.zeros();
- outGatePeepholeGradient.zeros();
+ outGatePeepholeGradient.t(), 0);
inGatePeepholeDerivatives.zeros();
forgetGatePeepholeDerivatives.zeros();
@@ -318,6 +324,22 @@ class LSTMLayer
//! Modify the sequence length.
size_t& SeqLen() { return seqLen; }
+ //! Get the InGate peephole weights..
+ MatType& InGatePeepholeWeights() const { return inGatePeepholeWeights; }
+ //! Modify the InGate peephole weights..
+ MatType& InGatePeepholeWeights() { return inGatePeepholeWeights; }
+
+ //! Get the InGate peephole weights..
+ MatType& ForgetGatePeepholeWeights() const {
+ return forgetGatePeepholeWeights; }
+ //! Modify the InGate peephole weights..
+ MatType& ForgetGatePeepholeWeights() { return forgetGatePeepholeWeights; }
+
+ //! Get the InGate peephole weights..
+ MatType& OutGatePeepholeWeights() const { return outGatePeepholeWeights; }
+ //! Modify the InGate peephole weights..
+ MatType& OutGatePeepholeWeights() { return outGatePeepholeWeights; }
+
private:
//! Locally-stored input activation object.
VecType inputActivations;
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
index 7d4c16f..3d67063 100644
--- a/src/mlpack/methods/ann/rnn.hpp
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -494,7 +494,10 @@ class RNN
{
// Sum up the stored delta for recurrent connections.
if (recurrentLayer[layer])
- std::get<I>(t).Delta() += delta[deltaNum].subvec(0, std::get<I>(t).InputLayer().OutputSize() - 1);
+ {
+ std::get<I>(t).Delta() += delta[deltaNum].subvec(
+ 0, std::get<I>(t).InputLayer().OutputSize() - 1);
+ }
// Perform the backward pass.
std::get<I>(t).InputLayer().FeedBackward(
More information about the mlpack-git
mailing list