[mlpack-git] master: Use the new optimizer interface. (e03cc28)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Tue Jun 16 14:50:46 EDT 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/9264f7544f7c4d93ff735f00f35b0f5287abf59d...7df836c2f5a2287cda82801ca20f4b4b410cf4e1
>---------------------------------------------------------------
commit e03cc2862d684a29086e9d60aebd3cbe3fa571aa
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date: Tue Jun 16 14:11:31 2015 +0200
Use the new optimizer interface.
>---------------------------------------------------------------
e03cc2862d684a29086e9d60aebd3cbe3fa571aa
src/mlpack/methods/ann/cnn.hpp | 118 +---------------------------------------
src/mlpack/methods/ann/ffnn.hpp | 71 +-----------------------
src/mlpack/methods/ann/rnn.hpp | 23 +-------
3 files changed, 9 insertions(+), 203 deletions(-)
diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
index e80ae2f..03dea65 100644
--- a/src/mlpack/methods/ann/cnn.hpp
+++ b/src/mlpack/methods/ann/cnn.hpp
@@ -78,11 +78,6 @@ class CNN
template <typename ErrorType>
void FeedBackward(const ErrorType& error)
{
- // Initialize the gradient storage only once.
- if (!gradients.size())
- InitLayer(network);
-
- gradientNum = 0;
LayerBackward(network, error);
UpdateGradients(network);
}
@@ -93,7 +88,6 @@ class CNN
*/
void ApplyGradients()
{
- gradientNum = 0;
ApplyGradients(network);
// Reset the overall error.
@@ -373,10 +367,7 @@ class CNN
if (!ConnectionTraits<typename std::remove_reference<decltype(
std::get<I>(t))>::type>::IsPoolingConnection)
{
- DataType gradient;
- std::get<I>(t).Gradient(gradient);
-
- gradients[gradientNum++] += gradient;
+ std::get<I>(t).Optimzer().Update();
}
Gradients<I + 1, Tp...>(t);
@@ -404,48 +395,6 @@ class CNN
}
/**
- * Helper function to update the weights using the gradients from the
- * gradient store.
- *
- * enable_if (SFINAE) is used to select between two template overloads of
- * the get function - one for when I is equal the size of the tuple of
- * connections, and one for the general case which peels off the first type
- * and recurses, as usual with variadic function templates.
- */
- template<size_t I = 0, typename eT, typename... Tp>
- void UpdateWeights(arma::Mat<eT>& weights, std::tuple<Tp...>& t)
- {
- std::get<I>(t).Optimzer().UpdateWeights(weights,
- gradients[gradientNum].slice(0), trainError);
- }
-
- template<size_t I = 0, typename eT, typename... Tp>
- void UpdateWeights(arma::Cube<eT>& weights, std::tuple<Tp...>& t)
- {
- if (gradientNum == std::get<I>(t).InputLayer().OutputMaps() != 1)
- {
- for (size_t i = 0, g = 0;
- i < std::get<I>(t).OutputLayer().OutputMaps(); i++)
- {
- for (size_t j = i; j < weights.n_slices;
- j+= std::get<I>(t).OutputLayer().OutputMaps(), g++)
- {
- std::get<I>(t).Optimzer().UpdateWeights(weights.slice(j),
- gradients[gradientNum].slice(g), trainError);
- }
- }
- }
- else
- {
- for (size_t i = 0; i < weights.n_slices; i++)
- {
- std::get<I>(t).Optimzer().UpdateWeights(weights.slice(i),
- gradients[gradientNum].slice(i), trainError);
- }
- }
- }
-
- /**
* Update the weights using the gradients from the gradient store.
*
* enable_if (SFINAE) is used to iterate through the network connections.
@@ -460,17 +409,11 @@ class CNN
typename std::enable_if<I < sizeof...(Tp), void>::type
Apply(std::tuple<Tp...>& t)
{
- // Take a mean gradient step over the number of inputs.
- if (seqNum > 1)
- gradients[gradientNum] /= seqNum;
-
if (!ConnectionTraits<typename std::remove_reference<decltype(
std::get<I>(t))>::type>::IsPoolingConnection)
{
- UpdateWeights<I>(std::get<I>(t).Weights(), t);
-
- // Reset the gradient storage.
- gradients[gradientNum++].zeros();
+ std::get<I>(t).Optimzer().Optimize();
+ std::get<I>(t).Optimzer().Reset();
}
Apply<I + 1, Tp...>(t);
@@ -497,55 +440,6 @@ class CNN
InitLayer<I + 1, Tp...>(t);
}
- /**
- * Iterate through all connections and build the the gradient storage.
- *
- * enable_if (SFINAE) is used to select between two template overloads of
- * the get function - one for when I is equal the size of the tuple of
- * connections, and one for the general case which peels off the first type
- * and recurses, as usual with variadic function templates.
- */
- template<size_t I = 0, typename... Tp>
- typename std::enable_if<I == sizeof...(Tp), void>::type
- Layer(std::tuple<Tp...>& /* unused */) { }
-
- template<size_t I = 0, typename... Tp>
- typename std::enable_if<I < sizeof...(Tp), void>::type
- Layer(std::tuple<Tp...>& t)
- {
- if (!ConnectionTraits<typename std::remove_reference<decltype(
- std::get<I>(t))>::type>::IsPoolingConnection)
- {
- gradients.push_back(new DataType(std::get<I>(t).Weights().n_rows,
- std::get<I>(t).Weights().n_cols,
- ElementCount(std::get<I>(t).Weights()), arma::fill::zeros));
- }
-
- Layer<I + 1, Tp...>(t);
- }
-
- /*
- * Get the number of elements.
- *
- * @param data The reference data.
- */
- template<typename eT>
- size_t ElementCount(const arma::Mat<eT>& /* unused */) const
- {
- return 1;
- }
-
- /*
- * Get the number of elements.
- *
- * @param data The reference data.
- */
- template<typename eT>
- size_t ElementCount(const arma::Cube<eT>& data) const
- {
- return data.n_slices;
- }
-
//! The connection modules used to build the network.
ConnectionTypes network;
@@ -555,12 +449,6 @@ class CNN
//! The current training error of the network.
double trainError;
- //! The gradient storage we are using to perform the feed backward pass.
- boost::ptr_vector<DataType> gradients;
-
- //! The index of the currently activate gradient.
- size_t gradientNum;
-
//! The number of the current input sequence.
size_t seqNum;
}; // class CNN
diff --git a/src/mlpack/methods/ann/ffnn.hpp b/src/mlpack/methods/ann/ffnn.hpp
index 9711078..20a7dea 100644
--- a/src/mlpack/methods/ann/ffnn.hpp
+++ b/src/mlpack/methods/ann/ffnn.hpp
@@ -78,11 +78,6 @@ class FFNN
template <typename VecType>
void FeedBackward(const VecType& error)
{
- // Initialize the gradient storage only once.
- if (!gradients.size())
- InitLayer(network);
-
- gradientNum = 0;
LayerBackward(network, error);
UpdateGradients(network);
}
@@ -93,7 +88,6 @@ class FFNN
*/
void ApplyGradients()
{
- gradientNum = 0;
ApplyGradients(network);
// Reset the overall error.
@@ -366,10 +360,7 @@ class FFNN
typename std::enable_if<I < sizeof...(Tp), void>::type
Gradients(std::tuple<Tp...>& t)
{
- MatType gradient;
- std::get<I>(t).Gradient(gradient);
- gradients[gradientNum++] += gradient;
-
+ std::get<I>(t).Optimzer().Update();
Gradients<I + 1, Tp...>(t);
}
@@ -409,62 +400,12 @@ class FFNN
typename std::enable_if<I < sizeof...(Tp), void>::type
Apply(std::tuple<Tp...>& t)
{
- // Take a mean gradient step over the number of inputs.
- if (seqNum > 1)
- gradients[gradientNum] /= seqNum;
-
- std::get<I>(t).Optimzer().UpdateWeights(std::get<I>(t).Weights(),
- gradients[gradientNum], trainError);
-
- // Reset the gradient storage.
- gradients[gradientNum++].zeros();
+ std::get<I>(t).Optimzer().Optimize();
+ std::get<I>(t).Optimzer().Reset();
Apply<I + 1, Tp...>(t);
}
- /**
- * Helper function to iterate through all connection modules and to build
- * gradient storage.
- *
- * enable_if (SFINAE) is used to select between two template overloads of
- * the get function - one for when I is equal the size of the tuple of
- * connections, and one for the general case which peels off the first type
- * and recurses, as usual with variadic function templates.
- */
- template<size_t I = 0, typename... Tp>
- typename std::enable_if<I == sizeof...(Tp), void>::type
- InitLayer(std::tuple<Tp...>& /* unused */) { }
-
- template<size_t I = 0, typename... Tp>
- typename std::enable_if<I < sizeof...(Tp), void>::type
- InitLayer(std::tuple<Tp...>& t)
- {
- Layer(std::get<I>(t));
- InitLayer<I + 1, Tp...>(t);
- }
-
- /**
- * Iterate through all connections and build the the gradient storage.
- *
- * enable_if (SFINAE) is used to select between two template overloads of
- * the get function - one for when I is equal the size of the tuple of
- * connections, and one for the general case which peels off the first type
- * and recurses, as usual with variadic function templates.
- */
- template<size_t I = 0, typename... Tp>
- typename std::enable_if<I == sizeof...(Tp), void>::type
- Layer(std::tuple<Tp...>& /* unused */) { }
-
- template<size_t I = 0, typename... Tp>
- typename std::enable_if<I < sizeof...(Tp), void>::type
- Layer(std::tuple<Tp...>& t)
- {
- gradients.push_back(new MatType(std::get<I>(t).Weights().n_rows,
- std::get<I>(t).Weights().n_cols, arma::fill::zeros));
-
- Layer<I + 1, Tp...>(t);
- }
-
//! The connection modules used to build the network.
ConnectionTypes network;
@@ -474,12 +415,6 @@ class FFNN
//! The current training error of the network.
double trainError;
- //! The gradient storage we are using to perform the feed backward pass.
- boost::ptr_vector<MatType> gradients;
-
- //! The index of the currently activate gradient.
- size_t gradientNum;
-
//! The number of the current input sequence.
size_t seqNum;
}; // class FFNN
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
index 102fcc5..6240f43 100644
--- a/src/mlpack/methods/ann/rnn.hpp
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -149,7 +149,6 @@ class RNN
// Iterate through the input sequence and perform the feed backward pass.
for (seqNum = seqLen - 1; seqNum >= 0; seqNum--)
{
- gradientNum = 0;
deltaNum = 0;
// Perform the backward pass and update the gradient storage.
@@ -173,7 +172,6 @@ class RNN
*/
void ApplyGradients()
{
- gradientNum = 0;
ApplyGradients(network);
// Reset the overall error.
@@ -548,10 +546,7 @@ class RNN
typename std::enable_if<I < sizeof...(Tp), void>::type
Gradients(std::tuple<Tp...>& t)
{
- MatType gradient;
- std::get<I>(t).Gradient(gradient);
- gradients[gradientNum++] += gradient;
-
+ std::get<I>(t).Optimzer().Update();
Gradients<I + 1, Tp...>(t);
}
@@ -593,11 +588,8 @@ class RNN
typename std::enable_if<I < sizeof...(Tp), void>::type
Apply(std::tuple<Tp...>& t)
{
- std::get<I>(t).Optimzer().UpdateWeights(std::get<I>(t).Weights(),
- gradients[gradientNum], trainError);
-
- // // Reset the gradient storage.
- gradients[gradientNum++].zeros();
+ std::get<I>(t).Optimzer().Optimize();
+ std::get<I>(t).Optimzer().Reset();
Apply<I + 1, Tp...>(t);
}
@@ -678,9 +670,6 @@ class RNN
activations.push_back(new MatType(
std::get<I>(t).InputLayer().OutputSize(), input.n_elem));
- gradients.push_back(new MatType(std::get<I>(t).Weights().n_rows,
- std::get<I>(t).Weights().n_cols, arma::fill::zeros));
-
Layer<I + 1, VecType, Tp...>(t, input, layer);
}
@@ -788,18 +777,12 @@ class RNN
//! The activation storage we are using to perform the feed backward pass.
boost::ptr_vector<MatType> activations;
- //! The gradient storage we are using to perform the feed backward pass.
- boost::ptr_vector<MatType> gradients;
-
//! The index of the current sequence number.
size_t seqNum;
//! The index of the currently activate layer.
size_t layerNum;
- //! The index of the currently activate gradient.
- size_t gradientNum;
-
//! The index of the currently activate delta.
size_t deltaNum;
More information about the mlpack-git
mailing list