[mlpack-git] master: Add test cases for the recurrent neural network. (9233324)
gitdub at big.cc.gt.atl.ga.us
gitdub at big.cc.gt.atl.ga.us
Thu Mar 5 22:17:18 EST 2015
Repository : https://github.com/mlpack/mlpack
On branch : master
Link : https://github.com/mlpack/mlpack/compare/904762495c039e345beba14c1142fd719b3bd50e...f94823c800ad6f7266995c700b1b630d5ffdcf40
>---------------------------------------------------------------
commit 923332420cc466c48f8e4bd8bd5f90f641dc68c8
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date: Fri Feb 27 21:13:11 2015 +0100
Add test cases for the recurrent neural network.
>---------------------------------------------------------------
923332420cc466c48f8e4bd8bd5f90f641dc68c8
src/mlpack/tests/feedforward_network_test.cpp | 86 +++-
src/mlpack/tests/recurrent_network_test.cpp | 646 ++++++++++++++++++++++++++
2 files changed, 709 insertions(+), 23 deletions(-)
diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp
index 6f8e338..5f67fbc 100644
--- a/src/mlpack/tests/feedforward_network_test.cpp
+++ b/src/mlpack/tests/feedforward_network_test.cpp
@@ -36,30 +36,50 @@ using namespace mlpack::ann;
BOOST_AUTO_TEST_SUITE(FeedForwardNetworkTest);
-
/**
* Train and evaluate a vanilla network with the specified structure.
*/
-template<typename WeightInitRule,
- typename PerformanceFunction,
- typename OptimizerType,
- typename OutputLayerType,
- typename PerformanceFunctionType,
- typename MatType = arma::mat,
- typename VecType = arma::colvec
+template<
+ typename WeightInitRule,
+ typename PerformanceFunction,
+ typename OptimizerType,
+ typename OutputLayerType,
+ typename PerformanceFunctionType,
+ typename MatType = arma::mat,
+ typename VecType = arma::colvec
>
void BuildVanillaNetwork(MatType& trainData,
MatType& trainLabels,
MatType& testData,
MatType& testLabels,
- size_t hiddenLayerSize,
- size_t maxEpochs,
- double classificationErrorThreshold,
- double ValidationErrorThreshold,
+ const size_t hiddenLayerSize,
+ const size_t maxEpochs,
+ const double classificationErrorThreshold,
+ const double ValidationErrorThreshold,
WeightInitRule weightInitRule = WeightInitRule())
{
+ /*
+ * Construct a feed forward network with trainData.n_rows input nodes,
+ * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
+ * network structure looks like:
+ *
+ * Input Hidden Output
+ * Layer Layer Layer
+ * +-----+ +-----+ +-----+
+ * | | | | | |
+ * | +------>| +------>| |
+ * | | +>| | | |
+ * +-----+ | +--+--+ +-----+
+ * |
+ * Bias |
+ * Layer |
+ * +-----+ |
+ * | | |
+ * | +-----+
+ * | |
+ * +-----+
+ */
BiasLayer<> biasLayer0(1);
- BiasLayer<> biasLayer1(1);
NeuronLayer<PerformanceFunction> inputLayer(trainData.n_rows);
NeuronLayer<PerformanceFunction> hiddenLayer0(hiddenLayerSize);
@@ -169,7 +189,7 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
SteepestDescent<>,
BinaryClassificationLayer<>,
MeanSquaredErrorFunction<> >
- (dataset, labels, dataset, labels, 100, 450, 0.6, 90, randInitB);
+ (dataset, labels, dataset, labels, 100, 100, 0.6, 10, randInitB);
// Vanilla neural net with tanh activation function.
BuildVanillaNetwork<RandomInitialization<>,
@@ -177,7 +197,7 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
SteepestDescent<>,
BinaryClassificationLayer<>,
MeanSquaredErrorFunction<> >
- (dataset, labels, dataset, labels, 10, 450, 0.6, 90, randInitB);
+ (dataset, labels, dataset, labels, 10, 200, 0.6, 20, randInitB);
}
/**
@@ -237,13 +257,13 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkConvergenceTest)
* Train a vanilla network with the specified structure step by step and
* evaluate the network.
*/
-template<typename WeightInitRule,
- typename PerformanceFunction,
- typename OptimizerType,
- typename OutputLayerType,
- typename PerformanceFunctionType,
- typename MatType = arma::mat,
- typename VecType = arma::colvec
+template<
+ typename WeightInitRule,
+ typename PerformanceFunction,
+ typename OptimizerType,
+ typename OutputLayerType,
+ typename PerformanceFunctionType,
+ typename MatType = arma::mat
>
void BuildNetworkOptimzer(MatType& trainData,
MatType& trainLabels,
@@ -253,8 +273,28 @@ void BuildNetworkOptimzer(MatType& trainData,
size_t epochs,
WeightInitRule weightInitRule = WeightInitRule())
{
+ /*
+ * Construct a feed forward network with trainData.n_rows input nodes,
+ * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
+ * network structure looks like:
+ *
+ * Input Hidden Output
+ * Layer Layer Layer
+ * +-----+ +-----+ +-----+
+ * | | | | | |
+ * | +------>| +------>| |
+ * | | +>| | | |
+ * +-----+ | +--+--+ +-----+
+ * |
+ * Bias |
+ * Layer |
+ * +-----+ |
+ * | | |
+ * | +-----+
+ * | |
+ * +-----+
+ */
BiasLayer<> biasLayer0(1);
- BiasLayer<> biasLayer1(1);
NeuronLayer<PerformanceFunction> inputLayer(trainData.n_rows);
NeuronLayer<PerformanceFunction> hiddenLayer0(hiddenLayerSize);
diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp
new file mode 100644
index 0000000..24f44fa
--- /dev/null
+++ b/src/mlpack/tests/recurrent_network_test.cpp
@@ -0,0 +1,646 @@
+/**
+ * @file feedforward_network_test.cpp
+ * @author Marcus Edel
+ *
+ * Tests the feed forward network.
+ */
+#include <mlpack/core.hpp>
+
+#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
+#include <mlpack/methods/ann/activation_functions/identity_function.hpp>
+#include <mlpack/methods/ann/activation_functions/softsign_function.hpp>
+#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
+#include <mlpack/methods/ann/activation_functions/rectifier_function.hpp>
+
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+#include <mlpack/methods/ann/init_rules/orthogonal_init.hpp>
+#include <mlpack/methods/ann/init_rules/oivs_init.hpp>
+#include <mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp>
+#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
+
+#include <mlpack/methods/ann/layer/neuron_layer.hpp>
+#include <mlpack/methods/ann/layer/bias_layer.hpp>
+#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
+#include <mlpack/methods/ann/layer/multiclass_classification_layer.hpp>
+
+#include <mlpack/methods/ann/connections/full_connection.hpp>
+#include <mlpack/methods/ann/connections/self_connection.hpp>
+#include <mlpack/methods/ann/connections/fullself_connection.hpp>
+#include <mlpack/methods/ann/connections/connection_traits.hpp>
+
+#include <mlpack/methods/ann/trainer/trainer.hpp>
+
+#include <mlpack/methods/ann/ffnn.hpp>
+#include <mlpack/methods/ann/rnn.hpp>
+
+
+#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
+#include <mlpack/methods/ann/performance_functions/sse_function.hpp>
+#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+
+#include <mlpack/methods/ann/optimizer/steepest_descent.hpp>
+#include <mlpack/methods/ann/optimizer/rpropp.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "old_boost_test_definitions.hpp"
+
+using namespace mlpack;
+using namespace mlpack::ann;
+
+
+BOOST_AUTO_TEST_SUITE(RecurrentNetworkTest);
+
+// Be careful! When writing new tests, always get the boolean value and store
+// it in a temporary, because the Boost unit test macros do weird things and
+// will cause bizarre problems.
+
+/**
+ * Construct a 2-class dataset out of noisy sines.
+ *
+ * @param data Input data used to store the noisy sines.
+ * @param labels Labels used to store the target class of the noisy sines.
+ * @param points Number of points/features in a single sequence.
+ * @param sequences Number of sequences for each class.
+ * @param noise The noise factor that influences the sines.
+ */
+void GenerateNoisySines(arma::mat& data,
+ arma::mat& labels,
+ const size_t points,
+ const size_t sequences,
+ const double noise = 0.3)
+{
+ arma::colvec x = arma::linspace<arma::Col<double> >(0,
+ points - 1, points) / points * 20.0;
+ arma::colvec y1 = arma::sin(x + arma::as_scalar(arma::randu(1)) * 3.0);
+ arma::colvec y2 = arma::sin(x / 2.0 + arma::as_scalar(arma::randu(1)) * 3.0);
+
+ data = arma::zeros(points, sequences * 2);
+ labels = arma::zeros(2, sequences * 2);
+
+ for (size_t seq = 0; seq < sequences; seq++)
+ {
+ data.col(seq) = arma::randu(points) * noise + y1 +
+ arma::as_scalar(arma::randu(1) - 0.5) * noise;
+ labels(0, seq) = 1;
+
+ data.col(sequences + seq) = arma::randu(points) * noise + y2 +
+ arma::as_scalar(arma::randu(1) - 0.5) * noise;
+ labels(1, sequences + seq) = 1;
+ }
+}
+
+/**
+ * Train the vanilla network on a larger dataset.
+ */
+BOOST_AUTO_TEST_CASE(SequenceClassificationTest)
+{
+ // Generate 12 (2 * 6) noisy sines. A single sine contains 10 points/features.
+ arma::mat input, labels;
+ GenerateNoisySines(input, labels, 10, 6);
+
+ /*
+ * Construct a network with 1 input unit, 4 hidden units and 2 output units.
+ * The hidden layer is connected to itself. The network structure looks like:
+ *
+ * Input Hidden Output
+ * Layer(1) Layer(4) Layer(2)
+ * +-----+ +-----+ +-----+
+ * | | | | | |
+ * | +------>| +------>| |
+ * | | ..>| | | |
+ * +-----+ . +--+--+ +-----+
+ * . .
+ * . .
+ * .......
+ */
+ NeuronLayer<LogisticFunction> inputLayer(1);
+ NeuronLayer<LogisticFunction> hiddenLayer0(4);
+ NeuronLayer<LogisticFunction> recurrentLayer0(hiddenLayer0.InputSize());
+ NeuronLayer<LogisticFunction> hiddenLayer1(2);
+ BinaryClassificationLayer<> outputLayer;
+
+ SteepestDescent< > conOptimizer0(inputLayer.InputSize(),
+ hiddenLayer0.InputSize(), 1, 0);
+ SteepestDescent< > conOptimizer2(hiddenLayer0.InputSize(),
+ hiddenLayer0.InputSize(), 1, 0);
+ SteepestDescent< > conOptimizer3(hiddenLayer0.InputSize(),
+ hiddenLayer1.OutputSize(), 1, 0);
+
+ NguyenWidrowInitialization<> randInit;
+
+ FullConnection<
+ decltype(inputLayer),
+ decltype(hiddenLayer0),
+ decltype(conOptimizer0),
+ decltype(randInit)>
+ layerCon0(inputLayer, hiddenLayer0, conOptimizer0, randInit);
+
+ SelfConnection<
+ decltype(recurrentLayer0),
+ decltype(hiddenLayer0),
+ decltype(conOptimizer2),
+ decltype(randInit)>
+ layerCon2(recurrentLayer0, hiddenLayer0, conOptimizer2, randInit);
+
+ FullConnection<
+ decltype(hiddenLayer0),
+ decltype(hiddenLayer1),
+ decltype(conOptimizer3),
+ decltype(randInit)>
+ layerCon4(hiddenLayer0, hiddenLayer1, conOptimizer3, randInit);
+
+ auto module0 = std::tie(layerCon0, layerCon2);
+ auto module1 = std::tie(layerCon4);
+ auto modules = std::tie(module0, module1);
+
+ RNN<decltype(modules),
+ decltype(outputLayer),
+ MeanSquaredErrorFunction<> > net(modules, outputLayer);
+
+ // Train the network for 1000 epochs.
+ Trainer<decltype(net)> trainer(net, 1000);
+ trainer.Train(input, labels, input, labels);
+
+ // Ask the network to classify the trained input data.
+ arma::colvec output;
+ for (size_t i = 0; i < input.n_cols; i++)
+ {
+ net.Predict(input.unsafe_col(i), output);
+
+ bool b = arma::all((output == labels.unsafe_col(i)) == 1);
+ BOOST_REQUIRE_EQUAL(b, 1);
+ }
+}
+
+/**
+ * Train and evaluate a vanilla feed forward network and a recurrent network
+ * with the specified structure and compare the two networks output and overall
+ * error.
+ */
+template<
+ typename WeightInitRule,
+ typename PerformanceFunction,
+ typename OptimizerType,
+ typename OutputLayerType,
+ typename PerformanceFunctionType,
+ typename MatType = arma::mat
+>
+void CompareVanillaNetworks(MatType& trainData,
+ MatType& trainLabels,
+ MatType& testData,
+ MatType& testLabels,
+ const size_t hiddenLayerSize,
+ const size_t maxEpochs,
+ WeightInitRule weightInitRule = WeightInitRule())
+{
+ BiasLayer<> biasLayer0(1);
+
+ NeuronLayer<PerformanceFunction> inputLayer(trainData.n_rows);
+ NeuronLayer<PerformanceFunction> hiddenLayer0(hiddenLayerSize);
+ NeuronLayer<PerformanceFunction> hiddenLayer1(trainLabels.n_rows);
+
+ OutputLayerType outputLayer;
+
+ OptimizerType ffnConOptimizer0(trainData.n_rows, hiddenLayerSize);
+ OptimizerType ffnConOptimizer1(1, hiddenLayerSize);
+ OptimizerType ffnConOptimizer2(hiddenLayerSize, trainLabels.n_rows);
+
+ OptimizerType rnnConOptimizer0(trainData.n_rows, hiddenLayerSize);
+ OptimizerType rnnConOptimizer1(1, hiddenLayerSize);
+ OptimizerType rnnConOptimizer2(hiddenLayerSize, trainLabels.n_rows);
+
+ FullConnection<
+ decltype(inputLayer),
+ decltype(hiddenLayer0),
+ decltype(ffnConOptimizer0),
+ decltype(weightInitRule)>
+ ffnLayerCon0(inputLayer, hiddenLayer0, ffnConOptimizer0, weightInitRule);
+
+ FullConnection<
+ decltype(inputLayer),
+ decltype(hiddenLayer0),
+ decltype(rnnConOptimizer0),
+ decltype(weightInitRule)>
+ rnnLayerCon0(inputLayer, hiddenLayer0, rnnConOptimizer0, weightInitRule);
+
+ FullConnection<
+ decltype(biasLayer0),
+ decltype(hiddenLayer0),
+ decltype(ffnConOptimizer1),
+ decltype(weightInitRule)>
+ ffnLayerCon1(biasLayer0, hiddenLayer0, ffnConOptimizer1, weightInitRule);
+
+ FullConnection<
+ decltype(biasLayer0),
+ decltype(hiddenLayer0),
+ decltype(rnnConOptimizer1),
+ decltype(weightInitRule)>
+ rnnLayerCon1(biasLayer0, hiddenLayer0, rnnConOptimizer1, weightInitRule);
+
+ FullConnection<
+ decltype(hiddenLayer0),
+ decltype(hiddenLayer1),
+ decltype(ffnConOptimizer2),
+ decltype(weightInitRule)>
+ ffnLayerCon2(hiddenLayer0, hiddenLayer1, ffnConOptimizer2, weightInitRule);
+
+ FullConnection<
+ decltype(hiddenLayer0),
+ decltype(hiddenLayer1),
+ decltype(rnnConOptimizer2),
+ decltype(weightInitRule)>
+ rnnLayerCon2(hiddenLayer0, hiddenLayer1, rnnConOptimizer2, weightInitRule);
+
+ auto ffnModule0 = std::tie(ffnLayerCon0, ffnLayerCon1);
+ auto ffnModule1 = std::tie(ffnLayerCon2);
+ auto ffnModules = std::tie(ffnModule0, ffnModule1);
+
+ auto rnnModule0 = std::tie(rnnLayerCon0, rnnLayerCon1);
+ auto rnnModule1 = std::tie(rnnLayerCon2);
+ auto rnnModules = std::tie(rnnModule0, rnnModule1);
+
+ /*
+ * Construct a feed forward network with trainData.n_rows input units,
+ * hiddenLayerSize hidden units and trainLabels.n_rows output units. The
+ * network structure looks like:
+ *
+ * Input Hidden Output
+ * Layer Layer Layer
+ * +-----+ +-----+ +-----+
+ * | | | | | |
+ * | +------>| +------>| |
+ * | | | | | |
+ * +-----+ +--+--+ +-----+
+ */
+ FFNN<decltype(ffnModules), decltype(outputLayer), PerformanceFunctionType>
+ ffn(ffnModules, outputLayer);
+
+ /*
+ * Construct a recurrent network with trainData.n_rows input units,
+ * hiddenLayerSize hidden units and trainLabels.n_rows output units. The
+ * hidden layer is connected to itself. The network structure looks like:
+ *
+ * Input Hidden Output
+ * Layer Layer Layer
+ * +-----+ +-----+ +-----+
+ * | | | | | |
+ * | +------>| +------>| |
+ * | | ..>| | | |
+ * +-----+ . +--+--+ +-----+
+ * . .
+ * . .
+ * .......
+ */
+ RNN<decltype(rnnModules), decltype(outputLayer), PerformanceFunctionType>
+ rnn(rnnModules, outputLayer);
+
+ // Train the network for maxEpochs epochs or until we reach a validation error
+ // of less then 0.001.
+ Trainer<decltype(ffn)> ffnTrainer(ffn, maxEpochs, 1, 0.001, false);
+ Trainer<decltype(rnn)> rnnTrainer(rnn, maxEpochs, 1, 0.001, false);
+
+ for (size_t i = 0; i < 5; i++)
+ {
+ rnnTrainer.Train(trainData, trainLabels, testData, testLabels);
+ ffnTrainer.Train(trainData, trainLabels, testData, testLabels);
+
+ if (!arma::is_finite(ffnTrainer.ValidationError()))
+ continue;
+
+ BOOST_REQUIRE_CLOSE(ffnTrainer.ValidationError(),
+ rnnTrainer.ValidationError(), 1e-3);
+ }
+}
+
+/**
+ * Train a vanilla feed forward and recurrent network on a sequence with len
+ * one. Ideally the recurrent network should produce the same output as the
+ * recurrent network. The self connection shouldn't affect the output when using
+ * a sequence with a length of one.
+ */
+BOOST_AUTO_TEST_CASE(FeedForwardRecurrentNetworkTest)
+{
+ arma::mat input;
+ arma::mat labels;
+
+ RandomInitialization<> randInit(1, 1);
+
+ // Test on a non-linearly separable dataset (XOR).
+ input << 0 << 1 << 1 << 0 << arma::endr
+ << 1 << 0 << 1 << 0 << arma::endr;
+ labels << 0 << 0 << 1 << 1;
+
+ // Vanilla neural net with logistic activation function.
+ CompareVanillaNetworks<RandomInitialization<>,
+ LogisticFunction,
+ SteepestDescent<>,
+ BinaryClassificationLayer<>,
+ MeanSquaredErrorFunction<> >
+ (input, labels, input, labels, 10, 10, randInit);
+
+ // Vanilla neural net with identity activation function.
+ CompareVanillaNetworks<RandomInitialization<>,
+ IdentityFunction,
+ SteepestDescent<>,
+ BinaryClassificationLayer<>,
+ MeanSquaredErrorFunction<> >
+ (input, labels, input, labels, 1, 1, randInit);
+
+ // Vanilla neural net with rectifier activation function.
+ CompareVanillaNetworks<RandomInitialization<>,
+ RectifierFunction,
+ SteepestDescent<>,
+ BinaryClassificationLayer<>,
+ MeanSquaredErrorFunction<> >
+ (input, labels, input, labels, 10, 10, randInit);
+
+ // Vanilla neural net with softsign activation function.
+ CompareVanillaNetworks<RandomInitialization<>,
+ SoftsignFunction,
+ SteepestDescent<>,
+ BinaryClassificationLayer<>,
+ MeanSquaredErrorFunction<> >
+ (input, labels, input, labels, 10, 10, randInit);
+
+ // Vanilla neural net with tanh activation function.
+ CompareVanillaNetworks<RandomInitialization<>,
+ TanhFunction,
+ SteepestDescent<>,
+ BinaryClassificationLayer<>,
+ MeanSquaredErrorFunction<> >
+ (input, labels, input, labels, 10, 10, randInit);
+}
+
+/**
+ * Generate a random Reber grammar.
+ *
+ * For more information, see the following thesis.
+ *
+ * @code
+ * @misc{Gers2001,
+ * author = {Felix Gers},
+ * title = {Long Short-Term Memory in Recurrent Neural Networks},
+ * year = {2001}
+ * }
+ * @endcode
+ *
+ * @param transitions Reber grammar transition matrix.
+ * @param reber The generated Reber grammar string.
+ */
+void GenerateReber(const arma::Mat<char>& transitions, std::string& reber)
+{
+ size_t idx = 0;
+ reber = "B";
+
+ do
+ {
+ const int grammerIdx = rand() % 2;
+ reber += arma::as_scalar(transitions.submat(idx, grammerIdx, idx,
+ grammerIdx));
+
+ idx = arma::as_scalar(transitions.submat(idx, grammerIdx + 2, idx,
+ grammerIdx + 2)) - '0';
+ } while (idx != 0);
+
+ reber = "BPTVVE";
+}
+
+/**
+ * Generate a random embedded Reber grammar.
+ *
+ * @param transitions Embedded Reber grammar transition matrix.
+ * @param reber The generated embedded Reber grammar string.
+ */
+void GenerateEmbeddedReber(const arma::Mat<char>& transitions,
+ std::string& reber)
+{
+ GenerateReber(transitions, reber);
+ const char c = (rand() % 2) == 1 ? 'P' : 'T';
+ reber = c + reber + c;
+ reber = "B" + reber + "E";
+}
+
+/**
+ * Convert a Reber symbol to a unit vector.
+ *
+ * @param symbol Reber symbol to be converted.
+ * @param translation The converted symbol stored as unit vector.
+ */
+void ReberTranslation(const char symbol, arma::colvec& translation)
+{
+ arma::Col<char> symbols;
+ symbols << 'B' << 'T' << 'S' << 'X' << 'P' << 'V' << 'E' << arma::endr;
+ const int idx = arma::as_scalar(arma::find(symbols == symbol, 1, "first"));
+
+ translation = arma::zeros<arma::colvec>(7);
+ translation(idx) = 1;
+}
+
+/**
+ * Convert a unit vector to a Reber symbol.
+ *
+ * @param translation The unit vector to be converted.
+ * @param symbol The converted unit vector stored as Reber symbol.
+ */
+void ReberReverseTranslation(const arma::colvec& translation, char& symbol)
+{
+ arma::Col<char> symbols;
+ symbols << 'B' << 'T' << 'S' << 'X' << 'P' << 'V' << 'E' << arma::endr;
+ const int idx = arma::as_scalar(arma::find(translation == 1, 1, "first"));
+
+ symbol = symbols(idx);
+}
+
+/**
+ * Given a Reber string, return a Reber string with all reachable next symbols.
+ *
+ * @param translation The unit vector to be converted.
+ * @param symbol The converted unit vector stored as Reber symbol.
+ */
+void GenerateNextReber(const arma::Mat<char>& transitions,
+ const std::string& reber, std::string& nextReber)
+{
+ size_t idx = 0;
+
+ for (size_t grammer = 1; grammer < reber.length(); grammer++)
+ {
+ const int grammerIdx = arma::as_scalar(arma::find(
+ transitions.row(idx) == reber[grammer], 1, "first"));
+
+ idx = arma::as_scalar(transitions.submat(idx, grammerIdx + 2, idx,
+ grammerIdx + 2)) - '0';
+ }
+
+ nextReber = arma::as_scalar(transitions.submat(idx, 0, idx, 0));
+ nextReber += arma::as_scalar(transitions.submat(idx, 1, idx, 1));
+}
+
+BOOST_AUTO_TEST_CASE(ReberGrammarTest)
+{
+ // Reber state transition matrix. (The last two columns are the indices to the
+ // next path).
+ arma::Mat<char> transitions;
+ transitions << 'T' << 'P' << '1' << '2' << arma::endr
+ << 'X' << 'S' << '3' << '1' << arma::endr
+ << 'V' << 'T' << '4' << '2' << arma::endr
+ << 'X' << 'S' << '2' << '5' << arma::endr
+ << 'P' << 'V' << '3' << '5' << arma::endr
+ << 'E' << 'E' << '0' << '0' << arma::endr;
+
+ const size_t trainReberGrammarCount = 1000;
+ const size_t testReberGrammarCount = 10;
+
+ std::string trainReber, testReber;
+ arma::field<arma::mat> trainInput(1, trainReberGrammarCount);
+ arma::field<arma::mat> trainLabels(1, trainReberGrammarCount);
+ arma::field<arma::mat> testInput(1, testReberGrammarCount);
+ arma::field<arma::mat> testLabels(1, testReberGrammarCount);
+ arma::colvec translation;
+
+ // Generate the training data.
+ for (size_t i = 0; i < trainReberGrammarCount; i++)
+ {
+ GenerateReber(transitions, trainReber);
+
+ for (size_t j = 0; j < trainReber.length() - 1; j++)
+ {
+ ReberTranslation(trainReber[j], translation);
+ trainInput(0, i) = arma::join_cols(trainInput(0, i), translation);
+
+ ReberTranslation(trainReber[j + 1], translation);
+ trainLabels(0, i) = arma::join_cols(trainLabels(0, i), translation);
+ }
+ }
+
+ // Generate the test data.
+ for (size_t i = 0; i < testReberGrammarCount; i++)
+ {
+ GenerateReber(transitions, testReber);
+
+ for (size_t j = 0; j < testReber.length() - 1; j++)
+ {
+ ReberTranslation(testReber[j], translation);
+ testInput(0, i) = arma::join_cols(testInput(0, i), translation);
+
+ ReberTranslation(testReber[j + 1], translation);
+ testLabels(0, i) = arma::join_cols(testLabels(0, i), translation);
+ }
+ }
+
+ /*
+ * Construct a network with 7 input units, 5 hidden units and 7 output units.
+ * The hidden layer is connected to itself. The network structure looks like:
+ *
+ * Input Hidden Output
+ * Layer(7) Layer(5) Layer(7)
+ * +-----+ +-----+ +-----+
+ * | | | | | |
+ * | +------>| +------>| |
+ * | | ..>| | | |
+ * +-----+ . +--+--+ +-----+
+ * . .
+ * . .
+ * .......
+ */
+ NeuronLayer<LogisticFunction> inputLayer(7);
+ NeuronLayer<LogisticFunction> hiddenLayer0(5);
+ NeuronLayer<LogisticFunction> recurrentLayer0(hiddenLayer0.InputSize());
+ NeuronLayer<LogisticFunction> hiddenLayer1(7);
+ BinaryClassificationLayer<> outputLayer;
+
+ SteepestDescent< > conOptimizer0(inputLayer.InputSize(),
+ hiddenLayer0.InputSize());
+ SteepestDescent< > conOptimizer2(hiddenLayer0.InputSize(),
+ hiddenLayer0.InputSize());
+ SteepestDescent< > conOptimizer3(hiddenLayer0.InputSize(),
+ hiddenLayer1.OutputSize());
+
+ NguyenWidrowInitialization<> randInit;
+
+ FullConnection<
+ decltype(inputLayer),
+ decltype(hiddenLayer0),
+ decltype(conOptimizer0),
+ decltype(randInit)>
+ layerCon0(inputLayer, hiddenLayer0, conOptimizer0, randInit);
+
+ SelfConnection<
+ decltype(recurrentLayer0),
+ decltype(hiddenLayer0),
+ decltype(conOptimizer2),
+ decltype(randInit)>
+ layerCon2(recurrentLayer0, hiddenLayer0, conOptimizer2, randInit);
+
+ FullConnection<
+ decltype(hiddenLayer0),
+ decltype(hiddenLayer1),
+ decltype(conOptimizer3),
+ decltype(randInit)>
+ layerCon4(hiddenLayer0, hiddenLayer1, conOptimizer3, randInit);
+
+ auto module0 = std::tie(layerCon0, layerCon2);
+ auto module1 = std::tie(layerCon4);
+ auto modules = std::tie(module0, module1);
+
+ RNN<decltype(modules),
+ decltype(outputLayer),
+ MeanSquaredErrorFunction<> > net(modules, outputLayer);
+
+ // Train the network for (500 * trainReberGrammarCount) epochs.
+ Trainer<decltype(net)> trainer(net, 1, 1, 0, false);
+
+ arma::mat inputTemp, labelsTemp;
+ for (size_t i = 0; i < 500; i++)
+ {
+ for (size_t j = 0; j < trainReberGrammarCount; j++)
+ {
+ inputTemp = trainInput.at(0, j);
+ labelsTemp = trainLabels.at(0, j);
+ trainer.Train(inputTemp, labelsTemp, inputTemp, labelsTemp);
+ }
+ }
+
+ double error = 0;
+
+ // Ask the network to predict the next Reber grammar in the given sequence.
+ for (size_t i = 0; i < testReberGrammarCount; i++)
+ {
+ arma::colvec output;
+ arma::colvec input = testInput.at(0, i);
+
+ net.Predict(input, output);
+
+ const size_t reberGrammerSize = 7;
+ std::string inputReber = "";
+
+ size_t reberError = 0;
+ for (size_t j = 0; j < (output.n_elem / reberGrammerSize); j++)
+ {
+ if (arma::sum(output.subvec(j * reberGrammerSize, (j + 1) *
+ reberGrammerSize - 1)) != 1) break;
+
+ char predictedSymbol, inputSymbol;
+ std::string reberChoices;
+
+ ReberReverseTranslation(output.subvec(j * reberGrammerSize, (j + 1) *
+ reberGrammerSize - 1), predictedSymbol);
+ ReberReverseTranslation(input.subvec(j * reberGrammerSize, (j + 1) *
+ reberGrammerSize - 1), inputSymbol);
+ inputReber += inputSymbol;
+
+ GenerateNextReber(transitions, inputReber, reberChoices);
+
+ if (reberChoices.find(predictedSymbol) != std::string::npos)
+ reberError++;
+ }
+
+ if (reberError != (output.n_elem / reberGrammerSize))
+ error += 1;
+ }
+
+ error /= testReberGrammarCount;
+
+ BOOST_REQUIRE_LE(error, 0.2);
+}
+
+BOOST_AUTO_TEST_SUITE_END();
More information about the mlpack-git
mailing list