|
| 1 | +''' |
| 2 | +A Bidirectional Reccurent Neural Network (LSTM) implementation example using TensorFlow library. |
| 3 | +This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/) |
| 4 | +Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf |
| 5 | +
|
| 6 | +Author: Aymeric Damien |
| 7 | +Project: https://github.com/aymericdamien/TensorFlow-Examples/ |
| 8 | +''' |
| 9 | + |
| 10 | +# Import MINST data |
| 11 | +import input_data |
| 12 | +mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) |
| 13 | + |
| 14 | +import tensorflow as tf |
| 15 | +from tensorflow.python.ops.constant_op import constant |
| 16 | +from tensorflow.models.rnn import rnn, rnn_cell |
| 17 | +import numpy as np |
| 18 | + |
| 19 | +''' |
| 20 | +To classify images using a bidirectional reccurent neural network, we consider every image row as a sequence of pixels. |
| 21 | +Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample. |
| 22 | +''' |
| 23 | + |
| 24 | +# Parameters |
| 25 | +learning_rate = 0.001 |
| 26 | +training_iters = 100000 |
| 27 | +batch_size = 128 |
| 28 | +display_step = 10 |
| 29 | + |
| 30 | +# Network Parameters |
| 31 | +n_input = 28 # MNIST data input (img shape: 28*28) |
| 32 | +n_steps = 28 # timesteps |
| 33 | +n_hidden = 128 # hidden layer num of features |
| 34 | +n_classes = 10 # MNIST total classes (0-9 digits) |
| 35 | + |
| 36 | +# tf Graph input |
| 37 | +x = tf.placeholder("float", [None, n_steps, n_input]) |
| 38 | +# Tensorflow LSTM cell requires 2x n_hidden length (state & cell) |
| 39 | +istate_fw = tf.placeholder("float", [None, 2*n_hidden]) |
| 40 | +istate_bw = tf.placeholder("float", [None, 2*n_hidden]) |
| 41 | +y = tf.placeholder("float", [None, n_classes]) |
| 42 | + |
| 43 | +# Define weights |
| 44 | +weights = { |
| 45 | + # Hidden layer weights => 2*n_hidden because of foward + backward cells |
| 46 | + 'hidden': tf.Variable(tf.random_normal([n_input, 2*n_hidden])), |
| 47 | + 'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes])) |
| 48 | +} |
| 49 | +biases = { |
| 50 | + 'hidden': tf.Variable(tf.random_normal([2*n_hidden])), |
| 51 | + 'out': tf.Variable(tf.random_normal([n_classes])) |
| 52 | +} |
| 53 | + |
| 54 | +def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size, _seq_len): |
| 55 | + |
| 56 | + # BiRNN requires to supply sequence_length as [batch_size, int64] |
| 57 | + # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set |
| 58 | + # For a better implementation with latest version of tensorflow, check below |
| 59 | + _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64)) |
| 60 | + |
| 61 | + # input shape: (batch_size, n_steps, n_input) |
| 62 | + _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size |
| 63 | + # Reshape to prepare input to hidden activation |
| 64 | + _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) |
| 65 | + # Linear activation |
| 66 | + _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] |
| 67 | + |
| 68 | + # Define lstm cells with tensorflow |
| 69 | + # Forward direction cell |
| 70 | + lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) |
| 71 | + # Backward direction cell |
| 72 | + lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) |
| 73 | + # Split data because rnn cell needs a list of inputs for the RNN inner loop |
| 74 | + _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) |
| 75 | + |
| 76 | + # Get lstm cell output |
| 77 | + outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, |
| 78 | + initial_state_fw=_istate_fw, |
| 79 | + initial_state_bw=_istate_bw, |
| 80 | + sequence_length=_seq_len) |
| 81 | + |
| 82 | + # Linear activation |
| 83 | + # Get inner loop last output |
| 84 | + return tf.matmul(outputs[-1], _weights['out']) + _biases['out'] |
| 85 | + |
| 86 | +pred = BiRNN(x, istate_fw, istate_bw, weights, biases, batch_size, n_steps) |
| 87 | + |
| 88 | + |
| 89 | +# NOTE: The following code is working with current master version of tensorflow |
| 90 | +# BiRNN sequence_length parameter isn't required, so we don't define it |
| 91 | +# |
| 92 | +# def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): |
| 93 | +# |
| 94 | +# # input shape: (batch_size, n_steps, n_input) |
| 95 | +# _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size |
| 96 | +# # Reshape to prepare input to hidden activation |
| 97 | +# _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) |
| 98 | +# # Linear activation |
| 99 | +# _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] |
| 100 | +# |
| 101 | +# # Define lstm cells with tensorflow |
| 102 | +# # Forward direction cell |
| 103 | +# lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) |
| 104 | +# # Backward direction cell |
| 105 | +# lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) |
| 106 | +# # Split data because rnn cell needs a list of inputs for the RNN inner loop |
| 107 | +# _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) |
| 108 | +# |
| 109 | +# # Get lstm cell output |
| 110 | +# outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X, |
| 111 | +# initial_state_fw=_istate_fw, |
| 112 | +# initial_state_bw=_istate_bw) |
| 113 | +# |
| 114 | +# # Linear activation |
| 115 | +# # Get inner loop last output |
| 116 | +# return tf.matmul(outputs[-1], _weights['out']) + _biases['out'] |
| 117 | +# |
| 118 | +# pred = BiRNN(x, istate_fw, istate_bw, weights, biases) |
| 119 | + |
| 120 | +# Define loss and optimizer |
| 121 | +cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss |
| 122 | +optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer |
| 123 | + |
| 124 | +# Evaluate model |
| 125 | +correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1)) |
| 126 | +accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) |
| 127 | + |
| 128 | +# Initializing the variables |
| 129 | +init = tf.initialize_all_variables() |
| 130 | + |
| 131 | +# Launch the graph |
| 132 | +with tf.Session() as sess: |
| 133 | + sess.run(init) |
| 134 | + step = 1 |
| 135 | + # Keep training until reach max iterations |
| 136 | + while step * batch_size < training_iters: |
| 137 | + batch_xs, batch_ys = mnist.train.next_batch(batch_size) |
| 138 | + # Reshape data to get 28 seq of 28 elements |
| 139 | + batch_xs = batch_xs.reshape((batch_size, n_steps, n_input)) |
| 140 | + # Fit training using batch data |
| 141 | + sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, |
| 142 | + istate_fw: np.zeros((batch_size, 2*n_hidden)), |
| 143 | + istate_bw: np.zeros((batch_size, 2*n_hidden))}) |
| 144 | + if step % display_step == 0: |
| 145 | + # Calculate batch accuracy |
| 146 | + acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, |
| 147 | + istate_fw: np.zeros((batch_size, 2*n_hidden)), |
| 148 | + istate_bw: np.zeros((batch_size, 2*n_hidden))}) |
| 149 | + # Calculate batch loss |
| 150 | + loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, |
| 151 | + istate_fw: np.zeros((batch_size, 2*n_hidden)), |
| 152 | + istate_bw: np.zeros((batch_size, 2*n_hidden))}) |
| 153 | + print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \ |
| 154 | + ", Training Accuracy= " + "{:.5f}".format(acc) |
| 155 | + step += 1 |
| 156 | + print "Optimization Finished!" |
| 157 | + # Calculate accuracy for 128 mnist test images |
| 158 | + test_len = 128 |
| 159 | + test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input)) |
| 160 | + test_label = mnist.test.labels[:test_len] |
| 161 | + print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label, |
| 162 | + istate_fw: np.zeros((test_len, 2*n_hidden)), |
| 163 | + istate_bw: np.zeros((test_len, 2*n_hidden))}) |
0 commit comments