Skip to content

Commit d77e527

Browse files
committed
added Bidirectional RNN example
1 parent f93ff35 commit d77e527

2 files changed

Lines changed: 165 additions & 1 deletion

File tree

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ Code examples for some popular machine learning algorithms, using TensorFlow lib
1616
- Multilayer Perceptron ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3%20-%20Neural%20Networks/multilayer_perceptron.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/multilayer_perceptron.py))
1717
- Convolutional Neural Network ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3%20-%20Neural%20Networks/convolutional_network.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/convolutional_network.py))
1818
- AlexNet ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3%20-%20Neural%20Networks/alexnet.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/alexnet.py))
19-
- Reccurent Network (LSTM) ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3%20-%20Neural%20Networks/reccurent_network.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/recurrent_network.py))
19+
- Reccurent Neural Network (LSTM) ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3%20-%20Neural%20Networks/reccurent_network.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/recurrent_network.py))
20+
- Bidirectional Reccurent Neural Network ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/bidirectional_rnn.py))
2021

2122
#### 4 - Multi GPU
2223
- Basic Operations on multi-GPU ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/4%20-%20Multi%20GPU/multigpu_basics.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/4%20-%20Multi%20GPU/multigpu_basics.py))
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
'''
2+
A Bidirectional Reccurent Neural Network (LSTM) implementation example using TensorFlow library.
3+
This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)
4+
Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
5+
6+
Author: Aymeric Damien
7+
Project: https://github.com/aymericdamien/TensorFlow-Examples/
8+
'''
9+
10+
# Import MINST data
11+
import input_data
12+
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
13+
14+
import tensorflow as tf
15+
from tensorflow.python.ops.constant_op import constant
16+
from tensorflow.models.rnn import rnn, rnn_cell
17+
import numpy as np
18+
19+
'''
20+
To classify images using a bidirectional reccurent neural network, we consider every image row as a sequence of pixels.
21+
Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample.
22+
'''
23+
24+
# Parameters
25+
learning_rate = 0.001
26+
training_iters = 100000
27+
batch_size = 128
28+
display_step = 10
29+
30+
# Network Parameters
31+
n_input = 28 # MNIST data input (img shape: 28*28)
32+
n_steps = 28 # timesteps
33+
n_hidden = 128 # hidden layer num of features
34+
n_classes = 10 # MNIST total classes (0-9 digits)
35+
36+
# tf Graph input
37+
x = tf.placeholder("float", [None, n_steps, n_input])
38+
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
39+
istate_fw = tf.placeholder("float", [None, 2*n_hidden])
40+
istate_bw = tf.placeholder("float", [None, 2*n_hidden])
41+
y = tf.placeholder("float", [None, n_classes])
42+
43+
# Define weights
44+
weights = {
45+
# Hidden layer weights => 2*n_hidden because of foward + backward cells
46+
'hidden': tf.Variable(tf.random_normal([n_input, 2*n_hidden])),
47+
'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
48+
}
49+
biases = {
50+
'hidden': tf.Variable(tf.random_normal([2*n_hidden])),
51+
'out': tf.Variable(tf.random_normal([n_classes]))
52+
}
53+
54+
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size, _seq_len):
55+
56+
# BiRNN requires to supply sequence_length as [batch_size, int64]
57+
# Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set
58+
# For a better implementation with latest version of tensorflow, check below
59+
_seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64))
60+
61+
# input shape: (batch_size, n_steps, n_input)
62+
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
63+
# Reshape to prepare input to hidden activation
64+
_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
65+
# Linear activation
66+
_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
67+
68+
# Define lstm cells with tensorflow
69+
# Forward direction cell
70+
lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
71+
# Backward direction cell
72+
lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
73+
# Split data because rnn cell needs a list of inputs for the RNN inner loop
74+
_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
75+
76+
# Get lstm cell output
77+
outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
78+
initial_state_fw=_istate_fw,
79+
initial_state_bw=_istate_bw,
80+
sequence_length=_seq_len)
81+
82+
# Linear activation
83+
# Get inner loop last output
84+
return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
85+
86+
pred = BiRNN(x, istate_fw, istate_bw, weights, biases, batch_size, n_steps)
87+
88+
89+
# NOTE: The following code is working with current master version of tensorflow
90+
# BiRNN sequence_length parameter isn't required, so we don't define it
91+
#
92+
# def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):
93+
#
94+
# # input shape: (batch_size, n_steps, n_input)
95+
# _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
96+
# # Reshape to prepare input to hidden activation
97+
# _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
98+
# # Linear activation
99+
# _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
100+
#
101+
# # Define lstm cells with tensorflow
102+
# # Forward direction cell
103+
# lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
104+
# # Backward direction cell
105+
# lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
106+
# # Split data because rnn cell needs a list of inputs for the RNN inner loop
107+
# _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
108+
#
109+
# # Get lstm cell output
110+
# outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
111+
# initial_state_fw=_istate_fw,
112+
# initial_state_bw=_istate_bw)
113+
#
114+
# # Linear activation
115+
# # Get inner loop last output
116+
# return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
117+
#
118+
# pred = BiRNN(x, istate_fw, istate_bw, weights, biases)
119+
120+
# Define loss and optimizer
121+
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
122+
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
123+
124+
# Evaluate model
125+
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
126+
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
127+
128+
# Initializing the variables
129+
init = tf.initialize_all_variables()
130+
131+
# Launch the graph
132+
with tf.Session() as sess:
133+
sess.run(init)
134+
step = 1
135+
# Keep training until reach max iterations
136+
while step * batch_size < training_iters:
137+
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
138+
# Reshape data to get 28 seq of 28 elements
139+
batch_xs = batch_xs.reshape((batch_size, n_steps, n_input))
140+
# Fit training using batch data
141+
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys,
142+
istate_fw: np.zeros((batch_size, 2*n_hidden)),
143+
istate_bw: np.zeros((batch_size, 2*n_hidden))})
144+
if step % display_step == 0:
145+
# Calculate batch accuracy
146+
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
147+
istate_fw: np.zeros((batch_size, 2*n_hidden)),
148+
istate_bw: np.zeros((batch_size, 2*n_hidden))})
149+
# Calculate batch loss
150+
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
151+
istate_fw: np.zeros((batch_size, 2*n_hidden)),
152+
istate_bw: np.zeros((batch_size, 2*n_hidden))})
153+
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \
154+
", Training Accuracy= " + "{:.5f}".format(acc)
155+
step += 1
156+
print "Optimization Finished!"
157+
# Calculate accuracy for 128 mnist test images
158+
test_len = 128
159+
test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
160+
test_label = mnist.test.labels[:test_len]
161+
print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label,
162+
istate_fw: np.zeros((test_len, 2*n_hidden)),
163+
istate_bw: np.zeros((test_len, 2*n_hidden))})

0 commit comments

Comments
 (0)