Merge pull request nlintz#46 from j-min/master

hunkim · web-flow · commit 2969ed0a2fea · 2016-07-28T23:15:32.000+09:00
Fixed tensorflow 0.9 API compatibility
diff --git a/.travis.yml b/.travis.yml
@@ -11,9 +11,9 @@ install:
   - pip install matplotlib
   # install TensorFlow from https://storage.googleapis.com/tensorflow/
   - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.8.0-cp27-none-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl;
     elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.8.0-cp34-cp34m-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl;
     fi
 script:
   - sed -i -- 's/range(100)/range(1)/g' ??_*.py # change range to 1 for quick testing
diff --git a/07_lstm.py b/07_lstm.py
@@ -1,6 +1,5 @@
 #Inspired by https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/recurrent_network.py
 import tensorflow as tf
-from tensorflow.models.rnn import rnn, rnn_cell
 
 import numpy as np
 import input_data
@@ -32,7 +31,7 @@ def init_weights(shape):
     return tf.Variable(tf.random_normal(shape, stddev=0.01))
 
 
-def model(X, W, B, init_state, lstm_size):
+def model(X, W, B, lstm_size):
     # X, input shape: (batch_size, input_vec_size, time_step_size)
     XT = tf.transpose(X, [1, 0, 2])  # permute time_step_size and batch_size
     # XT shape: (input_vec_size, batch_szie, time_step_size)
@@ -42,10 +41,10 @@ def model(X, W, B, init_state, lstm_size):
     # Each array shape: (batch_size, input_vec_size)
 
     # Make lstm with lstm_size (each input vector size)
-    lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0)
+    lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0, state_is_tuple=True)
 
     # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)
-    outputs, _states = rnn.rnn(lstm, X_split, initial_state=init_state)
+    outputs, _states = tf.nn.rnn(lstm, X_split, dtype=tf.float32)
 
     # Linear activation
     # Get the last output
@@ -56,17 +55,14 @@ def model(X, W, B, init_state, lstm_size):
 trX = trX.reshape(-1, 28, 28)
 teX = teX.reshape(-1, 28, 28)
 
-# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
-init_state = tf.placeholder("float", [None, 2*lstm_size])
-
 X = tf.placeholder("float", [None, 28, 28])
 Y = tf.placeholder("float", [None, 10])
 
 # get lstm_size and output 10 labels
 W = init_weights([lstm_size, 10])
 B = init_weights([10])
 
-py_x, state_size = model(X, W, B, init_state, lstm_size)
+py_x, state_size = model(X, W, B, lstm_size)
 
 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
 train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
@@ -79,14 +75,12 @@ def model(X, W, B, init_state, lstm_size):
 
     for i in range(100):
         for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX), batch_size)):
-            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end],
-                                          init_state: np.zeros((batch_size, state_size))})
+            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
 
         test_indices = np.arange(len(teX))  # Get A Test Batch
         np.random.shuffle(test_indices)
         test_indices = test_indices[0:test_size]
 
         print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==
                          sess.run(predict_op, feed_dict={X: teX[test_indices],
-                                                         Y: teY[test_indices],
-                                                         init_state: np.zeros((test_size, state_size))})))
+                                                         Y: teY[test_indices]})))