Skip to content

Commit 5f60e3f

Browse files
author
lichuang
committed
add some seq2seq code
1 parent e5f807a commit 5f60e3f

6 files changed

Lines changed: 832 additions & 0 deletions

File tree

chatbotv2/lstm_train.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
# coding:utf-8
2+
3+
import sys
4+
import numpy as np
5+
import tensorflow as tf
6+
from tensorflow.python.ops import seq2seq
7+
from tensorflow.python.ops import rnn_cell
8+
import tflearn
9+
10+
max_seq_len = 8
11+
learning_rate = 0.001
12+
13+
# 得到了单词转id的词典是word_id_dict, 最大单词id是max_word_id
14+
def init_word_id_dict():
15+
word_id_dict = {}
16+
max_word_id = 0
17+
threshold = max_seq_len
18+
vocab_dict = {}
19+
# 把每个词映射到一个整数编号word_id
20+
file_object = open("chat_dev.data", "r")
21+
while True:
22+
line = file_object.readline()
23+
if line:
24+
line = line.strip()
25+
words = []
26+
for word in line.split(" "):
27+
if len(word) > 0:
28+
words.append(word)
29+
if len(words) > threshold:
30+
continue
31+
32+
for word in words:
33+
if len(word)>0:
34+
if vocab_dict.has_key(word):
35+
vocab_dict[word] = vocab_dict[word] + 1
36+
else:
37+
vocab_dict[word] = 1
38+
else:
39+
break
40+
file_object.close()
41+
42+
vocab_dict = sorted(vocab_dict.items(), key=lambda d: d[1], reverse = True)
43+
44+
uuid = 1
45+
46+
max_word_id=1500
47+
for (word, freq) in vocab_dict:
48+
word_id_dict[word] = uuid
49+
uuid = uuid + 1
50+
if uuid > max_word_id:
51+
break
52+
53+
return (word_id_dict, max_word_id)
54+
55+
56+
def sequence_loss(y_pred, y_true):
57+
logits = tf.unpack(y_pred, axis=1)
58+
targets = tf.unpack(y_true, axis=1)
59+
weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
60+
return seq2seq.sequence_loss(logits, targets, weights)
61+
62+
def accuracy(y_pred, y_true, x_in):
63+
pred_idx = tf.to_int32(tf.argmax(y_pred, 2))
64+
return tf.reduce_mean(tf.cast(tf.equal(pred_idx, y_true), tf.float32), name='acc')
65+
66+
def create_model(max_word_id):
67+
GO_VALUE = max_word_id + 1
68+
network = tflearn.input_data(shape=[None, max_seq_len + max_seq_len], dtype=tf.int32, name="XY")
69+
encoder_inputs = tf.slice(network, [0, 0], [-1, max_seq_len], name="enc_in")
70+
encoder_inputs = tf.unpack(encoder_inputs, axis=1)
71+
decoder_inputs = tf.slice(network, [0, max_seq_len], [-1, max_seq_len], name="dec_in")
72+
decoder_inputs = tf.unpack(decoder_inputs, axis=1)
73+
go_input = tf.mul( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE )
74+
decoder_inputs = [go_input] + decoder_inputs[: max_seq_len-1]
75+
num_encoder_symbols = max_word_id + 1 # 从0起始
76+
num_decoder_symbols = max_word_id + 2 # 包括GO
77+
78+
cell = rnn_cell.BasicLSTMCell(max_seq_len+max_seq_len, state_is_tuple=True)
79+
80+
model_outputs, states = seq2seq.embedding_rnn_seq2seq(
81+
encoder_inputs,
82+
decoder_inputs,
83+
cell,
84+
num_encoder_symbols=num_encoder_symbols,
85+
num_decoder_symbols=num_decoder_symbols,
86+
embedding_size=max_word_id,
87+
feed_previous=False)
88+
89+
network = tf.pack(model_outputs, axis=1)
90+
91+
92+
93+
94+
targetY = tf.placeholder(shape=[None, max_seq_len], dtype=tf.int32, name="Y")
95+
96+
network = tflearn.regression(
97+
network,
98+
placeholder=targetY,
99+
optimizer='adam',
100+
learning_rate=learning_rate,
101+
loss=sequence_loss,
102+
metric=accuracy,
103+
name="Y")
104+
105+
print "begin create DNN model"
106+
model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path=None)
107+
print "create DNN model finish"
108+
return model
109+
110+
111+
if __name__ == '__main__':
112+
(word_id_dict, max_word_id) = init_word_id_dict()
113+
print "max_word_id =", max_word_id
114+
115+
model = create_model(max_word_id)
116+
117+
threshold = max_seq_len
118+
file_object = open("chat_dev.data", "r")
119+
last_line_no = 0
120+
cur_line_no = 0
121+
last_words = []
122+
last_line = ""
123+
XY = []
124+
Y = []
125+
sample_count = 0
126+
while True:
127+
line = file_object.readline()
128+
cur_line_no = cur_line_no + 1
129+
if line:
130+
line = line.strip()
131+
words = []
132+
for word in line.split(" "):
133+
if len(word) > 0:
134+
words.append(word)
135+
if len(words) > threshold:
136+
continue
137+
138+
# 保证连续的话才参与训练
139+
if last_line_no != 0 and last_line_no == cur_line_no - 1:
140+
question_id_list = []
141+
question_array = np.zeros(max_seq_len + max_seq_len)
142+
answer_array = np.zeros(max_seq_len)
143+
idx = 0
144+
for word in last_words:
145+
if len(word)>0 and word_id_dict.has_key(word):
146+
word_id = word_id_dict[word]
147+
question_id_list.append(word_id)
148+
question_array[idx] = word_id
149+
idx = idx + 1
150+
for i in range(max_seq_len - len(question_id_list)):
151+
question_id_list.append(0)
152+
153+
answer_id_list = []
154+
155+
idx = 0
156+
for word in words:
157+
if len(word)>0 and word_id_dict.has_key(word):
158+
word_id = word_id_dict[word]
159+
answer_id_list.append(word_id)
160+
question_array[max_seq_len + idx] = word_id
161+
answer_array[idx] = word_id
162+
idx = idx + 1
163+
for i in range(2*max_seq_len - len(question_id_list)):
164+
answer_id_list.append(0)
165+
question_id_list.extend(answer_id_list)
166+
167+
XY.append(question_array)
168+
Y.append(answer_array)
169+
sample_count = sample_count + 1
170+
171+
#if sample_count > 0:
172+
# break
173+
174+
175+
last_words = words
176+
last_line = line
177+
last_line_no = cur_line_no
178+
179+
else:
180+
break
181+
file_object.close()
182+
183+
model.fit(
184+
XY,
185+
Y,
186+
n_epoch=100,
187+
validation_set=0.01,
188+
batch_size=1,
189+
shuffle=True,
190+
show_metric=True,
191+
snapshot_step=5000,
192+
snapshot_epoch=False,
193+
run_id="my_lstm_test")
194+
195+
model.save("./weights")
196+
#model.load("./weights")
197+
198+
199+
# predict
200+
TEST_XY = [XY[0]]
201+
res = model.predict(TEST_XY)
202+
res = np.array(res)
203+
num_decoder_symbols = max_word_id + 2
204+
y = res.reshape(max_seq_len, num_decoder_symbols)
205+
prediction = np.argmax(y, axis=1)
206+
print TEST_XY
207+
print "desire =", Y[0]
208+
print "prediction =", prediction

lstm_code/tensorflow/test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import tensorflow as tf
2+
3+
def main(_):
4+
pass
5+
6+
if __name__ == "__main__":
7+
tf.app.run()

seq2seq/tflearn_prj/07_lstm.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#Inspired by https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/recurrent_network.py
2+
import tensorflow as tf
3+
4+
import numpy as np
5+
from tensorflow.examples.tutorials.mnist import input_data
6+
7+
# configuration
8+
# O * W + b -> 10 labels for each image, O[? 28], W[28 10], B[10]
9+
# ^ (O: output 28 vec from 28 vec input)
10+
# |
11+
# +-+ +-+ +--+
12+
# |1|->|2|-> ... |28| time_step_size = 28
13+
# +-+ +-+ +--+
14+
# ^ ^ ... ^
15+
# | | |
16+
# img1:[28] [28] ... [28]
17+
# img2:[28] [28] ... [28]
18+
# img3:[28] [28] ... [28]
19+
# ...
20+
# img128 or img256 (batch_size or test_size 256)
21+
# each input size = input_vec_size=lstm_size=28
22+
23+
# configuration variables
24+
input_vec_size = lstm_size = 28
25+
time_step_size = 28
26+
27+
batch_size = 128
28+
test_size = 256
29+
30+
def init_weights(shape):
31+
return tf.Variable(tf.random_normal(shape, stddev=0.01))
32+
33+
34+
def model(X, W, B, lstm_size):
35+
# X, input shape: (batch_size, time_step_size, input_vec_size)
36+
print "X=", X
37+
XT = tf.transpose(X, [1, 0, 2]) # permute time_step_size and batch_size
38+
print "XT=", XT
39+
# XT shape: (time_step_size, batch_size, input_vec_size)
40+
XR = tf.reshape(XT, [-1, lstm_size]) # each row has input for each lstm cell (lstm_size=input_vec_size)
41+
print "XR=", XR
42+
# XR shape: (time_step_size * batch_size, input_vec_size)
43+
X_split = tf.split(0, time_step_size, XR) # split them to time_step_size (28 arrays)
44+
print "X_split=", X_split
45+
# Each array shape: (batch_size, input_vec_size)
46+
47+
# Make lstm with lstm_size (each input vector size)
48+
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0, state_is_tuple=True)
49+
50+
# Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)
51+
outputs, _states = tf.nn.rnn(lstm, X_split, dtype=tf.float32)
52+
53+
# Linear activation
54+
# Get the last output
55+
return tf.matmul(outputs[-1], W) + B, lstm.state_size # State size to initialize the stat
56+
57+
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
58+
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
59+
trX = trX.reshape(-1, 28, 28)
60+
teX = teX.reshape(-1, 28, 28)
61+
62+
X = tf.placeholder("float", [None, 28, 28])
63+
Y = tf.placeholder("float", [None, 10])
64+
65+
# get lstm_size and output 10 labels
66+
W = init_weights([lstm_size, 10])
67+
B = init_weights([10])
68+
69+
py_x, state_size = model(X, W, B, lstm_size)
70+
71+
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
72+
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
73+
predict_op = tf.argmax(py_x, 1)
74+
75+
# Launch the graph in a session
76+
with tf.Session() as sess:
77+
# you need to initialize all variables
78+
tf.initialize_all_variables().run()
79+
80+
for i in range(100):
81+
for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size)):
82+
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
83+
84+
test_indices = np.arange(len(teX)) # Get A Test Batch
85+
np.random.shuffle(test_indices)
86+
test_indices = test_indices[0:test_size]
87+
88+
print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==
89+
sess.run(predict_op, feed_dict={X: teX[test_indices]})))

0 commit comments

Comments
 (0)