Skip to content

Commit ee25720

Browse files
author
lichuang
committed
add files
1 parent 5f60e3f commit ee25720

File tree

3 files changed

+250
-35
lines changed

3 files changed

+250
-35
lines changed

chatbotv2/lstm_train.py

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import tflearn
99

1010
max_seq_len = 8
11-
learning_rate = 0.001
11+
learning_rate = 0.01
12+
id_word_dict = {}
1213

1314
# 得到了单词转id的词典是word_id_dict, 最大单词id是max_word_id
1415
def init_word_id_dict():
@@ -43,9 +44,13 @@ def init_word_id_dict():
4344

4445
uuid = 1
4546

46-
max_word_id=1500
47+
max_word_id=2000
4748
for (word, freq) in vocab_dict:
4849
word_id_dict[word] = uuid
50+
id_word_dict[uuid] = word
51+
#if freq > 20:
52+
# print word, uuid, freq
53+
print word, uuid, freq
4954
uuid = uuid + 1
5055
if uuid > max_word_id:
5156
break
@@ -63,7 +68,7 @@ def accuracy(y_pred, y_true, x_in):
6368
pred_idx = tf.to_int32(tf.argmax(y_pred, 2))
6469
return tf.reduce_mean(tf.cast(tf.equal(pred_idx, y_true), tf.float32), name='acc')
6570

66-
def create_model(max_word_id):
71+
def create_model(max_word_id, is_test=False):
6772
GO_VALUE = max_word_id + 1
6873
network = tflearn.input_data(shape=[None, max_seq_len + max_seq_len], dtype=tf.int32, name="XY")
6974
encoder_inputs = tf.slice(network, [0, 0], [-1, max_seq_len], name="enc_in")
@@ -75,7 +80,7 @@ def create_model(max_word_id):
7580
num_encoder_symbols = max_word_id + 1 # 从0起始
7681
num_decoder_symbols = max_word_id + 2 # 包括GO
7782

78-
cell = rnn_cell.BasicLSTMCell(max_seq_len+max_seq_len, state_is_tuple=True)
83+
cell = rnn_cell.BasicLSTMCell(16*max_seq_len, state_is_tuple=True)
7984

8085
model_outputs, states = seq2seq.embedding_rnn_seq2seq(
8186
encoder_inputs,
@@ -84,7 +89,7 @@ def create_model(max_word_id):
8489
num_encoder_symbols=num_encoder_symbols,
8590
num_decoder_symbols=num_decoder_symbols,
8691
embedding_size=max_word_id,
87-
feed_previous=False)
92+
feed_previous=is_test)
8893

8994
network = tf.pack(model_outputs, axis=1)
9095

@@ -107,12 +112,22 @@ def create_model(max_word_id):
107112
print "create DNN model finish"
108113
return model
109114

115+
def print_sentence(list, msg):
116+
sentence = msg
117+
for item in list:
118+
if item != 0:
119+
sentence = sentence + id_word_dict[item]
120+
print sentence
110121

111122
if __name__ == '__main__':
123+
if len(sys.argv) > 1 and sys.argv[1] == 'test':
124+
is_test = True
125+
else:
126+
is_test = False
112127
(word_id_dict, max_word_id) = init_word_id_dict()
113128
print "max_word_id =", max_word_id
114129

115-
model = create_model(max_word_id)
130+
model = create_model(max_word_id, is_test)
116131

117132
threshold = max_seq_len
118133
file_object = open("chat_dev.data", "r")
@@ -138,14 +153,20 @@ def create_model(max_word_id):
138153
# 保证连续的话才参与训练
139154
if last_line_no != 0 and last_line_no == cur_line_no - 1:
140155
question_id_list = []
156+
question = ""
157+
answer = ""
141158
question_array = np.zeros(max_seq_len + max_seq_len)
142159
answer_array = np.zeros(max_seq_len)
143160
idx = 0
161+
question_has_word = False
162+
answer_has_word = False
144163
for word in last_words:
145164
if len(word)>0 and word_id_dict.has_key(word):
146165
word_id = word_id_dict[word]
147166
question_id_list.append(word_id)
167+
question = question + word
148168
question_array[idx] = word_id
169+
question_has_word = True
149170
idx = idx + 1
150171
for i in range(max_seq_len - len(question_id_list)):
151172
question_id_list.append(0)
@@ -157,20 +178,21 @@ def create_model(max_word_id):
157178
if len(word)>0 and word_id_dict.has_key(word):
158179
word_id = word_id_dict[word]
159180
answer_id_list.append(word_id)
181+
answer = answer + word
160182
question_array[max_seq_len + idx] = word_id
161183
answer_array[idx] = word_id
184+
answer_has_word = True
162185
idx = idx + 1
163186
for i in range(2*max_seq_len - len(question_id_list)):
164187
answer_id_list.append(0)
165188
question_id_list.extend(answer_id_list)
166189

167-
XY.append(question_array)
168-
Y.append(answer_array)
169-
sample_count = sample_count + 1
170-
171-
#if sample_count > 0:
172-
# break
173-
190+
if question_has_word and answer_has_word:
191+
#print "question =", question
192+
#print "answer =", answer
193+
XY.append(question_array)
194+
Y.append(answer_array)
195+
sample_count = sample_count + 1
174196

175197
last_words = words
176198
last_line = line
@@ -180,29 +202,39 @@ def create_model(max_word_id):
180202
break
181203
file_object.close()
182204

183-
model.fit(
184-
XY,
185-
Y,
186-
n_epoch=100,
187-
validation_set=0.01,
188-
batch_size=1,
189-
shuffle=True,
190-
show_metric=True,
191-
snapshot_step=5000,
192-
snapshot_epoch=False,
193-
run_id="my_lstm_test")
205+
if not is_test:
206+
model.fit(
207+
XY,
208+
Y,
209+
n_epoch=3000,
210+
validation_set=0.01,
211+
batch_size=64,
212+
shuffle=True,
213+
show_metric=True,
214+
snapshot_step=5000,
215+
snapshot_epoch=False,
216+
run_id="my_lstm_test")
194217

195-
model.save("./weights")
196-
#model.load("./weights")
218+
model.save("./weights")
219+
else:
220+
model.load("./weights")
197221

198222

199223
# predict
200-
TEST_XY = [XY[0]]
201-
res = model.predict(TEST_XY)
202-
res = np.array(res)
203-
num_decoder_symbols = max_word_id + 2
204-
y = res.reshape(max_seq_len, num_decoder_symbols)
205-
prediction = np.argmax(y, axis=1)
206-
print TEST_XY
207-
print "desire =", Y[0]
208-
print "prediction =", prediction
224+
for i in range(100):
225+
TEST_XY = [XY[i]]
226+
TEST_XY[0][max_seq_len:2*max_seq_len] = 0
227+
#TEST_XY[0][0:2*max_seq_len] = 0
228+
#TEST_XY[0][0] = 5
229+
#TEST_XY[0][1] = 4
230+
#TEST_XY[0][2] = 109
231+
232+
res = model.predict(TEST_XY)
233+
res = np.array(res)
234+
num_decoder_symbols = max_word_id + 2
235+
y = res.reshape(max_seq_len, num_decoder_symbols)
236+
prediction = np.argmax(y, axis=1)
237+
if 0 != np.sum(prediction):
238+
print_sentence(TEST_XY[0], "input ")
239+
print_sentence(Y[i], "desire ")
240+
print_sentence(prediction, "prediction ")

pattern_recognition.lua

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
require 'nn'
2+
require 'paths'
3+
if (not paths.filep("cifar10torchsmall.zip")) then
4+
os.execute('wget -c https://s3.amazonaws.com/torch7/data/cifar10torchsmall.zip')
5+
os.execute('unzip cifar10torchsmall.zip')
6+
end
7+
trainset = torch.load('cifar10-train.t7')
8+
testset = torch.load('cifar10-test.t7')
9+
classes = {'airplane', 'automobile', 'bird', 'cat',
10+
'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}
11+
setmetatable(trainset,
12+
{__index = function(t, i)
13+
return {t.data[i], t.label[i]}
14+
end}
15+
);
16+
trainset.data = trainset.data:double() -- convert the data from a ByteTensor to a DoubleTensor.
17+
18+
function trainset:size()
19+
return self.data:size(1)
20+
end
21+
mean = {} -- store the mean, to normalize the test set in the future
22+
stdv = {} -- store the standard-deviation for the future
23+
for i=1,3 do -- over each image channel
24+
mean[i] = trainset.data[{ {}, {i}, {}, {} }]:mean() -- mean estimation
25+
print('Channel ' .. i .. ', Mean: ' .. mean[i])
26+
trainset.data[{ {}, {i}, {}, {} }]:add(-mean[i]) -- mean subtraction
27+
28+
stdv[i] = trainset.data[{ {}, {i}, {}, {} }]:std() -- std estimation
29+
print('Channel ' .. i .. ', Standard Deviation: ' .. stdv[i])
30+
trainset.data[{ {}, {i}, {}, {} }]:div(stdv[i]) -- std scaling
31+
end
32+
net = nn.Sequential()
33+
net:add(nn.SpatialConvolution(3, 6, 5, 5)) -- 3 input image channels, 6 output channels, 5x5 convolution kernel
34+
net:add(nn.ReLU()) -- non-linearity
35+
net:add(nn.SpatialMaxPooling(2,2,2,2)) -- A max-pooling operation that looks at 2x2 windows and finds the max.
36+
net:add(nn.SpatialConvolution(6, 16, 5, 5))
37+
net:add(nn.ReLU()) -- non-linearity
38+
net:add(nn.SpatialMaxPooling(2,2,2,2))
39+
net:add(nn.View(16*5*5)) -- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5
40+
net:add(nn.Linear(16*5*5, 120)) -- fully connected layer (matrix multiplication between input and weights)
41+
net:add(nn.ReLU()) -- non-linearity
42+
net:add(nn.Linear(120, 84))
43+
net:add(nn.ReLU()) -- non-linearity
44+
net:add(nn.Linear(84, 10)) -- 10 is the number of outputs of the network (in this case, 10 digits)
45+
net:add(nn.LogSoftMax()) -- converts the output to a log-probability. Useful for classification problems
46+
criterion = nn.ClassNLLCriterion()
47+
trainer = nn.StochasticGradient(net, criterion)
48+
trainer.learningRate = 0.001
49+
trainer.maxIteration = 5
50+
trainer:train(trainset)
51+
testset.data = testset.data:double() -- convert from Byte tensor to Double tensor
52+
for i=1,3 do -- over each image channel
53+
testset.data[{ {}, {i}, {}, {} }]:add(-mean[i]) -- mean subtraction
54+
testset.data[{ {}, {i}, {}, {} }]:div(stdv[i]) -- std scaling
55+
end
56+
predicted = net:forward(testset.data[100])
57+
print(classes[testset.label[100]])
58+
print(predicted:exp())
59+
for i=1,predicted:size(1) do
60+
print(classes[i], predicted[i])
61+
end
62+
correct = 0
63+
for i=1,10000 do
64+
local groundtruth = testset.label[i]
65+
local prediction = net:forward(testset.data[i])
66+
local confidences, indices = torch.sort(prediction, true) -- true means sort in descending order
67+
if groundtruth == indices[1] then
68+
correct = correct + 1
69+
end
70+
end
71+
72+
print(correct, 100*correct/10000 .. ' % ')
73+
class_performance = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
74+
for i=1,10000 do
75+
local groundtruth = testset.label[i]
76+
local prediction = net:forward(testset.data[i])
77+
local confidences, indices = torch.sort(prediction, true) -- true means sort in descending order
78+
if groundtruth == indices[1] then
79+
class_performance[groundtruth] = class_performance[groundtruth] + 1
80+
end
81+
end
82+
83+
for i=1,#classes do
84+
print(classes[i], 100*class_performance[i]/1000 .. ' %')
85+
end

seq2seq/hello_sequence.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# coding:utf-8
2+
3+
from __future__ import print_function
4+
import numpy as np
5+
import tensorflow as tf
6+
import sys
7+
8+
vocab_size=256
9+
learning_rate=0.1
10+
# 暂时只试验一个bucket
11+
buckets=[(10, 10)]
12+
bucket_id=0
13+
# 填充0
14+
PAD=[0]
15+
sample_size=20
16+
# LSTM中的记忆单元数目
17+
num_units=100
18+
# 多少层的lstm
19+
num_layers=2
20+
21+
# sample_size个样本,每个样本有一个question、answer、weights,question、answer分别是10维的向量
22+
# 这sample_size个样本有时间序上的依赖关系
23+
question_sample_list = [map(ord, "hello?") + PAD * 4] * sample_size
24+
answer_sample_list = [map(ord, "world!") + PAD * 4] * sample_size
25+
init_weights_list = [[1.0]*7 + [0.0]*3] *sample_size # mask padding. todo: redundant --
26+
27+
with tf.Session() as session:
28+
29+
# 初始化神经网络单元
30+
cell = single_cell = tf.nn.rnn_cell.LSTMCell(num_units)
31+
if num_layers > 1:
32+
cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
33+
34+
# 定义函数
35+
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
36+
return tf.nn.seq2seq.embedding_rnn_seq2seq(
37+
encoder_inputs, decoder_inputs, cell,
38+
num_encoder_symbols=vocab_size,
39+
num_decoder_symbols=vocab_size,
40+
embedding_size=num_units,
41+
feed_previous=do_decode)
42+
43+
# 初始化训练用的变量,如果是多个层,权重共享
44+
encoder_inputs = []
45+
decoder_inputs = []
46+
weights = []
47+
for i in xrange(sample_size):
48+
encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
49+
for i in xrange(sample_size):
50+
decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
51+
weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))
52+
targets = [decoder_inputs[i] for i in xrange(len(decoder_inputs))]
53+
54+
# 创建模型及损失计算方法
55+
buckets_outputs, losses = tf.nn.seq2seq.model_with_buckets(
56+
encoder_inputs, decoder_inputs, targets,
57+
weights, buckets,
58+
lambda x, y: seq2seq_f(x, y, False))
59+
60+
61+
# 梯度更新算法
62+
updates=[]
63+
for b in xrange(len(buckets)):
64+
updates.append(tf.train.AdamOptimizer(learning_rate).minimize(losses[b]))
65+
66+
# 用于保存模型
67+
saver = tf.train.Saver(tf.all_variables())
68+
69+
# 初始化
70+
session.run(tf.initialize_all_variables())
71+
72+
while True:
73+
encoder_size = len(encoder_inputs)
74+
decoder_size = len(decoder_inputs)
75+
76+
# 初始化feed_dict数据
77+
feed_dict = {}
78+
for i in xrange(encoder_size):
79+
feed_dict[encoder_inputs[i].name] = question_sample_list[i]
80+
for i in xrange(decoder_size):
81+
feed_dict[decoder_inputs[i].name] = answer_sample_list[i]
82+
feed_dict[weights[i].name] = init_weights_list[i]
83+
84+
# 初始化fetches模型相关信息,fetches就是想拿什么就拿什么,比如updates就是拿更新值,losses就是拿损失值,buckets_outputs就是拿输出值
85+
fetches = [updates[bucket_id], losses[bucket_id]]
86+
fetches.append(buckets_outputs[bucket_id][0])
87+
# 这一句是为了拿输出,训练过程可以不要
88+
for i in xrange(len(buckets_outputs[bucket_id])):
89+
fetches.append(buckets_outputs[bucket_id][i])
90+
91+
# 参数传递进去的是数据和计算逻辑,具体执行时可以传到各种介质中执行
92+
fetches_outputs = session.run(fetches, feed_dict)
93+
perplexity = fetches_outputs[1]
94+
outputs = fetches_outputs[2:]
95+
print ("perplexity =", perplexity)
96+
words = np.argmax(outputs, axis=2)
97+
word = "".join(map(chr, words[0])).replace('\x00', '').replace('\n', '')
98+
print("output: %s" % word)

0 commit comments

Comments
 (0)