Skip to content

Commit 865ed2c

Browse files
committed
made changes suggested by Yoshua
fixed bug in convolutional layer (missing tanh !)
1 parent e6ade48 commit 865ed2c

4 files changed

Lines changed: 35 additions & 22 deletions

File tree

code/convolutional_mlp.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
7272
# add the bias term. Since the bias is a vector (1D array), we first
7373
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
7474
# be broadcasted across mini-batches and feature map width & height
75-
self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
75+
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
7676

7777
# store parameters of this layer
7878
self.params = [self.W, self.b]
@@ -220,8 +220,7 @@ def load_dataset(fname):
220220
return train_batches, valid_batches, test_batches
221221

222222

223-
def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
224-
print 'learning_rate = ', learning_rate
223+
def evaluate_lenet5(learning_rate=0.01, n_iter=200, dataset='mnist.pkl.gz'):
225224
rng = numpy.random.RandomState(23455)
226225

227226
train_batches, valid_batches, test_batches = load_dataset(dataset)
@@ -245,18 +244,18 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
245244
# Construct the first convolutional pooling layer:
246245
# filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
247246
# maxpooling reduces this further to (24/2,24/2) = (12,12)
248-
# 4D output tensor is thus of shape (20,6,12,12)
247+
# 4D output tensor is thus of shape (20,20,12,12)
249248
layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
250249
image_shape=(batch_size,1,28,28),
251-
filter_shape=(6,1,5,5), poolsize=(2,2))
250+
filter_shape=(20,1,5,5), poolsize=(2,2))
252251

253252
# Construct the second convolutional pooling layer
254253
# filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
255254
# maxpooling reduces this further to (8/2,8/2) = (4,4)
256-
# 4D output tensor is thus of shape (20,32,4,4)
255+
# 4D output tensor is thus of shape (20,50,4,4)
257256
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
258-
image_shape=(batch_size,6,12,12),
259-
filter_shape=(32,6,5,5), poolsize=(2,2))
257+
image_shape=(batch_size,20,12,12),
258+
filter_shape=(50,20,5,5), poolsize=(2,2))
260259

261260
# the SigmoidalLayer being fully-connected, it operates on 2D matrices of
262261
# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
@@ -265,7 +264,7 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
265264

266265
# construct a fully-connected sigmoidal layer
267266
layer2 = SigmoidalLayer(rng, input=layer2_input,
268-
n_in=32*4*4, n_out=500)
267+
n_in=50*4*4, n_out=500)
269268

270269
# classify the values of the fully-connected sigmoidal layer
271270
layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
@@ -278,11 +277,18 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
278277

279278
# create a list of all model parameters to be fit by gradient descent
280279
params = layer3.params+ layer2.params+ layer1.params + layer0.params
281-
learning_rate = numpy.asarray(learning_rate, dtype=theano.config.floatX)
280+
281+
# create a list of gradients for all model parameters
282+
grads = T.grad(cost, params)
282283

283284
# train_model is a function that updates the model parameters by SGD
284-
train_model = theano.function([x, y], cost,
285-
updates=[(p, p - learning_rate*gp) for p,gp in zip(params, T.grad(cost, params))])
285+
# Since this model has many parameters, it would be tedious to manually
286+
# create an update rule for each model parameter. We thus create the updates
287+
# dictionary by automatically looping over all (params[i],grads[i]) pairs.
288+
updates = {}
289+
for param_i, grad_i in zip(params, grads):
290+
updates[param_i] = param_i - learning_rate * grad_i
291+
train_model = theano.function([x, y], cost, updates=updates)
286292

287293

288294
###############
@@ -310,7 +316,6 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
310316

311317
# have a maximum of `n_iter` iterations through the entire dataset
312318
for iter in xrange(n_iter * n_minibatches):
313-
#for iter in xrange(2 * n_minibatches):
314319

315320
# get epoch and minibatch index
316321
epoch = iter / n_minibatches

doc/images/3wolfmoon_output.png

36.3 KB
Loading

doc/images/mylenet.png

-4.99 KB
Loading

doc/lenet.txt

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,14 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
173173
size=w_shp),
174174
dtype=input.dtype))
175175

176-
# initialize shared variable for bias (1D tensor)
176+
# initialize shared variable for bias (1D tensor) with random values
177+
# IMPORTANT: biases are usually initialized to zero. However in this
178+
# particular application, we simply apply the convolutional layer to
179+
# an image without learning the parameters. We therefore initialize
180+
# them to random values to "simulate" learning.
177181
b_shp = (2,)
178182
b = theano.shared( numpy.asarray(
179-
rng.uniform(low=-.0, high=0., Size=(2,)),
183+
rng.uniform(low=-.5, high=.5, size=b_shp),
180184
dtype=input.dtype))
181185

182186
# build symbolic expression that computes the convolution of input with filters in w
@@ -186,7 +190,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
186190
output = T.nnet.sigmoid(conv_out + b.dimshuffle('x', 0, 'x', 'x'))
187191

188192
# create theano function to compute filtered images
189-
f = theano.function([input], [output])
193+
f = theano.function([input], output)
190194

191195

192196
Let's have a little bit of fun with this...
@@ -202,7 +206,7 @@ Let's have a little bit of fun with this...
202206

203207
# put image in 4D tensor of shape (1,3,height,width)
204208
img_ = img.swapaxes(0,2).swapaxes(1,2).reshape(1,3,639,516)
205-
filtered_img = f(img_)[0]
209+
filtered_img = f(img_)
206210

207211
# plot original image and first and second components of output
208212
pylab.subplot(1,3,1); pylab.axis('off'); pylab.imshow(img)
@@ -387,7 +391,7 @@ layer.
387391
# add the bias term. Since the bias is a vector (1D array), we first
388392
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
389393
# be broadcasted across mini-batches and feature map width & height
390-
self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
394+
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
391395

392396
# store parameters of this layer
393397
self.params = [self.W, self.b]
@@ -495,11 +499,15 @@ instantiate the network as follows.
495499

496500
# create a list of all model parameters to be fit by gradient descent
497501
params = layer3.params+ layer2.params+ layer1.params + layer0.params
498-
learning_rate = numpy.asarray(learning_rate, dtype=theano.config.floatX)
499502

500-
# train_model is a function that updates the model parameters by SGD
501-
train_model = theano.function([x, y], cost,
502-
updates=[(p, p - learning_rate*gp) for p,gp in zip(params, T.grad(cost, params))])
503+
# train_model is a function that updates the model parameters by SGD
504+
# Since this model has many parameters, it would be tedious to manually
505+
# create an update rule for each model parameter. We thus create the updates
506+
# dictionary by automatically looping over all (params[i],grads[i]) pairs.
507+
updates = {}
508+
for param_i, grad_i in zip(params, grads):
509+
updates[param_i] = param_i - learning_rate * grad_i
510+
train_model = theano.function([x, y], cost, updates=updates)
503511

504512

505513
We leave out the code, which performs the actual training and early-stopping,

0 commit comments

Comments
 (0)