@@ -72,7 +72,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
7272 # add the bias term. Since the bias is a vector (1D array), we first
7373 # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
7474 # be broadcasted across mini-batches and feature map width & height
75- self .output = pooled_out + self .b .dimshuffle ('x' , 0 , 'x' , 'x' )
75+ self .output = T . tanh ( pooled_out + self .b .dimshuffle ('x' , 0 , 'x' , 'x' ) )
7676
7777 # store parameters of this layer
7878 self .params = [self .W , self .b ]
@@ -220,8 +220,7 @@ def load_dataset(fname):
220220 return train_batches , valid_batches , test_batches
221221
222222
223- def evaluate_lenet5 (learning_rate = 0.0001 , n_iter = 1000 , dataset = 'mnist.pkl.gz' ):
224- print 'learning_rate = ' , learning_rate
223+ def evaluate_lenet5 (learning_rate = 0.01 , n_iter = 200 , dataset = 'mnist.pkl.gz' ):
225224 rng = numpy .random .RandomState (23455 )
226225
227226 train_batches , valid_batches , test_batches = load_dataset (dataset )
@@ -245,18 +244,18 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
245244 # Construct the first convolutional pooling layer:
246245 # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
247246 # maxpooling reduces this further to (24/2,24/2) = (12,12)
248- # 4D output tensor is thus of shape (20,6 ,12,12)
247+ # 4D output tensor is thus of shape (20,20 ,12,12)
249248 layer0 = LeNetConvPoolLayer (rng , input = layer0_input ,
250249 image_shape = (batch_size ,1 ,28 ,28 ),
251- filter_shape = (6 ,1 ,5 ,5 ), poolsize = (2 ,2 ))
250+ filter_shape = (20 ,1 ,5 ,5 ), poolsize = (2 ,2 ))
252251
253252 # Construct the second convolutional pooling layer
254253 # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
255254 # maxpooling reduces this further to (8/2,8/2) = (4,4)
256- # 4D output tensor is thus of shape (20,32 ,4,4)
255+ # 4D output tensor is thus of shape (20,50 ,4,4)
257256 layer1 = LeNetConvPoolLayer (rng , input = layer0 .output ,
258- image_shape = (batch_size ,6 ,12 ,12 ),
259- filter_shape = (32 , 6 ,5 ,5 ), poolsize = (2 ,2 ))
257+ image_shape = (batch_size ,20 ,12 ,12 ),
258+ filter_shape = (50 , 20 ,5 ,5 ), poolsize = (2 ,2 ))
260259
261260 # the SigmoidalLayer being fully-connected, it operates on 2D matrices of
262261 # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
@@ -265,7 +264,7 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
265264
266265 # construct a fully-connected sigmoidal layer
267266 layer2 = SigmoidalLayer (rng , input = layer2_input ,
268- n_in = 32 * 4 * 4 , n_out = 500 )
267+ n_in = 50 * 4 * 4 , n_out = 500 )
269268
270269 # classify the values of the fully-connected sigmoidal layer
271270 layer3 = LogisticRegression (input = layer2 .output , n_in = 500 , n_out = 10 )
@@ -278,11 +277,18 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
278277
279278 # create a list of all model parameters to be fit by gradient descent
280279 params = layer3 .params + layer2 .params + layer1 .params + layer0 .params
281- learning_rate = numpy .asarray (learning_rate , dtype = theano .config .floatX )
280+
281+ # create a list of gradients for all model parameters
282+ grads = T .grad (cost , params )
282283
283284 # train_model is a function that updates the model parameters by SGD
284- train_model = theano .function ([x , y ], cost ,
285- updates = [(p , p - learning_rate * gp ) for p ,gp in zip (params , T .grad (cost , params ))])
285+ # Since this model has many parameters, it would be tedious to manually
286+ # create an update rule for each model parameter. We thus create the updates
287+ # dictionary by automatically looping over all (params[i],grads[i]) pairs.
288+ updates = {}
289+ for param_i , grad_i in zip (params , grads ):
290+ updates [param_i ] = param_i - learning_rate * grad_i
291+ train_model = theano .function ([x , y ], cost , updates = updates )
286292
287293
288294 ###############
@@ -310,7 +316,6 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
310316
311317 # have a maximum of `n_iter` iterations through the entire dataset
312318 for iter in xrange (n_iter * n_minibatches ):
313- #for iter in xrange(2 * n_minibatches):
314319
315320 # get epoch and minibatch index
316321 epoch = iter / n_minibatches
0 commit comments