Ocode
diff --git a/‎code/convolutional_mlp.py‎
Lines changed: 18 additions & 13 deletions b/‎code/convolutional_mlp.py‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎doc/images/3wolfmoon_output.png‎
36.3 KB b/‎doc/images/3wolfmoon_output.png‎
36.3 KB
diff --git a/‎doc/images/mylenet.png‎
-4.99 KB b/‎doc/images/mylenet.png‎
-4.99 KB
diff --git a/‎doc/lenet.txt‎
Lines changed: 17 additions & 9 deletions b/‎doc/lenet.txt‎
Lines changed: 17 additions & 9 deletions
@@ -72,7 +72,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
         # add the bias term. Since the bias is a vector (1D array), we first
         # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
         # be broadcasted across mini-batches and feature map width & height
-        self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
+        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
 
         # store parameters of this layer
         self.params = [self.W, self.b]
@@ -220,8 +220,7 @@ def load_dataset(fname):
     return train_batches, valid_batches, test_batches
 
 
-def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
-    print 'learning_rate = ', learning_rate
+def evaluate_lenet5(learning_rate=0.01, n_iter=200, dataset='mnist.pkl.gz'):
     rng = numpy.random.RandomState(23455)
 
     train_batches, valid_batches, test_batches = load_dataset(dataset)
@@ -245,18 +244,18 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
     # Construct the first convolutional pooling layer:
     # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
     # maxpooling reduces this further to (24/2,24/2) = (12,12)
-    # 4D output tensor is thus of shape (20,6,12,12)
+    # 4D output tensor is thus of shape (20,20,12,12)
     layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
             image_shape=(batch_size,1,28,28), 
-            filter_shape=(6,1,5,5), poolsize=(2,2))
+            filter_shape=(20,1,5,5), poolsize=(2,2))
 
     # Construct the second convolutional pooling layer
     # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
     # maxpooling reduces this further to (8/2,8/2) = (4,4)
-    # 4D output tensor is thus of shape (20,32,4,4)
+    # 4D output tensor is thus of shape (20,50,4,4)
     layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
-            image_shape=(batch_size,6,12,12), 
-            filter_shape=(32,6,5,5), poolsize=(2,2))
+            image_shape=(batch_size,20,12,12), 
+            filter_shape=(50,20,5,5), poolsize=(2,2))
 
     # the SigmoidalLayer being fully-connected, it operates on 2D matrices of
     # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
@@ -265,7 +264,7 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
 
     # construct a fully-connected sigmoidal layer
     layer2 = SigmoidalLayer(rng, input=layer2_input, 
-                            n_in=32*4*4, n_out=500)
+                            n_in=50*4*4, n_out=500)
 
     # classify the values of the fully-connected sigmoidal layer
     layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
@@ -278,11 +277,18 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
 
     # create a list of all model parameters to be fit by gradient descent
     params = layer3.params+ layer2.params+ layer1.params + layer0.params
-    learning_rate = numpy.asarray(learning_rate, dtype=theano.config.floatX)
+    
+    # create a list of gradients for all model parameters
+    grads = T.grad(cost, params)
 
     # train_model is a function that updates the model parameters by SGD
-    train_model = theano.function([x, y], cost, 
-            updates=[(p, p - learning_rate*gp) for p,gp in zip(params, T.grad(cost, params))])
+    # Since this model has many parameters, it would be tedious to manually
+    # create an update rule for each model parameter. We thus create the updates
+    # dictionary by automatically looping over all (params[i],grads[i])  pairs.
+    updates = {}
+    for param_i, grad_i in zip(params, grads):
+        updates[param_i] = param_i - learning_rate * grad_i
+    train_model = theano.function([x, y], cost, updates=updates)
 
 
     ###############
@@ -310,7 +316,6 @@ def evaluate_lenet5(learning_rate=0.0001, n_iter=1000, dataset='mnist.pkl.gz'):
 
     # have a maximum of `n_iter` iterations through the entire dataset
     for iter in xrange(n_iter * n_minibatches):
-    #for iter in xrange(2 * n_minibatches):
 
         # get epoch and minibatch index
         epoch           = iter / n_minibatches
 
@@ -173,10 +173,14 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
                         size=w_shp),
                     dtype=input.dtype))
 
-        # initialize shared variable for bias (1D tensor)
+        # initialize shared variable for bias (1D tensor) with random values
+        # IMPORTANT: biases are usually initialized to zero. However in this
+        # particular application, we simply apply the convolutional layer to
+        # an image without learning the parameters. We therefore initialize
+        # them to random values to "simulate" learning.
         b_shp = (2,)
         b = theano.shared( numpy.asarray(
-                    rng.uniform(low=-.0, high=0., Size=(2,)),
+                    rng.uniform(low=-.5, high=.5, size=b_shp),
                     dtype=input.dtype))
 
         # build symbolic expression that computes the convolution of input with filters in w
@@ -186,7 +190,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
         output = T.nnet.sigmoid(conv_out + b.dimshuffle('x', 0, 'x', 'x'))
 
         # create theano function to compute filtered images
-        f = theano.function([input], [output])
+        f = theano.function([input], output)
 
 
 Let's have a little bit of fun with this...
@@ -202,7 +206,7 @@ Let's have a little bit of fun with this...
 
         # put image in 4D tensor of shape (1,3,height,width)
         img_ = img.swapaxes(0,2).swapaxes(1,2).reshape(1,3,639,516)
-        filtered_img = f(img_)[0]
+        filtered_img = f(img_)
 
         # plot original image and first and second components of output
         pylab.subplot(1,3,1); pylab.axis('off'); pylab.imshow(img)
@@ -387,7 +391,7 @@ layer.
             # add the bias term. Since the bias is a vector (1D array), we first
             # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
             # be broadcasted across mini-batches and feature map width & height
-            self.output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
+            self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
 
             # store parameters of this layer
             self.params = [self.W, self.b]
@@ -495,11 +499,15 @@ instantiate the network as follows.
 
     # create a list of all model parameters to be fit by gradient descent
     params = layer3.params+ layer2.params+ layer1.params + layer0.params
-    learning_rate = numpy.asarray(learning_rate, dtype=theano.config.floatX)
 
-    # train_model is a function that updates the model parameters by SGD
-    train_model = theano.function([x, y], cost,
-            updates=[(p, p - learning_rate*gp) for p,gp in zip(params, T.grad(cost, params))])
+    # train_model is a function that updates the model parameters by SGD                                             
+    # Since this model has many parameters, it would be tedious to manually                                          
+    # create an update rule for each model parameter. We thus create the updates                                     
+    # dictionary by automatically looping over all (params[i],grads[i])  pairs.                                      
+    updates = {}
+    for param_i, grad_i in zip(params, grads):                                                                       
+        updates[param_i] = param_i - learning_rate * grad_i                                                          
+    train_model = theano.function([x, y], cost, updates=updates) 
 
 
 We leave out the code, which performs the actual training and early-stopping,