updates hyperparameters of lenet network

gdesjardins · gdesjardins · commit 952586355f39 · 2010-01-25T10:16:54.000-05:00
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
@@ -220,7 +220,7 @@ def load_dataset(fname):
     return train_batches, valid_batches, test_batches
 
 
-def evaluate_lenet5(learning_rate=0.01, n_iter=200, dataset='mnist.pkl.gz'):
+def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz'):
     rng = numpy.random.RandomState(23455)
 
     train_batches, valid_batches, test_batches = load_dataset(dataset)
diff --git a/doc/lenet.txt b/doc/lenet.txt
@@ -443,7 +443,7 @@ instantiate the network as follows.
 
 .. code-block:: python
 
-    learning_rate = 0.001
+    learning_rate = 0.1
     rng = numpy.random.RandomState(23455)
 
     train_batches, valid_batches, test_batches = load_dataset(dataset)
@@ -466,31 +466,32 @@ instantiate the network as follows.
     # Construct the first convolutional pooling layer:
     # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
     # maxpooling reduces this further to (24/2,24/2) = (12,12)
-    # 4D output tensor is thus of shape (20,6,12,12)
+    # 4D output tensor is thus of shape (20,20,12,12)
     layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
-            image_shape=(batch_size,1,28,28),
-            filter_shape=(6,1,5,5), poolsize=(2,2))
+            image_shape=(batch_size,1,28,28), 
+            filter_shape=(20,1,5,5), poolsize=(2,2))
 
     # Construct the second convolutional pooling layer
     # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
     # maxpooling reduces this further to (8/2,8/2) = (4,4)
-    # 4D output tensor is thus of shape (20,32,4,4)
+    # 4D output tensor is thus of shape (20,50,4,4)
     layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
-            image_shape=(batch_size,6,12,12),
-            filter_shape=(32,6,5,5), poolsize=(2,2))
+            image_shape=(batch_size,20,12,12), 
+            filter_shape=(50,20,5,5), poolsize=(2,2))
 
     # the SigmoidalLayer being fully-connected, it operates on 2D matrices of
     # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
     # This will generate a matrix of shape (20,32*4*4) = (20,512)
     layer2_input = layer1.output.flatten(2)
 
     # construct a fully-connected sigmoidal layer
-    layer2 = SigmoidalLayer(rng, input=layer2_input,
-                            n_in=32*4*4, n_out=500)
+    layer2 = SigmoidalLayer(rng, input=layer2_input, 
+                            n_in=50*4*4, n_out=500)
 
     # classify the values of the fully-connected sigmoidal layer
     layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
 
+
     # the cost we minimize during training is the NLL of the model
     cost = layer3.negative_log_likelihood(y)
 
@@ -527,20 +528,17 @@ The following output was obtained with a learning rate of 0.1:
 
 .. code-block:: bash
 
-   Best validation score of 1.080000 % obtained at iteration 27499,with test
-   performance 1.090000 %
-   The code ran for 62.096667 minutes
-
+    Optimization complete.
+    Best validation score of 0.900000 % obtained at iteration 12499,with test
+    performance 0.990000 %
+    The code ran for 85.694333 minutes
 
 Tips and Tricks
 +++++++++++++++
 
 Choosing Hyperparameters
 ------------------------
 
-Running on the GPU
-------------------
-
 
 References
 ++++++++++