updated formula to new weight initialization formula

gdesjardins · gdesjardins · commit c28306507bc2 · 2010-02-03T09:24:32.000-05:00
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
@@ -49,14 +49,10 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
         """
         assert image_shape[1]==filter_shape[1]
         self.input = input
-   
-        # initialize weight values: the fan-in of each hidden neuron is
-        # restricted by the size of the receptive fields.
-        fan_in =  numpy.prod(filter_shape[1:])
-        W_values = numpy.asarray( rng.uniform( \
-              low = -numpy.sqrt(3./fan_in), \
-              high = numpy.sqrt(3./fan_in), \
-              size = filter_shape), dtype = theano.config.floatX)
+  
+        # initialize weights to temporary values until we know the shape of the output feature
+        # maps
+        W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
         self.W = theano.shared(value = W_values)
 
         # the bias is a 1D tensor -- one bias per output feature map
@@ -67,6 +63,18 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
         conv_out = conv.conv2d(input, self.W, 
                 filter_shape=filter_shape, image_shape=image_shape)
 
+        # there are "num input feature maps * filter height * filter width" inputs
+        # to each hidden unit
+        fan_in = numpy.prod(filter_shape[1:])
+        # each unit in the lower layer receives a gradient from:
+        # "num output feature maps * filter height * filter width" / pooling size
+        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
+        # replace weight values with random weights
+        W_bound = numpy.sqrt(6./(fan_in + fan_out))
+        self.W.value = numpy.asarray( 
+                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
+                dtype = theano.config.floatX)
+  
         # downsample each feature map individually, using maxpooling
         pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)