@@ -75,7 +75,7 @@ def __init__(self, input, n_in, n_out):
7575 # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
7676 # while b is a vector of n_out elements, making theta a vector of
7777 # n_in*n_out + n_out elements
78- self .theta = theano .shared ( value = numpy .zeros (n_in * n_out + n_out ) )
78+ self .theta = theano .shared ( value = numpy .zeros (n_in * n_out + n_out , dtype = theano . config . floatX ) )
7979 # W is represented by the fisr n_in*n_out elements of theta
8080 self .W = self .theta [0 :n_in * n_out ].reshape ((n_in ,n_out ))
8181 # b is the rest (last n_out elements)
@@ -136,97 +136,105 @@ def errors(self, y):
136136
137137
138138
139- def cg_optimization_mnist ( n_iter = 50 ):
139+ def cg_optimization_mnist ( n_iter = 50 , mnist_pkl_gz = 'mnist.pkl.gz' ):
140140 """Demonstrate conjugate gradient optimization of a log-linear model
141141
142142 This is demonstrated on MNIST.
143143
144144 :param n_iter: number of iterations ot run the optimizer
145145
146+ :param mnist_pkl_gz: the path of the mnist training file from
147+ http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
148+
146149 """
147150
148151 # Load the dataset
149- f = gzip .open ('mnist.pkl.gz' ,'rb' )
152+ f = gzip .open (mnist_pkl_gz ,'rb' )
150153 train_set , valid_set , test_set = cPickle .load (f )
151154 f .close ()
152155
153156 # make minibatches of size 20
154- batch_size = 20 # sized of the minibatch
155-
156- # Dealing with the training set
157- # get the list of training images (x) and their labels (y)
158- (train_set_x , train_set_y ) = train_set
159- # initialize the list of training minibatches with empty list
160- train_batches = []
161- for i in xrange (0 , len (train_set_x ), batch_size ):
162- # add to the list of minibatches the minibatch starting at
163- # position i, ending at position i+batch_size
164- # a minibatch is a pair ; the first element of the pair is a list
165- # of datapoints, the second element is the list of corresponding
166- # labels
167- train_batches = train_batches + \
168- [(train_set_x [i :i + batch_size ], train_set_y [i :i + batch_size ])]
169-
170- # Dealing with the validation set
171- (valid_set_x , valid_set_y ) = valid_set
172- # initialize the list of validation minibatches
173- valid_batches = []
174- for i in xrange (0 , len (valid_set_x ), batch_size ):
175- valid_batches = valid_batches + \
176- [(valid_set_x [i :i + batch_size ], valid_set_y [i :i + batch_size ])]
177-
178- # Dealing with the testing set
179- (test_set_x , test_set_y ) = test_set
180- # initialize the list of testing minibatches
181- test_batches = []
182- for i in xrange (0 , len (test_set_x ), batch_size ):
183- test_batches = test_batches + \
184- [(test_set_x [i :i + batch_size ], test_set_y [i :i + batch_size ])]
157+ batch_size = 500 # sized of the minibatch
158+
159+ def shared_dataset (data_xy ):
160+ data_x , data_y = data_xy
161+ shared_x = theano .shared (numpy .asarray (data_x , dtype = theano .config .floatX ))
162+ shared_y = theano .shared (numpy .asarray (data_y , dtype = theano .config .floatX ))
163+ return shared_x , T .cast (shared_y , 'int32' )
164+
165+ test_set_x , test_set_y = shared_dataset (test_set )
166+ valid_set_x , valid_set_y = shared_dataset (valid_set )
167+ train_set_x , train_set_y = shared_dataset (train_set )
168+
169+ batch_size = 500 # sized of the minibatch
170+
171+ n_train_batches = train_set_x .value .shape [0 ] / batch_size
172+ n_valid_batches = valid_set_x .value .shape [0 ] / batch_size
173+ n_test_batches = test_set_x .value .shape [0 ] / batch_size
185174
186175
187176 ishape = (28 ,28 ) # this is the size of MNIST images
188177 n_in = 28 * 28 # number of input units
189178 n_out = 10 # number of output units
190179 # allocate symbolic variables for the data
191- x = T .fmatrix () # the data is presented as rasterized images
192- y = T .lvector () # the labels are presented as 1D vector of
193- # [long int] labels
180+ minibatch_offset = T .lscalar () # offset to the start of a [mini]batch
181+ x = T .matrix () # the data is presented as rasterized images
182+ y = T .ivector () # the labels are presented as 1D vector of
183+ # [int] labels
194184
195185
196186 # construct the logistic regression class
197187 classifier = LogisticRegression ( \
198- input = x . reshape (( batch_size , 28 * 28 )) , n_in = 28 * 28 , n_out = 10 )
188+ input = x , n_in = 28 * 28 , n_out = 10 )
199189
200190 # the cost we minimize during training is the negative log likelihood of
201191 # the model in symbolic format
202192 cost = classifier .negative_log_likelihood (y ).mean ()
203193
204194 # compile a theano function that computes the mistakes that are made by
205195 # the model on a minibatch
206- test_model = theano .function ([x ,y ], classifier .errors (y ))
196+ test_model = theano .function ([minibatch_offset ], classifier .errors (y ),
197+ givens = {
198+ x :test_set_x [minibatch_offset :minibatch_offset + batch_size ],
199+ y :test_set_y [minibatch_offset :minibatch_offset + batch_size ]})
200+
201+ validate_model = theano .function ([minibatch_offset ],classifier .errors (y ),
202+ givens = {
203+ x :valid_set_x [minibatch_offset :minibatch_offset + batch_size ],
204+ y :valid_set_y [minibatch_offset :minibatch_offset + batch_size ]})
205+
206+ # compile a thenao function that returns the cost of a minibatch
207+ batch_cost = theano .function (\
208+ [minibatch_offset ], cost , \
209+ givens = {
210+ x : train_set_x [minibatch_offset :minibatch_offset + batch_size ],
211+ y : train_set_y [minibatch_offset :minibatch_offset + batch_size ]})
212+
213+
214+
207215 # compile a theano function that returns the gradient of the minibatch
208216 # with respect to theta
209- batch_grad = theano .function ([x , y ], T .grad (cost , classifier .theta ))
210- # compile a thenao function that returns the cost of a minibatch
211- batch_cost = theano .function ([x , y ], cost )
217+ batch_grad = theano .function (\
218+ [minibatch_offset ], T .grad (cost ,classifier .theta ), \
219+ givens = {
220+ x : train_set_x [minibatch_offset :minibatch_offset + batch_size ],
221+ y : train_set_y [minibatch_offset :minibatch_offset + batch_size ]})
222+
212223
213224 # creates a function that computes the average cost on the training set
214225 def train_fn (theta_value ):
215226 classifier .theta .value = theta_value
216- cost = 0.
217- for x ,y in train_batches :
218- cost += batch_cost (x ,y )
219- return cost / len (train_batches )
227+ train_losses = [batch_cost (i * batch_size ) for i in xrange (n_train_batches )]
228+ return numpy .mean (train_losses )
220229
221230 # creates a function that computes the average gradient of cost with
222231 # respect to theta
223232 def train_fn_grad (theta_value ):
224233 classifier .theta .value = theta_value
225- grad = numpy .zeros (n_in * n_out + n_out )
226- for x ,y in train_batches :
227- grad += batch_grad (x ,y )
228- return grad / len (train_batches )
229-
234+ grad = batch_grad (0 )
235+ for i in xrange (1 ,n_train_batches ):
236+ grad += batch_grad (i * batch_size )
237+ return grad / n_train_batches
230238
231239
232240 validation_scores = [float ('inf' ), 0 ]
@@ -235,23 +243,17 @@ def train_fn_grad(theta_value):
235243 def callback (theta_value ):
236244 classifier .theta .value = theta_value
237245 #compute the validation loss
238- this_validation_loss = 0.
239- for x ,y in valid_batches :
240- this_validation_loss += test_model (x ,y )
241-
242- this_validation_loss /= len (valid_batches )
243-
246+ validation_losses = [validate_model (i * batch_size ) for i in xrange (n_valid_batches )]
247+ this_validation_loss = numpy .mean (validation_losses )
244248 print ('validation error %f %%' % (this_validation_loss * 100. ,))
245249
246250 # check if it is better then best validation score got until now
247251 if this_validation_loss < validation_scores [0 ]:
248252 # if so, replace the old one, and compute the score on the
249253 # testing dataset
250254 validation_scores [0 ] = this_validation_loss
251- test_score = 0.
252- for x ,y in test_batches :
253- test_score += test_model (x ,y )
254- validation_scores [1 ] = test_score / len (test_batches )
255+ test_loses = [test_model (i * batch_size ) for i in xrange (n_train_batches )]
256+ validation_scores [1 ] = numpy .mean (test_loses )
255257
256258 # using scipy conjugate gradient optimizer
257259 import scipy .optimize
@@ -272,11 +274,6 @@ def callback(theta_value):
272274 print ('The code ran for %f minutes' % ((end_time - start_time )/ 60. ))
273275
274276
275-
276-
277-
278-
279-
280277if __name__ == '__main__' :
281278 cg_optimization_mnist ()
282279
0 commit comments