3737__docformat__ = 'restructedtext en'
3838
3939
40- import numpy , time , cPickle , gzip , sys , os
40+ import cPickle
41+ import gzip
42+ import os
43+ import sys
44+ import time
45+
46+ import numpy
4147
4248import theano
4349import theano .tensor as T
@@ -52,9 +58,6 @@ class LogisticRegression(object):
5258 determine a class membership probability.
5359 """
5460
55-
56-
57-
5861 def __init__ (self , input , n_in , n_out ):
5962 """ Initialize the parameters of the logistic regression
6063
@@ -75,24 +78,20 @@ def __init__(self, input, n_in, n_out):
7578 # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
7679 # while b is a vector of n_out elements, making theta a vector of
7780 # n_in*n_out + n_out elements
78- self .theta = theano .shared (value = numpy .zeros (n_in * n_out + n_out , dtype = theano .config .floatX ),
81+ self .theta = theano .shared (value = numpy .zeros (n_in * n_out + n_out ,
82+ dtype = theano .config .floatX ),
7983 name = 'theta' )
8084 # W is represented by the fisr n_in*n_out elements of theta
81- self .W = self .theta [0 :n_in * n_out ].reshape ((n_in ,n_out ))
85+ self .W = self .theta [0 :n_in * n_out ].reshape ((n_in , n_out ))
8286 # b is the rest (last n_out elements)
83- self .b = self .theta [n_in * n_out :n_in * n_out + n_out ]
84-
87+ self .b = self .theta [n_in * n_out :n_in * n_out + n_out ]
8588
8689 # compute vector of class-membership probabilities in symbolic form
87- self .p_y_given_x = T .nnet .softmax (T .dot (input , self .W )+ self .b )
90+ self .p_y_given_x = T .nnet .softmax (T .dot (input , self .W ) + self .b )
8891
8992 # compute prediction as class whose probability is maximal in
9093 # symbolic form
91- self .y_pred = T .argmax (self .p_y_given_x , axis = 1 )
92-
93-
94-
95-
94+ self .y_pred = T .argmax (self .p_y_given_x , axis = 1 )
9695
9796 def negative_log_likelihood (self , y ):
9897 """Return the negative log-likelihood of the prediction of this model
@@ -108,11 +107,7 @@ def negative_log_likelihood(self, y):
108107 :param y: corresponds to a vector that gives for each example the
109108 correct label
110109 """
111- return - T .mean (T .log (self .p_y_given_x )[T .arange (y .shape [0 ]),y ])
112-
113-
114-
115-
110+ return - T .mean (T .log (self .p_y_given_x )[T .arange (y .shape [0 ]), y ])
116111
117112 def errors (self , y ):
118113 """Return a float representing the number of errors in the minibatch
@@ -136,12 +131,7 @@ def errors(self, y):
136131 raise NotImplementedError ()
137132
138133
139-
140-
141-
142-
143-
144- def cg_optimization_mnist ( n_epochs = 50 , mnist_pkl_gz = '../data/mnist.pkl.gz' ):
134+ def cg_optimization_mnist (n_epochs = 50 , mnist_pkl_gz = '../data/mnist.pkl.gz' ):
145135 """Demonstrate conjugate gradient optimization of a log-linear model
146136
147137 This is demonstrated on MNIST.
@@ -151,7 +141,7 @@ def cg_optimization_mnist( n_epochs=50, mnist_pkl_gz='../data/mnist.pkl.gz' ):
151141
152142 :type mnist_pkl_gz: string
153143 :param mnist_pkl_gz: the path of the mnist training file from
154- http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
144+ http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
155145
156146 """
157147 #############
@@ -160,7 +150,7 @@ def cg_optimization_mnist( n_epochs=50, mnist_pkl_gz='../data/mnist.pkl.gz' ):
160150 print '... loading data'
161151
162152 # Load the dataset
163- f = gzip .open (mnist_pkl_gz ,'rb' )
153+ f = gzip .open (mnist_pkl_gz , 'rb' )
164154 train_set , valid_set , test_set = cPickle .load (f )
165155 f .close ()
166156
@@ -174,8 +164,10 @@ def shared_dataset(data_xy):
174164 variable) would lead to a large decrease in performance.
175165 """
176166 data_x , data_y = data_xy
177- shared_x = theano .shared (numpy .asarray (data_x , dtype = theano .config .floatX ))
178- shared_y = theano .shared (numpy .asarray (data_y , dtype = theano .config .floatX ))
167+ shared_x = theano .shared (numpy .asarray (data_x ,
168+ dtype = theano .config .floatX ))
169+ shared_y = theano .shared (numpy .asarray (data_y ,
170+ dtype = theano .config .floatX ))
179171 # When storing data on the GPU it has to be stored as floats
180172 # therefore we will store the labels as ``floatX`` as well
181173 # (``shared_y`` does exactly that). But during our computations
@@ -185,37 +177,33 @@ def shared_dataset(data_xy):
185177 # lets ous get around this issue
186178 return shared_x , T .cast (shared_y , 'int32' )
187179
188-
189- test_set_x , test_set_y = shared_dataset (test_set )
180+ test_set_x , test_set_y = shared_dataset (test_set )
190181 valid_set_x , valid_set_y = shared_dataset (valid_set )
191182 train_set_x , train_set_y = shared_dataset (train_set )
192183
193184 batch_size = 600 # size of the minibatch
194185
195186 n_train_batches = train_set_x .get_value (borrow = True ).shape [0 ] / batch_size
196187 n_valid_batches = valid_set_x .get_value (borrow = True ).shape [0 ] / batch_size
197- n_test_batches = test_set_x .get_value (borrow = True ).shape [0 ] / batch_size
198-
199-
200- ishape = (28 ,28 ) # this is the size of MNIST images
201- n_in = 28 * 28 # number of input units
202- n_out = 10 # number of output units
188+ n_test_batches = test_set_x .get_value (borrow = True ).shape [0 ] / batch_size
203189
190+ ishape = (28 , 28 ) # this is the size of MNIST images
191+ n_in = 28 * 28 # number of input units
192+ n_out = 10 # number of output units
204193
205194 ######################
206195 # BUILD ACTUAL MODEL #
207196 ######################
208197 print '... building the model'
209198
210199 # allocate symbolic variables for the data
211- minibatch_offset = T .lscalar () # offset to the start of a [mini]batch
200+ minibatch_offset = T .lscalar () # offset to the start of a [mini]batch
212201 x = T .matrix () # the data is presented as rasterized images
213202 y = T .ivector () # the labels are presented as 1D vector of
214203 # [int] labels
215204
216-
217205 # construct the logistic regression class
218- classifier = LogisticRegression ( input = x , n_in = 28 * 28 , n_out = 10 )
206+ classifier = LogisticRegression (input = x , n_in = 28 * 28 , n_out = 10 )
219207
220208 # the cost we minimize during training is the negative log likelihood of
221209 # the model in symbolic format
@@ -225,65 +213,72 @@ def shared_dataset(data_xy):
225213 # the model on a minibatch
226214 test_model = theano .function ([minibatch_offset ], classifier .errors (y ),
227215 givens = {
228- x :test_set_x [minibatch_offset :minibatch_offset + batch_size ],
229- y :test_set_y [minibatch_offset :minibatch_offset + batch_size ]},
216+ x : test_set_x [minibatch_offset :minibatch_offset + batch_size ],
217+ y : test_set_y [minibatch_offset :minibatch_offset + batch_size ]},
230218 name = "test" )
231219
232- validate_model = theano .function ([minibatch_offset ],classifier .errors (y ),
220+ validate_model = theano .function ([minibatch_offset ], classifier .errors (y ),
233221 givens = {
234- x :valid_set_x [minibatch_offset :minibatch_offset + batch_size ],
235- y :valid_set_y [minibatch_offset :minibatch_offset + batch_size ]},
222+ x : valid_set_x [minibatch_offset :
223+ minibatch_offset + batch_size ],
224+ y : valid_set_y [minibatch_offset :
225+ minibatch_offset + batch_size ]},
236226 name = "validate" )
237227
238228 # compile a thenao function that returns the cost of a minibatch
239229 batch_cost = theano .function ([minibatch_offset ], cost ,
240- givens = {
241- x : train_set_x [minibatch_offset :minibatch_offset + batch_size ],
242- y : train_set_y [minibatch_offset :minibatch_offset + batch_size ]},
230+ givens = {
231+ x : train_set_x [minibatch_offset :
232+ minibatch_offset + batch_size ],
233+ y : train_set_y [minibatch_offset :
234+ minibatch_offset + batch_size ]},
243235 name = "batch_cost" )
244236
245-
246237 # compile a theano function that returns the gradient of the minibatch
247238 # with respect to theta
248- batch_grad = theano .function ([minibatch_offset ], T .grad (cost ,classifier .theta ),
249- givens = {
250- x : train_set_x [minibatch_offset :minibatch_offset + batch_size ],
251- y : train_set_y [minibatch_offset :minibatch_offset + batch_size ]},
239+ batch_grad = theano .function ([minibatch_offset ],
240+ T .grad (cost , classifier .theta ),
241+ givens = {
242+ x : train_set_x [minibatch_offset :
243+ minibatch_offset + batch_size ],
244+ y : train_set_y [minibatch_offset :
245+ minibatch_offset + batch_size ]},
252246 name = "batch_grad" )
253247
254-
255248 # creates a function that computes the average cost on the training set
256249 def train_fn (theta_value ):
257250 classifier .theta .set_value (theta_value , borrow = True )
258- train_losses = [batch_cost (i * batch_size ) for i in xrange (n_train_batches )]
251+ train_losses = [batch_cost (i * batch_size )
252+ for i in xrange (n_train_batches )]
259253 return numpy .mean (train_losses )
260254
261255 # creates a function that computes the average gradient of cost with
262256 # respect to theta
263257 def train_fn_grad (theta_value ):
264258 classifier .theta .set_value (theta_value , borrow = True )
265259 grad = batch_grad (0 )
266- for i in xrange (1 ,n_train_batches ):
267- grad += batch_grad (i * batch_size )
268- return grad / n_train_batches
269-
260+ for i in xrange (1 , n_train_batches ):
261+ grad += batch_grad (i * batch_size )
262+ return grad / n_train_batches
270263
271264 validation_scores = [numpy .inf , 0 ]
272265
273266 # creates the validation function
274267 def callback (theta_value ):
275268 classifier .theta .set_value (theta_value , borrow = True )
276269 #compute the validation loss
277- validation_losses = [validate_model (i * batch_size ) for i in xrange (n_valid_batches )]
270+ validation_losses = [validate_model (i * batch_size )
271+ for i in xrange (n_valid_batches )]
278272 this_validation_loss = numpy .mean (validation_losses )
279- print ('validation error %f %%' % (this_validation_loss * 100. ,))
273+ print ('validation error %f %%' % (this_validation_loss * 100. ,))
280274
281275 # check if it is better then best validation score got until now
282276 if this_validation_loss < validation_scores [0 ]:
283277 # if so, replace the old one, and compute the score on the
284278 # testing dataset
285279 validation_scores [0 ] = this_validation_loss
286- test_losses = [test_model (i * batch_size ) for i in xrange (n_test_batches )]
280+ test_losses = [test_model (i * batch_size )
281+ for i in xrange (n_test_batches )]
287282 validation_scores [1 ] = numpy .mean (test_losses )
288283
289284 ###############
@@ -295,18 +290,20 @@ def callback(theta_value):
295290 print ("Optimizing using scipy.optimize.fmin_cg..." )
296291 start_time = time .clock ()
297292 best_w_b = scipy .optimize .fmin_cg (
298- f = train_fn ,
299- x0 = numpy .zeros ((n_in + 1 ) * n_out , dtype = x .dtype ),
300- fprime = train_fn_grad ,
301- callback = callback ,
302- disp = 0 ,
303- maxiter = n_epochs )
293+ f = train_fn ,
294+ x0 = numpy .zeros ((n_in + 1 ) * n_out , dtype = x .dtype ),
295+ fprime = train_fn_grad ,
296+ callback = callback ,
297+ disp = 0 ,
298+ maxiter = n_epochs )
304299 end_time = time .clock ()
305300 print (('Optimization complete with best validation score of %f %%, with '
306301 'test performance %f %%' ) %
307- (validation_scores [0 ]* 100. , validation_scores [1 ]* 100. ))
302+ (validation_scores [0 ] * 100. , validation_scores [1 ] * 100. ))
308303
309- print >> sys .stderr , ('The code for file ' + os .path .split (__file__ )[1 ]+ ' ran for %.1fs' % ((end_time - start_time )))
304+ print >> sys .stderr , ('The code for file ' +
305+ os .path .split (__file__ )[1 ] +
306+ ' ran for %.1fs' % ((end_time - start_time )))
310307
311308
312309if __name__ == '__main__' :
0 commit comments