Skip to content

Commit ffa5bf9

Browse files
committed
Changing to the new way of storing the dataset (using shared variables)
1 parent a3010db commit ffa5bf9

4 files changed

Lines changed: 212 additions & 271 deletions

File tree

code/convolutional_mlp.py

Lines changed: 22 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -177,61 +177,40 @@ def errors(self, y):
177177
raise NotImplementedError()
178178

179179

180-
def load_dataset(fname):
180+
def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz'):
181+
rng = numpy.random.RandomState(23455)
181182

182183
# Load the dataset
183-
f = gzip.open(fname,'rb')
184+
f = gzip.open(dataset,'rb')
184185
train_set, valid_set, test_set = cPickle.load(f)
185186
f.close()
186187

187-
# make minibatches of size 20
188-
batch_size = 20 # sized of the minibatch
189-
190-
# Dealing with the training set
191-
# get the list of training images (x) and their labels (y)
192-
(train_set_x, train_set_y) = train_set
193-
# initialize the list of training minibatches with empty list
194-
train_batches = []
195-
for i in xrange(0, len(train_set_x), batch_size):
196-
# add to the list of minibatches the minibatch starting at
197-
# position i, ending at position i+batch_size
198-
# a minibatch is a pair ; the first element of the pair is a list
199-
# of datapoints, the second element is the list of corresponding
200-
# labels
201-
train_batches = train_batches + \
202-
[(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
203-
204-
# Dealing with the validation set
205-
(valid_set_x, valid_set_y) = valid_set
206-
# initialize the list of validation minibatches
207-
valid_batches = []
208-
for i in xrange(0, len(valid_set_x), batch_size):
209-
valid_batches = valid_batches + \
210-
[(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
211-
212-
# Dealing with the testing set
213-
(test_set_x, test_set_y) = test_set
214-
# initialize the list of testing minibatches
215-
test_batches = []
216-
for i in xrange(0, len(test_set_x), batch_size):
217-
test_batches = test_batches + \
218-
[(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
219-
220-
return train_batches, valid_batches, test_batches
221188

189+
def shared_dataset(data_xy):
190+
data_x, data_y = data_xy
191+
shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
192+
shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
193+
return shared_x, T.cast(shared_y, 'int32')
222194

223-
def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz'):
224-
rng = numpy.random.RandomState(23455)
195+
test_set_x, test_set_y = shared_dataset(test_set)
196+
valid_set_x, valid_set_y = shared_dataset(valid_set)
197+
train_set_x, train_set_y = shared_dataset(train_set)
225198

226-
train_batches, valid_batches, test_batches = load_dataset(dataset)
199+
batch_size = 500 # sized of the minibatch
227200

228-
ishape = (28,28) # this is the size of MNIST images
229-
batch_size = 20 # sized of the minibatch
201+
# compute number of minibatches for training, validation and testing
202+
n_train_batches = train_set_x.value.shape[0] / batch_size
203+
n_valid_batches = valid_set_x.value.shape[0] / batch_size
204+
n_test_batches = test_set_x.value.shape[0] / batch_size
230205

231206
# allocate symbolic variables for the data
232-
x = T.matrix('x') # rasterized images
233-
y = T.lvector() # the labels are presented as 1D vector of [long int] labels
207+
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
208+
x = T.matrix('x') # the data is presented as rasterized images
209+
y = T.ivector('y') # the labels are presented as 1D vector of
210+
# [int] labels
211+
234212

213+
ishape = (28,28) # this is the size of MNIST images
235214

236215
######################
237216
# BUILD ACTUAL MODEL #

code/logistic_cg.py

Lines changed: 63 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(self, input, n_in, n_out):
7575
# initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
7676
# while b is a vector of n_out elements, making theta a vector of
7777
# n_in*n_out + n_out elements
78-
self.theta = theano.shared( value = numpy.zeros(n_in*n_out+n_out) )
78+
self.theta = theano.shared( value = numpy.zeros(n_in*n_out+n_out, dtype = theano.config.floatX) )
7979
# W is represented by the fisr n_in*n_out elements of theta
8080
self.W = self.theta[0:n_in*n_out].reshape((n_in,n_out))
8181
# b is the rest (last n_out elements)
@@ -136,97 +136,105 @@ def errors(self, y):
136136

137137

138138

139-
def cg_optimization_mnist( n_iter=50 ):
139+
def cg_optimization_mnist( n_iter=50, mnist_pkl_gz='mnist.pkl.gz' ):
140140
"""Demonstrate conjugate gradient optimization of a log-linear model
141141
142142
This is demonstrated on MNIST.
143143
144144
:param n_iter: number of iterations ot run the optimizer
145145
146+
:param mnist_pkl_gz: the path of the mnist training file from
147+
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
148+
146149
"""
147150

148151
# Load the dataset
149-
f = gzip.open('mnist.pkl.gz','rb')
152+
f = gzip.open(mnist_pkl_gz,'rb')
150153
train_set, valid_set, test_set = cPickle.load(f)
151154
f.close()
152155

153156
# make minibatches of size 20
154-
batch_size = 20 # sized of the minibatch
155-
156-
# Dealing with the training set
157-
# get the list of training images (x) and their labels (y)
158-
(train_set_x, train_set_y) = train_set
159-
# initialize the list of training minibatches with empty list
160-
train_batches = []
161-
for i in xrange(0, len(train_set_x), batch_size):
162-
# add to the list of minibatches the minibatch starting at
163-
# position i, ending at position i+batch_size
164-
# a minibatch is a pair ; the first element of the pair is a list
165-
# of datapoints, the second element is the list of corresponding
166-
# labels
167-
train_batches = train_batches + \
168-
[(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
169-
170-
# Dealing with the validation set
171-
(valid_set_x, valid_set_y) = valid_set
172-
# initialize the list of validation minibatches
173-
valid_batches = []
174-
for i in xrange(0, len(valid_set_x), batch_size):
175-
valid_batches = valid_batches + \
176-
[(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
177-
178-
# Dealing with the testing set
179-
(test_set_x, test_set_y) = test_set
180-
# initialize the list of testing minibatches
181-
test_batches = []
182-
for i in xrange(0, len(test_set_x), batch_size):
183-
test_batches = test_batches + \
184-
[(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
157+
batch_size = 500 # sized of the minibatch
158+
159+
def shared_dataset(data_xy):
160+
data_x, data_y = data_xy
161+
shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
162+
shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
163+
return shared_x, T.cast(shared_y, 'int32')
164+
165+
test_set_x, test_set_y = shared_dataset(test_set)
166+
valid_set_x, valid_set_y = shared_dataset(valid_set)
167+
train_set_x, train_set_y = shared_dataset(train_set)
168+
169+
batch_size = 500 # sized of the minibatch
170+
171+
n_train_batches = train_set_x.value.shape[0] / batch_size
172+
n_valid_batches = valid_set_x.value.shape[0] / batch_size
173+
n_test_batches = test_set_x.value.shape[0] / batch_size
185174

186175

187176
ishape = (28,28) # this is the size of MNIST images
188177
n_in = 28*28 # number of input units
189178
n_out = 10 # number of output units
190179
# allocate symbolic variables for the data
191-
x = T.fmatrix() # the data is presented as rasterized images
192-
y = T.lvector() # the labels are presented as 1D vector of
193-
# [long int] labels
180+
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
181+
x = T.matrix() # the data is presented as rasterized images
182+
y = T.ivector() # the labels are presented as 1D vector of
183+
# [int] labels
194184

195185

196186
# construct the logistic regression class
197187
classifier = LogisticRegression( \
198-
input=x.reshape((batch_size,28*28)), n_in=28*28, n_out=10)
188+
input=x, n_in=28*28, n_out=10)
199189

200190
# the cost we minimize during training is the negative log likelihood of
201191
# the model in symbolic format
202192
cost = classifier.negative_log_likelihood(y).mean()
203193

204194
# compile a theano function that computes the mistakes that are made by
205195
# the model on a minibatch
206-
test_model = theano.function([x,y], classifier.errors(y))
196+
test_model = theano.function([minibatch_offset], classifier.errors(y),
197+
givens={
198+
x:test_set_x[minibatch_offset:minibatch_offset+batch_size],
199+
y:test_set_y[minibatch_offset:minibatch_offset+batch_size]})
200+
201+
validate_model = theano.function([minibatch_offset],classifier.errors(y),
202+
givens={
203+
x:valid_set_x[minibatch_offset:minibatch_offset+batch_size],
204+
y:valid_set_y[minibatch_offset:minibatch_offset+batch_size]})
205+
206+
# compile a thenao function that returns the cost of a minibatch
207+
batch_cost = theano.function(\
208+
[minibatch_offset], cost, \
209+
givens= {
210+
x : train_set_x[minibatch_offset:minibatch_offset+batch_size],
211+
y : train_set_y[minibatch_offset:minibatch_offset+batch_size]})
212+
213+
214+
207215
# compile a theano function that returns the gradient of the minibatch
208216
# with respect to theta
209-
batch_grad = theano.function([x, y], T.grad(cost, classifier.theta))
210-
# compile a thenao function that returns the cost of a minibatch
211-
batch_cost = theano.function([x, y], cost)
217+
batch_grad = theano.function(\
218+
[minibatch_offset], T.grad(cost,classifier.theta), \
219+
givens= {
220+
x : train_set_x[minibatch_offset:minibatch_offset+batch_size],
221+
y : train_set_y[minibatch_offset:minibatch_offset+batch_size]})
222+
212223

213224
# creates a function that computes the average cost on the training set
214225
def train_fn(theta_value):
215226
classifier.theta.value = theta_value
216-
cost = 0.
217-
for x,y in train_batches :
218-
cost += batch_cost(x,y)
219-
return cost / len(train_batches)
227+
train_losses = [batch_cost(i*batch_size) for i in xrange(n_train_batches)]
228+
return numpy.mean(train_losses)
220229

221230
# creates a function that computes the average gradient of cost with
222231
# respect to theta
223232
def train_fn_grad(theta_value):
224233
classifier.theta.value = theta_value
225-
grad = numpy.zeros(n_in * n_out + n_out)
226-
for x,y in train_batches:
227-
grad += batch_grad(x,y)
228-
return grad/ len(train_batches)
229-
234+
grad = batch_grad(0)
235+
for i in xrange(1,n_train_batches):
236+
grad += batch_grad(i*batch_size)
237+
return grad/n_train_batches
230238

231239

232240
validation_scores = [float('inf'), 0]
@@ -235,23 +243,17 @@ def train_fn_grad(theta_value):
235243
def callback(theta_value):
236244
classifier.theta.value = theta_value
237245
#compute the validation loss
238-
this_validation_loss = 0.
239-
for x,y in valid_batches:
240-
this_validation_loss += test_model(x,y)
241-
242-
this_validation_loss /= len(valid_batches)
243-
246+
validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
247+
this_validation_loss = numpy.mean(validation_losses)
244248
print('validation error %f %%' % (this_validation_loss*100.,))
245249

246250
# check if it is better then best validation score got until now
247251
if this_validation_loss < validation_scores[0]:
248252
# if so, replace the old one, and compute the score on the
249253
# testing dataset
250254
validation_scores[0] = this_validation_loss
251-
test_score = 0.
252-
for x,y in test_batches:
253-
test_score += test_model(x,y)
254-
validation_scores[1] = test_score / len(test_batches)
255+
test_loses = [test_model(i*batch_size) for i in xrange(n_train_batches)]
256+
validation_scores[1] = numpy.mean(test_loses)
255257

256258
# using scipy conjugate gradient optimizer
257259
import scipy.optimize
@@ -272,11 +274,6 @@ def callback(theta_value):
272274
print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
273275

274276

275-
276-
277-
278-
279-
280277
if __name__ == '__main__':
281278
cg_optimization_mnist()
282279

0 commit comments

Comments
 (0)