Skip to content

Commit e4fce9f

Browse files
author
James Bergstra
committed
pretraining is working in deep.py
1 parent b9dc731 commit e4fce9f

1 file changed

Lines changed: 73 additions & 72 deletions

File tree

code/deep.py

Lines changed: 73 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def gibbs_1(v0_sample, W, hbias, vbias):
208208
v1_mean = T.nnet.sigmoid(T.dot(h0_sample, W.T) + vbias)
209209
# get a sample of the visible given their activation
210210
v1_act = self.theano_rng.binomial(v1_mean.shape, 1, v1_mean)
211-
return [v1_act, v1_mean]
211+
return [v1_mean, v1_act]
212212

213213

214214
# DEBUGGING TO DO ALL WITHOUT SCAN
@@ -234,7 +234,7 @@ def gibbs_1(v0_sample, W, hbias, vbias):
234234
# no past value of the second output
235235
outputs_taps = { 0 : [-1], 1 : [] }
236236

237-
v_samples, v_means = theano.scan( fn = gibbs_1,
237+
v_means, v_samples = theano.scan( fn = gibbs_1,
238238
sequences = [],
239239
initial_states = [v_sample, v_mean],
240240
non_sequences = [self.W, self.hbias, self.vbias],
@@ -245,8 +245,8 @@ def gibbs_1(v0_sample, W, hbias, vbias):
245245
def free_energy(self, v_sample):
246246
wx_b = T.dot(v_sample, self.W) + self.hbias
247247
vbias_term = T.sum(T.dot(v_sample, self.vbias))
248-
return -T.sum(T.log(1+T.exp(wx_b))) - vbias_term
249-
return T.sum(T.log(T.nnet.sigmoid(-wx_b))) - vbias_term
248+
hidden_term = T.sum(T.log(1+T.exp(wx_b)))
249+
return -hidden_term - vbias_term
250250

251251
def cd(self, visible = None, persistent = None, steps = 1):
252252
"""
@@ -328,63 +328,7 @@ def cd_updates(self, lr, visible = None, persistent = None, steps = 1):
328328

329329
# DEEP MODELS
330330

331-
class DeepLayerwiseModel(object):
332-
333-
def finetune(self, datasets, lr, batch_size):
334-
335-
# unpack the various datasets
336-
(train_set_x, train_set_y) = datasets[0]
337-
(valid_set_x, valid_set_y) = datasets[1]
338-
(test_set_x, test_set_y) = datasets[2]
339-
340-
# compute number of minibatches for training, validation and testing
341-
assert train_set_x.value.shape[0] % batch_size == 0
342-
assert valid_set_x.value.shape[0] % batch_size == 0
343-
assert test_set_x.value.shape[0] % batch_size == 0
344-
n_train_batches = train_set_x.value.shape[0] / batch_size
345-
n_valid_batches = valid_set_x.value.shape[0] / batch_size
346-
n_test_batches = test_set_x.value.shape[0] / batch_size
347-
348-
index = T.lscalar() # index to a [mini]batch
349-
target = self.y
350-
351-
train_index = index % n_train_batches
352-
353-
classifier = self.logistic_regressor
354-
cost = classifier.negative_log_likelihood(target)
355-
# compute the gradients with respect to the model parameters
356-
gparams = T.grad(cost, self.params)
357-
358-
# compute list of fine-tuning updates
359-
updates = [(param, param - gparam*finetune_lr)
360-
for param,gparam in zip(self.params, gparams)]
361-
362-
train_fn = theano.function([index], cost,
363-
updates = updates,
364-
givens = {
365-
self.x : train_set_x[train_index*batch_size:(train_index+1)*batch_size],
366-
target : train_set_y[train_index*batch_size:(train_index+1)*batch_size]})
367-
368-
test_score_i = theano.function([index], classifier.errors(target),
369-
givens = {
370-
self.x: test_set_x[index*batch_size:(index+1)*batch_size],
371-
target: test_set_y[index*batch_size:(index+1)*batch_size]})
372-
373-
valid_score_i = theano.function([index], classifier.errors(target),
374-
givens = {
375-
self.x: valid_set_x[index*batch_size:(index+1)*batch_size],
376-
target: valid_set_y[index*batch_size:(index+1)*batch_size]})
377-
378-
def test_scores():
379-
return [test_score_i(i) for i in xrange(n_test_batches)]
380-
381-
def valid_scores():
382-
return [valid_score_i(i) for i in xrange(n_valid_batches)]
383-
384-
return train_fn, valid_scores, test_scores
385-
386-
387-
class DBN(DeepLayerwiseModel):
331+
class DBN(object):
388332
"""
389333
*** WHAT IS A DBN?
390334
"""
@@ -480,22 +424,71 @@ def pretraining_functions(self, train_set_x, batch_size, learning_rate, k=1):
480424
# N.B. these cd() samples are independent from the
481425
# samples used for learning
482426
outputs = list(rbm.cd())[0:2]
483-
outputs.append(rbm.input)
484-
outputs.append(train_set_x[batch_begin:batch_end])
485-
outputs.append(batch_begin)
486-
outputs.append(batch_end)
487427
rval.append(function([index], outputs,
488428
updates = rbm.cd_updates(lr=learning_rate),
489429
givens = {self.x: train_set_x[batch_begin:batch_end]}))
490430
if rbm is self.rbm_layers[0]:
491431
f = rval[-1]
492432
AA=len(outputs)
493-
for implicit_out in f.maker.env.outputs[len(outputs):]:
494-
print 'UPDATE ???'
433+
for i, implicit_out in enumerate(f.maker.env.outputs): #[len(outputs):]:
434+
print 'OUTPUT ', i
495435
theano.printing.debugprint(implicit_out, file=sys.stdout)
496436

497437
return rval
498438

439+
def finetune(self, datasets, lr, batch_size):
440+
441+
# unpack the various datasets
442+
(train_set_x, train_set_y) = datasets[0]
443+
(valid_set_x, valid_set_y) = datasets[1]
444+
(test_set_x, test_set_y) = datasets[2]
445+
446+
# compute number of minibatches for training, validation and testing
447+
assert train_set_x.value.shape[0] % batch_size == 0
448+
assert valid_set_x.value.shape[0] % batch_size == 0
449+
assert test_set_x.value.shape[0] % batch_size == 0
450+
n_train_batches = train_set_x.value.shape[0] / batch_size
451+
n_valid_batches = valid_set_x.value.shape[0] / batch_size
452+
n_test_batches = test_set_x.value.shape[0] / batch_size
453+
454+
index = T.lscalar() # index to a [mini]batch
455+
target = self.y
456+
457+
train_index = index % n_train_batches
458+
459+
classifier = self.logistic_regressor
460+
cost = classifier.negative_log_likelihood(target)
461+
# compute the gradients with respect to the model parameters
462+
gparams = T.grad(cost, self.params)
463+
464+
# compute list of fine-tuning updates
465+
updates = [(param, param - gparam*finetune_lr)
466+
for param,gparam in zip(self.params, gparams)]
467+
468+
train_fn = theano.function([index], cost,
469+
updates = updates,
470+
givens = {
471+
self.x : train_set_x[train_index*batch_size:(train_index+1)*batch_size],
472+
target : train_set_y[train_index*batch_size:(train_index+1)*batch_size]})
473+
474+
test_score_i = theano.function([index], classifier.errors(target),
475+
givens = {
476+
self.x: test_set_x[index*batch_size:(index+1)*batch_size],
477+
target: test_set_y[index*batch_size:(index+1)*batch_size]})
478+
479+
valid_score_i = theano.function([index], classifier.errors(target),
480+
givens = {
481+
self.x: valid_set_x[index*batch_size:(index+1)*batch_size],
482+
target: valid_set_y[index*batch_size:(index+1)*batch_size]})
483+
484+
def test_scores():
485+
return [test_score_i(i) for i in xrange(n_test_batches)]
486+
487+
def valid_scores():
488+
return [valid_score_i(i) for i in xrange(n_valid_batches)]
489+
490+
return train_fn, valid_scores, test_scores
491+
499492
def load_mnist(filename):
500493
f = gzip.open(filename,'rb')
501494
train_set, valid_set, test_set = cPickle.load(f)
@@ -512,11 +505,11 @@ def shared_dataset(data_xy):
512505

513506
return n_train_examples, datasets
514507

515-
def dbn_main(finetune_lr = 0.1,
508+
def dbn_main(finetune_lr = 0.01,
516509
pretraining_epochs = 10,
517510
pretrain_lr = 0.1,
518511
training_epochs = 1000,
519-
batch_size = 20,
512+
batch_size = 2,
520513
mnist_file='mnist.pkl.gz'):
521514
"""
522515
Demonstrate stochastic gradient descent optimization for a multilayer perceptron
@@ -562,23 +555,31 @@ def dbn_main(finetune_lr = 0.1,
562555
print 'Pre-training layer %i'% layer_idx
563556
for i in xrange(pretraining_epochs * n_train_examples / batch_size):
564557
outstuff = pretrain_fn(i)
565-
xe, negsample, input_i = outstuff[:3]
558+
xe, negsample = outstuff[:2]
566559
print (layer_idx, i,
567560
pretraining_epochs * n_train_examples / batch_size,
568561
float(xe),
569562
'Wmin', deep_model.rbm_layers[0].W.value.min(),
570563
'Wmax', deep_model.rbm_layers[0].W.value.max(),
571564
'vmin', deep_model.rbm_layers[0].vbias.value.min(),
572565
'vmax', deep_model.rbm_layers[0].vbias.value.max(),
573-
'x>0.3', (input_i>0.3).sum(),
566+
#'x>0.3', (input_i>0.3).sum(),
574567
)
568+
sys.stdout.flush()
575569
if i % 1000 == 0:
576570
PIL.Image.fromarray(
577571
pylearn.io.image_tiling.tile_raster_images(negsample, (28,28), (10,10),
578-
tile_spacing=(1,1))).save('img_%i_%i.png'%(layer_idx,i))
572+
tile_spacing=(1,1))).save('samples_%i_%i.png'%(layer_idx,i))
573+
574+
PIL.Image.fromarray(
575+
pylearn.io.image_tiling.tile_raster_images(
576+
deep_model.rbm_layers[0].W.value.T,
577+
(28,28), (10,10),
578+
tile_spacing=(1,1))).save('filters_%i_%i.png'%(layer_idx,i))
579579
end_time = time.clock()
580580
print 'Pretraining took %f minutes' %((end_time - start_time)/60.)
581581

582+
return
582583

583584
print "Fine tuning (supervised learning) ..."
584585
train_fn, valid_scores, test_scores =\

0 commit comments

Comments
 (0)