Ocode
diff --git a/‎code/convolutional_mlp.py‎
Lines changed: 22 additions & 43 deletions b/‎code/convolutional_mlp.py‎
Lines changed: 22 additions & 43 deletions
diff --git a/‎code/logistic_cg.py‎
Lines changed: 63 additions & 66 deletions b/‎code/logistic_cg.py‎
Lines changed: 63 additions & 66 deletions
@@ -177,61 +177,40 @@ def errors(self, y):
             raise NotImplementedError()
 
 
-def load_dataset(fname):
+def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz'):
+    rng = numpy.random.RandomState(23455)
 
     # Load the dataset 
-    f = gzip.open(fname,'rb')
+    f = gzip.open(dataset,'rb')
     train_set, valid_set, test_set = cPickle.load(f)
     f.close()
 
-    # make minibatches of size 20 
-    batch_size = 20    # sized of the minibatch
-
-    # Dealing with the training set
-    # get the list of training images (x) and their labels (y)
-    (train_set_x, train_set_y) = train_set
-    # initialize the list of training minibatches with empty list
-    train_batches = []
-    for i in xrange(0, len(train_set_x), batch_size):
-        # add to the list of minibatches the minibatch starting at 
-        # position i, ending at position i+batch_size
-        # a minibatch is a pair ; the first element of the pair is a list 
-        # of datapoints, the second element is the list of corresponding 
-        # labels
-        train_batches = train_batches + \
-               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
-
-    # Dealing with the validation set
-    (valid_set_x, valid_set_y) = valid_set
-    # initialize the list of validation minibatches 
-    valid_batches = []
-    for i in xrange(0, len(valid_set_x), batch_size):
-        valid_batches = valid_batches + \
-               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
-
-    # Dealing with the testing set
-    (test_set_x, test_set_y) = test_set
-    # initialize the list of testing minibatches 
-    test_batches = []
-    for i in xrange(0, len(test_set_x), batch_size):
-        test_batches = test_batches + \
-              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
-
-    return train_batches, valid_batches, test_batches
 
+    def shared_dataset(data_xy):
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        return shared_x, T.cast(shared_y, 'int32')
 
-def evaluate_lenet5(learning_rate=0.1, n_iter=200, dataset='mnist.pkl.gz'):
-    rng = numpy.random.RandomState(23455)
+    test_set_x, test_set_y = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
 
-    train_batches, valid_batches, test_batches = load_dataset(dataset)
+    batch_size = 500    # sized of the minibatch
 
-    ishape = (28,28)     # this is the size of MNIST images
-    batch_size = 20    # sized of the minibatch
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+    n_valid_batches = valid_set_x.value.shape[0] / batch_size
+    n_test_batches  = test_set_x.value.shape[0]  / batch_size
 
     # allocate symbolic variables for the data
-    x = T.matrix('x')  # rasterized images
-    y = T.lvector()  # the labels are presented as 1D vector of [long int] labels
+    minibatch_offset = T.lscalar() # offset to the start of a [mini]batch 
+    x = T.matrix('x')  # the data is presented as rasterized images
+    y = T.ivector('y') # the labels are presented as 1D vector of 
+                       # [int] labels
+
 
+    ishape = (28,28)     # this is the size of MNIST images
 
     ######################
     # BUILD ACTUAL MODEL #
 
@@ -75,7 +75,7 @@ def __init__(self, input, n_in, n_out):
         # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out), 
         # while b is a vector of n_out elements, making theta a vector of
         # n_in*n_out + n_out elements
-        self.theta = theano.shared( value = numpy.zeros(n_in*n_out+n_out) )
+        self.theta = theano.shared( value = numpy.zeros(n_in*n_out+n_out, dtype = theano.config.floatX) )
         # W is represented by the fisr n_in*n_out elements of theta
         self.W = self.theta[0:n_in*n_out].reshape((n_in,n_out))
         # b is the rest (last n_out elements)
@@ -136,97 +136,105 @@ def errors(self, y):
 
 
 
-def cg_optimization_mnist( n_iter=50 ):
+def cg_optimization_mnist( n_iter=50, mnist_pkl_gz='mnist.pkl.gz' ):
     """Demonstrate conjugate gradient optimization of a log-linear model 
 
     This is demonstrated on MNIST.
     
     :param n_iter: number of iterations ot run the optimizer 
 
+    :param mnist_pkl_gz: the path of the mnist training file from 
+    http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
+
     """
 
     # Load the dataset 
-    f = gzip.open('mnist.pkl.gz','rb')
+    f = gzip.open(mnist_pkl_gz,'rb')
     train_set, valid_set, test_set = cPickle.load(f)
     f.close()
 
     # make minibatches of size 20 
-    batch_size = 20    # sized of the minibatch
-
-    # Dealing with the training set
-    # get the list of training images (x) and their labels (y)
-    (train_set_x, train_set_y) = train_set
-    # initialize the list of training minibatches with empty list
-    train_batches = []
-    for i in xrange(0, len(train_set_x), batch_size):
-        # add to the list of minibatches the minibatch starting at 
-        # position i, ending at position i+batch_size
-        # a minibatch is a pair ; the first element of the pair is a list 
-        # of datapoints, the second element is the list of corresponding 
-        # labels
-        train_batches = train_batches + \
-               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
-
-    # Dealing with the validation set
-    (valid_set_x, valid_set_y) = valid_set
-    # initialize the list of validation minibatches 
-    valid_batches = []
-    for i in xrange(0, len(valid_set_x), batch_size):
-        valid_batches = valid_batches + \
-               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
-
-    # Dealing with the testing set
-    (test_set_x, test_set_y) = test_set
-    # initialize the list of testing minibatches 
-    test_batches = []
-    for i in xrange(0, len(test_set_x), batch_size):
-        test_batches = test_batches + \
-              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
+    batch_size = 500    # sized of the minibatch
+
+    def shared_dataset(data_xy):
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x,  test_set_y  = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+
+    batch_size = 500    # sized of the minibatch
+
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+    n_valid_batches = valid_set_x.value.shape[0] / batch_size
+    n_test_batches  = test_set_x.value.shape[0]  / batch_size
 
 
     ishape     = (28,28) # this is the size of MNIST images
     n_in       = 28*28   # number of input units
     n_out      = 10      # number of output units
     # allocate symbolic variables for the data
-    x = T.fmatrix()  # the data is presented as rasterized images
-    y = T.lvector()  # the labels are presented as 1D vector of 
-                          # [long int] labels
+    minibatch_offset = T.lscalar() # offset to the start of a [mini]batch 
+    x = T.matrix()   # the data is presented as rasterized images
+    y = T.ivector()  # the labels are presented as 1D vector of 
+                     # [int] labels
 
 
     # construct the logistic regression class
     classifier = LogisticRegression( \
-                   input=x.reshape((batch_size,28*28)), n_in=28*28, n_out=10)
+                   input=x, n_in=28*28, n_out=10)
 
     # the cost we minimize during training is the negative log likelihood of 
     # the model in symbolic format
     cost = classifier.negative_log_likelihood(y).mean() 
 
     # compile a theano function that computes the mistakes that are made by 
     # the model on a minibatch
-    test_model = theano.function([x,y], classifier.errors(y))
+    test_model = theano.function([minibatch_offset], classifier.errors(y),
+            givens={
+                x:test_set_x[minibatch_offset:minibatch_offset+batch_size],
+                y:test_set_y[minibatch_offset:minibatch_offset+batch_size]})
+
+    validate_model = theano.function([minibatch_offset],classifier.errors(y),
+            givens={
+                x:valid_set_x[minibatch_offset:minibatch_offset+batch_size],
+                y:valid_set_y[minibatch_offset:minibatch_offset+batch_size]})
+
+    #  compile a thenao function that returns the cost of a minibatch 
+    batch_cost = theano.function(\
+         [minibatch_offset], cost, \
+         givens= {
+             x : train_set_x[minibatch_offset:minibatch_offset+batch_size],
+             y : train_set_y[minibatch_offset:minibatch_offset+batch_size]})
+
+
+    
     # compile a theano function that returns the gradient of the minibatch 
     # with respect to theta
-    batch_grad = theano.function([x, y], T.grad(cost, classifier.theta))
-    #  compile a thenao function that returns the cost of a minibatch
-    batch_cost = theano.function([x, y], cost)
+    batch_grad = theano.function(\
+         [minibatch_offset], T.grad(cost,classifier.theta), \
+         givens= {
+             x : train_set_x[minibatch_offset:minibatch_offset+batch_size],
+             y : train_set_y[minibatch_offset:minibatch_offset+batch_size]})
+
 
     # creates a function that computes the average cost on the training set
     def train_fn(theta_value):
         classifier.theta.value = theta_value
-        cost = 0.
-        for x,y in train_batches :
-            cost += batch_cost(x,y)
-        return cost / len(train_batches)
+        train_losses = [batch_cost(i*batch_size) for i in xrange(n_train_batches)]
+        return numpy.mean(train_losses)
 
     # creates a function that computes the average gradient of cost with 
     # respect to theta
     def train_fn_grad(theta_value):
         classifier.theta.value = theta_value
-        grad = numpy.zeros(n_in * n_out + n_out)
-        for x,y in train_batches:
-            grad += batch_grad(x,y)
-        return grad/ len(train_batches)
-
+        grad = batch_grad(0)
+        for i in xrange(1,n_train_batches):
+            grad += batch_grad(i*batch_size)
+        return grad/n_train_batches
 
 
     validation_scores = [float('inf'), 0]
@@ -235,23 +243,17 @@ def train_fn_grad(theta_value):
     def callback(theta_value):
         classifier.theta.value = theta_value
         #compute the validation loss
-        this_validation_loss = 0.
-        for x,y in valid_batches:
-            this_validation_loss += test_model(x,y)
-
-        this_validation_loss /= len(valid_batches)
-
+        validation_losses = [validate_model(i*batch_size) for i in xrange(n_valid_batches)]
+        this_validation_loss = numpy.mean(validation_losses)
         print('validation error %f %%' % (this_validation_loss*100.,))
 
         # check if it is better then best validation score got until now
         if this_validation_loss < validation_scores[0]:
             # if so, replace the old one, and compute the score on the 
             # testing dataset
             validation_scores[0] = this_validation_loss
-            test_score = 0.
-            for x,y in test_batches:
-                test_score += test_model(x,y)
-            validation_scores[1] = test_score / len(test_batches)
+            test_loses = [test_model(i*batch_size) for i in xrange(n_train_batches)]
+            validation_scores[1] = numpy.mean(test_loses)
 
     # using scipy conjugate gradient optimizer 
     import scipy.optimize
@@ -272,11 +274,6 @@ def callback(theta_value):
     print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 
 
-
-
-
-
-
 if __name__ == '__main__':
     cg_optimization_mnist()