From 3a38cca044c7d6bbdd9871d5c51a4564bb6cd22e Mon Sep 17 00:00:00 2001 From: Caspar Meijer Date: Fri, 24 Apr 2020 02:22:32 +0200 Subject: [PATCH] Attempt at paralleling training using batches --- main2.py | 5 +-- numba_neural_network.py | 79 +++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 32 deletions(-) diff --git a/main2.py b/main2.py index e2d23ff..37b09ed 100644 --- a/main2.py +++ b/main2.py @@ -13,6 +13,7 @@ print("Begin compiling!") begin_time = time.time_ns() compiled_nn_values = nn.make_neural_network(layer_sizes=[data_input.shape[1], data_output.shape[1]], layer_activations=["sigmoid"]) +# nn.train_batch(data_input[:8], data_output[:8], compiled_nn_values) nn.train_auto(1, data_input[:1], data_output[:1], data_input[1: 2], data_output[1: 2], compiled_nn_values) end_time = time.time_ns() print("Compile time:", (end_time-begin_time) / 1e9) @@ -36,9 +37,9 @@ # # nn.train_single(validate_input[ii], validate_output[ii], nn_values) # end_time = time.time_ns() # print("Average 1:", ((end_time-begin_time)/1e9) / n2) - + begin_time = time.time_ns() - epochs, current_mse = nn.train_auto(1, train_input, train_output, validate_input, validate_output, nn_values) + epochs, current_mse = nn.train_auto(4, train_input, train_output, validate_input, validate_output, nn_values) end_time = time.time_ns() train_mse = nn.calculate_MSE(train_input, train_output, nn_values) diff --git a/numba_neural_network.py b/numba_neural_network.py index e5ac65e..b14c33b 100644 --- a/numba_neural_network.py +++ b/numba_neural_network.py @@ -18,6 +18,9 @@ def __init__(self, weights, biases, layer_outputs): self.biases = biases self.layer_outputs = layer_outputs + def copy(self): + return NeuralNetwork(self.weights.copy(), self.biases.copy(), self.layer_outputs.copy()) + def make_neural_network(layer_sizes, layer_activations, learning_rate=0.05, low=-2, high=2): @@ -52,7 +55,7 @@ def make_neural_network(layer_sizes, layer_activations, learning_rate=0.05, low= # print(typeof(typed_layer_outputs)) typed_learning_rate = learning_rate - return (typed_layer_sizes, typed_layer_activations, typed_weights, typed_biases, typed_layer_outputs, typed_learning_rate) + return (typed_layer_sizes, typed_layer_activations, typed_learning_rate, NeuralNetwork(typed_weights, typed_biases, typed_layer_outputs)) # typed_layer_sizes = 0 # typed_layer_activations = 1 @@ -66,17 +69,17 @@ def make_neural_network(layer_sizes, layer_activations, learning_rate=0.05, low= def calculate_output(input_data, nn): assert len(input_data) == nn[0][0] y = input_data - for i in prange(len(nn[2])): - y = h.activation(np.dot(nn[2][i].T, y) + nn[3][i], nn[1][i], False) + for i in prange(len(nn[3].weights)): + y = h.activation(np.dot(nn[3].weights[i].T, y) + nn[3].biases[i], nn[1][i], False) return y @njit def feed_forward_layers(input_data, nn): assert len(input_data) == nn[0][0] - nn[4][0] = input_data - for i in range(len(nn[2])): - nn[4][i+1] = h.activation(np.dot(nn[2][i].T, nn[4][i]) + nn[3][i], nn[1][i], False) + nn[3].layer_outputs[0] = input_data + for i in range(len(nn[3].weights)): + nn[3].layer_outputs[i+1] = h.activation(np.dot(nn[3].weights[i].T, nn[3].layer_outputs[i]) + nn[3].biases[i], nn[1][i], False) @njit @@ -85,33 +88,49 @@ def train_single(input_data, desired_output_data, nn): assert len(desired_output_data) == nn[0][-1] feed_forward_layers(input_data, nn) - error = (desired_output_data - nn[4][-1]) * h.activation(nn[4][-1], nn[1][-1], True) - nn[2][-1] += (nn[5] * nn[4][-2] * error.T) - nn[3][-1] += nn[5] * error + error = (desired_output_data - nn[3].layer_outputs[-1]) * h.activation(nn[3].layer_outputs[-1], nn[1][-1], True) + nn[3].weights[-1] += (nn[2] * nn[3].layer_outputs[-2] * error.T) + nn[3].biases[-1] += nn[2] * error - length_weights = len(nn[2]) + length_weights = len(nn[3].weights) for i in range(1, length_weights): i = length_weights - i - 1 - error = np.dot(nn[2][i+1], error) * h.activation(nn[4][i+1], nn[1][i], True) - nn[2][i] += (nn[5] * nn[4][i] * error.T) - nn[3][i] += nn[5] * error + error = np.dot(nn[3].weights[i+1], error) * h.activation(nn[3].layer_outputs[i+1], nn[1][i], True) + nn[3].weights[i] += (nn[2] * nn[3].layer_outputs[i] * error.T) + nn[3].biases[i] += nn[2] * error return nn @njit def train_batch(input_data, desired_output_data, nn): - new_nns = typed.List() + # new_nns = typed.List() length_input_data = len(input_data) + + # print(1, nn[3].weights) + for i in range(length_input_data): - new_nns.append(train_single(input_data[i], desired_output_data[i], tuple(np.copy(nn)))) - - for new_nn in new_nns: - nn[2] += new_nn[2] - nn[3] += new_nn[3] - nn[4] += new_nn[4] + train_single(input_data[i], desired_output_data[i], (nn[0], nn[1], nn[2], nn[3].copy()) ) + + # print(2, nn[3].weights) + + # length_updates = len(nn[3].weights) + # for new_nn in new_nns: + # for i in range(length_updates): + # nn[3].weights[i] += new_nn[3].weights[i] + # nn[3].biases[i] += new_nn[3].biases[i] + + # for i in range(length_updates+1): + # nn[3].layer_outputs[i] += new_nn[3].layer_outputs[i] + + # print(3, nn[3].weights) + + # for i in range(length_updates): + # nn[3].weights[i] /= length_input_data+1 + # nn[3].biases[i] /= length_input_data+1 + # for i in range(length_updates+1): + # nn[3].layer_outputs[i] /= length_input_data+1 + + # print(4, nn[3].weights) - nn[2] /= length_input_data+1 - nn[3] /= length_input_data+1 - nn[4] /= length_input_data+1 @njit(parallel=True) def calculate_MSE(input_data, desired_output_data, nn): @@ -146,11 +165,11 @@ def train_auto(batch_size, train_input_data, train_desired_output_data, validate epochs += 1 previous_mse = calculate_MSE(validate_input_data, validate_output_data, nn) # train_batch(train_input_data, train_desired_output_data, nn) - for i in range(len(train_input_data)): - train_single(train_input_data[i], train_desired_output_data[i], nn) - # for i in range(n_iterations): - # b, e = i*batch_size, i*batch_size+batch_size - # train_batch(train_input_data[b:e], train_desired_output_data[b:e], nn) + # for i in range(len(train_input_data)): + # train_single(train_input_data[i], train_desired_output_data[i], nn) + for i in range(n_iterations): + b, e = i*batch_size, i*batch_size+batch_size + train_batch(train_input_data[b:e], train_desired_output_data[b:e], nn) current_mse = calculate_MSE(validate_input_data, validate_output_data, nn) return epochs, current_mse @@ -171,5 +190,5 @@ def evaluate(input_data, desired_output_data, nn): @njit def print_weights_and_biases(nn): - print(nn[2]) - print(nn[3]) + print(nn[3].weights) + print(nn[3].biases)