Skip to content

Commit 82ee2c8

Browse files
committed
Added sample code for quadratic cost for single neuron.
1 parent 2da41b6 commit 82ee2c8

7 files changed

Lines changed: 161 additions & 12 deletions

File tree

digit_prediction.png

6.76 KB
Loading

guess_test.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import mnist_loader
2+
import network
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
6+
# Load data
7+
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
8+
# Ensure test_data is a list of (x, y) pairs
9+
test_data = list(test_data)
10+
11+
# Pick one sample
12+
x, y = test_data[0]
13+
14+
# Create and train a network
15+
net = network.Network([784, 30, 10])
16+
training_data = list(training_data)
17+
net.SGD(training_data, 1, 10, 3.0)
18+
19+
# Call the guess function defined in network.py (it expects self, x)
20+
pred_by_guess = network.guess(net, x)
21+
22+
# Reshape x from (784, 1) to (28, 28) and display as image
23+
x_image = x.reshape(28, 28)
24+
plt.figure(figsize=(6, 6))
25+
plt.imshow(x_image, cmap='gray')
26+
plt.title(f"Actual: {y}, Predicted: {int(pred_by_guess)}")
27+
plt.axis('off')
28+
plt.tight_layout()
29+
plt.savefig('digit_prediction.png')
30+
plt.show()
31+
print(f"Image saved as digit_prediction.png")
32+
33+
34+
print("Actual label:", y)
35+
print("network.guess predicted:", int(pred_by_guess))

network.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def update_mini_batch(self, mini_batch, eta):
8484
for x, y in mini_batch:
8585
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
8686
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
87-
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
87+
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
8888
self.weights = [w-(eta/len(mini_batch))*nw
8989
for w, nw in zip(self.weights, nabla_w)]
9090
self.biases = [b-(eta/len(mini_batch))*nb
@@ -95,6 +95,7 @@ def backprop(self, x, y):
9595
gradient for the cost function C_x. ``nabla_b`` and
9696
``nabla_w`` are layer-by-layer lists of numpy arrays, similar
9797
to ``self.biases`` and ``self.weights``."""
98+
# print("weights first row: {}".format(self.weights[0]))
9899
nabla_b = [np.zeros(b.shape) for b in self.biases]
99100
nabla_w = [np.zeros(w.shape) for w in self.weights]
100101
# feedforward
@@ -139,6 +140,14 @@ def cost_derivative(self, output_activations, y):
139140
\partial a for the output activations."""
140141
return (output_activations-y)
141142

143+
def guess(self, x):
144+
"""Return the index of the highest activation neuron."""
145+
activation = x
146+
147+
for b, w in zip(self.biases, self.weights):
148+
activation = sigmoid(np.dot(w, activation)+b)
149+
150+
return np.argmax(activation)
142151
#### Miscellaneous functions
143152
def sigmoid(z):
144153
"""The sigmoid function."""

practice.py

Whitespace-only changes.

quadratic_cost_demo.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""quadratic_cost_demo.py
2+
~~~~~~~~~~~~~~~~~~~~~~~~
3+
4+
Demonstration of quadratic cost function during gradient descent training.
5+
6+
"""
7+
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
11+
12+
def sigmoid(z):
13+
"""The sigmoid function."""
14+
return 1.0/(1.0+np.exp(-z))
15+
16+
17+
def sigmoid_prime(z):
18+
"""Derivative of the sigmoid function."""
19+
return sigmoid(z)*(1-sigmoid(z))
20+
21+
22+
def quadraticCostDemo(epochs):
23+
"""Demonstrate quadratic cost function over epochs."""
24+
w = 0.6
25+
b = 0.9
26+
x = 1
27+
y = 0 # desired output
28+
cost_history = {} # dictionary to store cost values
29+
30+
# Enable interactive mode for real-time plotting
31+
plt.ion()
32+
fig, ax = plt.subplots(figsize=(10, 6))
33+
34+
for i in range(1, epochs):
35+
z = w * x + b
36+
a = sigmoid(z)
37+
cost = 0.5 * (a - y) ** 2
38+
cost_history[i] = cost
39+
40+
# Calculate gradients
41+
nabla_cW = (a - y) * sigmoid_prime(z) * x
42+
nabla_cB = (a - y) * sigmoid_prime(z)
43+
44+
# Update weights and bias
45+
w = w - nabla_cW
46+
b = b - nabla_cB
47+
48+
# Update plot in real-time
49+
if i % 5 == 0 or i == 1: # Update every 5 epochs for better visualization
50+
ax.clear()
51+
epoch_list = sorted(cost_history.keys())
52+
cost_list = [cost_history[e] for e in epoch_list]
53+
54+
ax.plot(epoch_list, cost_list, 'b-', linewidth=2)
55+
ax.set_xlabel('Epoch', fontsize=12)
56+
ax.set_ylabel('Quadratic Cost', fontsize=12)
57+
ax.set_title('Quadratic Cost Function Over Training Epochs', fontsize=14)
58+
ax.grid(True, alpha=0.3)
59+
plt.tight_layout()
60+
plt.pause(0.01) # Small pause to see the update
61+
62+
# Turn off interactive mode
63+
plt.ioff()
64+
65+
return cost_history
66+
67+
68+
def plot_cost_history(cost_history):
69+
"""Plot the cost function over epochs."""
70+
epochs = sorted(cost_history.keys())
71+
costs = [cost_history[epoch] for epoch in epochs]
72+
73+
plt.figure(figsize=(10, 6))
74+
plt.plot(epochs, costs, 'b-', linewidth=2)
75+
plt.xlabel('Epoch', fontsize=12)
76+
plt.ylabel('Quadratic Cost', fontsize=12)
77+
plt.title('Quadratic Cost Function Over Training Epochs', fontsize=14)
78+
plt.grid(True, alpha=0.3)
79+
plt.tight_layout()
80+
plt.savefig('quadratic_cost_plot.png', dpi=150, bbox_inches='tight')
81+
print("Plot saved to quadratic_cost_plot.png")
82+
plt.show()
83+
84+
85+
if __name__ == "__main__":
86+
# Run the demo with 100 epochs
87+
epochs = 300
88+
cost_history = quadraticCostDemo(epochs)
89+
90+
# Save final plot
91+
plt.figure(figsize=(10, 6))
92+
epoch_list = sorted(cost_history.keys())
93+
cost_list = [cost_history[e] for e in epoch_list]
94+
plt.plot(epoch_list, cost_list, 'b-', linewidth=2)
95+
plt.xlabel('Epoch', fontsize=12)
96+
plt.ylabel('Quadratic Cost', fontsize=12)
97+
plt.title('Quadratic Cost Function Over Training Epochs', fontsize=14)
98+
plt.grid(True, alpha=0.3)
99+
plt.tight_layout()
100+
plt.savefig('quadratic_cost_plot.png', dpi=150, bbox_inches='tight')
101+
print("Plot saved to quadratic_cost_plot.png")
102+
plt.show()

quadratic_cost_plot.png

46.4 KB
Loading

test.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,18 @@
1717

1818
# ----------------------
1919
# - read the input data:
20-
'''
20+
2121
import mnist_loader
2222
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
2323
training_data = list(training_data)
24-
'''
2524
# ---------------------
2625
# - network.py example:
27-
#import network
26+
import network
27+
2828

29-
'''
3029
net = network.Network([784, 30, 10])
31-
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
32-
'''
30+
net.SGD(training_data, 1, 10, 3.0, test_data=test_data)
31+
3332

3433
# ----------------------
3534
# - network2.py example:
@@ -124,6 +123,7 @@
124123
125124
126125
"""
126+
'''
127127
def testTheano():
128128
from theano import function, config, shared, sandbox
129129
import theano.tensor as T
@@ -149,17 +149,18 @@ def testTheano():
149149
print('Used the gpu')
150150
# Perform check:
151151
#testTheano()
152+
'''
152153

153154

154155
# ----------------------
155156
# - network3.py example:
156-
import network3
157-
from network3 import Network, ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer # softmax plus log-likelihood cost is more common in modern image classification networks.
157+
# import network3
158+
# from network3 import Network, ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer # softmax plus log-likelihood cost is more common in modern image classification networks.
158159

159160
# read data:
160-
training_data, validation_data, test_data = network3.load_data_shared()
161-
# mini-batch size:
162-
mini_batch_size = 10
161+
# training_data, validation_data, test_data = network3.load_data_shared()
162+
# # mini-batch size:
163+
# mini_batch_size = 10
163164

164165
# chapter 6 - shallow architecture using just a single hidden layer, containing 100 hidden neurons.
165166
'''
@@ -195,6 +196,7 @@ def testTheano():
195196
'''
196197

197198
# chapter 6 - rectified linear units and some l2 regularization (lmbda=0.1) => even better accuracy
199+
'''
198200
from network3 import ReLU
199201
net = Network([
200202
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
@@ -208,3 +210,4 @@ def testTheano():
208210
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
209211
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
210212
net.SGD(training_data, 60, mini_batch_size, 0.03, validation_data, test_data, lmbda=0.1)
213+
'''

0 commit comments

Comments
 (0)