Skip to content

Commit 057d0c2

Browse files
committed
Adding MNIST, CIFAR10
1 parent 4ac4aa1 commit 057d0c2

9 files changed

Lines changed: 688 additions & 0 deletions

File tree

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/usr/bin/env sh
2+
# This scripts downloads the CIFAR10 (binary version) data and unzips it.
3+
set -e
4+
5+
cd "$( cd "$(dirname "$0")" ; pwd -P )"
6+
7+
echo "Downloading..."
8+
9+
wget --no-check-certificate http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
10+
11+
echo "Unzipping..."
12+
13+
tar -xf cifar-10-binary.tar.gz && rm -f cifar-10-binary.tar.gz
14+
mv cifar-10-batches-bin/* . && rm -rf cifar-10-batches-bin
15+
16+
# Creation is split out because leveldb sometimes causes segfault
17+
# and needs to be re-created.
18+
19+
echo "Done."
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
set -e
3+
4+
cd "$( cd "$(dirname "$0")" ; pwd -P )"
5+
6+
# Create CIFAR10 train + test databases
7+
make_cifar_db --db lmdb --input_folder "$(pwd)" --output_train_db_name cifar10_train_lmdb --output_test_db_name cifar10_test_lmdb
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
#!/usr/bin/env python
2+
"""Example: train a model on CIFAR10."""
3+
from __future__ import division, print_function
4+
5+
import argparse
6+
import functools
7+
import logging
8+
import os.path
9+
10+
from caffe2.python import brew, core, data_parallel_model, optimizer, workspace
11+
from caffe2.python.core import DataType
12+
from caffe2.python.model_helper import ModelHelper
13+
from caffe2.python.modeling.initializers import Initializer, pFP16Initializer
14+
15+
16+
logging.basicConfig()
17+
18+
TRAIN_ENTRIES = 50000
19+
TEST_ENTRIES = 10000
20+
BATCH_SIZE = 100
21+
EPOCHS = 10
22+
DISPLAY = 100
23+
ACCURACY_MIN = 0.7
24+
ACCURACY_MAX = 0.8
25+
26+
27+
def AddInputOps(model, reader, batch_size, dtype):
28+
"""Add input ops."""
29+
data, label = brew.image_input(
30+
model, [reader], ['data', 'label'],
31+
batch_size=batch_size, use_caffe_datum=False, use_gpu_transform=True,
32+
scale=32, crop=32, mirror=1, color=True, mean=128.0,
33+
output_type='float16' if dtype == DataType.FLOAT16 else 'float',
34+
is_test=False)
35+
data = model.StopGradient(data, data)
36+
37+
38+
def AddForwardPassOps(model, loss_scale, dtype):
39+
"""Add forward pass ops and return a list of losses."""
40+
initializer = (pFP16Initializer if dtype == DataType.FLOAT16
41+
else Initializer)
42+
with brew.arg_scope([brew.conv, brew.fc],
43+
WeightInitializer=initializer,
44+
BiasInitializer=initializer):
45+
conv1 = brew.conv(model, 'data', 'conv1', 3, 32, 5, pad=2,
46+
weight_init=('GaussianFill',
47+
{'std': 0.0001, 'mean': 0.0}))
48+
pool1 = brew.max_pool(model, conv1, 'pool1', kernel=3, stride=2)
49+
relu1 = brew.relu(model, pool1, 'relu1')
50+
conv2 = brew.conv(model, relu1, 'conv2', 32, 32, 5, pad=2,
51+
weight_init=('GaussianFill', {'std': 0.01}))
52+
conv2 = brew.relu(model, conv2, conv2)
53+
pool2 = brew.average_pool(model, conv2, 'pool2', kernel=3, stride=2)
54+
conv3 = brew.conv(model, pool2, 'conv3', 32, 64, 5, pad=2,
55+
weight_init=('GaussianFill', {'std': 0.01}))
56+
conv3 = brew.relu(model, conv3, conv3)
57+
pool3 = brew.average_pool(model, conv3, 'pool3', kernel=3, stride=2)
58+
fc1 = brew.fc(model, pool3, 'fc1', 64 * 3 * 3, 64,
59+
weight_init=('GaussianFill', {'std': 0.1}))
60+
fc2 = brew.fc(model, fc1, 'fc2', 64, 10,
61+
weight_init=('GaussianFill', {'std': 0.1}))
62+
63+
if dtype == DataType.FLOAT16:
64+
fc2 = model.net.HalfToFloat(fc2, fc2 + '_fp32')
65+
softmax, loss = model.SoftmaxWithLoss([fc2, 'label'], ['softmax', 'loss'])
66+
loss = model.Scale(loss, loss, scale=loss_scale)
67+
brew.accuracy(model, [softmax, 'label'], 'accuracy')
68+
return [loss]
69+
70+
71+
def AddOptimizerOps(model):
72+
"""Add optimizer ops."""
73+
optimizer.add_weight_decay(model, 0.004)
74+
stepsize = TRAIN_ENTRIES * EPOCHS // BATCH_SIZE
75+
optimizer.build_sgd(
76+
model, 0.001,
77+
policy='step', stepsize=stepsize, gamma=0.1,
78+
momentum=0.9, nesterov=False)
79+
80+
81+
def AddPostSyncOps(model):
82+
"""Add ops which run after the initial parameter sync."""
83+
for param_info in model.GetOptimizationParamInfo(model.GetParams()):
84+
if param_info.blob_copy is not None:
85+
# Ensure copies are in sync after initial broadcast
86+
model.param_init_net.HalfToFloat(
87+
param_info.blob,
88+
param_info.blob_copy[core.DataType.FLOAT]
89+
)
90+
91+
92+
def createTrainModel(lmdb_path, devices, dtype):
93+
"""Create and return a training model, complete with training ops."""
94+
model = ModelHelper(name='train', arg_scope={'order': 'NCHW'})
95+
reader = model.CreateDB('train_reader', db=lmdb_path, db_type='lmdb')
96+
data_parallel_model.Parallelize_GPU(
97+
model,
98+
input_builder_fun=functools.partial(
99+
AddInputOps, reader=reader,
100+
batch_size=(BATCH_SIZE // len(devices)), dtype=dtype),
101+
forward_pass_builder_fun=functools.partial(
102+
AddForwardPassOps, dtype=dtype),
103+
optimizer_builder_fun=AddOptimizerOps,
104+
post_sync_builder_fun=AddPostSyncOps,
105+
devices=devices, use_nccl=True)
106+
workspace.RunNetOnce(model.param_init_net)
107+
workspace.CreateNet(model.net)
108+
return model
109+
110+
111+
def createTestModel(lmdb_path, devices, dtype):
112+
"""Create and return a test model. Does not include training ops."""
113+
model = ModelHelper(name='test', arg_scope={'order': 'NCHW'},
114+
init_params=False)
115+
reader = model.CreateDB('test_reader', db=lmdb_path, db_type='lmdb')
116+
data_parallel_model.Parallelize_GPU(
117+
model,
118+
input_builder_fun=functools.partial(
119+
AddInputOps, reader=reader,
120+
batch_size=(BATCH_SIZE // len(devices)), dtype=dtype),
121+
forward_pass_builder_fun=functools.partial(
122+
AddForwardPassOps, dtype=dtype),
123+
param_update_builder_fun=None,
124+
devices=devices)
125+
workspace.RunNetOnce(model.param_init_net)
126+
workspace.CreateNet(model.net)
127+
return model
128+
129+
130+
def getArgs():
131+
"""Return command-line arguments."""
132+
CURDIR = os.path.dirname(__file__)
133+
parser = argparse.ArgumentParser(
134+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
135+
parser.add_argument('--train-lmdb', help='Path to training LMDB',
136+
default=os.path.join(CURDIR, 'cifar10_train_lmdb'))
137+
parser.add_argument('--test-lmdb', help='Path to test LMDB',
138+
default=os.path.join(CURDIR, 'cifar10_test_lmdb'))
139+
parser.add_argument('--dtype', choices=['float', 'float16'],
140+
default='float', help='Data type used for training')
141+
parser.add_argument('--gpus',
142+
help='Comma separated list of GPU devices to use')
143+
parser.add_argument('--num_gpus', type=int, default=1,
144+
help='Number of GPU devices (instead of --gpus)')
145+
parser.add_argument('--all-gpus', action='store_true',
146+
help='Use all GPUs in the system')
147+
args = parser.parse_args()
148+
149+
args.dtype = (DataType.FLOAT16 if args.dtype == 'float16'
150+
else DataType.FLOAT)
151+
152+
if args.all_gpus:
153+
args.num_gpus = workspace.NumCudaDevices()
154+
args.gpus = range(args.num_gpus)
155+
else:
156+
if args.gpus is not None:
157+
args.gpus = [int(x) for x in args.gpus.split(',')]
158+
args.num_gpus = len(args.gpus)
159+
else:
160+
args.gpus = range(args.num_gpus)
161+
args.num_gpus = args.num_gpus
162+
return args
163+
164+
165+
def main(args):
166+
"""Train and test."""
167+
train_model = createTrainModel(args.train_lmdb, args.gpus, args.dtype)
168+
test_model = createTestModel(args.test_lmdb, args.gpus, args.dtype)
169+
170+
train_iter_per_epoch = TRAIN_ENTRIES // BATCH_SIZE
171+
test_iter_per_epoch = TEST_ENTRIES // BATCH_SIZE
172+
scope_prefix = 'gpu_%d/' % args.gpus[0]
173+
174+
for epoch in range(1, EPOCHS + 1):
175+
# Train
176+
for iteration in range(1, train_iter_per_epoch + 1):
177+
workspace.RunNet(train_model.net.Proto().name)
178+
if not iteration % DISPLAY:
179+
loss = workspace.FetchBlob(scope_prefix + 'loss')
180+
print("Epoch %d/%d, iteration %4d/%d, loss=%f" % (
181+
epoch, EPOCHS, iteration, train_iter_per_epoch, loss))
182+
183+
# Test
184+
losses = []
185+
accuracies = []
186+
for _ in range(test_iter_per_epoch):
187+
workspace.RunNet(test_model.net.Proto().name)
188+
# Take average values across all GPUs
189+
losses.append(sum(
190+
workspace.FetchBlob('gpu_%d/loss' % g) for g in args.gpus
191+
) / len(args.gpus))
192+
accuracies.append(sum(
193+
workspace.FetchBlob('gpu_%d/accuracy' % g) for g in args.gpus
194+
) / len(args.gpus))
195+
196+
loss = sum(losses) / len(losses)
197+
accuracy = sum(accuracies) / len(accuracies)
198+
print("Test loss: %f, accuracy: %f" % (loss, accuracy))
199+
200+
if accuracy < ACCURACY_MIN or accuracy > ACCURACY_MAX:
201+
raise RuntimeError(
202+
"Final accuracy %f is not in the expected range [%f, %f]" %
203+
(accuracy, ACCURACY_MIN, ACCURACY_MAX))
204+
205+
206+
if __name__ == '__main__':
207+
core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
208+
main(getArgs())
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.mdb
2+
*-ubyte
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env sh
2+
# This scripts downloads the mnist data and unzips it.
3+
4+
cd "$( cd "$(dirname "$0")" ; pwd -P )"
5+
6+
echo "Downloading..."
7+
8+
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
9+
do
10+
if [ ! -e $fname ]; then
11+
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
12+
gunzip ${fname}.gz
13+
fi
14+
done
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
cd "$( cd "$(dirname "$0")" ; pwd -P )"
4+
5+
# Create MNIST databases from previously downloaded data
6+
make_mnist_db --db lmdb --image_file train-images-idx3-ubyte --label_file train-labels-idx1-ubyte --output_file mnist_train_lmdb
7+
make_mnist_db --db lmdb --image_file t10k-images-idx3-ubyte --label_file t10k-labels-idx1-ubyte --output_file mnist_test_lmdb
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env python
2+
"""Example: train LeNet on MNIST."""
3+
from __future__ import division, print_function
4+
5+
import argparse
6+
import os.path
7+
8+
import numpy as np
9+
10+
from caffe2.proto import caffe2_pb2
11+
from caffe2.python import brew, core, optimizer, workspace
12+
from caffe2.python.model_helper import ModelHelper
13+
14+
15+
TRAIN_ENTRIES = 60000
16+
TEST_ENTRIES = 10000
17+
BATCH_SIZE = 100
18+
EPOCHS = 4
19+
DISPLAY = 100
20+
ACCURACY_MIN = 0.98
21+
ACCURACY_MAX = 0.999
22+
23+
24+
def AddInputOps(model, reader, batch_size):
25+
"""Add input ops."""
26+
data, label = brew.image_input(
27+
model, [reader], ['data', 'label'],
28+
batch_size=batch_size, use_caffe_datum=False, use_gpu_transform=True,
29+
scale=28, crop=28, mirror=False, color=False, mean=128.0, std=256.0,
30+
is_test=False)
31+
data = model.StopGradient(data, data)
32+
33+
34+
def AddForwardPassOps(model):
35+
"""Add forward pass ops and return a list of losses."""
36+
conv1 = brew.conv(model, 'data', 'conv1', 1, 20, 5)
37+
pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
38+
conv2 = brew.conv(model, pool1, 'conv2', 20, 50, 5)
39+
pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
40+
fc3 = brew.fc(model, pool2, 'fc3', 50 * 4 * 4, 500)
41+
fc3 = brew.relu(model, fc3, fc3)
42+
pred = brew.fc(model, fc3, 'pred', 500, 10)
43+
softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss'])
44+
brew.accuracy(model, [softmax, 'label'], 'accuracy')
45+
return [loss]
46+
47+
48+
def AddOptimizerOps(model):
49+
"""Add optimizer ops."""
50+
optimizer.build_sgd(model, 0.01,
51+
policy='step', stepsize=1, gamma=0.999,
52+
momentum=0.9, nesterov=False)
53+
54+
55+
def createTrainModel(lmdb_path):
56+
"""Create and return a training model, complete with training ops."""
57+
model = ModelHelper(name='train', arg_scope={'order': 'NCHW'})
58+
reader = model.CreateDB('train_reader', db=lmdb_path, db_type='lmdb')
59+
AddInputOps(model, reader, BATCH_SIZE)
60+
losses = AddForwardPassOps(model)
61+
model.AddGradientOperators(losses)
62+
AddOptimizerOps(model)
63+
workspace.RunNetOnce(model.param_init_net)
64+
workspace.CreateNet(model.net)
65+
66+
return model
67+
68+
69+
def createTestModel(lmdb_path):
70+
"""Create and return a test model. Does not include training ops."""
71+
model = ModelHelper(name='test', arg_scope={'order': 'NCHW'},
72+
init_params=False)
73+
reader = model.CreateDB('test_reader', db=lmdb_path, db_type='lmdb')
74+
AddInputOps(model, reader, BATCH_SIZE)
75+
AddForwardPassOps(model)
76+
workspace.RunNetOnce(model.param_init_net)
77+
workspace.CreateNet(model.net)
78+
return model
79+
80+
81+
def getArgs():
82+
"""Return command-line arguments."""
83+
CURDIR = os.path.dirname(__file__)
84+
parser = argparse.ArgumentParser(
85+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
86+
parser.add_argument('--train-lmdb', help='Path to training LMDB',
87+
default=os.path.join(CURDIR, 'mnist_train_lmdb'))
88+
parser.add_argument('--test-lmdb', help='Path to test LMDB',
89+
default=os.path.join(CURDIR, 'mnist_test_lmdb'))
90+
args = parser.parse_args()
91+
return args
92+
93+
94+
def main(args):
95+
"""Train and test."""
96+
device = 0
97+
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, device)):
98+
train_model = createTrainModel(args.train_lmdb)
99+
test_model = createTestModel(args.test_lmdb)
100+
101+
train_iter_per_epoch = TRAIN_ENTRIES // BATCH_SIZE
102+
test_iter_per_epoch = TEST_ENTRIES // BATCH_SIZE
103+
104+
for epoch in range(1, EPOCHS + 1):
105+
# Train
106+
for iteration in range(1, train_iter_per_epoch + 1):
107+
workspace.RunNet(train_model.net.Proto().name)
108+
if not iteration % DISPLAY:
109+
loss = workspace.FetchBlob('loss')
110+
print("Epoch %d/%d, iteration %4d/%d, loss=%f" % (
111+
epoch, EPOCHS, iteration, train_iter_per_epoch, loss))
112+
113+
# Test
114+
losses = []
115+
accuracies = []
116+
for _ in range(test_iter_per_epoch):
117+
workspace.RunNet(test_model.net.Proto().name)
118+
losses.append(workspace.FetchBlob('loss'))
119+
accuracies.append(workspace.FetchBlob('accuracy'))
120+
121+
loss = np.array(losses).mean()
122+
accuracy = np.array(accuracies).mean()
123+
print("Test loss: %f, accuracy: %f" % (loss, accuracy))
124+
125+
if accuracy < ACCURACY_MIN or accuracy > ACCURACY_MAX:
126+
raise RuntimeError(
127+
"Final accuracy %f is not in the expected range [%f, %f]" %
128+
(accuracy, ACCURACY_MIN, ACCURACY_MAX))
129+
130+
131+
if __name__ == '__main__':
132+
core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
133+
main(getArgs())

0 commit comments

Comments
 (0)