Skip to content

Commit a644350

Browse files
committed
Updating models and adding BERT/PyT
Tacotron2+Waveglow/PyT * AMP support * Data preprocessing for Tacotron 2 training * Fixed dropouts on LSTMCells SSD/PyT * script and notebook for inference * AMP support * README update * updates to examples/* BERT/PyT * initial release GNMT/PyT * Default container updated to NGC PyTorch 19.05-py3 * Mixed precision training implemented using APEX AMP * Added inference throughput and latency results on NVIDIA Tesla V100 16G * Added option to run inference on user-provided raw input text from command line NCF/PyT * Updated performance tables. * Default container changed to PyTorch 19.06-py3. * Caching validation negatives between runs Transformer/PyT * new README * jit support added UNet Medical/TF * inference example scripts added * inference benchmark measuring latency added * TRT/TF-TRT support added * README updated GNMT/TF * Performance improvements Small updates (mostly README) for other models.
1 parent 3b3d0f6 commit a644350

File tree

109 files changed

+6441
-2413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+6441
-2413
lines changed

PyTorch/Classification/RN50v1.5/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def add_parser_arguments(parser):
4040

4141
parser.add_argument('data', metavar='DIR',
4242
help='path to dataset')
43-
parser.add_argument('--data-backend', metavar='BACKEND', default='pytorch',
43+
parser.add_argument('--data-backend', metavar='BACKEND', default='dali-cpu',
4444
choices=DATA_BACKEND_CHOICES)
4545

4646
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet50',

PyTorch/Detection/SSD/Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
FROM nvcr.io/nvidia/pytorch:19.05-py3
22

33
# Set working directory
4-
WORKDIR /mlperf
4+
WORKDIR /workspace
5+
6+
ENV PYTHONPATH "${PYTHONPATH}:/workspace"
57

68
RUN apt-get update && apt-get install -y python3-tk python-pip git tmux htop tree
79

PyTorch/Detection/SSD/README.md

Lines changed: 459 additions & 160 deletions
Large diffs are not rendered by default.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import numpy as np
2+
import skimage
3+
4+
def load_image(image_path):
5+
"""Code from Loading_Pretrained_Models.ipynb - a Caffe2 tutorial"""
6+
mean, std = 128, 128
7+
img = skimage.img_as_float(skimage.io.imread(image_path))
8+
if len(img.shape) == 2:
9+
img = np.array([img, img, img]).swapaxes(0,2)
10+
return img
11+
12+
def rescale(img, input_height, input_width):
13+
"""Code from Loading_Pretrained_Models.ipynb - a Caffe2 tutorial"""
14+
aspect = img.shape[1]/float(img.shape[0])
15+
if(aspect>1):
16+
# landscape orientation - wide image
17+
res = int(aspect * input_height)
18+
imgScaled = skimage.transform.resize(img, (input_width, res))
19+
if(aspect<1):
20+
# portrait orientation - tall image
21+
res = int(input_width/aspect)
22+
imgScaled = skimage.transform.resize(img, (res, input_height))
23+
if(aspect == 1):
24+
imgScaled = skimage.transform.resize(img, (input_width, input_height))
25+
return imgScaled
26+
27+
def crop_center(img,cropx,cropy):
28+
"""Code from Loading_Pretrained_Models.ipynb - a Caffe2 tutorial"""
29+
y,x,c = img.shape
30+
startx = x//2-(cropx//2)
31+
starty = y//2-(cropy//2)
32+
return img[starty:starty+cropy,startx:startx+cropx]
33+
34+
def normalize(img, mean=128, std=128):
35+
img = (img * 256 - mean) / std
36+
return img
37+
38+
def prepare_input(img_uri):
39+
img = load_image(img_uri)
40+
img = rescale(img, 300, 300)
41+
img = crop_center(img, 300, 300)
42+
img = normalize(img)
43+
44+
return img
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# This script launches SSD300 training in FP16 on 1 GPUs using 64 batch size
22
# Usage bash SSD300_FP16_1GPU.sh <path to this repository> <path to dataset> <additional flags>
33

4-
python $1/main.py --backbone resnet50 --warmup 300 --bs 64 --fp16 --data $2 ${@:3}
4+
python $1/main.py --backbone resnet50 --warmup 300 --bs 64 --amp --data $2 ${@:3}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# This script launches SSD300 training in FP16 on 4 GPUs using 256 batch size (64 per GPU)
22
# Usage ./SSD300_FP16_4GPU.sh <path to this repository> <path to dataset> <additional flags>
33

4-
python -m torch.distributed.launch --nproc_per_node=4 $1/main.py --backbone resnet50 --warmup 300 --bs 64 --fp16 --data $2 ${@:3}
4+
python -m torch.distributed.launch --nproc_per_node=4 $1/main.py --backbone resnet50 --warmup 300 --bs 64 --amp --data $2 ${@:3}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# This script launches SSD300 training in FP16 on 8 GPUs using 512 batch size (64 per GPU)
22
# Usage ./SSD300_FP16_8GPU.sh <path to this repository> <path to dataset> <additional flags>
33

4-
python -m torch.distributed.launch --nproc_per_node=8 $1/main.py --backbone resnet50 --warmup 300 --bs 64 --fp16 --data $2 ${@:3}
4+
python -m torch.distributed.launch --nproc_per_node=8 $1/main.py --backbone resnet50 --warmup 300 --bs 64 --amp --data $2 ${@:3}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# This script evaluates SSD300 model in FP16 using 32 batch size on 1 GPU
22
# Usage: ./SSD300_FP16_EVAL.sh <path to this repository> <path to dataset> <path to checkpoint> <additional flags>
33

4-
python $1/main.py --backbone resnet50 --fp16 --ebs 32 --data $2 --mode evaluation --checkpoint $3 ${@:4}
4+
python $1/main.py --backbone resnet50 --amp --ebs 32 --data $2 --mode evaluation --checkpoint $3 ${@:4}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# This script launches SSD300 inference benchmark in FP16 on 1 GPU with 64 batch size
22
# Usage bash SSD300_FP16_INFERENCE_BENCHMARK.sh <path to this repository> <path to dataset> <additional flags>
33

4-
python $1/main.py --backbone resnet50 --mode benchmark-inference --bs 64 --fp16 --data $2 ${@:3}
4+
python $1/main.py --backbone resnet50 --mode benchmark-inference --bs 64 --amp --data $2 ${@:3}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import torch
2+
import numpy as np
3+
4+
from apex.fp16_utils import network_to_half
5+
6+
from dle.inference import prepare_input
7+
from src.model import SSD300, ResNet
8+
from src.utils import dboxes300_coco, Encoder
9+
10+
11+
def load_checkpoint(model, model_file):
12+
cp = torch.load(model_file)['model']
13+
cp = { k.replace('module.1.', ''): cp[k] for k in cp }
14+
model.load_state_dict(cp)
15+
16+
17+
def build_predictor(model_file, backbone='resnet50'):
18+
ssd300 = SSD300(backbone=ResNet(backbone))
19+
load_checkpoint(ssd300, model_file)
20+
21+
return ssd300
22+
23+
24+
def prepare_model(checkpoint_path):
25+
ssd300 = build_predictor(checkpoint_path)
26+
ssd300 = ssd300.cuda()
27+
ssd300 = network_to_half(ssd300)
28+
ssd300 = ssd300.eval()
29+
30+
return ssd300
31+
32+
33+
def prepare_tensor(inputs):
34+
NHWC = np.array(inputs)
35+
NCHW = np.swapaxes(np.swapaxes(NHWC, 2, 3), 1, 2)
36+
tensor = torch.from_numpy(NCHW)
37+
tensor = tensor.cuda()
38+
tensor = tensor.half()
39+
40+
return tensor
41+
42+
43+
def decode_results(predictions):
44+
dboxes = dboxes300_coco()
45+
encoder = Encoder(dboxes)
46+
ploc, plabel = [val.float() for val in predictions]
47+
results = encoder.decode_batch(ploc, plabel, criteria=0.5, max_output=20)
48+
49+
return [ [ pred.detach().cpu().numpy()
50+
for pred in detections
51+
]
52+
for detections in results
53+
]
54+
55+
56+
def pick_best(detections, treshold):
57+
bboxes, classes, confidences = detections
58+
best = np.argwhere(confidences > 0.3).squeeze(axis=1)
59+
60+
return [pred[best] for pred in detections]
61+
62+
63+
def main(checkpoint_path, imgs):
64+
inputs = [prepare_input(uri) for uri in imgs]
65+
tensor = prepare_tensor(inputs)
66+
ssd300 = prepare_model(checkpoint_path)
67+
68+
predictions = ssd300(tensor)
69+
70+
results = decode_results(predictions)
71+
best_results = [pick_best(detections, treshold=0.3) for detections in results]
72+
return best_results
73+
74+
if __name__ == '__main__':
75+
best_results = main(
76+
checkpoint_path='/checkpoints/SSD300v1.1.pt',
77+
imgs=[ 'http://images.cocodataset.org/val2017/000000397133.jpg',
78+
'http://images.cocodataset.org/val2017/000000037777.jpg',
79+
'http://images.cocodataset.org/val2017/000000252219.jpg',
80+
]
81+
)
82+
print(best_results)

0 commit comments

Comments
 (0)