Skip to content

Commit dcd3bba

Browse files
hXl3snv-kkudrynski
authored andcommitted
[Convnets/TF] TF-TRT support
1 parent a0c9442 commit dcd3bba

File tree

15 files changed

+223
-103
lines changed

15 files changed

+223
-103
lines changed

TensorFlow/Classification/ConvNets/export_frozen_graph.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import tensorflow as tf
2424

25-
import horovod.tensorflow as hvd
25+
from utils import hvd_wrapper as hvd
2626
from model import resnet
2727

2828
tf.app.flags.DEFINE_string(
@@ -75,8 +75,6 @@
7575

7676

7777
def main(_):
78-
79-
# Initialize Horovod (TODO: Remove dependency of horovod for freezing graphs)
8078
hvd.init()
8179

8280
if not FLAGS.output_file:
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import argparse
2+
import os
3+
import pathlib
4+
import time
5+
import tempfile
6+
7+
import tensorflow as tf
8+
import numpy as np
9+
10+
from tensorflow.python.compiler.tensorrt import trt_convert as trt
11+
12+
import dllogger
13+
14+
from runtime import runner_utils
15+
from runtime import runner
16+
from model.resnet import model_architectures
17+
from utils import data_utils
18+
from utils import hvd_wrapper as hvd
19+
20+
OUTPUT_SAVED_MODEL_PATH = tempfile.mkdtemp(prefix="tftrt-converted")
21+
LOG_FREQUENCY = 100
22+
23+
def argument_parser() -> argparse.Namespace:
24+
parser = argparse.ArgumentParser()
25+
26+
exclusive_args = parser.add_mutually_exclusive_group()
27+
exclusive_args.add_argument("--model", type=str, default=None, help="Saved model location to use for inference")
28+
exclusive_args.add_argument("--architecture", type=str, choices=model_architectures.keys())
29+
30+
parser.add_argument("--log-path", type=str, default="./log.json", help="Path to log file")
31+
parser.add_argument("--tf-trt", action="store_true", default=False, help="Use TF-TRT for inference")
32+
parser.add_argument("--amp", action="store_true", default=False, help="Use AMP for inference")
33+
parser.add_argument("--data-dir", type=str, required=False,
34+
default=None, help="Localization of validation data")
35+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size for inference")
36+
37+
return parser.parse_args()
38+
39+
def main(args: argparse.Namespace):
40+
hvd.init()
41+
42+
dllogger.init(backends=[
43+
dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.log_path),
44+
dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)
45+
])
46+
dllogger.log(data=vars(args), step='PARAMETER')
47+
48+
if args.model is None:
49+
saved_model_to_load = tempfile.mkdtemp(prefix="tftrt-savedmodel")
50+
r = runner.Runner(n_classes=1001, architecture=args.architecture, use_tf_amp=args.amp,
51+
model_dir=saved_model_to_load)
52+
r.train("batch", 1, 1, args.batch_size, is_benchmark=True)
53+
r.evaluate("batch", 1, args.batch_size, export_dir=saved_model_to_load,
54+
is_benchmark=True)
55+
56+
saved_model_to_load = r.exported_path.decode("utf-8")
57+
else:
58+
saved_model_to_load = args.model
59+
60+
output_tensor_name = "y_preds_ref:0" if not args.tf_trt else "ArgMax:0"
61+
batch_size = args.batch_size
62+
63+
if args.tf_trt:
64+
converter = trt.TrtGraphConverter(input_saved_model_dir=str(saved_model_to_load),
65+
precision_mode="FP16" if args.amp else "FP32")
66+
converter.convert()
67+
converter.save(OUTPUT_SAVED_MODEL_PATH)
68+
saved_model_to_load = OUTPUT_SAVED_MODEL_PATH
69+
elif args.amp:
70+
os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
71+
72+
if args.data_dir is not None:
73+
filenames, _, num_steps, _, _ = runner_utils.parse_tfrecords_dataset(
74+
data_dir=str(args.data_dir),
75+
mode="validation",
76+
iter_unit="epoch",
77+
num_iter=1,
78+
global_batch_size=batch_size,
79+
)
80+
81+
82+
dataset = data_utils.get_tfrecords_input_fn(filenames=filenames,
83+
batch_size=batch_size,
84+
height=224,
85+
width=224,
86+
training=False,
87+
distort_color=False,
88+
num_threads=1,
89+
deterministic=True)
90+
iterator = dataset.make_initializable_iterator()
91+
next_item = iterator.get_next()
92+
else:
93+
num_steps=60000 / batch_size
94+
95+
96+
with tf.Session() as sess:
97+
if args.data_dir is not None:
98+
sess.run(iterator.initializer)
99+
tf.saved_model.loader.load(sess,
100+
[tf.saved_model.tag_constants.SERVING],
101+
str(saved_model_to_load))
102+
103+
try:
104+
start_time = time.time()
105+
last_time = start_time
106+
image_processed = 0
107+
image_correct = 0
108+
109+
for samples_processed in range(int(num_steps)):
110+
if args.data_dir is not None:
111+
next_batch_image, next_batch_target = sess.run(next_item)
112+
else:
113+
if samples_processed == 0:
114+
next_batch_image = np.random.normal(size=(batch_size, 224, 224, 3))
115+
next_batch_target = np.random.randint(0, 1000, size=(batch_size,))
116+
output = sess.run([output_tensor_name], feed_dict={"input_tensor:0": next_batch_image})
117+
image_processed += args.batch_size
118+
image_correct += np.sum(output == next_batch_target)
119+
120+
if samples_processed % LOG_FREQUENCY == 0 and samples_processed != 0:
121+
current_time = time.time()
122+
current_throughput = LOG_FREQUENCY * batch_size / (current_time - last_time)
123+
dllogger.log(step=(0, samples_processed), data={"throughput": current_throughput})
124+
last_time = current_time
125+
126+
except tf.errors.OutOfRangeError:
127+
pass
128+
finally:
129+
dllogger.log(step=tuple(), data={"throughput": image_processed / (last_time - start_time),
130+
"accuracy": image_correct / image_processed})
131+
132+
133+
if __name__ == "__main__":
134+
main(argument_parser())

TensorFlow/Classification/ConvNets/main.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@
2222

2323
import tensorflow as tf
2424

25-
import horovod.tensorflow as hvd
25+
from utils import hvd_wrapper as hvd
2626
import dllogger
2727

28-
from utils import hvd_utils
2928
from runtime import Runner
3029
from model.resnet import model_architectures
3130

@@ -36,7 +35,7 @@
3635
tf.logging.set_verbosity(tf.logging.ERROR)
3736

3837
FLAGS = parse_cmdline(model_architectures.keys())
39-
hvd.init()
38+
hvd.init(True)
4039

4140
if hvd.rank() == 0:
4241
log_path = os.path.join(FLAGS.results_dir, FLAGS.log_filename)
@@ -100,11 +99,10 @@
10099

101100
if FLAGS.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:
102101

103-
if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd():
102+
if FLAGS.mode == 'inference_benchmark' and hvd.size() > 1:
104103
raise NotImplementedError("Only single GPU inference is implemented.")
105104

106-
elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
107-
105+
elif hvd.rank() == 0:
108106
runner.evaluate(iter_unit=FLAGS.iter_unit if FLAGS.mode != "train_and_evaluate" else "epoch",
109107
num_iter=FLAGS.num_iter if FLAGS.mode != "train_and_evaluate" else 1,
110108
warmup_steps=FLAGS.warmup_steps,
@@ -124,10 +122,10 @@
124122
if not os.path.isfile(FLAGS.to_predict):
125123
raise ValueError("Only prediction on single images is supported!")
126124

127-
if hvd_utils.is_using_hvd():
125+
if hvd.size() > 1:
128126
raise NotImplementedError("Only single GPU inference is implemented.")
129127

130-
elif not hvd_utils.is_using_hvd() or hvd.rank() == 0:
128+
else:
131129
runner.predict(FLAGS.to_predict,
132130
quantize=FLAGS.quantize,
133131
symmetric=FLAGS.symmetric,

TensorFlow/Classification/ConvNets/model/blocks/conv2d_block.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ def conv2d_block(
6464
trainable=is_training,
6565
dtype=tf.float32)
6666
net = tf.nn.conv2d(inputs,
67-
group_filter,
68-
strides=strides,
69-
padding='SAME',
70-
data_format=data_format)
67+
group_filter,
68+
strides=strides,
69+
padding='SAME',
70+
data_format=data_format)
7171
if use_batch_norm:
7272
net = layers.batch_norm(
7373
net,

TensorFlow/Classification/ConvNets/model/resnet.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,13 @@
1919

2020
import tensorflow as tf
2121

22-
import horovod.tensorflow as hvd
22+
from utils import hvd_wrapper as hvd
2323
import dllogger
2424

2525
from model import layers
2626
from model import blocks
2727

2828
from utils import var_storage
29-
from utils import hvd_utils
30-
3129
from utils.data_utils import normalized_inputs
3230

3331
from utils.learning_rate import learning_rate_scheduler
@@ -337,8 +335,8 @@ def loss_filter_fn(name):
337335
if params["apply_loss_scaling"]:
338336
optimizer = FixedLossScalerOptimizer(optimizer, scale=params["loss_scale"])
339337

340-
if hvd_utils.is_using_hvd():
341-
optimizer = hvd.DistributedOptimizer(optimizer)
338+
if hvd.size() > 1:
339+
optimizer = hvd.hvd_global_object.DistributedOptimizer(optimizer)
342340

343341
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
344342
if mode != tf.estimator.ModeKeys.TRAIN:

TensorFlow/Classification/ConvNets/resnet50v1.5/README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,6 @@ The `utils/` directory contains the following utility modules:
276276
- `cmdline_helper.py`: helper module for command line processing
277277
- `data_utils.py`: module defining input data pipelines
278278
- `dali_utils.py`: helper module for DALI
279-
- `hvd_utils.py`: helper module for Horovod
280279
- `image_processing.py`: image processing and data augmentation functions
281280
- `learning_rate.py`: definition of used learning rate schedule
282281
- `optimizers.py`: definition of used custom optimizers
@@ -447,7 +446,11 @@ To run inference on a single example with a checkpoint and a model script, use:
447446

448447
`python main.py --mode predict --model_dir <path to model> --to_predict <path to image> --results_dir <path to results>`
449448

450-
The optional `--xla` and `--amp` flags control XLA and AMP during inference.
449+
The optional `--xla` and `--amp` flags control XLA and AMP during inference. To run inference using TF-TRT, please use the following command:
450+
451+
`python inference.py --model <path to model> --tf-trt --batch-size <inference_batch_size> --data-dir <path to data>`
452+
453+
The optional `--amp` flag controls AMP during inference.
451454

452455
## Performance
453456

TensorFlow/Classification/ConvNets/resnext101-32x4d/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,6 @@ The `utils/` directory contains the following utility modules:
283283
- `cmdline_helper.py`: helper module for command line processing
284284
- `data_utils.py`: module defining input data pipelines
285285
- `dali_utils.py`: helper module for DALI
286-
- `hvd_utils.py`: helper module for Horovod
287286
- `image_processing.py`: image processing and data augmentation functions
288287
- `learning_rate.py`: definition of used learning rate schedule
289288
- `optimizers.py`: definition of used custom optimizers

0 commit comments

Comments
 (0)