-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Expand file tree
/
Copy pathbuild.py
More file actions
78 lines (70 loc) · 3 KB
/
build.py
File metadata and controls
78 lines (70 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python3
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import tensorrt as trt
import numpy as np
from polygraphy.logger import G_LOGGER
from polygraphy.backend.trt import (
CreateNetwork,
CreateConfig,
engine_bytes_from_network,
get_trt_logger
)
DEBUG_LOG = False # Turn on to print TRT verbose logs
if DEBUG_LOG:
verbose = G_LOGGER.verbosity(G_LOGGER.SUPER_VERBOSE)
verbose.__enter__()
else:
verbose = None
def build_network():
builder, network = CreateNetwork()()
# A simple network with internal tensors
input_tensor = network.add_input(name="input", dtype=trt.float32, shape=(1, 3, 224, 224))
conv1_w = np.random.randn(16, 3, 3, 3).astype(np.float32)
conv1_b = np.random.randn(16).astype(np.float32)
conv1 = network.add_convolution_nd(input=input_tensor, num_output_maps=16, kernel_shape=(3, 3), kernel=conv1_w, bias=conv1_b)
relu1 = network.add_activation(input=conv1.get_output(0), type=trt.ActivationType.RELU)
conv2_w = np.random.randn(32, 16, 3, 3).astype(np.float32)
conv2_b = np.random.randn(32).astype(np.float32)
conv2 = network.add_convolution_nd(input=relu1.get_output(0), num_output_maps=32, kernel_shape=(3, 3), kernel=conv2_w, bias=conv2_b)
relu2 = network.add_activation(input=conv2.get_output(0), type=trt.ActivationType.RELU)
network.mark_output(tensor=relu2.get_output(0))
return builder, network
class StreamWriter(trt.IStreamWriter):
def __init__(self):
trt.IStreamWriter.__init__(self)
self.bytes = bytes()
def write(self, data):
self.bytes += data
return len(data)
def build_engine():
print("Constructing network...")
builder, network = build_network()
config = CreateConfig()(builder, network)
stream_writer = StreamWriter()
print("Building engine and serializing to stream...")
engine_bytes = builder.build_serialized_network_to_stream(network, config, stream_writer)
print("The total bytes written to stream is: ", len(stream_writer.bytes))
runtime = trt.Runtime(get_trt_logger())
print("Deserializing engine from stream...")
engine = runtime.deserialize_cuda_engine(stream_writer.bytes)
assert engine is not None, "Engine deserialization failed"
print("Engine deserialized successfully")
if __name__ == "__main__":
build_engine()
if verbose is not None:
verbose.__exit__(None, None, None)