Fix large payload for Python backend (triton-inference-server#18)

Tabrizian · web-flow · commit 76be86fd1021 · 2020-10-30T09:38:21.000-07:00
diff --git a/src/python.cc b/src/python.cc
@@ -87,6 +87,8 @@ namespace triton { namespace backend { namespace python {
     }                                                                   \
   } while (false)
 
+constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
+
 class ModelState;
 
 struct BackendState {
@@ -261,8 +263,11 @@ TRITONSERVER_Error*
 ModelInstanceState::ConnectPythonInterpreter()
 {
   grpc_init();
-  auto grpc_channel =
-      grpc::CreateChannel(domain_socket_, grpc::InsecureChannelCredentials());
+  grpc::ChannelArguments arguments;
+  arguments.SetMaxSendMessageSize(MAX_GRPC_MESSAGE_SIZE);
+  arguments.SetMaxReceiveMessageSize(MAX_GRPC_MESSAGE_SIZE);
+  auto grpc_channel = grpc::CreateCustomChannel(
+      domain_socket_, grpc::InsecureChannelCredentials(), arguments);
 
   stub = PythonInterpreter::NewStub(grpc_channel);
 
@@ -441,6 +446,12 @@ ModelInstanceState::GetInputTensor(
       in, &input_name, &input_dtype, &input_shape, &input_dims_count,
       &input_byte_size, &input_buffer_count));
 
+  if (input_byte_size >= MAX_GRPC_MESSAGE_SIZE)
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "Python backend does not support input size larger than 2GBs, consider "
+        "parititioning your input into multiple inputs.");
+
   // Update input_tensor
   input_tensor->set_name(input_name);
   input_tensor->set_dtype(static_cast<int>(input_dtype));
diff --git a/src/resources/startup.py b/src/resources/startup.py
@@ -46,6 +46,8 @@
 from python_host_pb2_grpc import PythonInterpreterServicer, add_PythonInterpreterServicer_to_server
 import grpc
 
+MAX_GRPC_MESSAGE_SIZE = 2147483647
+
 
 def serialize_byte_tensor(input_tensor):
     """
@@ -345,7 +347,13 @@ def watch_connections(address, event):
 if __name__ == "__main__":
     signal_received = False
     FLAGS = parse_startup_arguments()
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=1),
+                         options=[
+                             ('grpc.max_send_message_length',
+                              MAX_GRPC_MESSAGE_SIZE),
+                             ('grpc.max_receive_message_length',
+                              MAX_GRPC_MESSAGE_SIZE),
+                         ])
     channelz.add_channelz_servicer(server)
     # Create an Event to keep the GRPC server running
     event = threading.Event()