Skip to content

Commit 22e1ee8

Browse files
xds: add panic recovery in xdsclient resource unmarshalling. (#8895)
This PR introduces panic recovery mechanisms to the xdsclient resource unmarshaling logic. Currently, a panic during the parsing of a malformed xDS update could crash the entire application. To improve resilience in production environments, this change wraps the resource parsing logic in a `defer recover()` block. The recovery mechanism is controlled by the `GRPC_EXPERIMENTAL_XDS_PANIC_RECOVERY` environment variable, which defaults to `true`. - When enabled, panics during unmarshaling are caught and returned as errors, preventing the application from crashing. - When disabled, panics are allowed to propagate. This is required for fuzz testing, where crashing on unexpected input is the desired behavior to identify bugs: [go/grpc-go-xdsclient-fuzzing](http://goto.google.com/grpc-go-xdsclient-fuzzing) Part of : #8749 RELEASE NOTES: N/A
1 parent 7136e99 commit 22e1ee8

3 files changed

Lines changed: 76 additions & 13 deletions

File tree

internal/envconfig/envconfig.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,14 @@ var (
9595
// feature can be disabled by setting the environment variable
9696
// GRPC_EXPERIMENTAL_PF_WEIGHTED_SHUFFLING to "false".
9797
PickFirstWeightedShuffling = boolFromEnv("GRPC_EXPERIMENTAL_PF_WEIGHTED_SHUFFLING", true)
98+
99+
// XDSRecoverPanicInResourceParsing indicates whether the xdsclient should
100+
// recover from panics while parsing xDS resources.
101+
//
102+
// This feature can be disabled (e.g. for fuzz testing) by setting the
103+
// environment variable "GRPC_GO_EXPERIMENTAL_XDS_RESOURCE_PANIC_RECOVERY"
104+
// to "false".
105+
XDSRecoverPanicInResourceParsing = boolFromEnv("GRPC_GO_EXPERIMENTAL_XDS_RESOURCE_PANIC_RECOVERY", true)
98106
)
99107

100108
func boolFromEnv(envVar string, def bool) bool {

internal/xds/clients/xdsclient/channel.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"time"
2626

2727
"google.golang.org/grpc/grpclog"
28+
"google.golang.org/grpc/internal/envconfig"
2829
igrpclog "google.golang.org/grpc/internal/grpclog"
2930
"google.golang.org/grpc/internal/xds/clients"
3031
"google.golang.org/grpc/internal/xds/clients/internal"
@@ -253,7 +254,16 @@ func decodeResponse(opts *DecodeOptions, rType *ResourceType, resp response) (ma
253254
perResourceErrors := make(map[string]error) // Tracks resource validation errors, where we have a resource name.
254255
ret := make(map[string]dataAndErrTuple) // Return result, a map from resource name to either resource data or error.
255256
for _, r := range resp.resources {
256-
result, err := rType.Decoder.Decode(NewAnyProto(r), *opts)
257+
result, err := func() (res *DecodeResult, err error) {
258+
defer func() {
259+
if envconfig.XDSRecoverPanicInResourceParsing {
260+
if p := recover(); p != nil {
261+
err = fmt.Errorf("recovered from panic during resource parsing, resource: %v, panic: %v", r, p)
262+
}
263+
}
264+
}()
265+
return rType.Decoder.Decode(NewAnyProto(r), *opts)
266+
}()
257267

258268
// Name field of the result is left unpopulated only when resource
259269
// deserialization fails.

internal/xds/clients/xdsclient/channel_test.go

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ import (
3030
"github.com/google/go-cmp/cmp/cmpopts"
3131
"github.com/google/uuid"
3232
"google.golang.org/grpc/credentials/insecure"
33+
"google.golang.org/grpc/internal/envconfig"
34+
"google.golang.org/grpc/internal/testutils"
3335
"google.golang.org/grpc/internal/xds/clients"
3436
"google.golang.org/grpc/internal/xds/clients/grpctransport"
35-
"google.golang.org/grpc/internal/xds/clients/internal/testutils"
37+
xdstestutils "google.golang.org/grpc/internal/xds/clients/internal/testutils"
3638
"google.golang.org/grpc/internal/xds/clients/internal/testutils/e2e"
3739
"google.golang.org/grpc/internal/xds/clients/internal/testutils/fakeserver"
3840
"google.golang.org/grpc/internal/xds/clients/xdsclient/internal/xdsresource"
@@ -184,18 +186,18 @@ func (s) TestChannel_ADS_HandleResponseFromManagementServer(t *testing.T) {
184186
Value: []byte{1, 2, 3, 4},
185187
}
186188
apiListener = &v3listenerpb.ApiListener{
187-
ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
189+
ApiListener: xdstestutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
188190
RouteSpecifier: &v3httppb.HttpConnectionManager_RouteConfig{
189191
RouteConfig: &v3routepb.RouteConfiguration{
190192
Name: routeName},
191193
},
192194
}),
193195
}
194-
listener1 = testutils.MarshalAny(t, &v3listenerpb.Listener{
196+
listener1 = xdstestutils.MarshalAny(t, &v3listenerpb.Listener{
195197
Name: listenerName1,
196198
ApiListener: apiListener,
197199
})
198-
listener2 = testutils.MarshalAny(t, &v3listenerpb.Listener{
200+
listener2 = xdstestutils.MarshalAny(t, &v3listenerpb.Listener{
199201
Name: listenerName2,
200202
ApiListener: apiListener,
201203
})
@@ -234,10 +236,10 @@ func (s) TestChannel_ADS_HandleResponseFromManagementServer(t *testing.T) {
234236
managementServerResponse: &v3discoverypb.DiscoveryResponse{
235237
VersionInfo: "0",
236238
TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener",
237-
Resources: []*anypb.Any{testutils.MarshalAny(t, &v3listenerpb.Listener{
239+
Resources: []*anypb.Any{xdstestutils.MarshalAny(t, &v3listenerpb.Listener{
238240
Name: listenerName1,
239241
ApiListener: &v3listenerpb.ApiListener{
240-
ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
242+
ApiListener: xdstestutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
241243
RouteSpecifier: &v3httppb.HttpConnectionManager_ScopedRoutes{},
242244
}),
243245
},
@@ -265,10 +267,10 @@ func (s) TestChannel_ADS_HandleResponseFromManagementServer(t *testing.T) {
265267
TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener",
266268
Resources: []*anypb.Any{
267269
badlyMarshaledResource,
268-
testutils.MarshalAny(t, &v3listenerpb.Listener{
270+
xdstestutils.MarshalAny(t, &v3listenerpb.Listener{
269271
Name: listenerName2,
270272
ApiListener: &v3listenerpb.ApiListener{
271-
ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
273+
ApiListener: xdstestutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
272274
RouteSpecifier: &v3httppb.HttpConnectionManager_ScopedRoutes{},
273275
}),
274276
},
@@ -345,10 +347,10 @@ func (s) TestChannel_ADS_HandleResponseFromManagementServer(t *testing.T) {
345347
VersionInfo: "0",
346348
TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener",
347349
Resources: []*anypb.Any{
348-
testutils.MarshalAny(t, &v3listenerpb.Listener{
350+
xdstestutils.MarshalAny(t, &v3listenerpb.Listener{
349351
Name: listenerName1,
350352
ApiListener: &v3listenerpb.ApiListener{
351-
ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
353+
ApiListener: xdstestutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
352354
RouteSpecifier: &v3httppb.HttpConnectionManager_ScopedRoutes{},
353355
}),
354356
},
@@ -513,7 +515,7 @@ func (s) TestChannel_ADS_StreamFailure(t *testing.T) {
513515
if err != nil {
514516
t.Fatalf("net.Listen() failed: %v", err)
515517
}
516-
lis := testutils.NewRestartableListener(l)
518+
lis := xdstestutils.NewRestartableListener(l)
517519
mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis})
518520

519521
// Configure a listener resource on the management server.
@@ -539,7 +541,7 @@ func (s) TestChannel_ADS_StreamFailure(t *testing.T) {
539541

540542
// Wait for an update callback on the event handler and verify the
541543
// contents of the update and the metadata.
542-
hcm := testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
544+
hcm := xdstestutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
543545
RouteSpecifier: &v3httppb.HttpConnectionManager_Rds{Rds: &v3httppb.Rds{
544546
ConfigSource: &v3corepb.ConfigSource{
545547
ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{Ads: &v3corepb.AggregatedConfigSource{}},
@@ -772,3 +774,46 @@ func (ta *testEventHandler) waitForResourceDoesNotExist(ctx context.Context) (Re
772774
}
773775
return typ, name, nil
774776
}
777+
778+
type panicDecoder struct{}
779+
780+
func (panicDecoder) Decode(*AnyProto, DecodeOptions) (*DecodeResult, error) {
781+
panic("simulate panic")
782+
}
783+
784+
// TestDecodeResponse_PanicRecoveryEnabled tests the panic recovery mechanism
785+
// in decodeResponse. It verifies that if XDSRecoverPanicInResourceParsing
786+
// env variable is enabled, panics during unmarshaling are caught and returned
787+
// as errors.
788+
func (s) TestDecodeResponse_PanicRecoveryEnabled(t *testing.T) {
789+
rType := &ResourceType{
790+
TypeName: "resourceType",
791+
Decoder: panicDecoder{},
792+
}
793+
resp := response{resources: []*anypb.Any{{Value: []byte("test")}}}
794+
wantErr := "recovered from panic during resource parsing"
795+
796+
if _, _, err := decodeResponse(&DecodeOptions{}, rType, resp); err == nil || !strings.Contains(err.Error(), wantErr) {
797+
t.Fatalf("decodeResponse() failed with err: %v, want %q", err, wantErr)
798+
}
799+
}
800+
801+
// TestDecodeResponse_PanicRecoveryDisabled tests the panic recovery mechanism
802+
// in decodeResponse. It verifies that when XDSRecoverPanicInResourceParsing
803+
// env variable is disabled, panics during unmarshaling propagate.
804+
func (s) TestDecodeResponse_PanicRecoveryDisabled(t *testing.T) {
805+
testutils.SetEnvConfig(t, &envconfig.XDSRecoverPanicInResourceParsing, false)
806+
rType := &ResourceType{
807+
TypeName: "resourceType",
808+
Decoder: panicDecoder{},
809+
}
810+
resp := response{resources: []*anypb.Any{{Value: []byte("test")}}}
811+
wantErr := "simulate panic"
812+
813+
defer func() {
814+
if r := recover(); r == nil || !strings.Contains(fmt.Sprint(r), wantErr) {
815+
t.Fatalf("Expected panic in decodeResponse, got: %v, want: %q", r, wantErr)
816+
}
817+
}()
818+
decodeResponse(&DecodeOptions{}, rType, resp)
819+
}

0 commit comments

Comments
 (0)