Skip to content

Commit b7c0ec6

Browse files
committed
Improve handling of situation where a request body is never read in the handler
1 parent 984a63a commit b7c0ec6

2 files changed

Lines changed: 120 additions & 16 deletions

File tree

sentry_sdk/integrations/flask.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import sentry_sdk
44
from sentry_sdk.integrations import DidNotEnable, Integration, _check_minimum_version
55
from sentry_sdk.integrations._wsgi_common import (
6-
_RAW_DATA_EXCEPTIONS,
76
DEFAULT_HTTP_METHODS_TO_CAPTURE,
87
RequestExtractor,
98
_serialize_request_body_data,
@@ -46,6 +45,7 @@
4645
request_started,
4746
)
4847
from markupsafe import Markup
48+
from werkzeug.exceptions import ClientDisconnected
4949
except ImportError:
5050
raise DidNotEnable("Flask is not installed")
5151

@@ -191,13 +191,7 @@ def _set_request_body_data_on_streaming_segment(
191191
if not request_body_within_bounds(client, content_length):
192192
data = AnnotatedValue.substituted_because_over_size_limit()
193193
else:
194-
# Only use data that Werkzeug has already cached — never consume
195-
# wsgi.input ourselves, as that would break user code that reads
196-
# the stream directly.
197-
# You can find where this gets set here:
198-
# https://github.com/pallets/werkzeug/blob/1b00618e787f40dfb21eba29caf8f8be7c8e1d93/src/werkzeug/wrappers/request.py#L444
199194
raw_data = getattr(request, "_cached_data", None)
200-
201195
parsed_body = None
202196
if "form" in request.__dict__:
203197
extractor = FlaskRequestExtractor(request)
@@ -206,6 +200,19 @@ def _set_request_body_data_on_streaming_segment(
206200
extractor = FlaskRequestExtractor(request)
207201
if extractor.is_json():
208202
parsed_body = extractor.json()
203+
else:
204+
# The route never read the body via Werkzeug, but it
205+
# may have consumed wsgi.input directly. get_data()
206+
# raises ClientDisconnected if the stream is exhausted.
207+
try:
208+
raw_data = request.get_data()
209+
except ClientDisconnected:
210+
raw_data = None
211+
212+
if raw_data:
213+
extractor = FlaskRequestExtractor(request)
214+
if extractor.is_json():
215+
parsed_body = extractor.json()
209216

210217
if parsed_body is not None:
211218
data = parsed_body

tests/integrations/flask/test_flask.py

Lines changed: 106 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -406,13 +406,18 @@ def index():
406406
assert len(event["request"]["data"]["foo"]) == DEFAULT_MAX_VALUE_LENGTH
407407

408408

409-
def test_flask_formdata_request_appear_transaction_body(
410-
sentry_init, capture_events, app
409+
@pytest.mark.parametrize("span_streaming", [True, False])
410+
def test_flask_formdata_request_appear_in_segment_or_transaction_body(
411+
sentry_init, capture_events, capture_items, app, span_streaming
411412
):
412413
"""
413-
Test that ensures that transaction request data contains body, even if no exception was raised
414+
Test that ensures that transaction/segment request data contains body, even if no exception was raised
414415
"""
415-
sentry_init(integrations=[flask_sentry.FlaskIntegration()], traces_sample_rate=1.0)
416+
sentry_init(
417+
integrations=[flask_sentry.FlaskIntegration()],
418+
traces_sample_rate=1.0,
419+
_experiments={"trace_lifecycle": "stream"} if span_streaming else {},
420+
)
416421

417422
data = {"username": "sentry-user", "age": "26"}
418423

@@ -430,17 +435,29 @@ def index():
430435
capture_message("hi")
431436
return "ok"
432437

433-
events = capture_events()
438+
if span_streaming:
439+
items = capture_items("event", "span")
440+
else:
441+
events = capture_events()
434442

435443
client = app.test_client()
436444
response = client.post("/", data=data)
437445
assert response.status_code == 200
438446

439-
event, transaction_event = events
447+
if span_streaming:
448+
sentry_sdk.flush()
449+
450+
spans = [i for i in items if i.type == "span"]
451+
assert len(spans) == 1
452+
span = spans[0].payload
453+
454+
assert span["attributes"]["http.request.body.data"] == json.dumps(data)
455+
else:
456+
event, transaction_event = events
440457

441-
assert "request" in transaction_event
442-
assert "data" in transaction_event["request"]
443-
assert transaction_event["request"]["data"] == data
458+
assert "request" in transaction_event
459+
assert "data" in transaction_event["request"]
460+
assert transaction_event["request"]["data"] == data
444461

445462

446463
@pytest.mark.parametrize("input_char", ["a", b"a"])
@@ -1357,6 +1374,86 @@ def test_sensitive_header_scrubbing_span_streaming(sentry_init, capture_items, a
13571374
assert span["attributes"]["http.request.header.x-custom-header"] == "passthrough"
13581375

13591376

1377+
def test_request_body_captured_when_route_ignores_body_span_streaming(
1378+
sentry_init, capture_items, app
1379+
):
1380+
"""
1381+
When the route handler never reads the request body, the SDK should
1382+
still capture it in _request_finished via request.get_data().
1383+
"""
1384+
sentry_init(
1385+
integrations=[flask_sentry.FlaskIntegration()],
1386+
traces_sample_rate=1.0,
1387+
max_request_body_size="always",
1388+
_experiments={"trace_lifecycle": "stream"},
1389+
)
1390+
1391+
body = {"key": "value"}
1392+
1393+
@app.route("/ignore-body", methods=["POST"])
1394+
def ignore_body_endpoint():
1395+
return "ok"
1396+
1397+
items = capture_items("span")
1398+
1399+
client = app.test_client()
1400+
response = client.post("/ignore-body", json=body)
1401+
assert response.status_code == 200
1402+
1403+
sentry_sdk.flush()
1404+
1405+
assert len(items) == 1
1406+
span = items[0].payload
1407+
assert span["attributes"]["http.request.body.data"] == json.dumps(body)
1408+
1409+
1410+
def test_client_disconnected_handled_gracefully_span_streaming(
1411+
sentry_init, capture_items, app
1412+
):
1413+
from unittest.mock import patch
1414+
1415+
from werkzeug.exceptions import ClientDisconnected
1416+
1417+
sentry_init(
1418+
integrations=[flask_sentry.FlaskIntegration()],
1419+
traces_sample_rate=1.0,
1420+
max_request_body_size="always",
1421+
_experiments={"trace_lifecycle": "stream"},
1422+
)
1423+
1424+
@app.route("/disconnect", methods=["POST"])
1425+
def disconnect_endpoint():
1426+
# Simulate a client that disconnected: patch get_data so that
1427+
# when _request_finished tries to read the body it raises.
1428+
request._cached_data = None
1429+
request.__dict__.pop("form", None)
1430+
patch.object(
1431+
type(request._get_current_object()),
1432+
"get_data",
1433+
side_effect=ClientDisconnected(),
1434+
).start()
1435+
return "ok"
1436+
1437+
items = capture_items("span")
1438+
1439+
client = app.test_client()
1440+
try:
1441+
response = client.post(
1442+
"/disconnect",
1443+
data=b'{"key": "value"}',
1444+
content_type="application/json",
1445+
)
1446+
assert response.status_code == 200
1447+
finally:
1448+
patch.stopall()
1449+
1450+
sentry_sdk.flush()
1451+
1452+
assert len(items) == 1
1453+
span = items[0].payload
1454+
assert "http.request.body.data" not in span.get("attributes", {})
1455+
1456+
13601457
def test_wsgi_input_direct_read_does_not_hang_span_streaming(
13611458
sentry_init, capture_items, app
13621459
):

0 commit comments

Comments
 (0)