Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 30 additions & 15 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,32 @@
raise DidNotEnable("LiteLLM not installed")


def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
"""Get the metadata dictionary from the kwargs."""
litellm_params = kwargs.setdefault("litellm_params", {})
# litellm threads the SAME `kwargs` dict (its per-request ``model_call_details``)
# through the input, success, and failure callbacks, so the bookkeeping span is
# stashed on it directly. This ties the span's lifetime to the request -- it is
# freed when litellm releases the request -- with no module-level tracking.
#
# The span must NOT go in ``kwargs["litellm_params"]["metadata"]``: litellm
# forwards that caller ``metadata`` dict into the outbound request body for some
# providers (e.g. Anthropic's /v1/messages passthrough), which would break
# ``json.dumps(request_body)`` and leak the span (and its prompt data) to the
# provider. The Anthropic request body is built only from the recognized request
# params, not from ``model_call_details``, so a top-level key here is not
# forwarded -- it is also where litellm stores its own per-request internal state
# (e.g. ``agentic_loop_params``).
_SPAN_KEY = "_sentry_span"

# we need this weird little dance, as metadata might be set but may be None initially
metadata = litellm_params.get("metadata")
if metadata is None:
metadata = {}
litellm_params["metadata"] = metadata
return metadata

def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None:
kwargs[_SPAN_KEY] = span


def _peek_span(kwargs: "Dict[str, Any]") -> "Any":
return kwargs.get(_SPAN_KEY)


def _pop_span(kwargs: "Dict[str, Any]") -> "Any":
return kwargs.pop(_SPAN_KEY, None)


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
Expand Down Expand Up @@ -117,8 +133,8 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
)
span.__enter__()

# Store span for later
_get_metadata_dict(kwargs)["_sentry_span"] = span
# Store span for later, off-band from the kwargs litellm may forward.
_store_span(kwargs, span)

# Set basic data
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
Expand Down Expand Up @@ -198,8 +214,7 @@ def _success_callback(
) -> None:
"""Handle successful completion."""

metadata = _get_metadata_dict(kwargs)
span = metadata.get("_sentry_span")
span = _peek_span(kwargs)
if span is None:
return

Expand Down Expand Up @@ -259,7 +274,7 @@ def _success_callback(
or "complete_streaming_response" in kwargs
or "async_complete_streaming_response" in kwargs
):
span = metadata.pop("_sentry_span", None)
span = _pop_span(kwargs)
if span is not None:
span.__exit__(None, None, None)

Expand All @@ -285,7 +300,7 @@ def _failure_callback(
end_time: "datetime",
) -> None:
"""Handle request failure."""
span = _get_metadata_dict(kwargs).get("_sentry_span")
span = _pop_span(kwargs)
if span is None:
Comment thread
jgreer013 marked this conversation as resolved.
return

Expand Down
133 changes: 133 additions & 0 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2532,6 +2532,139 @@ def test_integration_setup(sentry_init):
assert _failure_callback in (litellm.failure_callback or [])


def test_caller_metadata_stays_json_serializable(
sentry_init,
capture_events,
):
"""Regression test for GH-6596.

litellm threads the caller's ``metadata`` dict into ``litellm_params`` and
some providers (e.g. Anthropic's ``/v1/messages`` passthrough) serialize it
into the outbound request body *before the response comes back*. The
integration must therefore never write its live ``Span`` into that dict, or
``json.dumps(request_body)`` raises ``TypeError: Object of type Span is not
JSON serializable`` before the request is even sent.
"""
sentry_init(
integrations=[LiteLLMIntegration()],
disabled_integrations=[StdlibIntegration],
traces_sample_rate=1.0,
send_default_pii=True,
_experiments={"trace_lifecycle": "static"},
)
events = capture_events()

# Mirror the kwargs litellm hands to its callbacks: the caller's metadata
# lives under litellm_params and is the very dict forwarded onto the wire.
caller_metadata = {"user_id": "my-org"}
kwargs = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hello!"}],
"litellm_call_id": "call-6596",
"litellm_params": {"metadata": caller_metadata},
}

with start_transaction(name="litellm test"):
_input_callback(kwargs)

# litellm would serialize the request body here, while the span is live.
# The live span must not be in the forwarded metadata...
assert "_sentry_span" not in caller_metadata
# ...so the request body remains JSON-serializable.
json.dumps(caller_metadata)

# The span is still recorded off-band, so monitoring keeps working.
_success_callback(
kwargs, MockCompletionResponse(), datetime.now(), datetime.now()
)

(event,) = events
chat_spans = [
span
for span in event["spans"]
if span["op"] == OP.GEN_AI_CHAT and span["origin"] == "auto.ai.litellm"
]
assert len(chat_spans) == 1


def test_span_stashed_on_shared_kwargs_not_forwarded_metadata(sentry_init):
"""The span is stashed on the shared kwargs dict (a top-level key litellm
does not forward), never in the caller's metadata, and each call keeps its
own span.
"""
sentry_init(
integrations=[LiteLLMIntegration()],
disabled_integrations=[StdlibIntegration],
traces_sample_rate=1.0,
_experiments={"trace_lifecycle": "static"},
)

with start_transaction(name="litellm test"):
caller_metadata = {"user_id": "my-org"}
kwargs_a = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "a"}],
"litellm_params": {"metadata": caller_metadata},
}
kwargs_b = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "b"}],
}

_input_callback(kwargs_a)
_input_callback(kwargs_b)

# Stashed on the shared kwargs dict, off the forwarded metadata path...
assert kwargs_a["_sentry_span"] is not None
assert "_sentry_span" not in caller_metadata
json.dumps(caller_metadata)
# ...and each call keeps its own span (no cross-talk).
assert kwargs_a["_sentry_span"] is not kwargs_b["_sentry_span"]

_success_callback(
kwargs_a, MockCompletionResponse(), datetime.now(), datetime.now()
)
_success_callback(
kwargs_b, MockCompletionResponse(), datetime.now(), datetime.now()
)


def test_span_cleaned_up_after_terminal_callbacks(sentry_init):
"""Both terminal callbacks remove the span from the shared kwargs dict, so a
completed or failed call leaves nothing behind."""
sentry_init(
integrations=[LiteLLMIntegration()],
disabled_integrations=[StdlibIntegration],
traces_sample_rate=1.0,
_experiments={"trace_lifecycle": "static"},
)

with start_transaction(name="litellm test"):
success_kwargs = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "hi"}],
"litellm_call_id": "success-call",
}
_input_callback(success_kwargs)
assert "_sentry_span" in success_kwargs
_success_callback(
success_kwargs, MockCompletionResponse(), datetime.now(), datetime.now()
)
assert "_sentry_span" not in success_kwargs

failure_kwargs = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "hi"}],
"litellm_call_id": "failure-call",
}
_input_callback(failure_kwargs)
assert "_sentry_span" in failure_kwargs
_failure_callback(
failure_kwargs, ValueError("boom"), datetime.now(), datetime.now()
)
assert "_sentry_span" not in failure_kwargs


def test_litellm_message_truncation(sentry_init, capture_events):
"""Test that large messages are truncated properly in LiteLLM integration."""
sentry_init(
Expand Down