feldera/python/felderize/felderize/llm.py at issue-segment-indicator · feldera/feldera

129 lines (107 loc) · 4.56 KB
from __future__ import annotations
import time
from abc import ABC, abstractmethod
import anthropic
import httpx
from felderize.config import Config
_RATE_LIMIT_RETRIES = 5
class PromptTooLargeError(Exception):
    """The request exceeded the model's context window.
    Raised instead of the raw ``anthropic.BadRequestError`` so callers can turn a
    too-large program into a clear "shorten the input" message rather than an
    opaque API error. Retrying does not help, so this is never retried.
def _is_context_length_error(error: anthropic.BadRequestError) -> bool:
    """Tell a context-window overflow apart from other 400 errors.
    Anthropic reports an oversized request with a message such as
    ``prompt is too long: N tokens > M maximum``. Match that phrasing (and a few
    near-synonyms) so genuine bad requests still surface unchanged.
    message = str(error).lower()
        "too long" in message
        or "context window" in message
        or "context length" in message
        return True
    return "maximum" in message and "token" in message
# Transient failures worth retrying: dropped/again-refused connections, timeouts,
# and 5xx. The raw httpx errors are included because a streamed response can drop
# mid-body (RemoteProtocolError) without the SDK wrapping it.
_TRANSIENT_ERRORS = (
    anthropic.APIConnectionError,
    anthropic.APITimeoutError,
    anthropic.InternalServerError,
    httpx.RemoteProtocolError,
    httpx.ReadError,
    httpx.ConnectError,
class LLMClient(ABC):
    """Abstract interface for LLM translation backends."""
    @abstractmethod
    def translate(self, system_prompt: str, user_prompt: str) -> str:
        """Send a translation request and return the raw response text."""
class AnthropicClient(LLMClient):
    """LLM client backed by the Anthropic API."""
    def __init__(self, config: Config, verbose: bool = False):
        self.client = anthropic.Anthropic(
            api_key=config.api_key, base_url=config.base_url
        self.model = config.model
        self.max_tokens = config.max_tokens
        self.verbose = verbose
    def translate(self, system_prompt: str, user_prompt: str) -> str:
        """Send a prompt to the Anthropic API and return the response text.
        Retries up to _RATE_LIMIT_RETRIES times on rate limit errors, with
        linearly increasing back-off.
        """
        for attempt in range(_RATE_LIMIT_RETRIES):
            try:
                with self.client.messages.stream(
                    model=self.model,
                    max_tokens=self.max_tokens,
                    system=[
                            "type": "text",
                            "text": system_prompt,
                            "cache_control": {"type": "ephemeral"},
                    messages=[{"role": "user", "content": user_prompt}],
                ) as stream:
                    response = stream.get_final_message()
                u = response.usage
                if self.verbose:
                    print(
                        f"    llm: input={u.input_tokens} "
                        f"cache_read={getattr(u, 'cache_read_input_tokens', 0)} "
                        f"cache_write={getattr(u, 'cache_creation_input_tokens', 0)} "
                        f"output={u.output_tokens}",
                        file=sys.stderr,
                return response.content[0].text
            except anthropic.BadRequestError as e:
                if _is_context_length_error(e):
                    raise PromptTooLargeError(str(e)) from e
                raise
            except anthropic.RateLimitError:
                if attempt == _RATE_LIMIT_RETRIES - 1:
                wait = 60 * (attempt + 1)
                print(f"Rate limited — waiting {wait}s before retry...", flush=True)
                time.sleep(wait)
            except _TRANSIENT_ERRORS as e:
                if attempt == _RATE_LIMIT_RETRIES - 1:
                wait = 5 * (attempt + 1)
                print(
                    f"Transient API error ({type(e).__name__}) — retrying in {wait}s...",
                    file=sys.stderr,
                    flush=True,
                time.sleep(wait)
        raise AssertionError("unreachable")
def create_client(config: Config, verbose: bool = False) -> LLMClient:
    """Instantiate the appropriate LLM client for the given config."""
    return AnthropicClient(config, verbose=verbose)
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

llm.py

Latest commit

History

llm.py

File metadata and controls