-
Notifications
You must be signed in to change notification settings - Fork 131
Expand file tree
/
Copy pathllm.py
More file actions
129 lines (107 loc) · 4.56 KB
/
Copy pathllm.py
File metadata and controls
129 lines (107 loc) · 4.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from __future__ import annotations
import sys
import time
from abc import ABC, abstractmethod
import anthropic
import httpx
from felderize.config import Config
_RATE_LIMIT_RETRIES = 5
class PromptTooLargeError(Exception):
"""The request exceeded the model's context window.
Raised instead of the raw ``anthropic.BadRequestError`` so callers can turn a
too-large program into a clear "shorten the input" message rather than an
opaque API error. Retrying does not help, so this is never retried.
"""
def _is_context_length_error(error: anthropic.BadRequestError) -> bool:
"""Tell a context-window overflow apart from other 400 errors.
Anthropic reports an oversized request with a message such as
``prompt is too long: N tokens > M maximum``. Match that phrasing (and a few
near-synonyms) so genuine bad requests still surface unchanged.
"""
message = str(error).lower()
if (
"too long" in message
or "context window" in message
or "context length" in message
):
return True
return "maximum" in message and "token" in message
# Transient failures worth retrying: dropped/again-refused connections, timeouts,
# and 5xx. The raw httpx errors are included because a streamed response can drop
# mid-body (RemoteProtocolError) without the SDK wrapping it.
_TRANSIENT_ERRORS = (
anthropic.APIConnectionError,
anthropic.APITimeoutError,
anthropic.InternalServerError,
httpx.RemoteProtocolError,
httpx.ReadError,
httpx.ConnectError,
)
class LLMClient(ABC):
"""Abstract interface for LLM translation backends."""
@abstractmethod
def translate(self, system_prompt: str, user_prompt: str) -> str:
"""Send a translation request and return the raw response text."""
class AnthropicClient(LLMClient):
"""LLM client backed by the Anthropic API."""
def __init__(self, config: Config, verbose: bool = False):
self.client = anthropic.Anthropic(
api_key=config.api_key, base_url=config.base_url
)
self.model = config.model
self.max_tokens = config.max_tokens
self.verbose = verbose
def translate(self, system_prompt: str, user_prompt: str) -> str:
"""Send a prompt to the Anthropic API and return the response text.
Retries up to _RATE_LIMIT_RETRIES times on rate limit errors, with
linearly increasing back-off.
"""
for attempt in range(_RATE_LIMIT_RETRIES):
try:
with self.client.messages.stream(
model=self.model,
max_tokens=self.max_tokens,
system=[
{
"type": "text",
"text": system_prompt,
"cache_control": {"type": "ephemeral"},
}
],
messages=[{"role": "user", "content": user_prompt}],
) as stream:
response = stream.get_final_message()
u = response.usage
if self.verbose:
print(
f" llm: input={u.input_tokens} "
f"cache_read={getattr(u, 'cache_read_input_tokens', 0)} "
f"cache_write={getattr(u, 'cache_creation_input_tokens', 0)} "
f"output={u.output_tokens}",
file=sys.stderr,
)
return response.content[0].text
except anthropic.BadRequestError as e:
if _is_context_length_error(e):
raise PromptTooLargeError(str(e)) from e
raise
except anthropic.RateLimitError:
if attempt == _RATE_LIMIT_RETRIES - 1:
raise
wait = 60 * (attempt + 1)
print(f"Rate limited — waiting {wait}s before retry...", flush=True)
time.sleep(wait)
except _TRANSIENT_ERRORS as e:
if attempt == _RATE_LIMIT_RETRIES - 1:
raise
wait = 5 * (attempt + 1)
print(
f"Transient API error ({type(e).__name__}) — retrying in {wait}s...",
file=sys.stderr,
flush=True,
)
time.sleep(wait)
raise AssertionError("unreachable")
def create_client(config: Config, verbose: bool = False) -> LLMClient:
"""Instantiate the appropriate LLM client for the given config."""
return AnthropicClient(config, verbose=verbose)