diff --git a/apps/sim/providers/anthropic/core.test.ts b/apps/sim/providers/anthropic/core.test.ts new file mode 100644 index 0000000000..c4cb6afd9b --- /dev/null +++ b/apps/sim/providers/anthropic/core.test.ts @@ -0,0 +1,66 @@ +/** + * @vitest-environment node + */ +import type Anthropic from '@anthropic-ai/sdk' +import type { TextBlockParam } from '@anthropic-ai/sdk/resources' +import { describe, expect, it, vi } from 'vitest' +import { executeAnthropicProviderRequest } from '@/providers/anthropic/core' +import type { ProviderRequest } from '@/providers/types' + +const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate +const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate + +/** + * Drives the real `executeAnthropicProviderRequest` down the streaming/no-tools + * path and captures the request payload handed to `messages.create`, injecting + * only the client via the `createClient` seam (real models/utils/attachments run). + * The streaming path builds its stream lazily, so an empty async iterable suffices. + */ +async function captureRequestPayload( + request: Partial +): Promise> { + let captured: Record = {} + const fakeClient = { + messages: { + create: vi.fn(async (payload: Record) => { + captured = payload + return (async function* () {})() + }), + }, + } as unknown as Anthropic + + await executeAnthropicProviderRequest( + { + model: 'claude-sonnet-4-6', + messages: [{ role: 'user', content: 'hi' }], + apiKey: 'test-key', + stream: true, + ...request, + } as ProviderRequest, + { + providerId: 'anthropic', + providerLabel: 'Anthropic', + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() } as never, + createClient: () => fakeClient, + } + ) + + return captured +} + +describe('executeAnthropicProviderRequest prompt caching (request capture)', () => { + it('emits a cache_control-tagged system block for a large system prompt', async () => { + const payload = await captureRequestPayload({ systemPrompt: LARGE }) + + expect(Array.isArray(payload.system)).toBe(true) + const blocks = payload.system as TextBlockParam[] + expect(blocks[0]).toMatchObject({ type: 'text', cache_control: { type: 'ephemeral' } }) + }) + + it('leaves a small system prompt as a plain string (no cache_control)', async () => { + const payload = await captureRequestPayload({ systemPrompt: SMALL }) + + expect(typeof payload.system).toBe('string') + expect(payload.system).toBe(SMALL) + }) +}) diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts index 57056e6acc..41d7184afe 100644 --- a/apps/sim/providers/anthropic/core.ts +++ b/apps/sim/providers/anthropic/core.ts @@ -6,6 +6,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors' import type { BlockTokens, IterationToolCall, StreamingExecution } from '@/executor/types' import { MAX_TOOL_ITERATIONS } from '@/providers' import { + applyAnthropicPromptCache, checkForForcedToolUsage, createReadableStreamFromAnthropicStream, } from '@/providers/anthropic/utils' @@ -324,6 +325,8 @@ export async function executeAnthropicProviderRequest( } } + applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt) + // Add extended thinking configuration if supported and requested // The 'none' sentinel means "disable thinking" — skip configuration entirely. if (request.thinkingLevel && request.thinkingLevel !== 'none') { diff --git a/apps/sim/providers/anthropic/utils.test.ts b/apps/sim/providers/anthropic/utils.test.ts new file mode 100644 index 0000000000..5ef24c000d --- /dev/null +++ b/apps/sim/providers/anthropic/utils.test.ts @@ -0,0 +1,87 @@ +/** + * @vitest-environment node + */ +import type { TextBlockParam, Tool } from '@anthropic-ai/sdk/resources' +import { describe, expect, it } from 'vitest' +import { applyAnthropicPromptCache } from '@/providers/anthropic/utils' + +const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate +const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate + +const tool = (name: string): Tool => ({ + name, + description: 'does a thing', + input_schema: { type: 'object', properties: {} }, +}) + +describe('applyAnthropicPromptCache', () => { + it('converts a large system prompt to a cached text block and tags the last tool', () => { + const payload: { system?: string | TextBlockParam[] } = { system: LARGE } + const tools = [tool('a'), tool('b')] + + applyAnthropicPromptCache(payload, tools, LARGE) + + expect(Array.isArray(payload.system)).toBe(true) + const blocks = payload.system as TextBlockParam[] + expect(blocks).toHaveLength(1) + expect(blocks[0]).toMatchObject({ + type: 'text', + text: LARGE, + cache_control: { type: 'ephemeral' }, + }) + // Only the LAST tool carries the breakpoint; earlier tools are untouched. + expect(tools[0].cache_control).toBeUndefined() + expect(tools[1].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('tags the system block when the system alone is large and there are no tools', () => { + const payload: { system?: string | TextBlockParam[] } = { system: LARGE } + + applyAnthropicPromptCache(payload, undefined, LARGE) + + const blocks = payload.system as TextBlockParam[] + expect(blocks[0].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('tags the tools even when payload.system was relocated/blanked (gate uses the request prompt)', () => { + // No-messages path: the provider moves the system text into a user message + // and blanks payload.system, but the original prompt is large, so the tools + // prefix is still worth caching. + const payload: { system?: string | TextBlockParam[] } = { system: '' } + const tools = [tool('a')] + + applyAnthropicPromptCache(payload, tools, LARGE) + + expect(payload.system).toBe('') // empty system is never converted + expect(tools[0].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('caches when payload.system is large from appended schema text even if the request prompt is small', () => { + // Prompt-based structured output appends a large schema to payload.system, + // so the cacheable system block is large even though request.systemPrompt is small. + const payload: { system?: string | TextBlockParam[] } = { system: LARGE } + + applyAnthropicPromptCache(payload, undefined, SMALL) + + expect(Array.isArray(payload.system)).toBe(true) + expect((payload.system as TextBlockParam[])[0].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => { + const payload: { system?: string | TextBlockParam[] } = { system: SMALL } + + applyAnthropicPromptCache(payload, undefined, SMALL) + + expect(payload.system).toBe(SMALL) + }) + + it('does nothing when the combined prefix is below the threshold', () => { + const payload: { system?: string | TextBlockParam[] } = { system: SMALL } + const tools = [tool('a')] + + applyAnthropicPromptCache(payload, tools, SMALL) + + expect(payload.system).toBe(SMALL) + expect(tools[0].cache_control).toBeUndefined() + }) +}) diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts index b9b001bb7a..230091bdcc 100644 --- a/apps/sim/providers/anthropic/utils.ts +++ b/apps/sim/providers/anthropic/utils.ts @@ -2,14 +2,69 @@ import type { RawMessageDeltaEvent, RawMessageStartEvent, RawMessageStreamEvent, + TextBlockParam, + Tool, Usage, } from '@anthropic-ai/sdk/resources' import { createLogger } from '@sim/logger' import { randomFloat } from '@sim/utils/random' +import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' import { trackForcedToolUsage } from '@/providers/utils' const logger = createLogger('AnthropicUtils') +/** Mutable view of the parts of the Anthropic payload that carry cache breakpoints. */ +interface AnthropicCacheablePayload { + system?: string | Array +} + +/** + * Marks the static request prefix (system prompt + tools) with an ephemeral + * cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so + * repeated calls reuse the cached prefix. Mutates `payload.system` (string → a + * single cached text block) and the last entry of `tools` in place; a no-op when + * the prefix is too small or not present. Call after any structured-output + * mutation of `payload.system`, since it may replace the string with a block array. + * + * The worthiness gate is sized on the LARGER of the final `payload.system` + * (which may include appended structured-output schema text) and the original + * `systemPrompt` (non-empty even when the no-messages path relocates the system + * text into a user message and blanks `payload.system` — the tools prefix is + * still worth caching there). + * + * @param payload - Anthropic request payload; `system` is mutated in place. + * @param tools - Anthropic tool definitions; the last entry is mutated in place. + * @param systemPrompt - The original request system prompt, used only for sizing. + */ +export function applyAnthropicPromptCache( + payload: AnthropicCacheablePayload, + tools: Tool[] | undefined, + systemPrompt: string | null | undefined +): void { + const payloadSystem = typeof payload.system === 'string' ? payload.system : '' + + const gateSystem = + payloadSystem.length >= (systemPrompt?.length ?? 0) ? payloadSystem : systemPrompt + + const shouldCache = shouldCacheStaticPrefix({ + systemPrompt: gateSystem, + hasTools: !!tools?.length, + toolsApproxChars: tools ? JSON.stringify(tools).length : 0, + }) + if (!shouldCache) { + return + } + + if (payloadSystem.length > 0) { + payload.system = [{ type: 'text', text: payloadSystem, cache_control: { type: 'ephemeral' } }] + } + + if (tools?.length) { + const lastIndex = tools.length - 1 + tools[lastIndex] = { ...tools[lastIndex], cache_control: { type: 'ephemeral' } } + } +} + export interface AnthropicStreamUsage { input_tokens: number output_tokens: number diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts new file mode 100644 index 0000000000..7d4590ebb4 --- /dev/null +++ b/apps/sim/providers/prompt-cache.test.ts @@ -0,0 +1,43 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' + +const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate +const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate + +describe('shouldCacheStaticPrefix', () => { + it('caches a large system prompt that has tools (agent loop)', () => { + expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true) + }) + + it('caches a large system prompt even without tools', () => { + expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: false })).toBe(true) + }) + + it('reaches the threshold via tools when the system prompt alone is below it', () => { + // Small system + large serialized tools clears the combined threshold, and + // tools imply reuse, so it should cache. + expect( + shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 8_000 }) + ).toBe(true) + }) + + it('does NOT cache a small, tool-less prompt (one-shot write surcharge avoided)', () => { + expect(shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: false })).toBe(false) + }) + + it('does NOT cache a small system even with tools when the combined prefix is below threshold', () => { + expect( + shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 400 }) + ).toBe(false) + }) + + it('does NOT cache when there is no system prompt', () => { + expect( + shouldCacheStaticPrefix({ systemPrompt: '', hasTools: true, toolsApproxChars: 8_000 }) + ).toBe(false) + expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false) + }) +}) diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts new file mode 100644 index 0000000000..036467c5e1 --- /dev/null +++ b/apps/sim/providers/prompt-cache.ts @@ -0,0 +1,44 @@ +/** + * Minimum estimated static-prefix size (system + tool definitions) before it is + * worth marking a prompt-cache breakpoint. This is a rough lower bound across + * Claude models (some require more); below it, providers silently skip caching + * anyway, so this only avoids spending a breakpoint on a trivially small prefix. + */ +const MIN_CACHEABLE_PREFIX_TOKENS = 1024 + +/** Rough token estimate (~4 chars/token) — fast and good enough for a gate. */ +function estimateTokens(text: string): number { + return Math.ceil(text.length / 4) +} + +/** + * Decides whether to inject prompt-cache breakpoints on the static prefix + * (system prompt + tool definitions) for providers that require explicit cache + * control (Anthropic, Bedrock, and Anthropic models via OpenRouter). + * + * Caching only pays off when the prefix is large enough to be cacheable AND is + * actually re-read: agent tool-loops re-send the prefix on every iteration, and + * a large system prompt is typically reused across runs within the cache TTL. + * A small, tool-less prompt is intentionally skipped so a one-shot call never + * pays the cache-write surcharge for a prefix that is never read back. + */ +export function shouldCacheStaticPrefix(params: { + systemPrompt: string | null | undefined + hasTools: boolean + toolsApproxChars?: number +}): boolean { + const system = params.systemPrompt ?? '' + if (!system) { + return false + } + + const systemTokens = estimateTokens(system) + const toolTokens = params.toolsApproxChars ? Math.ceil(params.toolsApproxChars / 4) : 0 + const prefixTokens = systemTokens + toolTokens + + if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) { + return false + } + + return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS +}