diff --git a/.changeset/chat-system-prompt-caching.md b/.changeset/chat-system-prompt-caching.md new file mode 100644 index 0000000000..7e5ba97dcd --- /dev/null +++ b/.changeset/chat-system-prompt-caching.md @@ -0,0 +1,25 @@ +--- +"@trigger.dev/sdk": patch +--- + +Cache your chat agent's system prompt with Anthropic prompt caching. `chat.toStreamTextOptions()` now emits the system prompt as a cacheable message when you opt in, so a large, stable system block is billed at cache-read rates on every turn instead of full price. + +```ts +// at the streamText call site (Anthropic sugar) +streamText({ + ...chat.toStreamTextOptions({ cacheControl: { type: "ephemeral" } }), + messages, +}); + +// provider-agnostic equivalent +chat.toStreamTextOptions({ + systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, +}); + +// or where the prompt is defined +chat.prompt.set(SYSTEM_PROMPT, { + providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, +}); +``` + +Without an option, `system` stays a plain string. Pairs with a `prepareMessages` cache breakpoint to cache the conversation prefix across turns too. diff --git a/packages/trigger-sdk/src/v3/ai.ts b/packages/trigger-sdk/src/v3/ai.ts index 0d0caf7c96..6d4368f814 100644 --- a/packages/trigger-sdk/src/v3/ai.ts +++ b/packages/trigger-sdk/src/v3/ai.ts @@ -42,6 +42,7 @@ import type { FinishReason, LanguageModelUsage, ModelMessage, + ProviderMetadata, Tool, ToolSet, UIMessage, @@ -3409,11 +3410,40 @@ export type ChatPromptValue = /** @internal */ const chatPromptKey = locals.create("chat.prompt"); +/** + * @internal Provider options attached to the system message that + * `toStreamTextOptions()` builds from the stored prompt — lets a provider cache + * the system block. Stored separately so it works for both the `ResolvedPrompt` + * and plain-string forms without mutating the prompt object. + */ +const chatPromptProviderOptionsKey = locals.create( + "chat.prompt.providerOptions" +); + +/** + * Options for `chat.prompt.set()`. + */ +export type SetChatPromptOptions = { + /** + * Provider options attached to the system prompt so a provider can cache it. + * The most common use is an Anthropic prompt-cache breakpoint on the (large, + * stable) system block — see the prompt-caching guide. Carried through to + * `chat.toStreamTextOptions()` automatically; a `systemProviderOptions` / + * `cacheControl` passed there overrides this. + * + * @example + * chat.prompt.set(SYSTEM_PROMPT, { + * providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + * }); + */ + providerOptions?: ProviderMetadata; +}; + /** * Store a resolved prompt (or plain string) for the current run. * Call from any hook (`onPreload`, `onChatStart`, `onTurnStart`) or `run()`. */ -function setChatPrompt(resolved: ResolvedPrompt | string): void { +function setChatPrompt(resolved: ResolvedPrompt | string, options?: SetChatPromptOptions): void { if (typeof resolved === "string") { locals.set(chatPromptKey, { text: resolved, @@ -3429,6 +3459,10 @@ function setChatPrompt(resolved: ResolvedPrompt | string): void { } else { locals.set(chatPromptKey, resolved); } + + // Always overwrite the slot (even with undefined) so a later prompt.set with + // no options clears a previous prompt's cache opt-in rather than leaking it. + locals.set(chatPromptProviderOptionsKey, options?.providerOptions); } /** @@ -3620,8 +3654,40 @@ export type ToStreamTextOptionsOptions = { * your tools here. */ tools?: Record; + /** + * Provider options attached to the system prompt so a provider can cache it. + * When set (or when {@link cacheControl} or `chat.prompt.set`'s + * `providerOptions` is set), `system` is returned as a structured + * `SystemModelMessage` carrying these options instead of a plain string — + * letting providers like Anthropic apply prompt caching to the (large, + * stable) system block, which is the single highest-value cache target. + * + * Overrides any `providerOptions` set on `chat.prompt.set()`. + * + * @example + * chat.toStreamTextOptions({ + * systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + * }); + */ + systemProviderOptions?: ProviderMetadata; + /** + * Anthropic-only convenience for {@link systemProviderOptions}: caches the + * system prompt with the given cache breakpoint. Equivalent to + * `systemProviderOptions: { anthropic: { cacheControl } }`. For other + * providers (e.g. Amazon Bedrock's `cachePoint`), use `systemProviderOptions`. + * + * @example + * chat.toStreamTextOptions({ cacheControl: { type: "ephemeral" } }); + */ + cacheControl?: SystemCacheControl; }; +/** + * Anthropic prompt-cache breakpoint shape (`providerOptions.anthropic.cacheControl`). + * `ttl` defaults to the 5-minute cache; `"1h"` selects the 1-hour cache. + */ +export type SystemCacheControl = { type: "ephemeral"; ttl?: "5m" | "1h" }; + /** * Returns an options object ready to spread into `streamText()`. * @@ -3642,7 +3708,24 @@ function toStreamTextOptions(options?: ToStreamTextOptionsOptions): Record 0 ? buildSkillsSystemPrompt(skills) : ""; if (promptText || skillsText) { - result.system = [promptText, skillsText].filter(Boolean).join("\n\n"); + const systemText = [promptText, skillsText].filter(Boolean).join("\n\n"); + + // Resolve system-prompt provider options for caching. Precedence (most + // specific wins, no deep merge): explicit `systemProviderOptions` → + // `cacheControl` sugar → `providerOptions` stored on `chat.prompt.set()`. + const systemProviderOptions: ProviderMetadata | undefined = + options?.systemProviderOptions ?? + (options?.cacheControl + ? ({ anthropic: { cacheControl: options.cacheControl } } as ProviderMetadata) + : undefined) ?? + locals.get(chatPromptProviderOptionsKey); + + // A bare string stays a bare string (the unchanged default). With provider + // options, emit a structured `SystemModelMessage` so the provider can cache + // the system block — `streamText`'s `system` accepts string | message. + result.system = systemProviderOptions + ? { role: "system", content: systemText, providerOptions: systemProviderOptions } + : systemText; } // Prompt-related options (only if chat.prompt.set() was called) @@ -10058,6 +10141,9 @@ export const chat = { * Store and retrieve a resolved prompt for the current run. * * - `chat.prompt.set(resolved)` — store a `ResolvedPrompt` or plain string + * - `chat.prompt.set(resolved, { providerOptions })` — also attach provider + * options to the system block so a provider can cache it (e.g. Anthropic + * prompt caching). See the prompt-caching guide. * - `chat.prompt()` — read the stored prompt (throws if not set) */ prompt: Object.assign(getChatPrompt, { set: setChatPrompt }), diff --git a/packages/trigger-sdk/test/promptCaching.test.ts b/packages/trigger-sdk/test/promptCaching.test.ts new file mode 100644 index 0000000000..b20bf01285 --- /dev/null +++ b/packages/trigger-sdk/test/promptCaching.test.ts @@ -0,0 +1,207 @@ +// Import the test harness FIRST so the resource catalog is installed +import { mockChatAgent } from "../src/v3/test/index.js"; + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import type { LanguageModelV3StreamPart } from "@ai-sdk/provider"; +import { MockLanguageModelV3 } from "ai/test"; +import { simulateReadableStream, streamText } from "ai"; +import { chat } from "../src/v3/ai.js"; + +function userMessage(text: string, id?: string) { + return { + id: id ?? `u-${Math.random().toString(36).slice(2)}`, + role: "user" as const, + parts: [{ type: "text" as const, text }], + }; +} + +function textStream(text: string) { + const chunks: LanguageModelV3StreamPart[] = [ + { type: "text-start", id: "t1" }, + { type: "text-delta", id: "t1", delta: text }, + { type: "text-end", id: "t1" }, + { + type: "finish", + finishReason: { unified: "stop", raw: "stop" }, + usage: { + inputTokens: { total: 10, noCache: 10, cacheRead: undefined, cacheWrite: undefined }, + outputTokens: { total: 10, text: 10, reasoning: undefined }, + }, + }, + ]; + return simulateReadableStream({ chunks }); +} + +/** Capture the rendered system message handed to the provider. */ +type Captured = { system?: { role: string; content: unknown; providerOptions?: any } }; + +function makeModel(capture: Captured) { + return new MockLanguageModelV3({ + doStream: async (opts) => { + capture.system = opts.prompt.find((m) => m.role === "system") as Captured["system"]; + return { stream: textStream("ok") }; + }, + }); +} + +/** Poll until the mock model captures the system message (bounded), instead of a fixed sleep. */ +async function waitForSystemCaptured(capture: Captured, timeoutMs = 1000, intervalMs = 5) { + const startedAt = Date.now(); + while (!capture.system) { + if (Date.now() - startedAt > timeoutMs) { + throw new Error("Timed out waiting for system message capture"); + } + await new Promise((r) => setTimeout(r, intervalMs)); + } +} + +const SYSTEM = "You are a helpful assistant for tests."; + +describe("chat prompt caching — system providerOptions", () => { + it("emits a plain system prompt with no providerOptions by default", async () => { + const cap: Captured = {}; + const model = makeModel(cap); + + const agent = chat.agent({ + id: "prompt-caching.default", + onChatStart: async () => { + chat.prompt.set(SYSTEM); + }, + run: async ({ messages, signal }) => + streamText({ model, messages, abortSignal: signal, ...chat.toStreamTextOptions() }), + }); + + const harness = mockChatAgent(agent, { chatId: "pc-default" }); + try { + await harness.sendMessage(userMessage("hi")); + await waitForSystemCaptured(cap); + expect(cap.system?.content).toContain("helpful assistant"); + expect(cap.system?.providerOptions).toBeUndefined(); + } finally { + await harness.close(); + } + }); + + it("attaches cacheControl via the toStreamTextOptions sugar", async () => { + const cap: Captured = {}; + const model = makeModel(cap); + + const agent = chat.agent({ + id: "prompt-caching.sugar", + onChatStart: async () => { + chat.prompt.set(SYSTEM); + }, + run: async ({ messages, signal }) => + streamText({ + model, + messages, + abortSignal: signal, + ...chat.toStreamTextOptions({ cacheControl: { type: "ephemeral" } }), + }), + }); + + const harness = mockChatAgent(agent, { chatId: "pc-sugar" }); + try { + await harness.sendMessage(userMessage("hi")); + await waitForSystemCaptured(cap); + expect(cap.system?.content).toContain("helpful assistant"); + expect(cap.system?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }); + } finally { + await harness.close(); + } + }); + + it("attaches systemProviderOptions verbatim", async () => { + const cap: Captured = {}; + const model = makeModel(cap); + + const agent = chat.agent({ + id: "prompt-caching.explicit", + onChatStart: async () => { + chat.prompt.set(SYSTEM); + }, + run: async ({ messages, signal }) => + streamText({ + model, + messages, + abortSignal: signal, + ...chat.toStreamTextOptions({ + systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } }, + }), + }), + }); + + const harness = mockChatAgent(agent, { chatId: "pc-explicit" }); + try { + await harness.sendMessage(userMessage("hi")); + await waitForSystemCaptured(cap); + expect(cap.system?.providerOptions?.anthropic?.cacheControl).toEqual({ + type: "ephemeral", + ttl: "1h", + }); + } finally { + await harness.close(); + } + }); + + it("carries providerOptions set on chat.prompt.set()", async () => { + const cap: Captured = {}; + const model = makeModel(cap); + + const agent = chat.agent({ + id: "prompt-caching.prompt-set", + onChatStart: async () => { + chat.prompt.set(SYSTEM, { + providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + }); + }, + run: async ({ messages, signal }) => + streamText({ model, messages, abortSignal: signal, ...chat.toStreamTextOptions() }), + }); + + const harness = mockChatAgent(agent, { chatId: "pc-prompt-set" }); + try { + await harness.sendMessage(userMessage("hi")); + await waitForSystemCaptured(cap); + expect(cap.system?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }); + } finally { + await harness.close(); + } + }); + + it("call-site systemProviderOptions overrides chat.prompt.set providerOptions", async () => { + const cap: Captured = {}; + const model = makeModel(cap); + + const agent = chat.agent({ + id: "prompt-caching.precedence", + onChatStart: async () => { + chat.prompt.set(SYSTEM, { + providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + }); + }, + run: async ({ messages, signal }) => + streamText({ + model, + messages, + abortSignal: signal, + ...chat.toStreamTextOptions({ + systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } }, + }), + }), + }); + + const harness = mockChatAgent(agent, { chatId: "pc-precedence" }); + try { + await harness.sendMessage(userMessage("hi")); + await waitForSystemCaptured(cap); + // The call-site option wins (ttl: "1h"), not the prompt-set default. + expect(cap.system?.providerOptions?.anthropic?.cacheControl).toEqual({ + type: "ephemeral", + ttl: "1h", + }); + } finally { + await harness.close(); + } + }); +});