diff --git a/.changeset/agent-skills.md b/.changeset/agent-skills.md new file mode 100644 index 00000000000..5ed3b11fc2f --- /dev/null +++ b/.changeset/agent-skills.md @@ -0,0 +1,16 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +"@trigger.dev/build": patch +"trigger.dev": patch +--- + +Add Agent Skills for `chat.agent`. Drop a folder with a `SKILL.md` and any helper scripts/references next to your task code, register it with `skills.define({ id, path })`, and the CLI bundles it into the deploy image automatically — no `trigger.config.ts` changes. The agent gets a one-line summary in its system prompt and discovers full instructions on demand via `loadSkill`, with `bash` and `readFile` tools scoped per-skill (path-traversal guards, output caps, abort-signal propagation). + +```ts +const pdfSkill = skills.define({ id: "pdf-extract", path: "./skills/pdf-extract" }); + +chat.skills.set([await pdfSkill.local()]); +``` + +Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. SDK + CLI only for now; dashboard-editable `SKILL.md` text is on the roadmap. diff --git a/.changeset/ai-prompts.md b/.changeset/ai-prompts.md new file mode 100644 index 00000000000..511aa303097 --- /dev/null +++ b/.changeset/ai-prompts.md @@ -0,0 +1,52 @@ +--- +"@trigger.dev/sdk": minor +--- + +**AI Prompts** — define prompt templates as code alongside your tasks, version them on deploy, and override the text or model from the dashboard without redeploying. Prompts integrate with the Vercel AI SDK via `toAISDKTelemetry()` (links every generation span back to the prompt) and with `chat.agent` via `chat.prompt.set()` + `chat.toStreamTextOptions()`. + +```ts +import { prompts } from "@trigger.dev/sdk"; +import { generateText } from "ai"; +import { openai } from "@ai-sdk/openai"; +import { z } from "zod"; + +export const supportPrompt = prompts.define({ + id: "customer-support", + model: "gpt-4o", + config: { temperature: 0.7 }, + variables: z.object({ + customerName: z.string(), + plan: z.string(), + issue: z.string(), + }), + content: `You are a support agent for Acme. + +Customer: {{customerName}} ({{plan}} plan) +Issue: {{issue}}`, +}); + +const resolved = await supportPrompt.resolve({ + customerName: "Alice", + plan: "Pro", + issue: "Can't access billing", +}); + +const result = await generateText({ + model: openai(resolved.model ?? "gpt-4o"), + system: resolved.text, + prompt: "Can't access billing", + ...resolved.toAISDKTelemetry(), +}); +``` + +**What you get:** + +- **Code-defined, deploy-versioned templates** — define with `prompts.define({ id, model, config, variables, content })`. Every deploy creates a new version visible in the dashboard. Mustache-style placeholders (`{{var}}`, `{{#cond}}...{{/cond}}`) with Zod / ArkType / Valibot-typed variables. +- **Dashboard overrides** — change a prompt's text or model from the dashboard without redeploying. Overrides take priority over the deployed "current" version and are environment-scoped (dev / staging / production independent). +- **Resolve API** — `prompt.resolve(vars, { version?, label? })` returns the compiled `text`, resolved `model`, `version`, and labels. Standalone `prompts.resolve(slug, vars)` for cross-file resolution with full type inference on slug and variable shape. +- **AI SDK integration** — spread `resolved.toAISDKTelemetry({ ...extra })` into any `generateText` / `streamText` call and every generation span links to the prompt in the dashboard alongside its input variables, model, tokens, and cost. +- **`chat.agent` integration** — `chat.prompt.set(resolved)` stores the resolved prompt run-scoped; `chat.toStreamTextOptions({ registry })` pulls `system`, `model` (resolved via the AI SDK provider registry), `temperature` / `maxTokens` / etc., and telemetry into a single spread for `streamText`. +- **Management SDK** — `prompts.list()`, `prompts.versions(slug)`, `prompts.promote(slug, version)`, `prompts.createOverride(slug, body)`, `prompts.updateOverride(slug, body)`, `prompts.removeOverride(slug)`, `prompts.reactivateOverride(slug, version)`. +- **Dashboard** — prompts list with per-prompt usage sparklines; per-prompt detail with Template / Details / Versions / Generations / Metrics tabs. AI generation spans get a custom inspector showing the linked prompt's metadata, input variables, and template content alongside model, tokens, cost, and the message thread. + +See [/docs/ai/prompts](https://trigger.dev/docs/ai/prompts) for the full reference — template syntax, version resolution order, override workflow, and type utilities (`PromptHandle`, `PromptIdentifier`, `PromptVariables`). diff --git a/.changeset/ai-sdk-7-support.md b/.changeset/ai-sdk-7-support.md new file mode 100644 index 00000000000..9f81c50973f --- /dev/null +++ b/.changeset/ai-sdk-7-support.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Adds AI SDK 7 support. The `ai` peer range now includes v7, and the `chat.agent` / chat surfaces work against v7's ESM-only build. On v7, install `@ai-sdk/otel` alongside `ai` and the SDK registers it for you so `experimental_telemetry` spans keep flowing into your run traces (v7 stopped emitting them from `ai` core). v5 and v6 keep working unchanged. diff --git a/.changeset/ai-tool-helpers.md b/.changeset/ai-tool-helpers.md new file mode 100644 index 00000000000..09e3b612ada --- /dev/null +++ b/.changeset/ai-tool-helpers.md @@ -0,0 +1,15 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `ai.toolExecute(task)` so you can wire a Trigger subtask in as the `execute` handler of an AI SDK `tool()` while defining `description` and `inputSchema` yourself — useful when you want full control over the tool surface and just need Trigger's subtask machinery for the body. + +```ts +const myTool = tool({ + description: "...", + inputSchema: z.object({ ... }), + execute: ai.toolExecute(mySubtask), +}); +``` + +`ai.tool(task)` (`toolFromTask`) keeps doing the all-in-one wrap and now aligns its return type with AI SDK's `ToolSet`. Minimum `ai` peer raised to `^6.0.116` to avoid cross-version `ToolSet` mismatches in monorepos. diff --git a/.changeset/backpressure-scale-up-freeze.md b/.changeset/backpressure-scale-up-freeze.md new file mode 100644 index 00000000000..b69fad0f262 --- /dev/null +++ b/.changeset/backpressure-scale-up-freeze.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Add optional `shouldPauseScaling` to the supervisor consumer pool scaling options to freeze scale-up while it returns true (scale-down stays allowed). diff --git a/.changeset/bundle-skills-single-pass.md b/.changeset/bundle-skills-single-pass.md new file mode 100644 index 00000000000..30b2c428b22 --- /dev/null +++ b/.changeset/bundle-skills-single-pass.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +Fix `chat.agent` skills silently missing in `trigger dev` for projects whose task files read `process.env` at module top level (e.g. a third-party SDK client initialized at import). Skill folders now bundle into `.trigger/skills/` reliably regardless of which env vars are set when the CLI launches. diff --git a/.changeset/cap-idempotency-key-length.md b/.changeset/cap-idempotency-key-length.md new file mode 100644 index 00000000000..d1360369148 --- /dev/null +++ b/.changeset/cap-idempotency-key-length.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Reject overlong `idempotencyKey` values at the API boundary so they no longer trip an internal size limit on the underlying unique index and surface as a generic 500. Inputs are capped at 2048 characters — well above what `idempotencyKeys.create()` produces (a 64-character hash) and above any realistic raw key. Applies to `tasks.trigger`, `tasks.batchTrigger`, `batch.create` (Phase 1 streaming batches), `wait.createToken`, `wait.forDuration`, and the input/session stream waitpoint endpoints. Over-limit requests now return a structured 400 instead. diff --git a/.changeset/chat-agent-hardening.md b/.changeset/chat-agent-hardening.md new file mode 100644 index 00000000000..0ea82c0617e --- /dev/null +++ b/.changeset/chat-agent-hardening.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Reliability fixes for `chat.agent`. A user message sent while the agent is streaming is no longer delivered twice (which could run a duplicate turn), input appends now carry an idempotency key so a retried send can't duplicate a message, stopping a generation clears the streaming state so a page reload doesn't replay the stopped turn, and runs can now carry the full set of dashboard tags instead of being silently truncated. `onTurnComplete` now fires on errored turns (with the thrown error attached) and the failed turn's user message is persisted so it isn't lost on the next run. Custom agents and manual `chat.writeTurnComplete` callers now trim the output stream, sending a custom action no longer leaves a second stream reader running, and a long-lived `watch` subscription no longer grows its dedupe set without bound. diff --git a/.changeset/chat-agent-on-boot-hook.md b/.changeset/chat-agent-on-boot-hook.md new file mode 100644 index 00000000000..5eaa078e65e --- /dev/null +++ b/.changeset/chat-agent-on-boot-hook.md @@ -0,0 +1,21 @@ +--- +"@trigger.dev/sdk": minor +--- + +Adds `onBoot` to `chat.agent` — a lifecycle hook that fires once per worker process picking up the chat. Runs for the initial run, preloaded runs, AND reactive continuation runs (post-cancel, crash, `endRun`, `requestUpgrade`, OOM retry), before any other hook. Use it to initialize `chat.local`, open per-process resources, or re-hydrate state from your DB on continuation — anywhere the SAME run picking up after suspend/resume isn't enough. + +```ts +const userContext = chat.local<{ name: string; plan: string }>({ id: "userContext" }); + +export const myChat = chat.agent({ + id: "my-chat", + onBoot: async ({ clientData, continuation }) => { + const user = await db.user.findUnique({ where: { id: clientData.userId } }); + userContext.init({ name: user.name, plan: user.plan }); + }, + run: async ({ messages, signal }) => + streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }), +}); +``` + +Use `onBoot` (not `onChatStart`) for state setup that must run every time a worker picks up the chat — `onChatStart` fires once per chat and won't run on continuation, leaving `chat.local` uninitialized when `run()` tries to use it. diff --git a/.changeset/chat-agent-tools.md b/.changeset/chat-agent-tools.md new file mode 100644 index 00000000000..1d44ea2a659 --- /dev/null +++ b/.changeset/chat-agent-tools.md @@ -0,0 +1,15 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add a `tools` option to `chat.agent`. Declaring your tools here threads them into the SDK's internal `convertToModelMessages`, so each tool's `toModelOutput` is re-applied when prior-turn history is re-converted. + +```ts +chat.agent({ + tools: { readFile, search }, + run: async ({ messages, tools, signal }) => + streamText({ model, messages, tools, abortSignal: signal }), +}); +``` + +Also exports `InferChatUIMessageFromTools` to derive the chat `UIMessage` type (typed tool parts) directly from a tool set. diff --git a/.changeset/chat-agent.md b/.changeset/chat-agent.md new file mode 100644 index 00000000000..733a8ab22e4 --- /dev/null +++ b/.changeset/chat-agent.md @@ -0,0 +1,44 @@ +--- +"@trigger.dev/sdk": minor +"@trigger.dev/core": patch +--- + +**AI Agents** — run AI SDK chat completions as durable Trigger.dev agents instead of fragile API routes. Define an agent in one function, point `useChat` at it from React, and the conversation survives page refreshes, network blips, and process restarts. + +```ts +import { chat } from "@trigger.dev/sdk/ai"; +import { streamText } from "ai"; +import { openai } from "@ai-sdk/openai"; + +export const myChat = chat.agent({ + id: "my-chat", + run: async ({ messages, signal }) => + streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }), +}); +``` + +```tsx +import { useChat } from "@ai-sdk/react"; +import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react"; + +const transport = useTriggerChatTransport({ task: "my-chat", accessToken, startSession }); +const { messages, sendMessage } = useChat({ transport }); +``` + +**What you get:** + +- **AI SDK `useChat` integration** — a custom [`ChatTransport`](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) (`useTriggerChatTransport`) plugs straight into Vercel AI SDK's `useChat` hook. Text streaming, tool calls, reasoning, and `data-*` parts all work natively over Trigger.dev's realtime streams. No custom API routes needed. +- **First-turn fast path (`chat.headStart`)** — opt-in handler that runs the first turn's `streamText` step in your warm server process while the agent run boots in parallel, cutting cold-start TTFC by roughly half (measured 2801ms → 1218ms on `claude-sonnet-4-6`). The agent owns step 2+ (tool execution, persistence, hooks) so heavy deps stay where they belong. Web Fetch handler works natively in Next.js, Hono, SvelteKit, Remix, Workers, etc.; bridge to Express/Fastify/Koa via `chat.toNodeListener`. New `@trigger.dev/sdk/chat-server` subpath. +- **Multi-turn durability via Sessions** — every chat is backed by a durable Session that outlives any individual run. Conversations resume across page refreshes, idle timeout, crashes, and deploys; `resume: true` reconnects via `lastEventId` so clients only see new chunks. `sessions.list` enumerates chats for inbox-style UIs. +- **Auto-accumulated history, delta-only wire** — the backend accumulates the full conversation across turns; clients only ship the new message each turn. Long chats never hit the 512 KiB body cap. Register `hydrateMessages` to be the source of truth yourself. +- **Lifecycle hooks** — `onPreload`, `onChatStart`, `onValidateMessages`, `hydrateMessages`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onChatSuspend`, `onChatResume` — for persistence, validation, and post-turn work. +- **Stop generation** — client-driven `transport.stopGeneration(chatId)` aborts mid-stream; the run stays alive for the next message, partial response is captured, and aborted parts (stuck `partial-call` tools, in-progress reasoning) are auto-cleaned. +- **Tool approvals (HITL)** — tools with `needsApproval: true` pause until the user approves or denies via `addToolApprovalResponse`. The runtime reconciles the updated assistant message by ID and continues `streamText`. +- **Steering and background injection** — `pendingMessages` injects user messages between tool-call steps so users can steer the agent mid-execution; `chat.inject()` + `chat.defer()` adds context from background work (self-review, RAG, safety checks) between turns. +- **Actions** — non-turn frontend commands (undo, rollback, regenerate, edit) sent via `transport.sendAction`. Fire `hydrateMessages` + `onAction` only — no turn hooks, no `run()`. `onAction` can return a `StreamTextResult` for a model response, or `void` for side-effect-only. +- **Typed state primitives** — `chat.local` for per-run state accessible from hooks, `run()`, tools, and subtasks (auto-serialized through `ai.toolExecute`); `chat.store` for typed shared data between agent and client; `chat.history` for reading and mutating the message chain; `clientDataSchema` for typed `clientData` in every hook. +- **`chat.toStreamTextOptions()`** — one spread into `streamText` wires up versioned system [Prompts](https://trigger.dev/docs/ai/prompts), model resolution, telemetry metadata, compaction, steering, and background injection. +- **Multi-tab coordination** — `multiTab: true` + `useMultiTabChat` prevents duplicate sends and syncs state across browser tabs via `BroadcastChannel`. Non-active tabs go read-only with live updates. +- **Network resilience** — built-in indefinite retry with bounded backoff, reconnect on `online` / tab refocus / bfcache restore, `Last-Event-ID` mid-stream resume. No app code needed. + +See [/docs/ai-chat](https://trigger.dev/docs/ai-chat/overview) for the full surface — quick start, three backend approaches (`chat.agent`, `chat.createSession`, raw task), persistence and code-sandbox patterns, type-level guides, and API reference. diff --git a/.changeset/chat-boot-cursor.md b/.changeset/chat-boot-cursor.md new file mode 100644 index 00000000000..eb1b7a41c98 --- /dev/null +++ b/.changeset/chat-boot-cursor.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Continuation chat boots no longer stall for around 10 seconds before the first turn. The `session.in` resume cursor is now found with a non-blocking records read instead of draining an SSE long-poll (which always waited out its full 5 second inactivity window, twice per boot), the boot reads run concurrently, and chat snapshots carry the cursor so subsequent boots skip the scan entirely. diff --git a/.changeset/chat-headstart-hydrate.md b/.changeset/chat-headstart-hydrate.md new file mode 100644 index 00000000000..49e9bb926ee --- /dev/null +++ b/.changeset/chat-headstart-hydrate.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Fix `chat.headStart` when `hydrateMessages` is registered. The warm route's step-1 partial now reaches the agent's accumulator on the hydrate path, so `onTurnComplete` carries the full first turn (the head-start user message included), tool-call handovers resume from step 2 instead of re-running step 1, and the assistant `messageId` stays stable across the handover. diff --git a/.changeset/chat-headstart-reasoning.md b/.changeset/chat-headstart-reasoning.md new file mode 100644 index 00000000000..a53d388c561 --- /dev/null +++ b/.changeset/chat-headstart-reasoning.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Preserve reasoning parts across the `chat.headStart` handover. Extended-thinking models' step-1 reasoning now lands in the durable session history (and `onTurnComplete`) under the same assistant `messageId`, with provider metadata intact so Anthropic thinking signatures survive replays. diff --git a/.changeset/chat-history-read-primitives.md b/.changeset/chat-history-read-primitives.md new file mode 100644 index 00000000000..fd26ad8548b --- /dev/null +++ b/.changeset/chat-history-read-primitives.md @@ -0,0 +1,21 @@ +--- +"@trigger.dev/sdk": minor +--- + +Add read primitives to `chat.history` for HITL flows: `getPendingToolCalls()`, `getResolvedToolCalls()`, `extractNewToolResults(message)`, `getChain()`, and `findMessage(messageId)`. These lift the accumulator-walking logic that customers building human-in-the-loop tools were re-implementing into the SDK. + +Use `getPendingToolCalls()` to gate fresh user turns while a tool call is awaiting an answer. Use `extractNewToolResults(message)` to dedup tool results when persisting to your own store — the helper returns only the parts whose `toolCallId` is not already resolved on the chain. + +```ts +const pending = chat.history.getPendingToolCalls(); +if (pending.length > 0) { + // an addToolOutput is expected before a new user message +} + +onTurnComplete: async ({ responseMessage }) => { + const newResults = chat.history.extractNewToolResults(responseMessage); + for (const r of newResults) { + await db.toolResults.upsert({ id: r.toolCallId, output: r.output, errorText: r.errorText }); + } +}; +``` diff --git a/.changeset/chat-session-attributes.md b/.changeset/chat-session-attributes.md new file mode 100644 index 00000000000..ec4c6a54076 --- /dev/null +++ b/.changeset/chat-session-attributes.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Stamp `gen_ai.conversation.id` (the chat id) on every span and metric emitted from inside a `chat.task` or `chat.agent` run. Lets you filter dashboard spans, runs, and metrics by the chat conversation that produced them — independent of the run boundary, so multi-run chats correlate cleanly. No code changes required on the user side. diff --git a/.changeset/chat-slim-wire-merge.md b/.changeset/chat-slim-wire-merge.md new file mode 100644 index 00000000000..19ea48a8cdd --- /dev/null +++ b/.changeset/chat-slim-wire-merge.md @@ -0,0 +1,31 @@ +--- +"@trigger.dev/sdk": patch +--- + +Fix `chat.agent` HITL continuations on reasoning-heavy turns. Two changes that work together: + +- The per-turn merge now overlays the wire copy's tool-part state advancement onto the agent's existing chain — `state` + the matching resolution field (`output` / `errorText` / `approval`) come from the wire, everything else (text, reasoning, tool `input`, provider metadata) stays whatever the snapshot or `hydrateMessages` returned. Previously a full-message replace overwrote those fields with whatever the client shipped, so a slimmed wire copy landed a tool call with no `arguments` on the next LLM call. Covers `output-available` / `output-error` (HITL `addToolOutput`) and `approval-responded` / `output-denied` (approval flow). +- `TriggerChatTransport.sendMessages` and `AgentChat.sendRaw` now slim assistant messages that carry advanced tool parts. The wire payload is just `{ id, role, parts: [] }` for `submit-message` continuations; everything else passes through. Reasoning blobs and full tool inputs no longer ride the wire on every `addToolOutput` / `addToolApproveResponse`, so continuation payloads stay well under the `.in/append` cap on long agent loops. + +Note: `onValidateMessages` receives the slim wire on HITL turns. If you call `validateUIMessages` from `ai` against the full `messages` array it will reject the slim assistant; filter to user messages (or skip on HITL turns) — see the updated docstring on `onValidateMessages` for the recommended pattern. + +For `hydrateMessages` hooks that persist the chain, this release also adds a small helper to the `@trigger.dev/sdk/ai` surface: + +```ts +import { chat, upsertIncomingMessage } from "@trigger.dev/sdk/ai"; + +chat.agent({ + hydrateMessages: async ({ chatId, trigger, incomingMessages }) => { + const record = await db.chat.findUnique({ where: { id: chatId } }); + const stored = record?.messages ?? []; + if (upsertIncomingMessage(stored, { trigger, incomingMessages })) { + await db.chat.update({ where: { id: chatId }, data: { messages: stored } }); + } + return stored; + }, +}); +``` + +It pushes fresh user messages by id, no-ops on HITL continuations (the incoming shares an id with the existing assistant — the runtime overlays the new tool-state advance), and skips on non-`submit-message` triggers. Returns `true` if it mutated `stored` so the caller knows whether to persist. + +Net effect: `chat.addToolOutput(...)` / `chat.addToolApproveResponse(...)` on multi-step reasoning agents (OpenAI Responses with `store: false`, Anthropic extended thinking, etc.) no longer blows the cap and no longer corrupts the LLM input. diff --git a/.changeset/chat-start-session-action-typed-client-data.md b/.changeset/chat-start-session-action-typed-client-data.md new file mode 100644 index 00000000000..acd75037caf --- /dev/null +++ b/.changeset/chat-start-session-action-typed-client-data.md @@ -0,0 +1,22 @@ +--- +"@trigger.dev/sdk": patch +--- + +Type `chat.createStartSessionAction` against your chat agent so `clientData` is typed end-to-end on the first turn: + +```ts +import { chat } from "@trigger.dev/sdk/ai"; +import type { myChat } from "@/trigger/chat"; + +export const startChatSession = chat.createStartSessionAction("my-chat"); + +// In the browser, threaded from the transport's typed startSession callback: +const transport = useTriggerChatTransport({ + task: "my-chat", + startSession: ({ chatId, clientData }) => + startChatSession({ chatId, clientData }), + // ... +}); +``` + +`ChatStartSessionParams` gains a typed `clientData` field — folded into the first run's `payload.metadata` so `onPreload` / `onChatStart` see the same shape per-turn `metadata` carries via the transport. The opaque session-level `metadata` field is unchanged. diff --git a/.changeset/chat-transport-recreate-missing-session.md b/.changeset/chat-transport-recreate-missing-session.md new file mode 100644 index 00000000000..dba916e9442 --- /dev/null +++ b/.changeset/chat-transport-recreate-missing-session.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +`useTriggerChatTransport` now recovers when restored session state points at a session that no longer exists in the current environment diff --git a/.changeset/cli-deploy-skip-rewrite-timestamp.md b/.changeset/cli-deploy-skip-rewrite-timestamp.md new file mode 100644 index 00000000000..60e82732dce --- /dev/null +++ b/.changeset/cli-deploy-skip-rewrite-timestamp.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +Add `TRIGGER_BUILD_SKIP_REWRITE_TIMESTAMP=1` escape hatch for local self-hosted builds whose buildx driver doesn't support `rewrite-timestamp` alongside push (e.g. orbstack's default `docker` driver). diff --git a/.changeset/cli-init-ai-tooling.md b/.changeset/cli-init-ai-tooling.md new file mode 100644 index 00000000000..a7a4c8be14a --- /dev/null +++ b/.changeset/cli-init-ai-tooling.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +`trigger init` now sets up your AI coding assistant as part of project setup: pick the MCP server, the agent skills, or both, then scaffold with the CLI or hand off to your assistant. Adds a new `getting-started` agent skill that teaches assistants how to bootstrap Trigger.dev (install the SDK, write `trigger.config.ts`, create a first task, run `trigger dev`), so the AI-driven setup path works end to end. It ships in the CLI alongside the existing skills, version-matched to your SDK. diff --git a/.changeset/coerce-concurrency-key-to-string.md b/.changeset/coerce-concurrency-key-to-string.md new file mode 100644 index 00000000000..faccf7a48bf --- /dev/null +++ b/.changeset/coerce-concurrency-key-to-string.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Coerce numeric `concurrencyKey` values to string at the API boundary across `tasks.trigger`, `tasks.batchTrigger`, and the Phase-2 streaming batch endpoint. diff --git a/.changeset/dequeue-latency-histogram.md b/.changeset/dequeue-latency-histogram.md new file mode 100644 index 00000000000..0b69b8c98a9 --- /dev/null +++ b/.changeset/dequeue-latency-histogram.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Record client-side dequeue API latency in the supervisor consumer pool as a Prometheus histogram (`queue_consumer_pool_dequeue_duration_seconds`, labelled by `outcome`: success/empty/error). diff --git a/.changeset/duplicate-task-ids.md b/.changeset/duplicate-task-ids.md new file mode 100644 index 00000000000..c68724bfedd --- /dev/null +++ b/.changeset/duplicate-task-ids.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +`dev` and `deploy` now fail with a clear error when two tasks are defined with the same id, including across different task types (e.g. a scheduled task and a regular task sharing an id). Previously the second definition silently overwrote the first, so one of the tasks would vanish with no warning. Task ids are detected as duplicates during indexing (naming each offending id and the files it was found in), and the same rule is enforced server-side when the background worker is registered. diff --git a/.changeset/env-vars-tracing-forceflush-typecheck.md b/.changeset/env-vars-tracing-forceflush-typecheck.md new file mode 100644 index 00000000000..9d90c0383d7 --- /dev/null +++ b/.changeset/env-vars-tracing-forceflush-typecheck.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix `@trigger.dev/core` build: cast the underlying log record exporter when calling `forceFlush` so it typechecks against the updated OpenTelemetry `LogRecordExporter` type (which no longer declares `forceFlush`). diff --git a/.changeset/envvars-import-is-secret.md b/.changeset/envvars-import-is-secret.md new file mode 100644 index 00000000000..5fbe70f43ae --- /dev/null +++ b/.changeset/envvars-import-is-secret.md @@ -0,0 +1,12 @@ +--- +"@trigger.dev/core": patch +--- + +`envvars.upload` now accepts an optional `isSecret` flag, letting you create the imported variables as secret (redacted) environment variables. When omitted, variables default to non-secret. + +```ts +await envvars.upload("proj_1234", "prod", { + variables: { STRIPE_SECRET_KEY: "sk_live_..." }, + isSecret: true, +}); +``` diff --git a/.changeset/large-trigger-payload-offload.md b/.changeset/large-trigger-payload-offload.md new file mode 100644 index 00000000000..e8d87947166 --- /dev/null +++ b/.changeset/large-trigger-payload-offload.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Offload large trigger payloads to object storage before sending the trigger API request. The SDK uploads packets at or above the existing 128KB limit and sends an `application/store` pointer instead of embedding large JSON in the request body. `TriggerTaskRequestBody` now validates that `application/store` payloads are non-empty storage paths. + +Payload uploads use the same resolved `ApiClient` as the trigger call (including `requestOptions.clientConfig`), not only the global `apiClientManager.client` — so custom `baseURL`, access token, and preview branch apply to both presign and trigger. diff --git a/.changeset/locals-key-dual-package-fix.md b/.changeset/locals-key-dual-package-fix.md new file mode 100644 index 00000000000..38d42e19dfb --- /dev/null +++ b/.changeset/locals-key-dual-package-fix.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix `LocalsKey` type incompatibility across dual-package builds. The phantom value-type brand no longer uses a module-level `unique symbol`, so a single TypeScript compilation that resolves the type from both the ESM and CJS outputs (which can happen under certain pnpm hoisting layouts) no longer sees two structurally-incompatible variants of the same type. diff --git a/.changeset/mcp-agent-chat-sessions.md b/.changeset/mcp-agent-chat-sessions.md new file mode 100644 index 00000000000..c3f01aebf28 --- /dev/null +++ b/.changeset/mcp-agent-chat-sessions.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +The CLI MCP server's agent-chat tools (`start_agent_chat`, `send_agent_message`, `close_agent_chat`) now run on the new Sessions primitive, so AI assistants driving a `chat.agent` get the same idempotent-by-`chatId`, durable-across-runs behavior the browser transport gets. Required PAT scopes go from `write:inputStreams` to `read:sessions` + `write:sessions`. diff --git a/.changeset/mcp-list-runs-region.md b/.changeset/mcp-list-runs-region.md new file mode 100644 index 00000000000..b72cfb23c97 --- /dev/null +++ b/.changeset/mcp-list-runs-region.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +MCP `list_runs` tool: add a `region` filter input and surface each run's executing region in the formatted summary. diff --git a/.changeset/mcp-trigger-task-no-default-wait.md b/.changeset/mcp-trigger-task-no-default-wait.md new file mode 100644 index 00000000000..396c68bd005 --- /dev/null +++ b/.changeset/mcp-trigger-task-no-default-wait.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +The MCP server no longer tells the AI agent to wait for a run to complete after every `trigger_task` call. Waiting is now opt-in: the agent only waits when you ask it to (for example "trigger and then wait for it to finish"). This avoids burning tokens polling runs you didn't need to block on and keeps responses clearer. diff --git a/.changeset/mock-chat-agent-test-harness.md b/.changeset/mock-chat-agent-test-harness.md new file mode 100644 index 00000000000..9876e56a9f7 --- /dev/null +++ b/.changeset/mock-chat-agent-test-harness.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Unit-test `chat.agent` definitions offline with `mockChatAgent` from `@trigger.dev/sdk/ai/test`. Drives a real agent's turn loop in-process — no network, no task runtime — so you can send messages, actions, and stop signals via driver methods, inspect captured output chunks, and verify hooks fire. Pairs with `MockLanguageModelV3` from `ai/test` for model mocking. `setupLocals` lets you pre-seed `locals` (DB clients, service stubs) before `run()` starts. + +The broader `runInMockTaskContext` harness it's built on lives at `@trigger.dev/core/v3/test` — useful for unit-testing any task code, not just chat. diff --git a/.changeset/mollifier-buffer-pipeline-list-entries.md b/.changeset/mollifier-buffer-pipeline-list-entries.md new file mode 100644 index 00000000000..2c55d9b18a8 --- /dev/null +++ b/.changeset/mollifier-buffer-pipeline-list-entries.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Pipeline the per-entry `HGETALL` fetches in `MollifierBuffer.listEntriesForEnv`. The previous serial implementation issued one Redis round-trip per runId returned by `LRANGE`, which dominated stale-sweep wall-time at any meaningful backlog (at the sweep's default maxCount=1000, this is ~1000 RTTs per env per pass). Behaviour is unchanged — entries are still skipped when the entry hash has been torn down by a concurrent drainer ack/fail between the LRANGE and the HGETALL. diff --git a/.changeset/mollifier-configurable-constants.md b/.changeset/mollifier-configurable-constants.md new file mode 100644 index 00000000000..e9943e9b190 --- /dev/null +++ b/.changeset/mollifier-configurable-constants.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Make mollifier buffer and drainer internals configurable. `MollifierBuffer` now accepts `ackGraceTtlSeconds`, `maxRetriesPerRequest`, `reconnectStepMs`, and `reconnectMaxMs` options, and `MollifierDrainer` accepts `maxBackoffMs` and `backoffFloorMs`. All default to their previous hardcoded values, so existing behaviour is unchanged. diff --git a/.changeset/mollifier-drain-batch-size.md b/.changeset/mollifier-drain-batch-size.md new file mode 100644 index 00000000000..9e848b5011d --- /dev/null +++ b/.changeset/mollifier-drain-batch-size.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +`MollifierDrainer` accepts a `drainBatchSize` option (default 1) that controls how many entries are popped per env per tick — in-flight handlers remain capped by the global `concurrency`. `MollifierBuffer` also gains `getDrainingCount()` / `listStaleDraining()`, backed by a new `mollifier:draining` ZSET maintained atomically with pop/ack/fail/requeue (observability-only). diff --git a/.changeset/mollifier-redis-worker-primitives.md b/.changeset/mollifier-redis-worker-primitives.md new file mode 100644 index 00000000000..a209e530c24 --- /dev/null +++ b/.changeset/mollifier-redis-worker-primitives.md @@ -0,0 +1,9 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Add MollifierBuffer and MollifierDrainer primitives for trigger burst smoothing. + +MollifierBuffer (`accept`, `pop`, `ack`, `requeue`, `fail`, `evaluateTrip`) is a per-env FIFO over Redis with atomic Lua transitions for status tracking. `evaluateTrip` is a sliding-window trip evaluator the webapp gate uses to detect per-env trigger bursts. + +MollifierDrainer pops entries through a polling loop with a user-supplied handler. The loop survives transient Redis errors via capped exponential backoff (up to 5s), and per-env pop failures don't poison the rest of the batch — one env's blip is logged and counted as failed for that tick. Rotation is two-level: orgs at the top, envs within each org. The buffer maintains `mollifier:orgs` and `mollifier:org-envs:${orgId}` atomically with per-env queues, so the drainer walks orgs → envs directly without an in-memory cache. The `maxOrgsPerTick` option (default 500) caps how many orgs are scheduled per tick; for each picked org, one env is popped (rotating round-robin within the org). An org with N envs gets the same per-tick scheduling slot as an org with 1 env, so tenant-level drainage throughput is determined by org count rather than env count. diff --git a/.changeset/mollifier-tag-cap.md b/.changeset/mollifier-tag-cap.md new file mode 100644 index 00000000000..b9057664fa7 --- /dev/null +++ b/.changeset/mollifier-tag-cap.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Mollifier `mutateSnapshot` now enforces a tag cap: an `append_tags` patch carrying `maxTags` returns `"limit_exceeded"` (writing nothing) when the deduped tag count would exceed the limit, so a buffered run can't accumulate more tags via the tags API than the trigger validator allows at creation. diff --git a/.changeset/otel-suite-0218.md b/.changeset/otel-suite-0218.md new file mode 100644 index 00000000000..38b71ceeec1 --- /dev/null +++ b/.changeset/otel-suite-0218.md @@ -0,0 +1,7 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +"@trigger.dev/sdk": patch +--- + +Update the bundled OpenTelemetry packages to their latest releases (`@opentelemetry/sdk-node` 0.218.0, `@opentelemetry/core` 2.7.1, `@opentelemetry/host-metrics` 0.38.3). diff --git a/.changeset/plugin-auth-path.md b/.changeset/plugin-auth-path.md new file mode 100644 index 00000000000..7ce08b71a33 --- /dev/null +++ b/.changeset/plugin-auth-path.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/plugins": patch +--- + +The public interfaces for a plugin system. Initially consolidated authentication and authorization interfaces. diff --git a/.changeset/pre.json b/.changeset/pre.json new file mode 100644 index 00000000000..818658e8972 --- /dev/null +++ b/.changeset/pre.json @@ -0,0 +1,64 @@ +{ + "mode": "pre", + "tag": "rc", + "initialVersions": { + "coordinator": "0.0.1", + "docker-provider": "0.0.1", + "kubernetes-provider": "0.0.1", + "supervisor": "0.0.1", + "webapp": "1.0.0", + "@trigger.dev/build": "4.4.6", + "trigger.dev": "4.4.6", + "@trigger.dev/core": "4.4.6", + "@trigger.dev/plugins": "4.4.6", + "@trigger.dev/python": "4.4.6", + "@trigger.dev/react-hooks": "4.4.6", + "@trigger.dev/redis-worker": "4.4.6", + "@trigger.dev/rsc": "4.4.6", + "@trigger.dev/schema-to-json": "4.4.6", + "@trigger.dev/sdk": "4.4.6" + }, + "changesets": [ + "agent-skills", + "ai-prompts", + "ai-sdk-7-support", + "ai-tool-helpers", + "backpressure-scale-up-freeze", + "bundle-skills-single-pass", + "cap-idempotency-key-length", + "chat-agent-on-boot-hook", + "chat-agent-tools", + "chat-agent", + "chat-history-read-primitives", + "chat-session-attributes", + "chat-slim-wire-merge", + "chat-start-session-action-typed-client-data", + "chat-transport-recreate-missing-session", + "cli-deploy-skip-rewrite-timestamp", + "coerce-concurrency-key-to-string", + "env-vars-tracing-forceflush-typecheck", + "envvars-import-is-secret", + "large-trigger-payload-offload", + "locals-key-dual-package-fix", + "mcp-agent-chat-sessions", + "mcp-list-runs-region", + "mcp-trigger-task-no-default-wait", + "mock-chat-agent-test-harness", + "mollifier-buffer-pipeline-list-entries", + "mollifier-configurable-constants", + "mollifier-drain-batch-size", + "mollifier-redis-worker-primitives", + "mollifier-tag-cap", + "otel-suite-0218", + "plugin-auth-path", + "resource-catalog-runtime-registration", + "retry-middleware-errors", + "retry-sigsegv", + "runs-list-region-filter", + "s2-batch-transform-linger-fix", + "sessions-primitive", + "trigger-client", + "unflatten-attributes-conflict", + "warm-start-external-trace-context-leak" + ] +} diff --git a/.changeset/project-environments-endpoint.md b/.changeset/project-environments-endpoint.md new file mode 100644 index 00000000000..9059c548245 --- /dev/null +++ b/.changeset/project-environments-endpoint.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Add `GetProjectEnvironmentsResponseBody` and `ProjectEnvironment` schemas for the new `GET /api/v1/projects/{projectRef}/environments` endpoint, which lists the parent environments (dev, staging, preview, prod) a personal access token can access for a project. Dev is scoped to the token owner and branch (preview child) environments are excluded. diff --git a/.changeset/resource-catalog-runtime-registration.md b/.changeset/resource-catalog-runtime-registration.md new file mode 100644 index 00000000000..5046f09e1f1 --- /dev/null +++ b/.changeset/resource-catalog-runtime-registration.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +Fix `COULD_NOT_FIND_EXECUTOR` when a task's definition is loaded via `await import(...)` from inside another task's `run()`. The runtime workers now register such tasks with a sentinel file context, and the catalog logs a one-time warning per task id. diff --git a/.changeset/retry-middleware-errors.md b/.changeset/retry-middleware-errors.md new file mode 100644 index 00000000000..2267b4d724c --- /dev/null +++ b/.changeset/retry-middleware-errors.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Retry `TASK_MIDDLEWARE_ERROR` under the task's retry policy instead of failing the run on the first attempt. The error was already classified as retryable by `shouldRetryError`, but `shouldLookupRetrySettings` did not include it, so the retry flow fell through to `fail_run`. Fixes #3231. diff --git a/.changeset/retry-sigsegv.md b/.changeset/retry-sigsegv.md new file mode 100644 index 00000000000..5a53c351efe --- /dev/null +++ b/.changeset/retry-sigsegv.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Retry `TASK_PROCESS_SIGSEGV` task crashes under the user's retry policy instead of failing the run on the first segfault. SIGSEGV in Node tasks is frequently non-deterministic (native addon races, JIT/GC interaction, near-OOM in native code, host issues), so retrying on a fresh process often succeeds. The retry is gated by the task's existing `retry` config + `maxAttempts` — same path `TASK_PROCESS_SIGTERM` and uncaught exceptions already use — so tasks without a retry policy still fail fast. diff --git a/.changeset/runs-list-region-filter.md b/.changeset/runs-list-region-filter.md new file mode 100644 index 00000000000..c487e2d632c --- /dev/null +++ b/.changeset/runs-list-region-filter.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Add `region` to the runs list / retrieve API: filter runs by region (`runs.list({ region: "..." })` / `filter[region]=`) and read each run's executing region from the new `region` field on the response. diff --git a/.changeset/s2-batch-transform-linger-fix.md b/.changeset/s2-batch-transform-linger-fix.md new file mode 100644 index 00000000000..f1e9bab34aa --- /dev/null +++ b/.changeset/s2-batch-transform-linger-fix.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +Bump `@s2-dev/streamstore` to `0.22.10` to fix a `TASK_RUN_UNCAUGHT_EXCEPTION` ("Invalid state: Unable to enqueue") when a `chat.agent` turn is aborted mid-stream. diff --git a/.changeset/sessions-primitive.md b/.changeset/sessions-primitive.md new file mode 100644 index 00000000000..79a6ca48f65 --- /dev/null +++ b/.changeset/sessions-primitive.md @@ -0,0 +1,26 @@ +--- +"@trigger.dev/sdk": minor +"@trigger.dev/core": patch +--- + +**Sessions** — a durable, run-aware stream channel keyed on a stable `externalId`. A Session is the unit of state that owns a multi-run conversation: messages flow through `.in`, responses through `.out`, both survive run boundaries. Sessions back the new `chat.agent` runtime, and you can build on them directly for any pattern that needs durable bi-directional streaming across runs. + +```ts +import { sessions, tasks } from "@trigger.dev/sdk"; + +// Trigger a task and subscribe to its session output in one call +const { runId, stream } = await tasks.triggerAndSubscribe("my-task", payload, { + externalId: "user-456", +}); + +for await (const chunk of stream) { + // ... +} + +// Enumerate existing sessions (powers inbox-style UIs without a separate index) +for await (const s of sessions.list({ type: "chat.agent", tag: "user:user-456" })) { + console.log(s.id, s.externalId, s.createdAt, s.closedAt); +} +``` + +See [/docs/ai-chat/overview](https://trigger.dev/docs/ai-chat/overview) for the full surface — Sessions powers the durable, resumable chat runtime described there. diff --git a/.changeset/trigger-client.md b/.changeset/trigger-client.md new file mode 100644 index 00000000000..75699471ba2 --- /dev/null +++ b/.changeset/trigger-client.md @@ -0,0 +1,18 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `TriggerClient` for running multiple SDK clients side-by-side, each with its own auth, preview branch, and baseURL. Useful when a single process needs to trigger tasks or read runs across multiple projects, environments, or preview branches without mutating shared global state. + +```ts +import { TriggerClient } from "@trigger.dev/sdk"; + +const prod = new TriggerClient({ accessToken: process.env.TRIGGER_PROD_KEY }); +const preview = new TriggerClient({ + accessToken: process.env.TRIGGER_PREVIEW_KEY, + previewBranch: "signup-flow", +}); + +await prod.tasks.trigger("send-email", payload); +await preview.runs.list({ status: ["COMPLETED"] }); +``` diff --git a/.changeset/trigger-skills-installer.md b/.changeset/trigger-skills-installer.md new file mode 100644 index 00000000000..4e05b125919 --- /dev/null +++ b/.changeset/trigger-skills-installer.md @@ -0,0 +1,11 @@ +--- +"trigger.dev": patch +--- + +`trigger skills` installs Trigger.dev agent skills into your coding agent so it knows how to write tasks, schedules, realtime, and chat.agent code. The skills ship with the CLI and are copied into each tool's native skills directory (Claude Code, Cursor, GitHub Copilot, and Codex / AGENTS.md), and `trigger dev` offers to install them on first run. + +```bash +trigger skills --target claude-code +``` + +Replaces the previous `install-rules` command, which stays as an alias. diff --git a/.changeset/unflatten-attributes-conflict.md b/.changeset/unflatten-attributes-conflict.md new file mode 100644 index 00000000000..9df627f2630 --- /dev/null +++ b/.changeset/unflatten-attributes-conflict.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix `TypeError` in `unflattenAttributes` when the input attribute map contains conflicting dotted key paths (e.g. both `a.b` set to a scalar and `a.b.c` set to a value). The path-walk loop now applies last-write-wins when a prior key wrote a primitive, null, or array at an intermediate slot, matching the existing precedent in `AttributeFlattener.addAttribute`. Callers no longer crash when handed malformed external attribute inputs. diff --git a/.changeset/warm-start-external-trace-context-leak.md b/.changeset/warm-start-external-trace-context-leak.md new file mode 100644 index 00000000000..84f91de7689 --- /dev/null +++ b/.changeset/warm-start-external-trace-context-leak.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix external trace context leaking across runs on warm-started workers with `processKeepAlive` enabled. Every subsequent run's attempt span was being exported with the first run's `traceId` and `parentSpanId`, breaking causal-chain navigation in external APM tools. Runs without an external trace context are unaffected. diff --git a/.claude/REVIEW.md b/.claude/REVIEW.md new file mode 100644 index 00000000000..19edf00a52e --- /dev/null +++ b/.claude/REVIEW.md @@ -0,0 +1,50 @@ +# REVIEW.md — Trigger.dev OSS + +Repo-specific signal for anyone (human or agent) reviewing a PR in this codebase. Calibrates what counts as critical, what to always check, and what to skip. + +## What makes a 🔴 Important finding here + +Reserve 🔴 for things that would page someone or block a rollback. In this codebase, that means: + +- **Rolling-deploy breakage.** Old and new versions of the webapp/supervisor run side-by-side during deploys. A change is broken if: + - A Lua script's behavior changes for a given key set without versioning (rename the script with a behavior-descriptive suffix like `Tracked` rather than `V2` — both versions must coexist safely). + - A Redis data shape used by both versions changes in place. New shapes need a new key namespace. + - A migration is not backward-compatible with the prior image. +- **Schema / migration safety.** Prisma migrations must be backward-compatible with the prior deploy. Adding NOT NULL without a default, dropping a column an old image still reads, renaming a column — all 🔴. +- **ClickHouse migration ordering + idempotency.** Goose runs in strict mode in the deploy pipeline and refuses to apply a missing version below the current version — slotting a new file in below the latest already-applied version blocks the deploy. New ClickHouse migration files MUST use the next available number (`max(files in internal-packages/clickhouse/schema/) + 1`); if main has added migrations while you've been on a branch, renumber yours. DDL must also be idempotent (`ADD COLUMN IF NOT EXISTS`, `DROP COLUMN IF EXISTS`, `CREATE TABLE IF NOT EXISTS`, `ADD INDEX IF NOT EXISTS`) so a partial / `--allow-missing` apply elsewhere doesn't fail on retry. Either fault is 🔴 — both break test/prod deploys. Rules live in `internal-packages/clickhouse/CLAUDE.md`. +- **Queue / concurrency correctness.** RunQueue, MarQS (V1, legacy), redis-worker — any change to enqueue / dequeue / locking semantics. Re-derive the invariant on paper before flagging or accepting. +- **Missing index on a hot table.** New Prisma queries against `TaskRun`, `TaskRunExecutionSnapshot`, `JobRun`, `Project`, etc. must use an existing index. Check `internal-packages/database/prisma/schema.prisma` for the relevant `@@index` lines — don't guess and don't propose `EXPLAIN`. +- **Recovery-path queries.** Any `TaskRun.findFirst` / `findMany` added to a schedule, run-recovery, or restart loop. Recovery fan-outs (Redis crash, restart storms) turn "rare indexed query" into a DB incident. 🔴 even if indexed. +- **Aggregations on hot tables.** No `COUNT` / `GROUP BY` on `TaskRun` or other multi-million-row tables. Use Redis or ClickHouse for counts. +- **Prod Redis blast-radius.** New code paths that `SCAN` with broad patterns (`*foo*`) on prod-shaped Redis, or `EVAL` Lua with `SCAN` loops inside. Both are 🔴. +- **`@trigger.dev/core` direct import** from anywhere outside the SDK package. Always import from `@trigger.dev/sdk`. Core direct imports are 🔴 — they break the public API contract. +- **Heavy execute-deps imported into request-handler bundles.** Specifically `chat.handover` and similar split-bundle entry points must not transitively import the agent task's execute path. Watch for new imports added at module top-level of route files. +- **V1 engine code modified in a "V2 only" PR.** The `apps/webapp/app/v3/` directory contains both. If the PR description says V2-only but it touches `triggerTaskV1`, `cancelTaskRunV1`, `MarQS`, etc. — 🔴. + +## Always check + +- **Tests use testcontainers, not mocks.** Vitest with `redisTest` / `postgresTest` / `containerTest` from `@internal/testcontainers`. Any new `vi.mock(...)` on Redis, Postgres, BullMQ, or other infra is wrong here — 🔴 if added in production-path tests, 🟡 if isolated unit test. +- **Public-package changes have a changeset.** `pnpm run changeset:add` produces `.changeset/*.md`. Required for any edit under `packages/*`. Missing → 🟡; missing on a breaking change → 🔴. +- **Server-only changes have `.server-changes/*.md`.** Required for `apps/webapp/`, `apps/supervisor/` edits with no public-package change. Body should be 1-2 sentences (it has to fit as one bullet in a future changelog). Missing → 🟡. +- **Lua script naming.** Coexisting scripts use behavior-descriptive suffixes (`Tracked`), never `V2`. Old name must keep working until the next deploy clears it. +- **RunQueue payload shape.** V2 run-queue payload's `projectId` is consumed by `workerQueueResolver` for override matching. If a PR drops it from the payload, 🔴. +- **`safeSend` scope.** Defensive IPC wrappers belong on loop / interval / handler contexts, not one-shot terminal sends. If the PR adds `safeSend` to a single terminal call for consistency, 🟡 with a "remove this" suggestion. +- **Zod version.** Pinned to `3.25.76` monorepo-wide. New package adding zod with a different version or range — 🔴. + +## Skip (do NOT flag) + +- Anything Prettier / ESLint catches. CI runs both. +- TypeScript style preferences (`type` vs `interface`) — already covered by repo standards. +- Test coverage exhortations as a generic suggestion. Only flag missing tests when a specific code path is genuinely untested and the path has prior incidents. +- `agentcrumbs` markers (`// @crumbs`, `// #region @crumbs`) and `agentcrumbs` imports — these are temporary debug instrumentation stripped before merge. +- `// removed comments for removed code`, renamed `_unused` vars, re-exported types as "backwards compatibility shims" — also covered by repo standards. +- Suggestions to "add error handling" without naming a specific scenario that breaks. +- Documentation prose nitpicks in `docs/*` MDX files unless factually wrong. + +## Things V1/legacy that should NOT block a PR + +The `apps/webapp/app/v3/` directory name is misleading — most code there is V2. Only specific files are V1-only legacy: `MarQS` queue, `triggerTaskV1`, `cancelTaskRunV1`, and a handful of others (see `apps/webapp/CLAUDE.md` for the exact list). Don't flag "you should refactor this to use V2" on those — they're frozen. + +## Confidence calibration for this repo + +The most common false-positive pattern: speculating about race conditions in code paths the agent doesn't have runtime visibility into. If the only evidence is "this *could* race", drop it. If you can point to a specific interleaving with file:line for each step, surface it. diff --git a/.claude/review-guides/chat-agent-sessions-row-agnostic.md b/.claude/review-guides/chat-agent-sessions-row-agnostic.md new file mode 100644 index 00000000000..7fb9851f308 --- /dev/null +++ b/.claude/review-guides/chat-agent-sessions-row-agnostic.md @@ -0,0 +1,287 @@ +# Review guide — chat.agent on Sessions, row-agnostic addressing + +Scope: the 12 uncommitted files. **No new behaviour beyond the public surface +already on this branch** — this is plumbing cleanup that: + +1. Eliminates the transport's session-creation step +2. Makes `chatId` the universal addressing string everywhere +3. Makes the server-side stream/append/wait routes row-agnostic + +## The two design moves + +**Move 1 — agent owns session lifecycle.** `chat.agent` and +`chat.customAgent` upsert the backing `Session` row at bind, fire-and-forget, +keyed on `externalId = payload.chatId`. The transport, server-side +`AgentChat`, and `chat.createTriggerAction` no longer create sessions at all. +Browsers cannot mint sessions either (`POST /api/v1/sessions` is now +secret-key-only). One owner, one path. + +**Move 2 — `chatId` is the only address.** The transport, server-side +`AgentChat`, JWT scopes, and S2 stream paths all use `chatId` directly. The +Session's friendlyId is informational. To make this safe, the three stream +routes (`.in/.out` PUT, GET, POST append, plus the run-engine `wait` +endpoint) became "row-optional" and derive a *canonical addressing key* +(`row.externalId ?? row.friendlyId`, fallback to the URL param when the row +hasn't been upserted yet). Same canonical key is used to build the S2 stream +path, the waitpoint cache key, and the JWT resource set — so any caller +addressing by either form converges on the same physical stream. + +Together these remove an entire class of "did the row land yet?" races. The +transport can subscribe to `/sessions/{chatId}/out` before the agent boots, +the agent's `void sessions.create({externalId: chatId})` lands a moment +later, and any earlier reads/writes are already on the right S2 key. + +--- + +## Read in this order + +### 1. `apps/webapp/app/services/realtime/sessions.server.ts` (+34 lines) + +The new primitive. Two helpers: + +- `isSessionFriendlyIdForm(value)` — `value.startsWith("session_")`. Used to + decide whether a missing row is a hard 404 (opaque friendlyId) or a soft + "row will land later" (externalId form). +- `canonicalSessionAddressingKey(row, paramSession)` — `row.externalId ?? + row.friendlyId` if the row exists, else `paramSession`. **This is the load- + bearing function.** Read its docstring. + +**Question to ask:** can two callers addressing the "same" session ever get +different canonical keys? Only if the row exists for one and not the other, +*and* the URL forms differ — but in that case the row-less caller used the +externalId form (friendlyId-form would have 404'd earlier), and the row-ful +caller computes `row.externalId ?? row.friendlyId`. If the row's externalId +matches the URL, they converge. If it doesn't, there's no row to find by +that string anyway. The interesting edge is "row exists with no externalId", +addressed via friendlyId — both sides read `row.friendlyId`. ✓ + +### 2. `apps/webapp/app/routes/realtime.v1.sessions.$session.$io.ts` (+47/-12) + +PUT initialize + GET subscribe (SSE). Both use the helper. The interesting +part is the loader's `findResource` + `authorization.resource`: + +```ts +findResource: async (params, auth) => { + const row = await resolveSessionByIdOrExternalId(...); + if (!row && isSessionFriendlyIdForm(params.session)) return undefined; // 404 + return { row, addressingKey: canonicalSessionAddressingKey(row, params.session) }; +}, +authorization: { + resource: ({ row, addressingKey }) => { + const ids = new Set([addressingKey]); + if (row) { + ids.add(row.friendlyId); + if (row.externalId) ids.add(row.externalId); + } + return { sessions: [...ids] }; + }, + superScopes: ["read:sessions", "read:all", "admin"], +}, +``` + +**Why three IDs in the resource set?** `checkAuthorization` is "any-match" +across the resource values. We want a JWT scoped to *either* form to +authorize *either* URL form. Smoke test verified the 4-cell matrix passes. + +**The PUT path** (action handler) is simpler — it just resolves the row, +builds an addressing key, and hands it to `initializeSessionStream`. Worth +noting the `closedAt` check is now `maybeSession?.closedAt` — no row means +no closedAt to enforce. + +### 3. `apps/webapp/app/routes/realtime.v1.sessions.$session.$io.append.ts` (+22/-13) + +POST append (browser writes a record to `.in` or server writes to `.out`). +Same row-optional pattern. Both the S2 append and the waitpoint drain use +`addressingKey`. + +**Question to ask:** what fires the waitpoint? An agent's +`session.in.wait()` registers a waitpoint keyed on `(addressingKey, io)` via +the wait endpoint (file 4). The append handler drains by the *same* key — +even if the agent registered with externalId form and the transport +appended via friendlyId form, both compute the same canonical key, so they +converge. ✓ + +### 4. `apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts` (+18/-13) + +The agent's `.in.wait()` endpoint. Run-engine creates the waitpoint, then +registers it in Redis under `(addressingKey, io)`. The race-check that runs +right after creation reads from S2 by the same key. Three call sites — +`addSessionStreamWaitpoint`, `readSessionStreamRecords`, +`removeSessionStreamWaitpoint` — all consistent. + +### 5. `apps/webapp/app/routes/api.v1.sessions.ts` (+4/-2) + +**Security tightening.** Removed `allowJWT: true` and `corsStrategy: "all"` +from the `POST /api/v1/sessions` action — secret-key only now. + +**Question to ask:** was the JWT path actually used? Until this branch, the +transport called it via `ensureSession` (now deleted). After this branch, +nobody reaches it from the browser. `chat.createTriggerAction` (server +secret key) is the only browser-adjacent path. + +### 6. `packages/trigger-sdk/src/v3/ai.ts` (+62/-39) + +Two near-identical edits — one in `chatAgent`, one in `chatCustomAgent`. +Both bind on `payload.chatId` and fire-and-forget the upsert: + +```ts +locals.set(chatSessionHandleKey, sessions.open(payload.chatId)); +void sessions + .create({ type: "chat.agent", externalId: payload.chatId }) + .catch(() => { /* best effort */ }); +``` + +**Question to ask:** why `void`-and-`catch`? Awaiting the upsert would gate +the agent's bind on a network round-trip that doesn't unblock anything +user-visible — `.in/.out` routes are row-agnostic and the waitpoint cache +is keyed on the addressing string, not the row id. If the upsert genuinely +fails, the next bind retries the same idempotent call (`sessions.create` +upserts on `externalId`, so concurrent triggers on one chatId converge to +one row). The row matters for downstream metadata + listing, not for live +addressing. + +The PAT scope minting in `chatAgent` (two call sites — preload and +sendMessage) now uses `payload.chatId` for the `sessions:` resource. That +matches what the transport/AgentChat use as the JWT resource and what the +JWT's resource set in the loader includes. Cross-form addressing works +either way (smoke-tested), but using `chatId` keeps the chain tight. + +`createChatTriggerAction` is the most visibly trimmed: no pre-create, no +threading `sessionId` into payload, scope mint uses `chatId`. Return type +no longer carries `sessionId` — note `TriggerChatTaskResult.sessionId` was +already declared optional, so this isn't a public-API break. + +**Stale docstring to flag:** `chat.ts:59` and `chat.ts:112` still describe +PAT scopes as `read:sessions:{sessionId}` and +`write:sessions:{sessionId}`. Functionally either ID works (row lookup +canonicalises), but the doc text is now out of date — it should say +`{chatId}`. Worth a tidy-up before merge but not blocking. + +### 7. `packages/trigger-sdk/src/v3/chat.ts` (+63/-117) + +**The biggest mechanical edit.** Net -54 lines from deleting `ensureSession` +and untangling its callers. + +What disappeared: +- `private async ensureSession(chatId)` — gone +- The "lazy upsert from the browser if no triggerTask callback" branch in + `sendMessages` and `preload` — gone +- The "throw if neither path surfaced a sessionId" guard — gone +- All `state.sessionId` URL params replaced with `chatId` +- `subscribeToSessionStream`'s `chatId?` (optional) is now `chatId` (required) + +What stayed: +- `state.sessionId` in `ChatSessionState` — optional, informational +- The `restore from external storage` branch in the constructor still + hydrates `sessionId` if persisted, just doesn't *require* it +- `notifySessionChange` still surfaces `sessionId` if known + +**Question to ask:** does the transport ever still need the friendlyId? The +only place is the `onSessionChange` callback's payload (so consumers +persisting state can save it for later display). The transport itself never +puts it in a URL or a waitpoint key. + +The `sendMessages` path is worth re-reading: when state.runId is set, it +appends to `.in/append` and subscribes to `.out`. If the append fails with +a non-auth error, it falls through to triggering a new run (legacy "run is +dead" detection — unchanged from pre-Sessions, doesn't depend on +addressing). + +### 8. `packages/trigger-sdk/src/v3/chat-client.ts` (+34/-33) + +Server-side `AgentChat`. Mirrors the transport changes — every URL uses +`this.chatId`. `triggerNewRun` no longer pre-creates a session. `ChatSession` +and internal `SessionState` types now have optional `sessionId`. + +The shape of the diff is identical to the transport: delete the upsert, +swap addressing identifiers, optionalise the friendlyId. If you've read +`chat.ts` carefully, this one is mostly mechanical confirmation that both +client surfaces (browser transport + server-side AgentChat) speak the same +addressing protocol. + +### 9. Test infrastructure — `sessions.ts` (+18) + `mock-chat-agent.ts` (+25) + +`__setSessionCreateImplForTests` mirrors the existing +`__setSessionOpenImplForTests`. `mockChatAgent` installs a no-op create stub +returning a synthetic `CreatedSessionResponseBody` so the agent's bind-time +`void sessions.create(...)` doesn't try to hit a real API. Cleanup runs in +the same `.finally` as the open override. + +**Question to ask:** is the synthetic response shape correct? It mirrors +`CreatedSessionResponseBody` — `id`, `externalId`, `type`, `tags`, +`metadata`, `closedAt`, `closedReason`, `expiresAt`, `createdAt`, +`updatedAt`, `isCached`. Tests don't currently assert on this object, so +the bar is "doesn't crash + matches the type". Met. + +### 10. `packages/trigger-sdk/src/v3/chat.test.ts` (+13/-12) + +Three classes of test edits, all consequences: + +- Stream URL assertion: `chat-1` (the chatId) instead of + `session_streamurl` (the friendlyId) +- `renewRunAccessToken` callback: `sessionId: undefined` (was + `DEFAULT_SESSION_ID` because the mocked trigger doesn't surface it) +- Token resolve count: `1` (was `2` — second resolve was for `ensureSession`) +- One `onSessionChange` matchObject loses `sessionId` + +### 11. `apps/webapp/app/routes/_app.../playground/.../route.tsx` (1 line) + +`sessionId: string` → `sessionId?: string` in the playground sidebar prop +to track the transport type change. + +--- + +## Edge cases I checked, so you don't have to + +- **Cross-form JWT auth (curl matrix).** JWT scoped to externalId can call + externalId URL ✓ and friendlyId URL ✓. JWT scoped to friendlyId can call + externalId URL ✓ and friendlyId URL ✓. Smoke-tested. +- **Row materialises after subscribe.** Transport opens + `GET /sessions/{chatId}/out` before agent's bind upsert lands → 200 OK, + `addressingKey = chatId` (paramSession fallback). Once the row lands + with `externalId = chatId`, addressingKey resolves to the same value via + `row.externalId`. Same S2 key throughout. +- **Concurrent triggers on one chatId.** Two browser tabs trigger two runs + → two binds → two `sessions.create({externalId: chatId})` calls. Upsert + semantics: both return the same row. +- **Closed session enforcement.** Still enforced when a row exists. + `maybeSession?.closedAt` is null-safe; no row = no close-state to honour. +- **Agent run cancellation.** Frontend doesn't auto-detect — unchanged from + pre-Sessions; messages sit in S2 until the next trigger (the existing + run-PAT auth-error path is the only reaper). Out of scope for this branch. +- **Idle timeout in dev.** Runs stay `EXECUTING_WITH_WAITPOINTS` past the + configured idle because dev runs don't snapshot/restore; the in-process + idle clock advances locally without touching the row. Expected, not a + regression. + +## Things explicitly **not** in this branch + +- Run-state subscription on the transport side (the "run died, re-trigger + silently" UX gap) +- Session auto-close on agent exit (still client-driven by design) +- Any change to `Session` schema, `sessions.create` semantics, or + `chatAccessTokenTTL` +- Docstring updates for `read:sessions:{sessionId}` / `write:sessions:{sessionId}` + in `chat.ts:59` and `chat.ts:112` (functional but textually stale — + follow-up nit) + +--- + +## What I'd be ready to answer cold + +- Why fire-and-forget upsert (vs. `await`) in the agent's bind step +- Why the route's authorization resource set has three IDs (cross-form JWT + auth) +- Why `POST /api/v1/sessions` lost `allowJWT` (security tightening — no + caller needs it after the transport's `ensureSession` is gone) +- What converges two callers using different URL forms onto the same S2 + stream (`canonicalSessionAddressingKey`, identical computation on both + sides for any given row) +- What makes `sessions.create` race-safe under concurrent triggers + (`externalId` upsert) +- Why `state.sessionId` stayed on `ChatSessionState` at all (pure + informational, surfaced via `onSessionChange` for consumer persistence; + zero addressing role) +- Why the chat-client (server-side AgentChat) and chat (transport) edits + look near-identical (they implement the same client protocol against the + same row-agnostic routes) diff --git a/.claude/rules/package-installation.md b/.claude/rules/package-installation.md new file mode 100644 index 00000000000..310074823c5 --- /dev/null +++ b/.claude/rules/package-installation.md @@ -0,0 +1,22 @@ +--- +paths: + - "**/package.json" +--- + +# Installing Packages + +When adding a new dependency to any package.json in the monorepo: + +1. **Look up the latest version** on npm before adding: + ```bash + pnpm view version + ``` + If unsure which version to use (e.g. major version compatibility), confirm with the user. + +2. **Edit the package.json directly** — do NOT use `pnpm add` as it can cause issues in the monorepo. Add the dependency with the correct version range (typically `^x.y.z`). + +3. **Run `pnpm i` from the repo root** after editing to install and update the lockfile: + ```bash + pnpm i + ``` + Always run from the repo root, not from the package directory. diff --git a/.claude/rules/sdk-packages.md b/.claude/rules/sdk-packages.md index 549eb341809..343be2045f8 100644 --- a/.claude/rules/sdk-packages.md +++ b/.claude/rules/sdk-packages.md @@ -9,4 +9,4 @@ paths: - Default to **patch**. Get maintainer approval for minor. Never select major without explicit approval. - `@trigger.dev/core`: **Never import the root**. Always use subpath imports (e.g., `@trigger.dev/core/v3`). - Do NOT update `rules/` or `.claude/skills/trigger-dev-tasks/` unless explicitly asked. These are maintained in separate dedicated passes. -- Test changes using `references/hello-world` reference project. +- Test changes using the `hello-world` project in the [`triggerdotdev/references`](https://github.com/triggerdotdev/references) repo. diff --git a/.cursor/mcp.json b/.cursor/mcp.json index da39e4ffafe..c4b06a67630 100644 --- a/.cursor/mcp.json +++ b/.cursor/mcp.json @@ -1,3 +1,7 @@ { - "mcpServers": {} + "mcpServers": { + "linear": { + "url": "https://mcp.linear.app/mcp" + } + } } diff --git a/.cursor/rules/webapp.mdc b/.cursor/rules/webapp.mdc index a362f14fe12..f1333febdc0 100644 --- a/.cursor/rules/webapp.mdc +++ b/.cursor/rules/webapp.mdc @@ -4,7 +4,7 @@ globs: apps/webapp/**/*.tsx,apps/webapp/**/*.ts alwaysApply: false --- -The main trigger.dev webapp, which powers it's API and dashboard and makes up the docker image that is produced as an OSS image, is a Remix 2.1.0 app that uses an express server, written in TypeScript. The following subsystems are either included in the webapp or are used by the webapp in another part of the monorepo: +The main trigger.dev webapp, which powers it's API and dashboard and makes up the docker image that is produced as an OSS image, is a Remix 2.17.4 app that uses an express server, written in TypeScript. The following subsystems are either included in the webapp or are used by the webapp in another part of the monorepo: - `@trigger.dev/database` exports a Prisma 6.14.0 client that is used extensively in the webapp to access a PostgreSQL instance. The schema file is [schema.prisma](mdc:internal-packages/database/prisma/schema.prisma) - `@trigger.dev/core` is a published package and is used to share code between the `@trigger.dev/sdk` and the webapp. It includes functionality but also a load of Zod schemas for data validation. When importing from `@trigger.dev/core` in the webapp, we never import the root `@trigger.dev/core` path, instead we favor one of the subpath exports that you can find in [package.json](mdc:packages/core/package.json) diff --git a/.env.example b/.env.example index 69d5acdc560..c6980d7d77a 100644 --- a/.env.example +++ b/.env.example @@ -17,6 +17,11 @@ NODE_ENV=development CLICKHOUSE_URL=http://default:password@localhost:8123 RUN_REPLICATION_CLICKHOUSE_URL=http://default:password@localhost:8123 RUN_REPLICATION_ENABLED=1 +# Store task run spans/traces in ClickHouse so the dashboard trace view is +# populated in local dev. The local stack is ClickHouse-backed (see above), so +# leaving this unset falls back to the "postgres" store and dev run traces show +# up empty even though the run itself appears. +EVENT_REPOSITORY_DEFAULT_STORE=clickhouse_v2 # Set this to UTC because Node.js uses the system timezone TZ="UTC" @@ -29,6 +34,52 @@ REDIS_TLS_DISABLED="true" DEV_OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:3030/otel" DEV_OTEL_BATCH_PROCESSING_ENABLED="0" +# Realtime streams v2 (Sessions, chat.agent, large stream backfills) backed +# by S2 (https://s2.dev). The `s2` service in docker/docker-compose.yml runs +# the open-source s2-lite binary and pre-creates a basin named `trigger-local` +# (see docker/config/s2-spec.json). Comment these out to fall back to v1 +# (Redis-only) streams; Sessions and chat.agent then become unavailable. +REALTIME_STREAMS_S2_BASIN=trigger-local +REALTIME_STREAMS_S2_ACCESS_TOKEN=ignored +REALTIME_STREAMS_S2_ENDPOINT=http://localhost:4566/v1 +REALTIME_STREAMS_S2_SKIP_ACCESS_TOKENS=true +REALTIME_STREAMS_DEFAULT_VERSION=v2 + +# Running multiple instances side by side (worktrees, branch experiments) +# +# Every host port in docker/docker-compose.yml is `${VAR:-default}` and the +# project name comes from `COMPOSE_PROJECT_NAME`. To stand up a second stack +# alongside the default one, uncomment the block below in this clone's `.env` +# (pick any offset that doesn't clash with anything else running), then update +# the URL/PORT vars further up to match. Default values are commented for +# reference. +# +# --- core (pnpm run docker) --- +# COMPOSE_PROJECT_NAME=triggerdotdev-docker-alt +# CONTAINER_PREFIX=alt- +# POSTGRES_HOST_PORT=15432 # default 5432 +# REDIS_HOST_PORT=16379 # default 6379 +# ELECTRIC_HOST_PORT=13060 # default 3060 +# MINIO_API_HOST_PORT=19005 # default 9005 +# MINIO_CONSOLE_HOST_PORT=19006 # default 9006 +# CLICKHOUSE_HTTP_HOST_PORT=18123 # default 8123 +# CLICKHOUSE_TCP_HOST_PORT=19000 # default 9000 +# S2_HOST_PORT=14566 # default 4566 +# REMIX_APP_PORT=13030 # default 3030 +# --- extras (only needed if you also run `pnpm run docker:full`) --- +# ELECTRIC_SHARD_1_HOST_PORT=13061 # default 3061 +# CH_UI_HOST_PORT=15521 # default 5521 +# TOXIPROXY_PROXY_HOST_PORT=40303 # default 30303 +# TOXIPROXY_API_HOST_PORT=18474 # default 8474 +# NGINX_H2_HOST_PORT=18443 # default 8443 +# OTEL_GRPC_HOST_PORT=14317 # default 4317 +# OTEL_HTTP_HOST_PORT=14318 # default 4318 +# OTEL_PROMETHEUS_HOST_PORT=18889 # default 8889 +# PROMETHEUS_HOST_PORT=19090 # default 9090 +# GRAFANA_HOST_PORT=13001 # default 3001 +# (and update DATABASE_URL / CLICKHOUSE_URL / REDIS_PORT / APP_ORIGIN / +# LOGIN_ORIGIN / ELECTRIC_ORIGIN / REALTIME_STREAMS_S2_ENDPOINT to match) + # When the domain is set to `localhost` the CLI deploy command will only --load the image by default and not --push it DEPLOY_REGISTRY_HOST=localhost:5000 @@ -106,7 +157,7 @@ POSTHOG_PROJECT_KEY= # INTERNAL_OTEL_TRACE_LOGGING_ENABLED=1 # INTERNAL_OTEL_TRACE_INSTRUMENT_PRISMA_ENABLED=0 -# Enable local observability stack (requires `pnpm run docker` to start otel-collector) +# Enable local observability stack (requires `pnpm run docker:full` to bring up otel-collector + prometheus + grafana) # Uncomment these to send metrics to the local Prometheus via OTEL Collector: # INTERNAL_OTEL_METRIC_EXPORTER_ENABLED=1 # INTERNAL_OTEL_METRIC_EXPORTER_URL=http://localhost:4318/v1/metrics diff --git a/.github/VOUCHED.td b/.github/VOUCHED.td index ce96548aa6f..922ca13cd65 100644 --- a/.github/VOUCHED.td +++ b/.github/VOUCHED.td @@ -11,10 +11,15 @@ myftija nicktrn samejr isshaddad +# Bots +devin-ai-integration[bot] +dependabot[bot] # Outside contributors gautamsi capaj chengzp bharathkumar39293 bhekanik -jrossi \ No newline at end of file +jrossi +ThullyoCunha +ConProgramming \ No newline at end of file diff --git a/.github/actions/get-image-tag/action.yml b/.github/actions/get-image-tag/action.yml index e0646230463..7f1505a0c11 100644 --- a/.github/actions/get-image-tag/action.yml +++ b/.github/actions/get-image-tag/action.yml @@ -23,35 +23,37 @@ runs: id: get_tag shell: bash run: | - if [[ -n "${{ inputs.tag }}" ]]; then - tag="${{ inputs.tag }}" - elif [[ "${{ github.ref_type }}" == "tag" ]]; then - if [[ "${{ github.ref_name }}" == infra-*-* ]]; then - env=$(echo ${{ github.ref_name }} | cut -d- -f2) - sha=$(echo ${{ github.sha }} | head -c7) + if [[ -n "${INPUTS_TAG}" ]]; then + tag="${INPUTS_TAG}" + elif [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then + if [[ "${GITHUB_REF_NAME}" == infra-*-* ]]; then + env=$(echo ${GITHUB_REF_NAME} | cut -d- -f2) + sha=$(echo "${GITHUB_SHA}" | head -c7) ts=$(date +%s) tag=${env}-${sha}-${ts} - elif [[ "${{ github.ref_name }}" == re2-*-* ]]; then - env=$(echo ${{ github.ref_name }} | cut -d- -f2) - sha=$(echo ${{ github.sha }} | head -c7) + elif [[ "${GITHUB_REF_NAME}" == re2-*-* ]]; then + env=$(echo ${GITHUB_REF_NAME} | cut -d- -f2) + sha=$(echo "${GITHUB_SHA}" | head -c7) ts=$(date +%s) tag=${env}-${sha}-${ts} - elif [[ "${{ github.ref_name }}" == v.docker.* ]]; then + elif [[ "${GITHUB_REF_NAME}" == v.docker.* ]]; then version="${GITHUB_REF_NAME#v.docker.}" tag="v${version}" - elif [[ "${{ github.ref_name }}" == build-* ]]; then + elif [[ "${GITHUB_REF_NAME}" == build-* ]]; then tag="${GITHUB_REF_NAME#build-}" else - echo "Invalid git tag: ${{ github.ref_name }}" + echo "Invalid git tag: ${GITHUB_REF_NAME}" exit 1 fi - elif [[ "${{ github.ref_name }}" == "main" ]]; then + elif [[ "${GITHUB_REF_NAME}" == "main" ]]; then tag="main" else - echo "Invalid git ref: ${{ github.ref }}" + echo "Invalid git ref: ${GITHUB_REF}" exit 1 fi echo "tag=${tag}" >> "$GITHUB_OUTPUT" + env: + INPUTS_TAG: ${{ inputs.tag }} - name: 🔍 Check for validity id: check_validity diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000..7bb64f36744 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + cooldown: + default-days: 7 + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/changesets-pr.yml b/.github/workflows/changesets-pr.yml index c7fc4e07136..e80ab04e7f1 100644 --- a/.github/workflows/changesets-pr.yml +++ b/.github/workflows/changesets-pr.yml @@ -22,20 +22,21 @@ jobs: permissions: contents: write pull-requests: write + checks: write if: github.repository == 'triggerdotdev/trigger.dev' steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # zizmor: ignore[artipacked] changesets/action pushes the release branch; no artifact upload here so no leak path with: fetch-depth: 0 - name: Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 - name: Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" - name: Install dependencies @@ -43,7 +44,7 @@ jobs: - name: Create release PR id: changesets - uses: changesets/action@v1 + uses: changesets/action@63a615b9cd06ba9a3e6d13796c7fbcb080a60a0b # v1.8.0 with: version: pnpm run changeset:version commit: "chore: release" @@ -73,51 +74,26 @@ jobs: fi fi - update-lockfile: - name: Update lockfile on release PR - runs-on: ubuntu-latest - needs: release-pr - permissions: - contents: write - steps: - - name: Checkout release branch - uses: actions/checkout@v4 - with: - ref: changeset-release/main - - - name: Setup pnpm - uses: pnpm/action-setup@v4 - with: - version: 10.23.0 - - - name: Setup node - uses: buildjet/setup-node@v4 - with: - node-version: 20.20.0 - - - name: Install and update lockfile - run: pnpm install --no-frozen-lockfile - - - name: Clean up consumed .server-changes/ files - run: | - set -e - shopt -s nullglob - files=(.server-changes/*.md) - for f in "${files[@]}"; do - if [ "$(basename "$f")" != "README.md" ]; then - git rm --ignore-unmatch "$f" - fi - done - - - name: Commit and push lockfile + server-changes cleanup + # The changesets bot authors release PRs with GITHUB_TOKEN, which by GitHub + # design cannot trigger downstream workflows. That leaves the required + # "All PR Checks" status permanently Expected and the PR unmergeable. + # The release PR only bumps package.json + lockfile + CHANGELOGs from + # changesets already on main, so we self-report the required check as + # success. If a human ever pushes to changeset-release/main, the real + # pr_checks.yml fires and its result overwrites this one (last write wins + # for the same context on the same SHA). + - name: Self-report "All PR Checks" success on release PR + if: steps.changesets.outputs.published != 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - set -e - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git add pnpm-lock.yaml - if ! git diff --cached --quiet; then - git commit -m "chore: update lockfile and clean up .server-changes/ for release" - git push origin changeset-release/main - else - echo "No changes to commit" - fi + PR_NUMBER=$(gh pr list --head changeset-release/main --json number --jq '.[0].number') + if [ -z "$PR_NUMBER" ]; then exit 0; fi + HEAD_SHA=$(gh pr view "$PR_NUMBER" --json headRefOid --jq '.headRefOid') + gh api -X POST repos/${{ github.repository }}/check-runs \ + -f name="All PR Checks" \ + -f head_sha="$HEAD_SHA" \ + -f status=completed \ + -f conclusion=success \ + -f 'output[title]=Auto-pass for changeset release PR' \ + -f 'output[summary]=Required check auto-satisfied for changeset-release/main PRs. Full CI ran on the underlying commits before they landed on main.' diff --git a/.github/workflows/check-review-md.yml b/.github/workflows/check-review-md.yml new file mode 100644 index 00000000000..fb093ac9a1c --- /dev/null +++ b/.github/workflows/check-review-md.yml @@ -0,0 +1,92 @@ +name: 🔎 REVIEW.md Drift Audit + +on: + pull_request: + types: [opened, ready_for_review, synchronize] + paths-ignore: + - "docs/**" + - ".changeset/**" + - ".server-changes/**" + +concurrency: + group: review-md-drift-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + audit: + if: >- + github.event.pull_request.draft == false && + github.event.pull_request.head.repo.full_name == github.repository + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@787c5a0ce96a9a6cfb050ea0c8f4c05f2447c251 # v1.0.133 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + use_sticky_comment: true + allowed_bots: "devin-ai-integration[bot]" + + claude_args: | + --max-turns 30 + --allowedTools "Read,Glob,Grep,Bash(git diff:*)" + + prompt: | + You are auditing this PR for drift against `.claude/REVIEW.md`. + + ## Context + + `.claude/REVIEW.md` is the repo's source of truth for what AI / agent code reviewers should treat as critical findings (rolling-deploy safety, hot-table indexes, recovery-path queries, testcontainers usage, Lua versioning, etc.). It is consumed by review agents to calibrate severity. If REVIEW.md goes stale, every future agent review degrades. + + ## Strategy — read this first + + You have a hard turn budget. Spend it on signal, not coverage. The audit is allowed to miss things; it is NOT allowed to time out. + + 1. Read `.claude/REVIEW.md` once, in full. + 2. Run `git diff origin/main...HEAD --name-only` to get the list of changed files. Do NOT read the diff content yet. + 3. Scan the file-list for relevance to REVIEW.md scope. Relevance signals: changes to Prisma schema, Redis / queue / Lua code, hot tables, recovery / restart loops, new packages, deletions of paths REVIEW.md cites. Skim everything else. + 4. Open at most **5 files** total — only the ones most likely to surface a real signal. If nothing in the file-list looks relevant to any REVIEW.md rule, do NOT read any files; go straight to the verdict. + 5. Form a verdict and stop. Do not exhaust the turn budget exploring. + + Large PRs (>50 files changed) are a strong signal to be MORE selective, not more thorough. Pick 3-5 files at most. + + ## What to look for + + - **Stale references** — does any REVIEW.md rule cite a file, directory, function, table, Prisma model, or package name that has been removed or renamed in this PR (or is already gone from `main`)? + - **Contradictions** — does code in this PR clearly violate a current REVIEW.md rule? (Don't re-review the PR. Only flag if REVIEW.md and the PR plainly disagree.) + - **Missing rules** — does this PR introduce a new pattern future reviewers should know about? Examples: a new hot table, a new Lua-script versioning convention, a new safety wrapper, a new "must always check" invariant. + - **Obsolete rules** — has the repo moved past a constraint REVIEW.md still asserts? (e.g. a deprecated path is gone, a pattern is now linted, V1 code is deleted.) + + ## Response format + + If nothing needs changing: + + ✅ REVIEW.md looks current for this PR. + + Otherwise: + + 📝 **REVIEW.md updates suggested:** + + - **[stale]** `` — + - **[contradiction]** `` — + - **[missing]** under `##
` — + - **[obsolete]** `` — + + ## Rules + + - Maximum 3 suggestions per audit. Pick the highest-signal ones. + - Only flag things that would actually mislead a future reviewer. Style and wording do not count. + - Do NOT review the PR itself. Do NOT propose rules outside REVIEW.md's existing sections. + - Do NOT propose rules for one-off PR specifics that don't generalize to future PRs. + - If REVIEW.md does not exist in the repo, respond with `(skip)` and stop. + - When in doubt between "one more file read" and "finish now" — finish now. diff --git a/.github/workflows/claude-md-audit.yml b/.github/workflows/claude-md-audit.yml index a80bbca0f52..32240ba5ea8 100644 --- a/.github/workflows/claude-md-audit.yml +++ b/.github/workflows/claude-md-audit.yml @@ -8,7 +8,6 @@ on: - ".changeset/**" - ".server-changes/**" - "**/*.md" - - "references/**" concurrency: group: claude-md-audit-${{ github.event.pull_request.number }} @@ -16,7 +15,9 @@ concurrency: jobs: audit: - if: github.event.pull_request.draft == false + if: >- + github.event.pull_request.draft == false && + github.event.pull_request.head.repo.full_name == github.repository runs-on: ubuntu-latest permissions: contents: read @@ -25,16 +26,18 @@ jobs: id-token: write steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: Run Claude Code id: claude - uses: anthropics/claude-code-action@v1 + uses: anthropics/claude-code-action@787c5a0ce96a9a6cfb050ea0c8f4c05f2447c251 # v1.0.133 with: - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} use_sticky_comment: true + allowed_bots: "devin-ai-integration[bot]" claude_args: | --max-turns 15 diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index cadbe31773f..1c783e7ef6d 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -19,26 +19,27 @@ jobs: (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) runs-on: ubuntu-latest permissions: - contents: read - pull-requests: read - issues: read + contents: write + pull-requests: write + issues: write id-token: write actions: read # Required for Claude to read CI results on PRs steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" - name: 📥 Download deps @@ -49,9 +50,9 @@ jobs: - name: Run Claude Code id: claude - uses: anthropics/claude-code-action@v1 + uses: anthropics/claude-code-action@787c5a0ce96a9a6cfb050ea0c8f4c05f2447c251 # v1.0.133 with: - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} # This is an optional setting that allows Claude to read CI results on PRs additional_permissions: | diff --git a/.github/workflows/dependabot-critical-alerts.yml b/.github/workflows/dependabot-critical-alerts.yml new file mode 100644 index 00000000000..a71b14bebf9 --- /dev/null +++ b/.github/workflows/dependabot-critical-alerts.yml @@ -0,0 +1,83 @@ +name: Dependabot Critical Alerts + +on: + schedule: + - cron: "0 8 * * *" # Daily 08:00 UTC + workflow_dispatch: + inputs: + severity: + description: "Severity to alert on" + type: choice + options: + - critical + - high + - medium + - low + default: critical + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +permissions: + contents: read + +jobs: + alert: + name: Post critical alerts + runs-on: ubuntu-latest + environment: dependabot-summary + env: + SEVERITY: ${{ inputs.severity || 'critical' }} + steps: + - name: Fetch alerts + id: alerts + env: + GH_TOKEN: ${{ secrets.DEPENDABOT_ALERTS_TOKEN }} + REPO: ${{ github.repository }} + run: | + set -euo pipefail + gh api -X GET "/repos/$REPO/dependabot/alerts" \ + -F state=open -F severity="$SEVERITY" --paginate > pages.json + jq -s 'add' pages.json > alerts.json + TOTAL=$(jq 'length' alerts.json) + echo "total=$TOTAL" >> "$GITHUB_OUTPUT" + if [ "$TOTAL" = "0" ]; then + exit 0 + fi + LIST=$(jq -r ' + map("• <\(.html_url)|#\(.number)> *\(.dependency.package.name)* - \(.security_advisory.summary)") + | join("\n") + ' alerts.json) + { + echo "list<> "$GITHUB_OUTPUT" + + - name: Build Slack payload + if: steps.alerts.outputs.total != '0' + env: + REPO: ${{ github.repository }} + CHANNEL: ${{ vars.SLACK_CHANNEL_ID }} + TOTAL: ${{ steps.alerts.outputs.total }} + LIST: ${{ steps.alerts.outputs.list }} + run: | + jq -n \ + --arg channel "$CHANNEL" \ + --arg repo "$REPO" \ + --arg total "$TOTAL" \ + --arg list "$LIST" \ + --arg severity "$SEVERITY" \ + '{ + channel: $channel, + text: ":bufo-alarma: `\($repo)` - *\($total) open \($severity) alert(s)*\n\($list)\n\n" + }' > payload.json + + - name: Post Slack alert + if: steps.alerts.outputs.total != '0' + uses: slackapi/slack-github-action@45a88b9581bfab2566dc881e2cd66d334e621e2c # v3.0.3 + with: + method: chat.postMessage + token: ${{ secrets.SLACK_BOT_TOKEN }} + payload-file-path: payload.json diff --git a/.github/workflows/dependabot-weekly-summary.yml b/.github/workflows/dependabot-weekly-summary.yml new file mode 100644 index 00000000000..fb2717e2fb0 --- /dev/null +++ b/.github/workflows/dependabot-weekly-summary.yml @@ -0,0 +1,206 @@ +name: Dependabot Weekly Summary + +on: + schedule: + - cron: "0 8 * * 1" # Mon 08:00 UTC + workflow_dispatch: + +# Single-purpose monitoring workflow; serialise on workflow name only - we never +# want two concurrent summary runs racing to post the same digest. +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +permissions: + contents: read # gh CLI baseline + pull-requests: read # gh pr list (open dependabot PRs) + actions: read # gh run list / view (parse latest dependabot run logs) + +jobs: + summary: + name: Post weekly Dependabot summary + runs-on: ubuntu-latest + environment: dependabot-summary + env: + # Severities surface in the actions list when their remaining TTR drops + # below this many days. Override via repo/env var ACTION_THRESHOLD_DAYS. + THRESHOLD_DAYS: ${{ vars.ACTION_THRESHOLD_DAYS || '7' }} + steps: + - name: Fetch alerts and compute summaries + id: alerts + env: + GH_TOKEN: ${{ secrets.DEPENDABOT_ALERTS_TOKEN }} + REPO: ${{ github.repository }} + run: | + if ! gh api -X GET "/repos/$REPO/dependabot/alerts" --paginate > pages.json 2> err.txt; then + echo "total=?" >> "$GITHUB_OUTPUT" + ERR=$(head -c 200 err.txt | tr '\n' ' ') + echo "by_severity=:x: _failed to fetch alerts: ${ERR}_" >> "$GITHUB_OUTPUT" + echo "actions=:x: _alerts unavailable_" >> "$GITHUB_OUTPUT" + exit 0 + fi + jq -s '[.[][] | select(.state == "open")]' pages.json > open.json + + TOTAL=$(jq 'length' open.json) + echo "total=$TOTAL" >> "$GITHUB_OUTPUT" + + if [ "$TOTAL" = "0" ]; then + echo "by_severity=:white_check_mark: No open alerts." >> "$GITHUB_OUTPUT" + echo "actions=_None_" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Severity breakdown - real newlines so jq --arg in the payload + # builder encodes them as proper \n in JSON (Slack renders as breaks). + BY_SEV=$(jq -r ' + group_by(.security_advisory.severity) + | map({sev: .[0].security_advisory.severity, + count: length, + weight: ({"critical":0,"high":1,"medium":2,"low":3}[.[0].security_advisory.severity])}) + | sort_by(.weight) + | map("• *\(.count)* \(.sev)") + | join("\n") + ' open.json) + { + echo "by_severity<> "$GITHUB_OUTPUT" + + # Actions: alerts within THRESHOLD_DAYS of their TTR (P0=7d, P1=30d, P2=90d, P3=no deadline) + # Grouped by (package, severity); shows earliest deadline per group. + ACTIONS=$(jq -r --argjson threshold "$THRESHOLD_DAYS" ' + [.[] + | (.security_advisory.severity) as $sev + | ({"critical":7,"high":30,"medium":90,"low":null}[$sev]) as $ttr + | select($ttr != null) + | ((now - (.created_at | fromdateiso8601)) / 86400 | floor) as $age + | {pkg: .dependency.package.name, sev: $sev, remaining: ($ttr - $age)} + ] + | group_by([.pkg, .sev]) + | map({pkg: .[0].pkg, sev: .[0].sev, count: length, min_remaining: ([.[].remaining] | min)}) + | map(select(.min_remaining < $threshold)) + | sort_by(.min_remaining) + | if length == 0 then "_None_" + else (map( + "• *\(.pkg)* (\(.sev))" + + (if .count > 1 then " ×\(.count)" else "" end) + " - " + + (if .min_remaining < 0 then "*OVERDUE* by \(-.min_remaining)d" + else "\(.min_remaining)d remaining" end) + ) | join("\n")) + end + ' open.json) + { + echo "actions<> "$GITHUB_OUTPUT" + + - name: Fetch open dependabot PRs + id: prs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + REPO_URL: https://github.com/${{ github.repository }} + run: | + if ! PR_JSON=$(gh pr list --repo "$REPO" --state open --author "app/dependabot" --json number,title 2> err.txt); then + ERR=$(head -c 200 err.txt | tr '\n' ' ') + echo "list=:x: _failed to fetch PRs: ${ERR}_" >> "$GITHUB_OUTPUT" + exit 0 + fi + LIST=$(echo "$PR_JSON" | jq -r --arg url "$REPO_URL" ' + if length == 0 then "_None_" + else (map("• <\($url)/pull/\(.number)|#\(.number)> \(.title)") | join("\n")) + end + ') + { + echo "list<> "$GITHUB_OUTPUT" + + - name: Find latest npm dependabot run + id: latest + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + run: | + # Repos without a dependabot.yml have no "Dependabot Updates" workflow; + # treat the lookup failure as "no recent run found" rather than failing. + if ! RUN_ID=$(gh run list --repo "$REPO" --workflow "Dependabot Updates" --status success --limit 30 --json databaseId,name --jq 'first(.[] | select(.name | startswith("npm_and_yarn")) | .databaseId) // empty' 2>/dev/null); then + RUN_ID="" + fi + echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" + + - name: Extract stuck deps (only if actions pending) + id: stuck + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + RUN_ID: ${{ steps.latest.outputs.run_id }} + ACTIONS: ${{ steps.alerts.outputs.actions }} + run: | + # Skip the stuck section entirely when nothing in the actions list + # - keeps the digest tidy when there's nothing to actually act on. + if [ "$ACTIONS" = "_None_" ]; then + echo "section=" >> "$GITHUB_OUTPUT" + exit 0 + fi + HEADER=$'\n\n*Couldn\'t auto-fix (need manual `pnpm.overrides`):*\n' + if [ -z "$RUN_ID" ]; then + { + echo "section<> "$GITHUB_OUTPUT" + exit 0 + fi + gh run view "$RUN_ID" --repo "$REPO" --log > log.txt 2>&1 || true + STUCK=$(grep -oE "No update possible for [^[:space:]]+ [0-9][^[:space:]]*" log.txt | sed 's/No update possible for //' | sort -u || true) + if [ -z "$STUCK" ]; then + { + echo "section<> "$GITHUB_OUTPUT" + exit 0 + fi + LIST=$(echo "$STUCK" | awk 'NR>1{printf "\n"} {printf "• *%s* %s", $1, $2}') + { + echo "section<> "$GITHUB_OUTPUT" + + - name: Build Slack payload + env: + REPO: ${{ github.repository }} + CHANNEL: ${{ vars.SLACK_CHANNEL_ID }} + TOTAL: ${{ steps.alerts.outputs.total }} + BY_SEVERITY: ${{ steps.alerts.outputs.by_severity }} + PRS_LIST: ${{ steps.prs.outputs.list }} + ACTIONS: ${{ steps.alerts.outputs.actions }} + STUCK: ${{ steps.stuck.outputs.section }} + run: | + # Build payload via jq so PR titles or error strings containing + # quotes/backslashes/newlines can't break the JSON. + jq -n \ + --arg channel "$CHANNEL" \ + --arg repo "$REPO" \ + --arg total "$TOTAL" \ + --arg by_severity "$BY_SEVERITY" \ + --arg prs_list "$PRS_LIST" \ + --arg actions "$ACTIONS" \ + --arg stuck "$STUCK" \ + --arg threshold "$THRESHOLD_DAYS" \ + '{ + channel: $channel, + text: ":calendar: *Weekly Dependabot summary* - `\($repo)`\n\n*Open alerts (\($total)):*\n\($by_severity)\n\n*Open Dependabot PRs:*\n\($prs_list)\n\n*Actions needed (<\($threshold)d remaining):*\n\($actions)\($stuck)\n\n" + }' > payload.json + + - name: Post Slack summary + uses: slackapi/slack-github-action@45a88b9581bfab2566dc881e2cd66d334e621e2c # v3.0.3 + with: + method: chat.postMessage + token: ${{ secrets.SLACK_BOT_TOKEN }} + payload-file-path: payload.json diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index bef575c353a..0cac7c8595f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,10 +26,12 @@ jobs: working-directory: ./docs steps: - name: 📥 Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: 📦 Cache npm - uses: actions/cache@v4 + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: | ~/.npm diff --git a/.github/workflows/e2e-webapp-auth-full.yml b/.github/workflows/e2e-webapp-auth-full.yml new file mode 100644 index 00000000000..de9d66c07e9 --- /dev/null +++ b/.github/workflows/e2e-webapp-auth-full.yml @@ -0,0 +1,120 @@ +name: "🛡️ E2E Tests: Webapp Auth (full)" + +# Comprehensive RBAC auth test suite — see TRI-8731. Runs separately from +# the smoke e2e-webapp.yml because it covers every route family with a +# pass/fail matrix and would otherwise dominate per-PR CI time. +# +# Triggered: +# - Manually via workflow_dispatch. +# - Nightly via schedule. +# - On pull requests touching auth-relevant files only (paths filter). + +permissions: + contents: read + +on: + workflow_dispatch: + schedule: + - cron: "0 4 * * *" # 04:00 UTC daily + pull_request: + paths: + - "apps/webapp/app/services/routeBuilders/**" + - "apps/webapp/app/services/rbac.server.ts" + - "apps/webapp/app/services/apiAuth.server.ts" + - "apps/webapp/app/services/personalAccessToken.server.ts" + - "apps/webapp/app/services/sessionStorage.server.ts" + - "apps/webapp/app/routes/api.v*.**" + - "apps/webapp/app/routes/realtime.v*.**" + - "apps/webapp/test/**/*.e2e.full.test.ts" + - "apps/webapp/test/setup/global-e2e-full-setup.ts" + - "apps/webapp/test/helpers/sharedTestServer.ts" + - "apps/webapp/test/helpers/seedTestSession.ts" + - "apps/webapp/vitest.e2e.full.config.ts" + - "internal-packages/rbac/**" + - "packages/plugins/**" + - ".github/workflows/e2e-webapp-auth-full.yml" + +jobs: + e2eAuthFull: + name: "🛡️ E2E Auth Tests (full)" + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + # Don't leave the GITHUB_TOKEN in .git/config — this job + # doesn't need to push and the persisted creds would be + # readable from any subsequent step (zizmor/artipacked). + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + docker pull postgres:14 + docker pull redis:7.2 + docker pull testcontainers/ryuk:0.11.0 + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🏗️ Build Webapp + run: pnpm run build --filter webapp + + - name: 🛡️ Run Webapp Full Auth E2E Tests + run: cd apps/webapp && pnpm exec vitest run --config vitest.e2e.full.config.ts --reporter=default + env: + WEBAPP_TEST_VERBOSE: "1" diff --git a/.github/workflows/e2e-webapp.yml b/.github/workflows/e2e-webapp.yml new file mode 100644 index 00000000000..f306a86cd28 --- /dev/null +++ b/.github/workflows/e2e-webapp.yml @@ -0,0 +1,97 @@ +name: "🧪 E2E Tests: Webapp" + +permissions: + contents: read + +on: + workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + +jobs: + e2eTests: + name: "🧪 E2E Tests: Webapp" + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + echo "Pre-pulling Docker images with authenticated session..." + docker pull postgres:14 + docker pull redis:7.2 + docker pull testcontainers/ryuk:0.11.0 + echo "Image pre-pull complete" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🏗️ Build Webapp + run: pnpm run build --filter webapp + + - name: 🧪 Run Webapp E2E Tests + run: cd apps/webapp && pnpm exec vitest run --config vitest.e2e.config.ts --reporter=default + env: + WEBAPP_TEST_VERBOSE: "1" diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 9518ca6157c..a70f0400e0a 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -24,19 +24,20 @@ jobs: package-manager: ["npm", "pnpm"] steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 - name: 📥 Download deps run: pnpm install --frozen-lockfile --filter trigger.dev... @@ -48,7 +49,7 @@ jobs: run: pnpm run build --filter trigger.dev^... - name: 🔧 Build worker template files - run: pnpm --filter trigger.dev run build:workers + run: pnpm --filter trigger.dev run --if-present build:workers - name: Enable corepack run: corepack enable diff --git a/.github/workflows/helm-pr-prerelease.yml b/.github/workflows/helm-pr-prerelease.yml deleted file mode 100644 index 8df045945e6..00000000000 --- a/.github/workflows/helm-pr-prerelease.yml +++ /dev/null @@ -1,138 +0,0 @@ -name: 🧭 Helm Chart PR Prerelease - -on: - pull_request: - types: [opened, synchronize, reopened] - paths: - - "hosting/k8s/helm/**" - -concurrency: - group: helm-prerelease-${{ github.event.pull_request.number }} - cancel-in-progress: true - -env: - REGISTRY: ghcr.io - CHART_NAME: trigger - -jobs: - lint-and-test: - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Helm - uses: azure/setup-helm@v4 - with: - version: "3.18.3" - - - name: Build dependencies - run: helm dependency build ./hosting/k8s/helm/ - - - name: Extract dependency charts - run: | - cd ./hosting/k8s/helm/ - for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done - - - name: Lint Helm Chart - run: | - helm lint ./hosting/k8s/helm/ - - - name: Render templates - run: | - helm template test-release ./hosting/k8s/helm/ \ - --values ./hosting/k8s/helm/values.yaml \ - --output-dir ./helm-output - - - name: Validate manifests - uses: docker://ghcr.io/yannh/kubeconform:v0.7.0 - with: - entrypoint: "/kubeconform" - args: "-summary -output json ./helm-output" - - prerelease: - needs: lint-and-test - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - pull-requests: write - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Helm - uses: azure/setup-helm@v4 - with: - version: "3.18.3" - - - name: Build dependencies - run: helm dependency build ./hosting/k8s/helm/ - - - name: Extract dependency charts - run: | - cd ./hosting/k8s/helm/ - for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done - - - name: Log in to Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Generate prerelease version - id: version - run: | - BASE_VERSION=$(grep '^version:' ./hosting/k8s/helm/Chart.yaml | awk '{print $2}') - PR_NUMBER=${{ github.event.pull_request.number }} - SHORT_SHA=$(echo "${{ github.event.pull_request.head.sha }}" | cut -c1-7) - PRERELEASE_VERSION="${BASE_VERSION}-pr${PR_NUMBER}.${SHORT_SHA}" - echo "version=$PRERELEASE_VERSION" >> $GITHUB_OUTPUT - echo "Prerelease version: $PRERELEASE_VERSION" - - - name: Update Chart.yaml with prerelease version - run: | - sed -i "s/^version:.*/version: ${{ steps.version.outputs.version }}/" ./hosting/k8s/helm/Chart.yaml - - - name: Package Helm Chart - run: | - helm package ./hosting/k8s/helm/ --destination /tmp/ - - - name: Push Helm Chart to GHCR - run: | - VERSION="${{ steps.version.outputs.version }}" - CHART_PACKAGE="/tmp/${{ env.CHART_NAME }}-${VERSION}.tgz" - - # Push to GHCR OCI registry - helm push "$CHART_PACKAGE" "oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts" - - - name: Find existing comment - uses: peter-evans/find-comment@v3 - id: find-comment - with: - issue-number: ${{ github.event.pull_request.number }} - comment-author: "github-actions[bot]" - body-includes: "Helm Chart Prerelease Published" - - - name: Create or update PR comment - uses: peter-evans/create-or-update-comment@v4 - with: - comment-id: ${{ steps.find-comment.outputs.comment-id }} - issue-number: ${{ github.event.pull_request.number }} - body: | - ### 🧭 Helm Chart Prerelease Published - - **Version:** `${{ steps.version.outputs.version }}` - - **Install:** - ```bash - helm upgrade --install trigger \ - oci://ghcr.io/${{ github.repository_owner }}/charts/trigger \ - --version "${{ steps.version.outputs.version }}" - ``` - - > ⚠️ This is a prerelease for testing. Do not use in production. - edit-mode: replace diff --git a/.github/workflows/helm-prerelease.yml b/.github/workflows/helm-prerelease.yml new file mode 100644 index 00000000000..ff2c8f5a614 --- /dev/null +++ b/.github/workflows/helm-prerelease.yml @@ -0,0 +1,200 @@ +name: 🧭 Helm Chart Prerelease + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - "hosting/k8s/helm/**" + push: + branches: + - main + paths: + - "hosting/k8s/helm/**" + workflow_dispatch: + inputs: + app_version: + description: "Override appVersion (e.g. 'main', 'v4.4.4'). Leave empty to keep Chart.yaml value." + required: false + type: string + default: "" + +concurrency: + group: helm-prerelease-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + REGISTRY: ghcr.io + CHART_NAME: trigger + +jobs: + lint-and-test: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Helm + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 + with: + version: "3.18.3" + + - name: Build dependencies + run: helm dependency build ./hosting/k8s/helm/ + + - name: Extract dependency charts + run: | + cd ./hosting/k8s/helm/ + for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done + + - name: Lint Helm Chart + run: | + helm lint ./hosting/k8s/helm/ + + - name: Render templates + run: | + helm template test-release ./hosting/k8s/helm/ \ + --values ./hosting/k8s/helm/values.yaml \ + --output-dir ./helm-output + + - name: Validate manifests + uses: docker://ghcr.io/yannh/kubeconform:v0.7.0@sha256:85dbef6b4b312b99133decc9c6fc9495e9fc5f92293d4ff3b7e1b30f5611823c + with: + entrypoint: "/kubeconform" + args: "-summary -output json ./helm-output" + + prerelease: + needs: lint-and-test + if: | + (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) || + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Helm + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 + with: + version: "3.18.3" + + - name: Build dependencies + run: helm dependency build ./hosting/k8s/helm/ + + - name: Extract dependency charts + run: | + cd ./hosting/k8s/helm/ + for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done + + - name: Log in to Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Generate prerelease version + id: version + run: | + BASE_VERSION=$(grep '^version:' ./hosting/k8s/helm/Chart.yaml | awk '{print $2}') + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + PR_NUMBER=${{ github.event.pull_request.number }} + SHORT_SHA=$(echo "${{ github.event.pull_request.head.sha }}" | cut -c1-7) + PRERELEASE_VERSION="${BASE_VERSION}-pr${PR_NUMBER}.${SHORT_SHA}" + elif [[ "${{ github.event_name }}" == "push" ]]; then + SHORT_SHA=$(echo "${GITHUB_SHA}" | cut -c1-7) + PRERELEASE_VERSION="${BASE_VERSION}-main.${SHORT_SHA}" + else + SHORT_SHA=$(echo "${GITHUB_SHA}" | cut -c1-7) + REF_SLUG=$(echo "${GITHUB_REF_NAME}" | tr '/' '-' | tr -cd 'a-zA-Z0-9-') + if [[ -z "$REF_SLUG" ]]; then + REF_SLUG="manual" + fi + PRERELEASE_VERSION="${BASE_VERSION}-${REF_SLUG}.${SHORT_SHA}" + fi + echo "version=$PRERELEASE_VERSION" >> "$GITHUB_OUTPUT" + echo "Prerelease version: $PRERELEASE_VERSION" + + - name: Update Chart.yaml with prerelease version + run: | + sed -i "s/^version:.*/version: ${STEPS_VERSION_OUTPUTS_VERSION}/" ./hosting/k8s/helm/Chart.yaml + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Override appVersion + if: github.event_name == 'workflow_dispatch' && inputs.app_version != '' + env: + APP_VERSION: ${{ inputs.app_version }} + run: | + yq -i '.appVersion = strenv(APP_VERSION)' ./hosting/k8s/helm/Chart.yaml + + - name: Package Helm Chart + run: | + helm package ./hosting/k8s/helm/ --destination /tmp/ + + - name: Push Helm Chart to GHCR + run: | + VERSION="${STEPS_VERSION_OUTPUTS_VERSION}" + CHART_PACKAGE="/tmp/${{ env.CHART_NAME }}-${VERSION}.tgz" + + # Push to GHCR OCI registry + helm push "$CHART_PACKAGE" "oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts" + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Write run summary + run: | + { + echo "### 🧭 Helm Chart Prerelease Published" + echo "" + echo "**Version:** \`${STEPS_VERSION_OUTPUTS_VERSION}\`" + echo "" + echo "**Install:**" + echo '```bash' + echo "helm upgrade --install trigger \\" + echo " oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts/${{ env.CHART_NAME }} \\" + echo " --version \"${STEPS_VERSION_OUTPUTS_VERSION}\"" + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Find existing comment + if: github.event_name == 'pull_request' + uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0 + id: find-comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: "github-actions[bot]" + body-includes: "Helm Chart Prerelease Published" + + - name: Create or update PR comment + if: github.event_name == 'pull_request' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + ### 🧭 Helm Chart Prerelease Published + + **Version:** `${{ steps.version.outputs.version }}` + + **Install:** + ```bash + helm upgrade --install trigger \ + oci://ghcr.io/${{ github.repository_owner }}/charts/trigger \ + --version "${{ steps.version.outputs.version }}" + ``` + + > ⚠️ This is a prerelease for testing. Do not use in production. + edit-mode: replace diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index dab18223e35..95805539807 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -3,9 +3,6 @@ name: 🤖 PR Checks on: pull_request: types: [opened, synchronize, reopened] - paths-ignore: - - "docs/**" - - ".changeset/**" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -13,23 +10,169 @@ concurrency: permissions: contents: read - id-token: write + pull-requests: read jobs: + changes: + name: Detect changes + runs-on: ubuntu-latest + outputs: + code: ${{ steps.code_filter.outputs.code }} + typecheck_self: ${{ steps.filter.outputs.typecheck_self }} + webapp: ${{ steps.filter.outputs.webapp }} + packages: ${{ steps.filter.outputs.packages }} + internal: ${{ steps.filter.outputs.internal }} + cli: ${{ steps.filter.outputs.cli }} + sdk: ${{ steps.filter.outputs.sdk }} + steps: + # `code` uses `every` semantics so the negation patterns actually subtract. + # With the default `some` quantifier, `**` matches every file and the + # subsequent `!...` patterns are no-ops (each pattern is OR'd, not AND'd). + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 + id: code_filter + with: + predicate-quantifier: every + filters: | + code: + - '**' + - '!docs/**' + - '!.changeset/**' + - '!hosting/**' + - '!.github/**' + - '!**/*.md' + - '!**/.env.example' + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 + id: filter + with: + filters: | + typecheck_self: + - '.github/workflows/pr_checks.yml' + - '.github/workflows/typecheck.yml' + webapp: + - 'apps/webapp/**' + - 'packages/**' + - 'internal-packages/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/unit-tests-webapp.yml' + - '.github/workflows/e2e-webapp.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + packages: + - 'packages/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/unit-tests-packages.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + internal: + - 'internal-packages/**' + - 'packages/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/unit-tests-internal.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + cli: + - 'packages/cli-v3/**' + - 'packages/build/**' + - 'packages/core/**' + - 'packages/schema-to-json/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/e2e.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + sdk: + - 'packages/trigger-sdk/**' + - 'packages/core/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/sdk-compat.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + typecheck: + needs: changes + if: needs.changes.outputs.code == 'true' || needs.changes.outputs.typecheck_self == 'true' uses: ./.github/workflows/typecheck.yml - secrets: inherit - units: - uses: ./.github/workflows/unit-tests.yml - secrets: inherit + webapp: + needs: changes + if: needs.changes.outputs.webapp == 'true' + uses: ./.github/workflows/unit-tests-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + e2e-webapp: + needs: changes + if: needs.changes.outputs.webapp == 'true' + uses: ./.github/workflows/e2e-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + packages: + needs: changes + if: needs.changes.outputs.packages == 'true' + uses: ./.github/workflows/unit-tests-packages.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + internal: + needs: changes + if: needs.changes.outputs.internal == 'true' + uses: ./.github/workflows/unit-tests-internal.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} e2e: + needs: changes + if: needs.changes.outputs.cli == 'true' uses: ./.github/workflows/e2e.yml with: package: cli-v3 - secrets: inherit sdk-compat: + needs: changes + if: needs.changes.outputs.sdk == 'true' uses: ./.github/workflows/sdk-compat.yml - secrets: inherit + + all-checks: + name: All PR Checks + needs: + - changes + - typecheck + - webapp + - e2e-webapp + - packages + - internal + - e2e + - sdk-compat + if: always() + runs-on: ubuntu-latest + steps: + - name: Verify all checks + run: | + if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then + echo "One or more checks failed" + exit 1 + fi + if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then + echo "One or more checks were cancelled" + exit 1 + fi + echo "All checks passed or were skipped due to path filters" diff --git a/.github/workflows/preview-dispatch.yml b/.github/workflows/preview-dispatch.yml new file mode 100644 index 00000000000..3f26c66cf33 --- /dev/null +++ b/.github/workflows/preview-dispatch.yml @@ -0,0 +1,76 @@ +name: 🌱 Preview environment dispatch + +# Opt-in per-PR preview environments + +on: + pull_request: + types: [opened, reopened, synchronize, closed, labeled, unlabeled] + +# Serialize a PR's events so dispatches arrive in order. Cloud-side concurrency +# collapses by branch but can't fix out-of-order arrival — e.g. a push racing a +# close could cancel the in-flight destroy and leak the preview. One short API +# call, so queuing is cheap; cancel-in-progress: false lets an in-flight +# dispatch finish (GitHub keeps only the latest pending, the desired behavior). +concurrency: + group: preview-dispatch-${{ github.event.pull_request.number }} + cancel-in-progress: false + +permissions: {} + +jobs: + dispatch: + name: Dispatch preview-deploy to cloud + runs-on: ubuntu-latest + # label added -> create + # new commit while labeled -> update + # label removed / PR closed -> destroy + if: >- + github.event.pull_request.head.repo.full_name == github.repository && + ( + (github.event.action == 'labeled' && github.event.label.name == 'preview') || + (github.event.action == 'unlabeled' && github.event.label.name == 'preview') || + ( + contains(github.event.pull_request.labels.*.name, 'preview') && + contains(fromJSON('["opened","reopened","synchronize","closed"]'), github.event.action) + ) + ) + steps: + - name: Build dispatch payload + id: payload + env: + ACTION: ${{ github.event.action }} + BRANCH: ${{ github.event.pull_request.head.ref }} + COMMIT: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + # Map the GitHub PR action to the cloud pipeline's lifecycle event. + case "$ACTION" in + labeled | opened | reopened) EVENT=opened ;; + synchronize) EVENT=synchronize ;; + unlabeled | closed) EVENT=closed ;; + *) echo "unexpected action: $ACTION" >&2; exit 1 ;; + esac + # jq --arg JSON-escapes every value, so a branch name containing + # quotes/braces can't break or inject into the client payload. + payload=$(jq -nc \ + --arg b "$BRANCH" \ + --arg c "$COMMIT" \ + --arg e "$EVENT" \ + '{branch_name: $b, commit: $c, pull_request_event: $e}') + { + echo "client_payload=$payload" + echo "summary=$EVENT for $BRANCH @ ${COMMIT:0:7}" + } >> "$GITHUB_OUTPUT" + + - name: Log dispatch + env: + SUMMARY: ${{ steps.payload.outputs.summary }} + run: echo "Dispatching preview-deploy event ($SUMMARY)" + + - name: Send repository_dispatch + uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 + with: + token: ${{ secrets.CROSS_REPO_PAT }} + repository: triggerdotdev/cloud + event-type: preview-deploy + client-payload: ${{ steps.payload.outputs.client_payload }} diff --git a/.github/workflows/preview-packages.yml b/.github/workflows/preview-packages.yml new file mode 100644 index 00000000000..f4dd5b39930 --- /dev/null +++ b/.github/workflows/preview-packages.yml @@ -0,0 +1,83 @@ +name: 📦 Preview packages (pkg.pr.new) + +# Publishes installable preview builds of the public @trigger.dev/* packages +# for every push to a branch, via https://pkg.pr.new. These are NOT published +# to npm — pkg.pr.new serves them by commit SHA and drops install instructions +# in a comment on the associated PR, e.g. +# npm i https://pkg.pr.new/@trigger.dev/sdk@ +# +# Prerequisites: +# - The pkg.pr.new GitHub App must be installed on triggerdotdev/trigger.dev +# (https://github.com/apps/pkg-pr-new). Publishing fails until it is. +# +# Fork note: pkg.pr.new authenticates with a GitHub Actions OIDC token, which +# GitHub does not issue to pull_request workflows from forks. This `push` +# trigger therefore covers branches pushed to this repo (the core team), not +# external fork PRs. Adding fork coverage would require a workflow_run two-stage +# setup. + +on: + push: + branches-ignore: + - main + - changeset-release/main + paths: + - "package.json" + - "packages/**" + - "pnpm-lock.yaml" + - "pnpm-workspace.yaml" + - "turbo.json" + - ".github/workflows/preview-packages.yml" + - "scripts/stamp-preview-version.mjs" + - "scripts/updateVersion.ts" + +concurrency: + group: preview-packages-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + id-token: write # OIDC token used by pkg.pr.new to authenticate the publish + +jobs: + publish: + name: Build and publish previews + runs-on: ubuntu-latest + if: github.repository == 'triggerdotdev/trigger.dev' + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.0 + cache: "pnpm" + + - name: 📥 Install dependencies + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma client + run: pnpm run generate + + # Stamp a unique 0.0.0-preview- version before building so it can't + # collide with real npm versions and so updateVersion.ts bakes it into the + # runtime VERSION constant. See scripts/stamp-preview-version.mjs. + - name: 🏷️ Stamp preview version + run: node scripts/stamp-preview-version.mjs + env: + GITHUB_SHA: ${{ github.sha }} + + - name: 🔨 Build packages + run: pnpm run build --filter "@trigger.dev/*" --filter "trigger.dev" + + - name: 🚀 Publish previews to pkg.pr.new + run: pnpm exec pkg-pr-new publish --pnpm --compact --commentWithSha './packages/*' diff --git a/.github/workflows/publish-webapp.yml b/.github/workflows/publish-webapp.yml index 6fcc30209ab..5a604e26082 100644 --- a/.github/workflows/publish-webapp.yml +++ b/.github/workflows/publish-webapp.yml @@ -4,6 +4,7 @@ permissions: contents: read packages: write id-token: write + attestations: write on: workflow_call: @@ -13,6 +14,27 @@ on: type: string required: false default: "" + image_registry: + description: The registry namespace to publish under (e.g. ghcr.io/) + type: string + required: false + default: "" + outputs: + version: + description: The published image tag + value: ${{ jobs.publish.outputs.version }} + short_sha: + description: Short commit SHA of the published build + value: ${{ jobs.publish.outputs.short_sha }} + image_repo: + description: The image repository the build was published to (without tag) + value: ${{ jobs.publish.outputs.image_repo }} + digest: + description: Multi-arch index digest (sha256:...) of the published image + value: ${{ jobs.publish.outputs.digest }} + secrets: + SENTRY_AUTH_TOKEN: + required: false jobs: publish: @@ -22,14 +44,17 @@ jobs: outputs: version: ${{ steps.get_tag.outputs.tag }} short_sha: ${{ steps.get_commit.outputs.sha_short }} + image_repo: ${{ steps.set_tags.outputs.image_repo }} + digest: ${{ steps.build_push.outputs.digest }} steps: - name: 🏭 Setup Depot CLI - uses: depot/setup-action@v1 + uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1 - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive + persist-credentials: false - name: "#️⃣ Get the image tag" id: get_tag @@ -40,42 +65,57 @@ jobs: - name: 🔢 Get the commit hash id: get_commit run: | - echo "sha_short=$(echo ${{ github.sha }} | cut -c1-7)" >> "$GITHUB_OUTPUT" + echo "sha_short=$(echo "${GITHUB_SHA}" | cut -c1-7)" >> "$GITHUB_OUTPUT" - name: 📛 Set the tags id: set_tags run: | - ref_without_tag=ghcr.io/triggerdotdev/trigger.dev - image_tags=$ref_without_tag:${{ steps.get_tag.outputs.tag }} + # The registry namespace is resolved by the caller (defaulting to + # ghcr.io/, overridable via the IMAGE_REGISTRY repository + # variable); the webapp image lives at /. A fork + # therefore publishes to its own package automatically. + image_tags=$REF_WITHOUT_TAG:${STEPS_GET_TAG_OUTPUTS_TAG} - # if tag is a semver, also tag it as v4 - if [[ "${{ steps.get_tag.outputs.is_semver }}" == true ]]; then - # TODO: switch to v4 tag on GA - image_tags=$image_tags,$ref_without_tag:v4-beta + # when pushing the mutable main tag, also push an immutable-by-convention + # full-commit-sha tag so a commit can be resolved to a specific digest + if [[ "${STEPS_GET_TAG_OUTPUTS_TAG}" == "main" ]]; then + image_tags=$image_tags,$REF_WITHOUT_TAG:${GITHUB_SHA} fi echo "image_tags=${image_tags}" >> "$GITHUB_OUTPUT" + echo "image_repo=${REF_WITHOUT_TAG}" >> "$GITHUB_OUTPUT" + env: + REF_WITHOUT_TAG: ${{ format('{0}/{1}', inputs.image_registry || vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner), github.event.repository.name) }} + STEPS_GET_TAG_OUTPUTS_TAG: ${{ steps.get_tag.outputs.tag }} + STEPS_GET_TAG_OUTPUTS_IS_SEMVER: ${{ steps.get_tag.outputs.is_semver }} - name: 📝 Set the build info id: set_build_info run: | - tag=${{ steps.get_tag.outputs.tag }} - if [[ "${{ steps.get_tag.outputs.is_semver }}" == true ]]; then - echo "BUILD_APP_VERSION=${tag}" >> "$GITHUB_OUTPUT" - fi - echo "BUILD_GIT_SHA=${{ github.sha }}" >> "$GITHUB_OUTPUT" - echo "BUILD_GIT_REF_NAME=${{ github.ref_name }}" >> "$GITHUB_OUTPUT" - echo "BUILD_TIMESTAMP_SECONDS=$(date +%s)" >> "$GITHUB_OUTPUT" + { + tag="${STEPS_GET_TAG_OUTPUTS_TAG}" + if [[ "${STEPS_GET_TAG_OUTPUTS_IS_SEMVER}" == true ]]; then + echo "BUILD_APP_VERSION=${tag}" + fi + echo "BUILD_GIT_SHA=${GITHUB_SHA}" + echo "BUILD_GIT_REF_NAME=${GITHUB_REF_NAME}" + echo "BUILD_TIMESTAMP_SECONDS=$(date +%s)" + echo "BUILD_TIMESTAMP_RFC3339=$(date -u +%Y-%m-%dT%H:%M:%SZ)" + } >> "$GITHUB_OUTPUT" + env: + STEPS_GET_TAG_OUTPUTS_TAG: ${{ steps.get_tag.outputs.tag }} + STEPS_GET_TAG_OUTPUTS_IS_SEMVER: ${{ steps.get_tag.outputs.is_semver }} - name: 🐙 Login to GitHub Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: 🐳 Build image and push to GitHub Container Registry - uses: depot/build-push-action@v1 + id: build_push + uses: depot/build-push-action@98e78adca7817480b8185f474a400b451d74e287 # v1.18.0 with: file: ./docker/Dockerfile platforms: linux/amd64,linux/arm64 @@ -86,8 +126,20 @@ jobs: BUILD_GIT_SHA=${{ steps.set_build_info.outputs.BUILD_GIT_SHA }} BUILD_GIT_REF_NAME=${{ steps.set_build_info.outputs.BUILD_GIT_REF_NAME }} BUILD_TIMESTAMP_SECONDS=${{ steps.set_build_info.outputs.BUILD_TIMESTAMP_SECONDS }} + BUILD_TIMESTAMP_RFC3339=${{ steps.set_build_info.outputs.BUILD_TIMESTAMP_RFC3339 }} SENTRY_RELEASE=${{ steps.set_build_info.outputs.BUILD_GIT_SHA }} SENTRY_ORG=triggerdev SENTRY_PROJECT=trigger-cloud secrets: | sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }} + + - name: 🪪 Attest build provenance + # Image is already pushed by this point — don't fail releases (and the + # downstream publish-helm job) on a Sigstore/GHCR-referrer hiccup. Real + # config errors still surface as a step warning in the workflow run. + continue-on-error: true + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-name: ${{ steps.set_tags.outputs.image_repo }} + subject-digest: ${{ steps.build_push.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/publish-worker-v4.yml b/.github/workflows/publish-worker-v4.yml index 4a2853da081..85ca903a8d6 100644 --- a/.github/workflows/publish-worker-v4.yml +++ b/.github/workflows/publish-worker-v4.yml @@ -8,6 +8,11 @@ on: type: string required: false default: "" + image_registry: + description: The registry namespace to publish under (e.g. ghcr.io/) + type: string + required: false + default: "" push: tags: - "re2-test-*" @@ -37,19 +42,22 @@ jobs: DOCKER_BUILDKIT: "1" steps: - name: 🏭 Setup Depot CLI - uses: depot/setup-action@v1 + uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1 - name: ⬇️ Checkout git repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: 📦 Get image repo id: get_repository + env: + PACKAGE: ${{ matrix.package }} run: | - if [[ "${{ matrix.package }}" == *-provider ]]; then - provider_type=$(echo "${{ matrix.package }}" | cut -d- -f1) - repo=provider/${provider_type} + if [[ "$PACKAGE" == *-provider ]]; then + repo="provider/${PACKAGE%-provider}" else - repo="${{ matrix.package }}" + repo="$PACKAGE" fi echo "repo=${repo}" >> "$GITHUB_OUTPUT" @@ -62,26 +70,28 @@ jobs: - name: 📛 Set tags to push id: set_tags run: | - ref_without_tag=ghcr.io/triggerdotdev/${{ steps.get_repository.outputs.repo }} - image_tags=$ref_without_tag:${{ steps.get_tag.outputs.tag }} - - # if tag is a semver, also tag it as v4 - if [[ "${{ steps.get_tag.outputs.is_semver }}" == true ]]; then - # TODO: switch to v4 tag on GA - image_tags=$image_tags,$ref_without_tag:v4-beta - fi + # Resolved by the caller when invoked from publish.yml; falls back to the + # IMAGE_REGISTRY repository variable (or ghcr.io/) for the direct + # push triggers above, so a fork publishes to its own namespace. + ref_without_tag=${IMAGE_REGISTRY}/${STEPS_GET_REPOSITORY_OUTPUTS_REPO} + image_tags=$ref_without_tag:${STEPS_GET_TAG_OUTPUTS_TAG} echo "image_tags=${image_tags}" >> "$GITHUB_OUTPUT" + env: + IMAGE_REGISTRY: ${{ inputs.image_registry || vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + STEPS_GET_REPOSITORY_OUTPUTS_REPO: ${{ steps.get_repository.outputs.repo }} + STEPS_GET_TAG_OUTPUTS_TAG: ${{ steps.get_tag.outputs.tag }} + STEPS_GET_TAG_OUTPUTS_IS_SEMVER: ${{ steps.get_tag.outputs.is_semver }} - name: 🐙 Login to GitHub Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: 🐳 Build image and push to GitHub Container Registry - uses: depot/build-push-action@v1 + uses: depot/build-push-action@98e78adca7817480b8185f474a400b451d74e287 # v1.18.0 with: file: ./apps/${{ matrix.package }}/Containerfile platforms: linux/amd64,linux/arm64 diff --git a/.github/workflows/publish-worker.yml b/.github/workflows/publish-worker.yml index 74a70d83667..f443e5dab1e 100644 --- a/.github/workflows/publish-worker.yml +++ b/.github/workflows/publish-worker.yml @@ -8,6 +8,16 @@ on: type: string required: false default: "" + image_registry: + description: The registry namespace to publish under (e.g. ghcr.io/) + type: string + required: false + default: "" + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false push: tags: - "infra-dev-*" @@ -26,18 +36,22 @@ jobs: runs-on: ubuntu-latest env: DOCKER_BUILDKIT: "1" + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} steps: - name: ⬇️ Checkout git repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: 📦 Get image repo id: get_repository + env: + PACKAGE: ${{ matrix.package }} run: | - if [[ "${{ matrix.package }}" == *-provider ]]; then - provider_type=$(echo "${{ matrix.package }}" | cut -d- -f1) - repo=provider/${provider_type} + if [[ "$PACKAGE" == *-provider ]]; then + repo="provider/${PACKAGE%-provider}" else - repo="${{ matrix.package }}" + repo="$PACKAGE" fi echo "repo=${repo}" >> "$GITHUB_OUTPUT" @@ -47,11 +61,12 @@ jobs: tag: ${{ inputs.image_tag }} - name: 🐋 Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 # ..to avoid rate limits when pulling images - name: 🐳 Login to DockerHub - uses: docker/login-action@v3 + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -62,7 +77,7 @@ jobs: # ..to push image - name: 🐙 Login to GitHub Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: registry: ghcr.io username: ${{ github.repository_owner }} @@ -73,7 +88,10 @@ jobs: docker tag infra_image "$REGISTRY/$REPOSITORY:$IMAGE_TAG" docker push "$REGISTRY/$REPOSITORY:$IMAGE_TAG" env: - REGISTRY: ghcr.io/triggerdotdev + # Resolved by the caller when invoked from publish.yml; falls back to the + # IMAGE_REGISTRY repository variable (or ghcr.io/) for the direct + # push triggers above, so a fork publishes to its own namespace. + REGISTRY: ${{ inputs.image_registry || vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} REPOSITORY: ${{ steps.get_repository.outputs.repo }} IMAGE_TAG: ${{ steps.get_tag.outputs.tag }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6213499c5ad..2f2744c7702 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,6 +8,15 @@ on: description: The image tag to publish required: true type: string + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + SENTRY_AUTH_TOKEN: + required: false + CROSS_REPO_PAT: + required: false push: branches: - main @@ -37,8 +46,6 @@ on: - "tests/**" permissions: - id-token: write - packages: write contents: read concurrency: @@ -50,29 +57,107 @@ env: jobs: typecheck: uses: ./.github/workflows/typecheck.yml - secrets: inherit units: uses: ./.github/workflows/unit-tests.yml - secrets: inherit + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} publish-webapp: needs: [typecheck] + permissions: + contents: read + packages: write + id-token: write + attestations: write uses: ./.github/workflows/publish-webapp.yml - secrets: inherit + secrets: + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} with: image_tag: ${{ inputs.image_tag }} + # Target registry namespace. Defaults to ghcr.io/ so a fork publishes + # to its own namespace; set the IMAGE_REGISTRY repository variable to override. + image_registry: ${{ vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} publish-worker: needs: [typecheck] + permissions: + contents: read + packages: write uses: ./.github/workflows/publish-worker.yml - secrets: inherit + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} with: image_tag: ${{ inputs.image_tag }} + image_registry: ${{ vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} publish-worker-v4: needs: [typecheck] + permissions: + contents: read + packages: write + id-token: write uses: ./.github/workflows/publish-worker-v4.yml - secrets: inherit with: image_tag: ${{ inputs.image_tag }} + image_registry: ${{ vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + + # OS-level CVE scan of the image just published above. Report-only (writes to + # the run summary); runs alongside the worker publishes and never blocks them. + scan-webapp: + needs: [publish-webapp] + permissions: + contents: read + packages: read # pull the just-published image from GHCR + uses: ./.github/workflows/trivy-image-webapp.yml + with: + image-ref: ${{ needs.publish-webapp.outputs.image_repo }}:${{ needs.publish-webapp.outputs.version }} + + # Announce the freshly published mutable `main` webapp image to subscriber + # repos via repository_dispatch, handing them a digest-pinned ref to build or + # deploy from. The repo, ref prefix, and dispatch target all default to the + # canonical values and can be overridden by repository variables. + # + # `push` only: release builds reach publish.yml via workflow_call (from + # release.yml) with an explicit image_tag while github.ref_name is still + # `main`, so gate on the event to avoid dispatching — and failing on the + # absent CROSS_REPO_PAT — during a release. + dispatch-main-image: + name: 📣 Dispatch main image + needs: [publish-webapp] + if: github.repository == (vars.MAIN_IMAGE_DISPATCH_REPO || 'triggerdotdev/trigger.dev') && github.event_name == 'push' && startsWith(github.ref_name, vars.MAIN_IMAGE_DISPATCH_REF_PREFIX || 'main') + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Build dispatch payload + id: payload + env: + IMAGE_REPO: ${{ needs.publish-webapp.outputs.image_repo }} + DIGEST: ${{ needs.publish-webapp.outputs.digest }} + COMMIT: ${{ github.sha }} + run: | + set -euo pipefail + # Pin to the exact multi-arch index just pushed so subscribers resolve a + # single immutable artifact rather than chasing the moving `main` tag. + if [[ -z "${DIGEST}" ]]; then + echo "::error::publish-webapp produced no image digest; refusing to dispatch" + exit 1 + fi + image="${IMAGE_REPO}@${DIGEST}" + # jq --arg JSON-escapes every value, so the ref/commit can't break out of + # or inject into the client payload. + payload=$(jq -nc \ + --arg img "$image" \ + --arg c "$COMMIT" \ + '{image: $img, commit: $c}') + echo "client_payload=$payload" >> "$GITHUB_OUTPUT" + + - name: Send repository_dispatch + uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 + with: + token: ${{ secrets.CROSS_REPO_PAT }} + repository: ${{ vars.MAIN_IMAGE_DISPATCH_TARGET || 'triggerdotdev/cloud' }} + event-type: main-image-published + client-payload: ${{ steps.payload.outputs.client_payload }} diff --git a/.github/workflows/release-helm.yml b/.github/workflows/release-helm.yml index c6efd382ff6..13d28545e7f 100644 --- a/.github/workflows/release-helm.yml +++ b/.github/workflows/release-helm.yml @@ -4,6 +4,12 @@ on: push: tags: - 'helm-v*' + workflow_call: + inputs: + chart_version: + description: 'Chart version to release' + required: true + type: string workflow_dispatch: inputs: chart_version: @@ -22,10 +28,12 @@ jobs: contents: read steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Set up Helm - uses: azure/setup-helm@v4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 with: version: "3.18.3" @@ -48,7 +56,7 @@ jobs: --output-dir ./helm-output - name: Validate manifests - uses: docker://ghcr.io/yannh/kubeconform:v0.7.0 + uses: docker://ghcr.io/yannh/kubeconform:v0.7.0@sha256:85dbef6b4b312b99133decc9c6fc9495e9fc5f92293d4ff3b7e1b30f5611823c with: entrypoint: '/kubeconform' args: "-summary -output json ./helm-output" @@ -61,10 +69,12 @@ jobs: packages: write steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Set up Helm - uses: azure/setup-helm@v4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 with: version: "3.18.3" @@ -77,7 +87,7 @@ jobs: for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done - name: Log in to Container Registry - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -86,18 +96,20 @@ jobs: - name: Extract version from tag or input id: version run: | - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - VERSION="${{ github.event.inputs.chart_version }}" + if [ -n "${INPUTS_CHART_VERSION}" ]; then + VERSION="${INPUTS_CHART_VERSION}" else - VERSION="${{ github.ref_name }}" + VERSION="${GITHUB_REF_NAME}" VERSION="${VERSION#helm-v}" fi - echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "version=$VERSION" >> "$GITHUB_OUTPUT" echo "Releasing version: $VERSION" + env: + INPUTS_CHART_VERSION: ${{ inputs.chart_version }} - name: Check Chart.yaml version matches release version run: | - VERSION="${{ steps.version.outputs.version }}" + VERSION="${STEPS_VERSION_OUTPUTS_VERSION}" CHART_VERSION=$(grep '^version:' ./hosting/k8s/helm/Chart.yaml | awk '{print $2}') echo "Chart.yaml version: $CHART_VERSION" echo "Release version: $VERSION" @@ -106,6 +118,8 @@ jobs: exit 1 fi echo "✅ Chart.yaml version matches release version." + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} - name: Package Helm Chart run: | @@ -113,18 +127,19 @@ jobs: - name: Push Helm Chart to GHCR run: | - VERSION="${{ steps.version.outputs.version }}" + VERSION="${STEPS_VERSION_OUTPUTS_VERSION}" CHART_PACKAGE="/tmp/${{ env.CHART_NAME }}-${VERSION}.tgz" # Push to GHCR OCI registry helm push "$CHART_PACKAGE" "oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts" + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} - name: Create GitHub Release id: release - uses: softprops/action-gh-release@v1 - if: github.event_name == 'push' + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 with: - tag_name: ${{ github.ref_name }} + tag_name: helm-v${{ steps.version.outputs.version }} name: "Helm Chart ${{ steps.version.outputs.version }}" body: | ### Installation diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 79b113b0f2a..e3b339dfca7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -33,6 +33,7 @@ jobs: show-release-summary: name: 📋 Release Summary runs-on: ubuntu-latest + permissions: {} if: | github.repository == 'triggerdotdev/trigger.dev' && github.event_name == 'pull_request' && @@ -43,7 +44,7 @@ jobs: env: PR_BODY: ${{ github.event.pull_request.body }} run: | - echo "$PR_BODY" | sed -n '/^# Releases/,$p' >> $GITHUB_STEP_SUMMARY + echo "$PR_BODY" | sed -n '/^# Releases/,$p' >> "$GITHUB_STEP_SUMMARY" release: name: 🚀 Release npm packages @@ -63,9 +64,10 @@ jobs: published: ${{ steps.changesets.outputs.published }} published_packages: ${{ steps.changesets.outputs.publishedPackages }} published_package_version: ${{ steps.get_version.outputs.package_version }} + is_prerelease: ${{ steps.get_version.outputs.is_prerelease }} steps: - name: Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # zizmor: ignore[artipacked] needs persisted git creds for tag push; no artifact upload here so no leak path with: fetch-depth: 0 ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.ref || github.sha }} @@ -73,20 +75,22 @@ jobs: - name: Verify ref is on main if: github.event_name == 'workflow_dispatch' run: | - if ! git merge-base --is-ancestor ${{ github.event.inputs.ref }} origin/main; then + if ! git merge-base --is-ancestor "${GITHUB_EVENT_INPUTS_REF}" origin/main; then echo "Error: ref must be an ancestor of main (i.e., already merged)" exit 1 fi + env: + GITHUB_EVENT_INPUTS_REF: ${{ github.event.inputs.ref }} - name: Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" # npm v11.5.1 or newer is required for OIDC support @@ -108,7 +112,7 @@ jobs: - name: Publish id: changesets - uses: changesets/action@v1 + uses: changesets/action@63a615b9cd06ba9a3e6d13796c7fbcb080a60a0b # v1.8.0 with: publish: pnpm run changeset:release createGithubReleases: false @@ -119,28 +123,54 @@ jobs: if: steps.changesets.outputs.published == 'true' id: get_version run: | - package_version=$(echo '${{ steps.changesets.outputs.publishedPackages }}' | jq -r '.[0].version') + package_version=$(echo "${STEPS_CHANGESETS_OUTPUTS_PUBLISHEDPACKAGES}" | jq -r '.[0].version') echo "package_version=${package_version}" >> "$GITHUB_OUTPUT" + # Any semver with a hyphen is a prerelease (e.g. 4.5.0-rc.0, 0.0.0-snapshot-...) + if [[ "${package_version}" == *-* ]]; then + echo "is_prerelease=true" >> "$GITHUB_OUTPUT" + else + echo "is_prerelease=false" >> "$GITHUB_OUTPUT" + fi + env: + STEPS_CHANGESETS_OUTPUTS_PUBLISHEDPACKAGES: ${{ steps.changesets.outputs.publishedPackages }} - name: Create unified GitHub release if: steps.changesets.outputs.published == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} RELEASE_PR_BODY: ${{ github.event.pull_request.body }} + STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION: ${{ steps.get_version.outputs.package_version }} + STEPS_GET_VERSION_OUTPUTS_IS_PRERELEASE: ${{ steps.get_version.outputs.is_prerelease }} run: | - VERSION="${{ steps.get_version.outputs.package_version }}" + VERSION="${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" node scripts/generate-github-release.mjs "$VERSION" > /tmp/release-body.md + PRERELEASE_FLAG="" + if [ "${STEPS_GET_VERSION_OUTPUTS_IS_PRERELEASE}" = "true" ]; then + PRERELEASE_FLAG="--prerelease" + fi gh release create "v${VERSION}" \ --title "trigger.dev v${VERSION}" \ --notes-file /tmp/release-body.md \ - --target main + --target main \ + $PRERELEASE_FLAG - name: Create and push Docker tag if: steps.changesets.outputs.published == 'true' run: | set -e - git tag "v.docker.${{ steps.get_version.outputs.package_version }}" - git push origin "v.docker.${{ steps.get_version.outputs.package_version }}" + git tag "v.docker.${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + git push origin "v.docker.${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + env: + STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION: ${{ steps.get_version.outputs.package_version }} + + - name: Create and push Helm chart tag + if: steps.changesets.outputs.published == 'true' + run: | + set -e + git tag "helm-v${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + git push origin "helm-v${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + env: + STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION: ${{ steps.get_version.outputs.package_version }} # Trigger Docker builds directly via workflow_call since tags pushed with # GITHUB_TOKEN don't trigger other workflows (GitHub Actions limitation). @@ -148,11 +178,33 @@ jobs: name: 🐳 Publish Docker images needs: release if: needs.release.outputs.published == 'true' + permissions: + contents: read + packages: write + id-token: write + attestations: write uses: ./.github/workflows/publish.yml - secrets: inherit + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} with: image_tag: v${{ needs.release.outputs.published_package_version }} + # Trigger Helm chart release directly via workflow_call (same GITHUB_TOKEN + # limitation as the Docker path). Runs after Docker images are published so + # the chart never references images that don't exist yet. + publish-helm: + name: 🧭 Publish Helm chart + needs: [release, publish-docker] + if: needs.release.outputs.published == 'true' + permissions: + contents: write + packages: write + uses: ./.github/workflows/release-helm.yml + with: + chart_version: ${{ needs.release.outputs.published_package_version }} + # After Docker images are published, update the GitHub release with the exact GHCR tag URL. # The GHCR package version ID is only known after the image is pushed, so we query for it here. update-release: @@ -167,9 +219,10 @@ jobs: - name: Update GitHub release with Docker image link env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NEEDS_RELEASE_OUTPUTS_PUBLISHED_PACKAGE_VERSION: ${{ needs.release.outputs.published_package_version }} run: | set -e - VERSION="${{ needs.release.outputs.published_package_version }}" + VERSION="${NEEDS_RELEASE_OUTPUTS_PUBLISHED_PACKAGE_VERSION}" TAG="v${VERSION}" # Query GHCR for the version ID matching this tag @@ -199,10 +252,11 @@ jobs: dispatch-changelog: name: 📝 Dispatch changelog PR needs: [release, update-release] - if: needs.release.outputs.published == 'true' + if: needs.release.outputs.published == 'true' && needs.release.outputs.is_prerelease != 'true' runs-on: ubuntu-latest + permissions: {} steps: - - uses: peter-evans/repository-dispatch@v3 + - uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 with: token: ${{ secrets.CROSS_REPO_PAT }} repository: triggerdotdev/trigger.dev-site-v3 @@ -220,20 +274,21 @@ jobs: if: github.repository == 'triggerdotdev/trigger.dev' && github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'prerelease' steps: - name: Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 ref: ${{ github.event.inputs.ref }} + persist-credentials: false - name: Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" # npm v11.5.1 or newer is required for OIDC support @@ -247,10 +302,18 @@ jobs: - name: Generate Prisma Client run: pnpm run generate + - name: Exit changeset pre mode (if active) + run: | + if [ -f .changeset/pre.json ]; then + echo "Repo is in changeset pre mode; exiting so snapshot release can run" + pnpm exec changeset pre exit + fi + - name: Snapshot version - run: pnpm exec changeset version --snapshot ${{ github.event.inputs.prerelease_tag }} + run: pnpm exec changeset version --snapshot "${GITHUB_EVENT_INPUTS_PRERELEASE_TAG}" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_EVENT_INPUTS_PRERELEASE_TAG: ${{ github.event.inputs.prerelease_tag }} - name: Clean run: pnpm run clean --filter "@trigger.dev/*" --filter "trigger.dev" @@ -259,6 +322,7 @@ jobs: run: pnpm run build --filter "@trigger.dev/*" --filter "trigger.dev" - name: Publish prerelease - run: pnpm exec changeset publish --no-git-tag --snapshot --tag ${{ github.event.inputs.prerelease_tag }} + run: pnpm exec changeset publish --no-git-tag --snapshot --tag "${GITHUB_EVENT_INPUTS_PRERELEASE_TAG}" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_EVENT_INPUTS_PRERELEASE_TAG: ${{ github.event.inputs.prerelease_tag }} diff --git a/.github/workflows/sdk-compat.yml b/.github/workflows/sdk-compat.yml index eb347c0f771..1510af23181 100644 --- a/.github/workflows/sdk-compat.yml +++ b/.github/workflows/sdk-compat.yml @@ -18,17 +18,18 @@ jobs: steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: ${{ matrix.node }} cache: "pnpm" @@ -56,23 +57,24 @@ jobs: runs-on: ubuntu-latest steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" - name: 🥟 Setup Bun - uses: oven-sh/setup-bun@v2 + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 with: bun-version: latest @@ -97,23 +99,24 @@ jobs: runs-on: ubuntu-latest steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" - name: 🦕 Setup Deno - uses: denoland/setup-deno@v2 + uses: denoland/setup-deno@667a34cdef165d8d2b2e98dde39547c9daac7282 # v2.0.4 with: deno-version: v2.x @@ -142,19 +145,20 @@ jobs: runs-on: ubuntu-latest steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" - name: 📥 Download deps diff --git a/.github/workflows/trivy-image-webapp.yml b/.github/workflows/trivy-image-webapp.yml new file mode 100644 index 00000000000..7dae65ef2bf --- /dev/null +++ b/.github/workflows/trivy-image-webapp.yml @@ -0,0 +1,75 @@ +name: Trivy Image Scan (webapp) + +# OS-level CVE scan of a published webapp image. Called by the publish pipeline +# (publish.yml) to scan each build right after it's pushed to GHCR — so every +# main build and every release is scanned, not rebuilt. Also runnable ad-hoc +# via workflow_dispatch against any image ref. +# +# Report-only: writes a table to the run summary. No SARIF upload, no gate. +# Library/dependency CVEs are covered by Dependabot, so this is restricted to +# OS packages (`vuln-type: os`) to avoid double-reporting. + +on: + workflow_call: + inputs: + image-ref: + description: "Full image ref to scan (e.g. ghcr.io/triggerdotdev/trigger.dev:main)" + type: string + required: true + workflow_dispatch: + inputs: + image-ref: + description: "Full image ref to scan" + type: string + required: false + default: "ghcr.io/triggerdotdev/trigger.dev:main" + +permissions: {} + +concurrency: + group: trivy-image-webapp-${{ inputs.image-ref }} + cancel-in-progress: true + +jobs: + scan: + name: Scan + runs-on: ubuntu-latest + permissions: + contents: read + packages: read # pull the image from GHCR + steps: + # Authenticate to GHCR so the scan also works for private images + # (GITHUB_TOKEN isn't forwarded to Docker automatically). Harmless for + # public images. Pairs with the packages: read permission above. + - name: Log in to GitHub Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Run Trivy image scan + uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0 + with: + scan-type: image + image-ref: ${{ inputs.image-ref }} + # vuln-type maps to --pkg-types: OS packages only (library deps are + # Dependabot's job). ignore-unfixed drops vulns with no patch yet. + vuln-type: os + ignore-unfixed: true + severity: HIGH,CRITICAL + format: table + output: trivy-image-webapp.txt + + - name: Job summary + if: always() + env: + IMAGE_REF: ${{ inputs.image-ref }} + run: | + { + echo "## Trivy Image Scan (webapp) — \`${IMAGE_REF}\`" + echo '```' + # GitHub step summary is capped at 1 MiB; truncate large reports. + head -c 900000 trivy-image-webapp.txt 2>/dev/null || echo "(no report produced)" + echo '```' + } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml index 665d54b2563..91ec46f3a9a 100644 --- a/.github/workflows/typecheck.yml +++ b/.github/workflows/typecheck.yml @@ -12,19 +12,20 @@ jobs: steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" - name: 📥 Download deps diff --git a/.github/workflows/unit-tests-internal.yml b/.github/workflows/unit-tests-internal.yml index 92b951e8aa0..e2aae11b846 100644 --- a/.github/workflows/unit-tests-internal.yml +++ b/.github/workflows/unit-tests-internal.yml @@ -5,15 +5,22 @@ permissions: on: workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false jobs: unitTests: name: "🧪 Unit Tests: Internal" runs-on: ubuntu-latest strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false matrix: - shardIndex: [1, 2, 3, 4, 5, 6, 7, 8] - shardTotal: [8] + shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + shardTotal: [12] env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHARD_INDEX: ${{ matrix.shardIndex }} @@ -46,25 +53,26 @@ jobs: run: sudo systemctl restart docker - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" # ..to avoid rate limits when pulling images - name: 🐳 Login to DockerHub if: ${{ env.DOCKERHUB_USERNAME }} - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -75,12 +83,22 @@ jobs: - name: 🐳 Pre-pull testcontainer images if: ${{ env.DOCKERHUB_USERNAME }} run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } echo "Pre-pulling Docker images with authenticated session..." - docker pull postgres:14 - docker pull clickhouse/clickhouse-server:25.4-alpine - docker pull redis:7-alpine - docker pull testcontainers/ryuk:0.11.0 - docker pull electricsql/electric:1.2.4 + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 echo "Image pre-pull complete" - name: 📥 Download deps @@ -90,7 +108,7 @@ jobs: run: pnpm run generate - name: 🧪 Run Internal Unit Tests - run: pnpm run test:internal --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} + run: pnpm run test:internal --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --passWithNoTests - name: Gather all reports if: ${{ !cancelled() }} @@ -101,7 +119,7 @@ jobs: - name: Upload blob reports to GitHub Actions Artifacts if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: internal-blob-report-${{ matrix.shardIndex }} path: .vitest-reports/* @@ -115,27 +133,28 @@ jobs: runs-on: ubuntu-latest steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 # no cache enabled, we're not installing deps - name: Download blob reports from GitHub Actions Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: .vitest-reports pattern: internal-blob-report-* merge-multiple: true - name: Merge reports - run: pnpm dlx vitest@3.1.4 run --merge-reports --pass-with-no-tests + run: pnpm dlx vitest@4.1.7 run --merge-reports --pass-with-no-tests diff --git a/.github/workflows/unit-tests-packages.yml b/.github/workflows/unit-tests-packages.yml index 78474e03f27..6642f2443c4 100644 --- a/.github/workflows/unit-tests-packages.yml +++ b/.github/workflows/unit-tests-packages.yml @@ -5,15 +5,22 @@ permissions: on: workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false jobs: unitTests: name: "🧪 Unit Tests: Packages" runs-on: ubuntu-latest strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false matrix: - shardIndex: [1] - shardTotal: [1] + shardIndex: [1, 2, 3] + shardTotal: [3] env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHARD_INDEX: ${{ matrix.shardIndex }} @@ -46,25 +53,26 @@ jobs: run: sudo systemctl restart docker - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" # ..to avoid rate limits when pulling images - name: 🐳 Login to DockerHub if: ${{ env.DOCKERHUB_USERNAME }} - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -75,12 +83,22 @@ jobs: - name: 🐳 Pre-pull testcontainer images if: ${{ env.DOCKERHUB_USERNAME }} run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } echo "Pre-pulling Docker images with authenticated session..." - docker pull postgres:14 - docker pull clickhouse/clickhouse-server:25.4-alpine - docker pull redis:7-alpine - docker pull testcontainers/ryuk:0.11.0 - docker pull electricsql/electric:1.2.4 + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 echo "Image pre-pull complete" - name: 📥 Download deps @@ -90,7 +108,7 @@ jobs: run: pnpm run generate - name: 🧪 Run Package Unit Tests - run: pnpm run test:packages --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} + run: pnpm run test:packages --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --passWithNoTests - name: Gather all reports if: ${{ !cancelled() }} @@ -101,7 +119,7 @@ jobs: - name: Upload blob reports to GitHub Actions Artifacts if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: packages-blob-report-${{ matrix.shardIndex }} path: .vitest-reports/* @@ -115,27 +133,28 @@ jobs: runs-on: ubuntu-latest steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 # no cache enabled, we're not installing deps - name: Download blob reports from GitHub Actions Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: .vitest-reports pattern: packages-blob-report-* merge-multiple: true - name: Merge reports - run: pnpm dlx vitest@3.1.4 run --merge-reports --pass-with-no-tests + run: pnpm dlx vitest@4.1.7 run --merge-reports --pass-with-no-tests diff --git a/.github/workflows/unit-tests-webapp.yml b/.github/workflows/unit-tests-webapp.yml index 523a1887db8..dc1cc978f35 100644 --- a/.github/workflows/unit-tests-webapp.yml +++ b/.github/workflows/unit-tests-webapp.yml @@ -5,15 +5,22 @@ permissions: on: workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false jobs: unitTests: name: "🧪 Unit Tests: Webapp" runs-on: ubuntu-latest strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false matrix: - shardIndex: [1, 2, 3, 4, 5, 6, 7, 8] - shardTotal: [8] + shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + shardTotal: [10] env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHARD_INDEX: ${{ matrix.shardIndex }} @@ -46,25 +53,26 @@ jobs: run: sudo systemctl restart docker - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 cache: "pnpm" # ..to avoid rate limits when pulling images - name: 🐳 Login to DockerHub if: ${{ env.DOCKERHUB_USERNAME }} - uses: docker/login-action@v3 + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -75,12 +83,23 @@ jobs: - name: 🐳 Pre-pull testcontainer images if: ${{ env.DOCKERHUB_USERNAME }} run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } echo "Pre-pulling Docker images with authenticated session..." - docker pull postgres:14 - docker pull clickhouse/clickhouse-server:25.4-alpine - docker pull redis:7-alpine - docker pull testcontainers/ryuk:0.11.0 - docker pull electricsql/electric:1.2.4 + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 + pull minio/minio:latest echo "Image pre-pull complete" - name: 📥 Download deps @@ -90,7 +109,7 @@ jobs: run: pnpm run generate - name: 🧪 Run Webapp Unit Tests - run: pnpm run test:webapp --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} + run: pnpm run test:webapp --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --passWithNoTests env: DATABASE_URL: postgresql://postgres:postgres@localhost:5432/postgres DIRECT_URL: postgresql://postgres:postgres@localhost:5432/postgres @@ -109,7 +128,7 @@ jobs: - name: Upload blob reports to GitHub Actions Artifacts if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: webapp-blob-report-${{ matrix.shardIndex }} path: .vitest-reports/* @@ -123,27 +142,28 @@ jobs: runs-on: ubuntu-latest steps: - name: ⬇️ Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - name: ⎔ Setup pnpm - uses: pnpm/action-setup@v4 + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: - version: 10.23.0 + version: 10.33.2 - name: ⎔ Setup node - uses: buildjet/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: - node-version: 20.20.0 + node-version: 20.20.2 # no cache enabled, we're not installing deps - name: Download blob reports from GitHub Actions Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: .vitest-reports pattern: webapp-blob-report-* merge-multiple: true - name: Merge reports - run: pnpm dlx vitest@3.1.4 run --merge-reports --pass-with-no-tests + run: pnpm dlx vitest@4.1.7 run --merge-reports --pass-with-no-tests diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 7c90a5a30ad..96e76279c82 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -5,14 +5,30 @@ permissions: on: workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false jobs: webapp: uses: ./.github/workflows/unit-tests-webapp.yml - secrets: inherit + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + e2e-webapp: + uses: ./.github/workflows/e2e-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} packages: uses: ./.github/workflows/unit-tests-packages.yml - secrets: inherit + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} internal: uses: ./.github/workflows/unit-tests-internal.yml - secrets: inherit + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/vouch-check-pr.yml b/.github/workflows/vouch-check-pr.yml index 21597cf467a..d854b1e0ce6 100644 --- a/.github/workflows/vouch-check-pr.yml +++ b/.github/workflows/vouch-check-pr.yml @@ -1,17 +1,18 @@ name: Vouch - Check PR on: - pull_request_target: + pull_request_target: # zizmor: ignore[dangerous-triggers] needed to comment/close fork PRs; safe because we never check out PR HEAD ref so no fork-controlled code runs types: [opened, reopened] -permissions: - contents: read - pull-requests: write - issues: read +permissions: {} jobs: check-vouch: runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write # auto-close unvouched PRs + issues: read steps: - uses: mitchellh/vouch/action/check-pr@c6d80ead49839655b61b422700b7a3bc9d0804a9 # v1.4.2 with: @@ -23,11 +24,16 @@ jobs: require-draft: needs: check-vouch + permissions: + pull-requests: write # close non-draft PRs with a comment if: > github.event.pull_request.draft == false && github.event.pull_request.author_association != 'MEMBER' && github.event.pull_request.author_association != 'OWNER' && - github.event.pull_request.author_association != 'COLLABORATOR' + github.event.pull_request.author_association != 'COLLABORATOR' && + github.event.pull_request.user.login != 'devin-ai-integration[bot]' && + github.event.pull_request.user.login != 'dependabot[bot]' && + github.event.pull_request.user.login != 'github-actions[bot]' runs-on: ubuntu-latest steps: - name: Close non-draft PR diff --git a/.github/workflows/workflow-checks.yml b/.github/workflows/workflow-checks.yml new file mode 100644 index 00000000000..a11918c04fe --- /dev/null +++ b/.github/workflows/workflow-checks.yml @@ -0,0 +1,51 @@ +name: Workflow Checks + +on: + push: + branches: [main] + paths: + - '.github/workflows/**' + - '.github/actions/**' + - '.github/zizmor.yml' + pull_request: + paths: + - '.github/workflows/**' + - '.github/actions/**' + - '.github/zizmor.yml' + +permissions: {} + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + actionlint: + name: Actionlint + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run actionlint + uses: docker://rhysd/actionlint:1.7.12@sha256:b1934ee5f1c509618f2508e6eb47ee0d3520686341fec936f3b79331f9315667 + + zizmor: + name: Zizmor + runs-on: ubuntu-latest + permissions: + security-events: write # Upload SARIF to GitHub Security tab + contents: read # Read workflow files for analysis + actions: read # Read workflow run metadata + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run zizmor + uses: zizmorcore/zizmor-action@5f14fd08f7cf1cb1609c1e344975f152c7ee938d # v0.5.6 diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 00000000000..2fcbb540127 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,5 @@ +rules: + unpinned-uses: + config: + policies: + '*': hash-pin diff --git a/.gitignore b/.gitignore index 5f6adddba0a..d5f0c945ad1 100644 --- a/.gitignore +++ b/.gitignore @@ -65,7 +65,13 @@ apps/**/public/build /packages/trigger-sdk/src/package.json /packages/python/src/package.json **/.claude/settings.local.json +.claude/architecture/ +.claude/docs-plans/ +.claude/review-guides/ +.claude/scheduled_tasks.lock .mcp.log .mcp.json .cursor/debug.log -ailogger-output.log \ No newline at end of file +ailogger-output.log +# per-package vitest timing capture (transient; merged into root test-timings.json) +.vitest-timing.json diff --git a/.nvmrc b/.nvmrc index 7c663e0a0bd..c675bca8de0 100644 --- a/.nvmrc +++ b/.nvmrc @@ -1 +1 @@ -v20.20.0 \ No newline at end of file +v20.20.2 diff --git a/.server-changes/README.md b/.server-changes/README.md index 82716de981c..2b0eeade36b 100644 --- a/.server-changes/README.md +++ b/.server-changes/README.md @@ -38,6 +38,14 @@ Speed up batch queue processing by removing stalls and fixing retry race The body text (below the frontmatter) is a one-line description of the change. Keep it concise — it will appear in release notes. +### Writing guidance + +These entries are public-facing - they ship verbatim in user-visible release notes. A few rules to keep them clean: + +- **One sentence is usually enough.** The body is the bullet in the changelog. If you need a paragraph, you're probably describing the implementation rather than the change. +- **Describe behavior, not implementation.** Skip internal scopes, middleware names, library specifics, framework internals. Users care about what's different for them, not how it's wired. +- **Never name internal tools or infra.** Observability stacks, internal services, infra components, monitoring backends, CI surfaces, AWS specifics - none of these belong in user-facing notes. + ## Lifecycle 1. Engineer adds a `.server-changes/` file in their PR diff --git a/.server-changes/batch-fast-fail-queue-size-limit.md b/.server-changes/batch-fast-fail-queue-size-limit.md deleted file mode 100644 index 77b926a5a80..00000000000 --- a/.server-changes/batch-fast-fail-queue-size-limit.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -area: webapp -type: fix ---- - -Batch items that hit the environment queue size limit now fast-fail without -retries and without creating pre-failed TaskRuns. diff --git a/.server-changes/bulk-action-cursor-pagination.md b/.server-changes/bulk-action-cursor-pagination.md new file mode 100644 index 00000000000..5f506493d11 --- /dev/null +++ b/.server-changes/bulk-action-cursor-pagination.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Fix run pagination that could duplicate or skip runs: the query orders by `(created_at, run_id)` but the cursor cut on `run_id` alone, which diverges when run_id order doesn't match created_at order (e.g. bulk replay re-processing runs). Cursors now encode the composite key as an opaque token and cut on the matching tuple; legacy bare-run_id cursors stay supported for in-flight pagination. diff --git a/.server-changes/cancel-stale-delayed-snapshots.md b/.server-changes/cancel-stale-delayed-snapshots.md new file mode 100644 index 00000000000..9a167c613b1 --- /dev/null +++ b/.server-changes/cancel-stale-delayed-snapshots.md @@ -0,0 +1,6 @@ +--- +area: supervisor +type: fix +--- + +Cancel pending delayed snapshots when a run completes or disconnects, preventing stale snapshots from pausing microVMs that have moved on to new work. diff --git a/.server-changes/compute-network-labels.md b/.server-changes/compute-network-labels.md new file mode 100644 index 00000000000..874081885d5 --- /dev/null +++ b/.server-changes/compute-network-labels.md @@ -0,0 +1,6 @@ +--- +area: supervisor +type: feature +--- + +Forward per-run identity labels to the compute provider on create and restore, letting network policy select runs (e.g. private link). diff --git a/.server-changes/compute-org-label.md b/.server-changes/compute-org-label.md new file mode 100644 index 00000000000..9306a0e2dc3 --- /dev/null +++ b/.server-changes/compute-org-label.md @@ -0,0 +1,8 @@ +--- +area: supervisor +type: improvement +--- + +Compute workload manager now sets an `org` label on every run (create + +restore) for network-policy selection, instead of a plan-gated label. The +Kubernetes workload manager is unchanged. diff --git a/.server-changes/env-vars-page-scope-values-to-visible-environments.md b/.server-changes/env-vars-page-scope-values-to-visible-environments.md new file mode 100644 index 00000000000..067c04661b7 --- /dev/null +++ b/.server-changes/env-vars-page-scope-values-to-visible-environments.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Speed up the environment variables page for projects with many archived preview branches. The page now only loads variable values for the environments it displays instead of every value ever created, including those left behind by archived branches. diff --git a/.server-changes/hipaa-addon-pricing-cta.md b/.server-changes/hipaa-addon-pricing-cta.md new file mode 100644 index 00000000000..8dc4a41f8b2 --- /dev/null +++ b/.server-changes/hipaa-addon-pricing-cta.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Request a HIPAA BAA add-on directly from any paid pricing tier in the dashboard. diff --git a/.server-changes/include-prisma-cli-in-prod-image.md b/.server-changes/include-prisma-cli-in-prod-image.md new file mode 100644 index 00000000000..888544239fe --- /dev/null +++ b/.server-changes/include-prisma-cli-in-prod-image.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Fix database migrations failing to run in the production image because the Prisma CLI was missing from the build. diff --git a/.server-changes/mollifier-decision-enrolled-org-labels.md b/.server-changes/mollifier-decision-enrolled-org-labels.md new file mode 100644 index 00000000000..b9e8a11f84a --- /dev/null +++ b/.server-changes/mollifier-decision-enrolled-org-labels.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Add bounded `enrolled` and `org` labels to the `mollifier.decisions` metric so per-enrolled-org pass-through vs mollify is visible (the `org` label is attached only for the enrolled cohort to keep cardinality bounded). diff --git a/.server-changes/react-router-route-matching-perf.md b/.server-changes/react-router-route-matching-perf.md new file mode 100644 index 00000000000..a264835af55 --- /dev/null +++ b/.server-changes/react-router-route-matching-perf.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Speed up the dashboard and API under high request load by memoizing react-router's per-request route matching, which previously re-flattened, re-ranked, and recompiled the entire route table on every request. diff --git a/.server-changes/realtime-replica-read-consistency.md b/.server-changes/realtime-replica-read-consistency.md new file mode 100644 index 00000000000..d23c73e682d --- /dev/null +++ b/.server-changes/realtime-replica-read-consistency.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Realtime feed reads now wait out measured read-replica lag and retry stale reads, so subscribers receive each change's current content instead of trailing one change behind when a read replica races the write. diff --git a/.server-changes/realtime-runs-subscription-scalability.md b/.server-changes/realtime-runs-subscription-scalability.md new file mode 100644 index 00000000000..5de00aae675 --- /dev/null +++ b/.server-changes/realtime-runs-subscription-scalability.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Add a new backend for the realtime runs feed (single runs, tags, and batches) that scales under high concurrency, available behind a feature flag diff --git a/.server-changes/require-plugins-fail-fast.md b/.server-changes/require-plugins-fail-fast.md new file mode 100644 index 00000000000..591cd47c402 --- /dev/null +++ b/.server-changes/require-plugins-fail-fast.md @@ -0,0 +1,8 @@ +--- +area: webapp +type: feature +--- + +Add `REQUIRE_PLUGINS=1` env var. When set, the RBAC plugin loader throws instead of silently falling back to the default implementation if the plugin module fails to load (missing, broken transitive dep, etc.). The webapp's `/healthcheck` route now resolves the lazy plugin controller so the throw surfaces during readiness probes — a deploy where the plugin didn't load fails the probe and is rolled back. + +Self-hosters leave `REQUIRE_PLUGINS` unset and continue to use the fallback when no plugin is installed. diff --git a/.server-changes/retry-transient-instance-create-failures.md b/.server-changes/retry-transient-instance-create-failures.md new file mode 100644 index 00000000000..f7b9c7afd11 --- /dev/null +++ b/.server-changes/retry-transient-instance-create-failures.md @@ -0,0 +1,6 @@ +--- +area: supervisor +type: fix +--- + +Retry transient instance create failures during cold starts instead of waiting minutes for the run to be requeued. diff --git a/.server-changes/runs-backward-pagination-slice.md b/.server-changes/runs-backward-pagination-slice.md new file mode 100644 index 00000000000..41695f4e159 --- /dev/null +++ b/.server-changes/runs-backward-pagination-slice.md @@ -0,0 +1,14 @@ +--- +area: webapp +type: fix +--- + +Fix an off-by-one in `ClickHouseRunsRepository.listRunIds` backward pagination. +When paging backward with more rows before the page (`hasMore`), the displayed +page was sliced as `rows.slice(1, size + 1)`, which dropped the row closest to +the cursor and kept the extra "has-more" sentinel — returning a page that +straddled two logical pages (one row from the correct previous page plus one +from the page before it). The result set is always the first `page.size` rows +(the sentinel is the trailing element in both directions), so the slice is now +`rows.slice(0, size)` for forward and backward alike. Forward pagination and the +cursor values were already correct and are unchanged. diff --git a/.server-changes/runs-bulk-action-no-reload.md b/.server-changes/runs-bulk-action-no-reload.md new file mode 100644 index 00000000000..1926ab20b75 --- /dev/null +++ b/.server-changes/runs-bulk-action-no-reload.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Stop reloading the runs list when opening or closing the bulk action inspector diff --git a/.server-changes/sanitize-agent-view-urls.md b/.server-changes/sanitize-agent-view-urls.md new file mode 100644 index 00000000000..c534a03623d --- /dev/null +++ b/.server-changes/sanitize-agent-view-urls.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Sanitize URLs from streamed agent and tool data before rendering them in the dashboard's Agent view, so an unsafe scheme such as `javascript:` can no longer produce a clickable link or image source. diff --git a/.server-changes/scheduled-run-region-display.md b/.server-changes/scheduled-run-region-display.md new file mode 100644 index 00000000000..dca41c4341e --- /dev/null +++ b/.server-changes/scheduled-run-region-display.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Scheduled runs now show under their correct region in the dashboard, run details, and the API, and match region filters, instead of appearing under a separate region. diff --git a/.server-changes/session-route-hardening.md b/.server-changes/session-route-hardening.md new file mode 100644 index 00000000000..2734b35a784 --- /dev/null +++ b/.server-changes/session-route-hardening.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Hardening fixes for realtime sessions: stricter authorization on snapshot URLs and out-channel appends, environment-scoped message delivery for waiting runs, and idempotent appends via the X-Part-Id header. Session creation now rejects expired sessions, externalId can no longer be changed after creation, and the sessions list returns friendly run ids. diff --git a/.server-changes/snapshots-since-replica-primary-fallback.md b/.server-changes/snapshots-since-replica-primary-fallback.md new file mode 100644 index 00000000000..9b8257f6410 --- /dev/null +++ b/.server-changes/snapshots-since-replica-primary-fallback.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Run snapshot polling no longer errors or pays extra latency when the database read replica hasn't yet replicated the snapshot the runner is polling from (`RUN_ENGINE_READ_REPLICA_SNAPSHOTS_SINCE_ENABLED`): the read is briefly retried on the replica and served from the primary if it still hasn't caught up. Polling also now rejects a since-snapshot id that doesn't belong to the run being polled. diff --git a/.server-changes/stop-creating-taskruntag-records.md b/.server-changes/stop-creating-taskruntag-records.md deleted file mode 100644 index 0b068d3c3ac..00000000000 --- a/.server-changes/stop-creating-taskruntag-records.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -area: webapp -type: improvement ---- - -Stop creating TaskRunTag records and _TaskRunToTaskRunTag join table entries during task triggering. The denormalized runTags string array on TaskRun already stores tag names, making the M2M relation redundant write overhead. diff --git a/.server-changes/trace-export-formats.md b/.server-changes/trace-export-formats.md new file mode 100644 index 00000000000..ff15483003f --- /dev/null +++ b/.server-changes/trace-export-formats.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Export a run's full trace from the run page as a downloadable Log, Markdown, or JSON Lines file, or copy it to the clipboard for pasting into an AI assistant. The export streams straight from the store, so even very large runs export reliably. diff --git a/.server-changes/trace-page-payload-diet.md b/.server-changes/trace-page-payload-diet.md new file mode 100644 index 00000000000..9f84e4b22db --- /dev/null +++ b/.server-changes/trace-page-payload-diet.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary and detailed summary span limits on both event store paths. diff --git a/.server-changes/trigger-worker-queue-db-error-leak.md b/.server-changes/trigger-worker-queue-db-error-leak.md new file mode 100644 index 00000000000..9725ef9f2eb --- /dev/null +++ b/.server-changes/trigger-worker-queue-db-error-leak.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Stop `trigger()` from leaking raw database connection errors to API clients during a database outage; infrastructure errors now return a generic, retryable 500. diff --git a/.server-changes/vercel-auto-promote-toggle.md b/.server-changes/vercel-auto-promote-toggle.md deleted file mode 100644 index bb5f25a21c1..00000000000 --- a/.server-changes/vercel-auto-promote-toggle.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -area: webapp -type: feature ---- - -Vercel integration option to disable auto promotions diff --git a/.vouch.yml b/.vouch.yml index 8a9668392d3..ec6e85aa705 100644 --- a/.vouch.yml +++ b/.vouch.yml @@ -1,2 +1,4 @@ vouch: - github: edosrecki + - github: GautamBytes + - github: ConProgramming diff --git a/.vscode/launch.json b/.vscode/launch.json index 71a76904a2b..1044443e197 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -47,78 +47,6 @@ "url": "http://localhost:3030", "webRoot": "${workspaceFolder}/apps/webapp/app" }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 init CLI", - "command": "pnpm exec trigger init", - "cwd": "${workspaceFolder}/references/init-shell", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 init dev CLI", - "command": "pnpm exec trigger dev", - "cwd": "${workspaceFolder}/references/init-shell", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 Dev CLI", - "command": "pnpm exec trigger dev", - "cwd": "${workspaceFolder}/references/hello-world", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug Dev Next.js Realtime", - "command": "pnpm exec trigger dev", - "cwd": "${workspaceFolder}/references/nextjs-realtime", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug prisma-catalog deploy CLI", - "command": "pnpm exec trigger deploy --self-hosted --load-image", - "cwd": "${workspaceFolder}/references/prisma-catalog", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 Deploy CLI", - "command": "pnpm exec trigger deploy --self-hosted --load-image", - "cwd": "${workspaceFolder}/references/hello-world", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 list-profiles CLI", - "command": "pnpm exec trigger list-profiles --log-level debug", - "cwd": "${workspaceFolder}/references/hello-world", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 update CLI", - "command": "pnpm exec trigger update", - "cwd": "${workspaceFolder}/references/hello-world", - "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug V3 Management", - "command": "pnpm run management", - "cwd": "${workspaceFolder}/references/hello-world", - "sourceMaps": true - }, { "type": "node", "request": "attach", @@ -135,14 +63,6 @@ "cwd": "${workspaceFolder}/packages/cli-v3", "sourceMaps": true }, - { - "type": "node-terminal", - "request": "launch", - "name": "debug v3 hello-world dev", - "command": "pnpm exec trigger dev", - "cwd": "${workspaceFolder}/references/hello-world", - "sourceMaps": true - }, { "type": "node-terminal", "request": "launch", @@ -158,14 +78,6 @@ "command": "pnpm run test ./src/run-queue/index.test.ts --run", "cwd": "${workspaceFolder}/internal-packages/run-engine", "sourceMaps": true - }, - { - "type": "node-terminal", - "request": "launch", - "name": "Debug d3-demo", - "command": "pnpm exec trigger dev", - "cwd": "${workspaceFolder}/references/d3-demo", - "sourceMaps": true } ] } diff --git a/.vscode/settings.json b/.vscode/settings.json index fd9f3dcde0c..f969bb6d5de 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { - "deno.enablePaths": ["references/deno-reference", "runtime_tests/tests/deno"], + "deno.enablePaths": ["runtime_tests/tests/deno"], "debug.toolBarLocation": "commandCenter", "typescript.tsdk": "node_modules/typescript/lib", "search.exclude": { diff --git a/AGENTS.md b/AGENTS.md index 99496f91bde..8ff9f18663c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,18 +7,19 @@ This repository is a pnpm monorepo managed with Turbo. It contains multiple apps - `apps/supervisor` – Node application for executing built tasks. - `packages/*` – Published packages such as `@trigger.dev/sdk`, the CLI (`trigger.dev`), and shared libraries. - `internal-packages/*` – Internal-only packages used by the webapp and other apps. -- `references/*` – Example projects for manual testing and development of new features. +- Example/reference projects for manual testing live in a separate repo: [`triggerdotdev/references`](https://github.com/triggerdotdev/references). - `ai/references` – Contains additional documentation including an overview (`repo.md`) and testing guidelines (`tests.md`). See `ai/references/repo.md` for a more complete explanation of the workspaces. ## Development setup -1. Install dependencies with `pnpm i` (pnpm `10.23.0` and Node.js `20.20.0` are required). +1. Install dependencies with `pnpm i` (pnpm `10.33.2` and Node.js `20.20.2` are required). 2. Copy `.env.example` to `.env` and generate a random 16 byte hex string for `ENCRYPTION_KEY` (`openssl rand -hex 16`). Update other secrets if needed. 3. Start the local services with Docker: ```bash pnpm run docker ``` + Add `:full` (`pnpm run docker:full`) for the optional observability + chaos tooling. See `docker/docker-compose.extras.yml`. 4. Run database migrations: ```bash pnpm run db:migrate @@ -64,5 +65,5 @@ Refer to `ai/references/tests.md` for details on writing tests. Tests should avo ```bash pnpm run dev --filter docs ``` -- `references/README.md` explains how to create new reference projects for manual testing. +- The [`triggerdotdev/references`](https://github.com/triggerdotdev/references) repo's README explains how to create new reference projects for manual testing. diff --git a/CLAUDE.md b/CLAUDE.md index 0a54cced672..c0fd82fb368 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,10 +4,13 @@ This file provides guidance to Claude Code when working with this repository. Su ## Build and Development Commands -This is a pnpm 10.23.0 monorepo using Turborepo. Run commands from root with `pnpm run`. +This is a pnpm 10.33.2 monorepo using Turborepo. Run commands from root with `pnpm run`. + +**Adding dependencies:** Edit `package.json` directly instead of using `pnpm add`, then run `pnpm i` from the repo root. See `.claude/rules/package-installation.md` for the full process. ```bash -pnpm run docker # Start Docker services (PostgreSQL, Redis, Electric) +pnpm run docker # Core dev services (Postgres, Redis, Electric, MinIO, ClickHouse, s2-lite) +# pnpm run docker:full # Same + observability stack (Prometheus, Grafana, OTEL) and chaos tooling pnpm run db:migrate # Run database migrations pnpm run db:seed # Seed the database (required for reference projects) @@ -66,6 +69,17 @@ containerTest("should use both", async ({ prisma, redisOptions }) => { }); ``` +## Code Style + +### Imports + +**Prefer static imports over dynamic imports.** Only use dynamic `import()` when: +- Circular dependencies cannot be resolved otherwise +- Code splitting is genuinely needed for performance +- The module must be loaded conditionally at runtime + +Dynamic imports add unnecessary overhead in hot paths and make code harder to analyze. If you find yourself using `await import()`, ask if a regular `import` statement would work instead. + ## Changesets and Server Changes When modifying any public package (`packages/*` or `integrations/*`), add a changeset: @@ -92,7 +106,7 @@ User API call -> Webapp routes -> Services -> RunEngine -> Redis Queue -> Superv ### Apps -- **apps/webapp**: Remix 2.1.0 app - main API, dashboard, orchestration. Uses Express server. +- **apps/webapp**: Remix 2.17.4 app - main API, dashboard, orchestration. Uses Express server. - **apps/supervisor**: Manages task execution containers (Docker/Kubernetes). ### Public Packages @@ -124,7 +138,7 @@ Docs live in `docs/` as a Mintlify site (MDX format). See `docs/CLAUDE.md` for c ### Reference Projects -The `references/` directory contains test workspaces for testing SDK and platform features. Use `references/hello-world` to manually test changes before submitting PRs. +Reference/example projects for testing SDK and platform features live in a separate repo: [`triggerdotdev/references`](https://github.com/triggerdotdev/references). Clone it alongside this repo and use its `projects/hello-world` to manually test changes before submitting PRs. See that repo's README for setup and linking to a local monorepo build. ## Docker Image Guidelines @@ -153,15 +167,17 @@ export const myTask = task({ The `rules/` directory contains versioned SDK documentation distributed via the SDK installer. Current version: `rules/manifest.json`. Do NOT update `rules/` or `.claude/skills/trigger-dev-tasks/` unless explicitly asked - these are maintained in separate dedicated passes. -## Testing with hello-world Reference Project +## Testing with the hello-world Reference Project + +The reference projects live in the separate [`triggerdotdev/references`](https://github.com/triggerdotdev/references) repo - clone it alongside this repo. First-time setup: -1. `pnpm run db:seed` to seed the database -2. Build CLI: `pnpm run build --filter trigger.dev && pnpm i` -3. Authorize: `cd references/hello-world && pnpm exec trigger login -a http://localhost:3030` +1. `pnpm run db:seed` to seed the database (creates the References org + hello-world project) +2. Build the CLI/packages you want to test: `pnpm run build --filter trigger.dev` +3. In your `references` clone, follow its README to link to your local monorepo build, then authorize: `cd projects/hello-world && pnpm exec trigger login -a http://localhost:3030` -Running: `cd references/hello-world && pnpm exec trigger dev` +Running (from your `references` clone): `cd projects/hello-world && pnpm exec trigger dev` ## Local Task Testing Workflow @@ -176,7 +192,8 @@ curl -s http://localhost:3030/healthcheck # Verify running ### Step 2: Start Trigger Dev in Background ```bash -cd references/hello-world && pnpm exec trigger dev +# in your triggerdotdev/references clone +cd projects/hello-world && pnpm exec trigger dev # Wait for "Local worker ready [node]" ``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 88e24cba4f0..cddb974417d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,8 +29,8 @@ branch are tagged into a release periodically. ### Prerequisites -- [Node.js](https://nodejs.org/en) version 20.20.0 -- [pnpm package manager](https://pnpm.io/installation) version 10.23.0 +- [Node.js](https://nodejs.org/en) version 20.20.2 +- [pnpm package manager](https://pnpm.io/installation) version 10.33.2 - [Docker](https://www.docker.com/get-started/) - [protobuf](https://github.com/protocolbuffers/protobuf) @@ -49,9 +49,9 @@ branch are tagged into a release periodically. ``` cd trigger.dev ``` -3. Ensure you are on the correct version of Node.js (20.20.0). If you are using `nvm`, there is an `.nvmrc` file that will automatically select the correct version of Node.js when you navigate to the repository. +3. Ensure you are on the correct version of Node.js (20.20.2). If you are using `nvm`, there is an `.nvmrc` file that will automatically select the correct version of Node.js when you navigate to the repository. -4. Run `corepack enable` to use the correct version of pnpm (`10.23.0`) as specified in the root `package.json` file. +4. Run `corepack enable` to use the correct version of pnpm (`10.33.2`) as specified in the root `package.json` file. 5. Install the required packages using pnpm. ``` @@ -71,21 +71,27 @@ branch are tagged into a release periodically. Feel free to update `SESSION_SECRET` and `MAGIC_LINK_SECRET` as well using the same method. -8. Start Docker. This starts the required services like Postgres & Redis. If this is your first time using Docker, consider going through this [guide](DOCKER_INSTALLATION.md) +8. Start Docker. This starts the core dev services (Postgres, Redis, Electric, MinIO, ClickHouse, s2-lite) and runs the ClickHouse migrator once on first start. If this is your first time using Docker, consider going through this [guide](DOCKER_INSTALLATION.md). ``` pnpm run docker ``` + For the observability stack (Prometheus, Grafana, OTEL collector) and other optional tooling (Toxiproxy, nginx-h2, ch-ui, extra electric shard), use `pnpm run docker:full` instead. See `docker/docker-compose.extras.yml` for the full list. + 9. Migrate the database ``` pnpm run db:migrate ``` -10. Build everything +10. Build the webapp, CLI, and SDK + ``` + pnpm run build --filter webapp --filter trigger.dev --filter @trigger.dev/sdk ``` - pnpm run build --filter webapp && pnpm run build --filter trigger.dev && pnpm run build --filter @trigger.dev/sdk +11. Seed the database. This creates a local user, a `References` org, and the reference projects (including `hello-world`) with stable IDs. ``` -11. Run the app. See the section below. + pnpm run db:seed + ``` +12. Run the app. See the section below. ## Running @@ -101,29 +107,32 @@ branch are tagged into a release periodically. ## Manual testing using hello-world -We use the `/references/hello-world` subdirectory as a staging ground for testing changes to the SDK (`@trigger.dev/sdk` at `/packages/trigger-sdk`), the Core package (`@trigger.dev/core` at `packages/core`), the CLI (`trigger.dev` at `/packages/cli-v3`) and the platform (The remix app at `/apps/webapp`). The instructions below will get you started on using the `hello-world` for local development of Trigger.dev. +The `hello-world` reference project (and the others) live in a separate repo: +[`triggerdotdev/references`](https://github.com/triggerdotdev/references). Clone it +alongside this repo. It's the staging ground for testing changes to the SDK +(`@trigger.dev/sdk` at `/packages/trigger-sdk`), the Core package +(`@trigger.dev/core` at `/packages/core`), the CLI (`trigger.dev` at +`/packages/cli-v3`) and the platform (the Remix app at `/apps/webapp`). +To exercise your local monorepo changes, the reference project links to your local +build — see the references repo's README for the `pnpm run link` flow. -### First-time setup +> Paths below such as `projects/hello-world` are relative to your `references` +> clone, not this repo. -First, make sure you are running the webapp according to the instructions above. Then: - -1. Visit http://localhost:3030 in your browser and create a new project called "hello-world". +### First-time setup -2. In Postgres go to the "Projects" table and for the project you create change the `externalRef` to `proj_rrkpdguyagvsoktglnod`. +First, make sure you are running the webapp according to the instructions above. The seed step from setup already created a `hello-world` project under the `References` org with the stable ref `proj_rrkpdguyagvsoktglnod` — log in at http://localhost:3030 with any email to access it. Then: -3. Build the CLI +1. Build the CLI and packages (skip if you already ran the build step in setup) ```sh -# Build the CLI -pnpm run build --filter trigger.dev -# Make it accessible to `pnpm exec` -pnpm i +pnpm run build --filter trigger.dev --filter "@trigger.dev/*" ``` -4. Change into the `/references/hello-world` directory and authorize the CLI to the local server: +2. In your `references` clone, link to your local monorepo build (see its README), then change into `projects/hello-world` and authorize the CLI to the local server: ```sh -cd references/hello-world +cd projects/hello-world cp .env.example .env pnpm exec trigger login -a http://localhost:3030 ``` @@ -133,7 +142,7 @@ This will open a new browser window and authorize the CLI against your local use You can optionally pass a `--profile` flag to the `login` command, which will allow you to use the CLI with separate accounts/servers. We suggest using a profile called `local` for your local development: ```sh -cd references/hello-world +cd projects/hello-world pnpm exec trigger login -a http://localhost:3030 --profile local # later when you run the dev or deploy command: pnpm exec trigger dev --profile local @@ -146,46 +155,46 @@ The following steps should be followed any time you start working on a new featu 1. Make sure the webapp is running on localhost:3030 -2. Open a terminal window and build the CLI and packages and watch for changes +2. In this repo, open a terminal window and build the CLI and packages and watch for changes (the reference project links against this build) ```sh pnpm run dev --filter trigger.dev --filter "@trigger.dev/*" ``` -3. Open another terminal window, and change into the `/references/hello-world` directory. +3. Open another terminal window, and change into `projects/hello-world` in your `references` clone. 4. Run the `dev` command, which will register all the local tasks with the platform and allow you to start testing task execution: ```sh -# in /references/hello-world +# in /projects/hello-world pnpm exec trigger dev ``` If you want additional debug logging, you can use the `--log-level debug` flag: ```sh -# in /references/hello-world +# in /projects/hello-world pnpm exec trigger dev --log-level debug ``` -6. If you make any changes in the CLI/Core/SDK, you'll need to `CTRL+C` to exit the `dev` command and restart it to pickup changes. Any changes to the files inside of the `hello-world/src/trigger` dir will automatically be rebuilt by the `dev` command. +5. If you make any changes in the CLI/Core/SDK, you'll need to `CTRL+C` to exit the `dev` command and restart it to pickup changes. Any changes to the files inside the reference project's `src/trigger` dir will automatically be rebuilt by the `dev` command. -7. Navigate to the `hello-world` project in your local dashboard at localhost:3030 and you should see the list of tasks. +6. Navigate to the `hello-world` project in your local dashboard at localhost:3030 and you should see the list of tasks. -8. Go to the "Test" page in the sidebar and select a task. Then enter a payload and click "Run test". You can tell what the payloads should be by looking at the relevant task file inside the `/references/hello-world/src/trigger` folder. Many of them accept an empty payload. +7. Go to the "Test" page in the sidebar and select a task. Then enter a payload and click "Run test". You can tell what the payloads should be by looking at the relevant task file inside the reference project's `src/trigger` folder. Many of them accept an empty payload. -9. Feel free to add additional files in `hello-world/src/trigger` to test out specific aspects of the system, or add in edge cases. +8. Feel free to add additional files in the reference project's `src/trigger` dir to test out specific aspects of the system, or add in edge cases. ## Adding and running migrations -1. Modify internal-packages/database/prisma/schema.prisma file -2. Change directory to the packages/database folder +1. Modify `internal-packages/database/prisma/schema.prisma`. +2. Change directory to the database package: ```sh - cd packages/database + cd internal-packages/database ``` -3. Create a migration +3. Create a migration: ``` pnpm run db:migrate:dev:create @@ -193,50 +202,17 @@ pnpm exec trigger dev --log-level debug This creates a migration file. Check the migration file does only what you want. If you're adding any database indexes they must use `CONCURRENTLY`, otherwise they'll lock the table when executed. -4. Run the migration. - -``` -pnpm run db:migrate:deploy -pnpm run generate -``` - -This executes the migrations against your database and applies changes to the database schema(s), and then regenerates the Prisma client. - -4. Commit generated migrations as well as changes to the schema.prisma file -5. If you're using VSCode you may need to restart the Typescript server in the webapp to get updated type inference. Open a TypeScript file, then open the Command Palette (View > Command Palette) and run `TypeScript: Restart TS server`. - -## Add sample jobs - -The [references/job-catalog](./references/job-catalog/) project defines simple jobs you can get started with. - -1. `cd` into `references/job-catalog` -2. Create a `.env` file with the following content, - replacing `` with an actual key: +4. Run the migration: -```env -TRIGGER_API_KEY=[TRIGGER_DEV_API_KEY] -TRIGGER_API_URL=http://localhost:3030 -``` - -`TRIGGER_API_URL` is used to configure the URL for your Trigger.dev instance, -where the jobs will be registered. - -3. Run one of the the `job-catalog` files: - -```sh -pnpm run events -``` - -This will open up a local server using `express` on port 8080. Then in a new terminal window you can run the trigger-cli dev command: - -```sh -pnpm run dev:trigger -``` + ``` + pnpm run db:migrate:deploy + pnpm run generate + ``` -See the [Job Catalog](./references/job-catalog/README.md) file for more. + This executes the migrations against your database and applies changes to the database schema(s), and then regenerates the Prisma client. -4. Navigate to your trigger.dev instance ([http://localhost:3030](http://localhost:3030/)), to see the jobs. - You can use the test feature to trigger them. +5. Commit the generated migration files as well as the changes to `schema.prisma`. +6. If you're using VSCode you may need to restart the TypeScript server in the webapp to get updated type inference. Open a TypeScript file, then open the Command Palette (View > Command Palette) and run `TypeScript: Restart TS server`. ## Making a pull request @@ -334,3 +310,7 @@ The process running on port `3030` should be destroyed. ```sh sudo kill -9 ``` + +### Running two clones side by side (worktree, branch experiment) + +The default `pnpm run docker` uses the project name `triggerdotdev-docker` and the standard host ports (5432, 6379, 3060, 4566, 8123, 9000, 9005, 9006). To stand up a second instance in another clone without clashing, set a different `COMPOSE_PROJECT_NAME` and the offset host ports in that clone's `.env`. The "Running multiple instances side by side" block in `.env.example` lists every overridable env var with its default for reference; uncomment the lines you need and update `DATABASE_URL` / `CLICKHOUSE_URL` / `REDIS_PORT` / `APP_ORIGIN` / `LOGIN_ORIGIN` / `ELECTRIC_ORIGIN` / `REALTIME_STREAMS_S2_ENDPOINT` to match. diff --git a/ai/references/repo.md b/ai/references/repo.md index 4f67bde2b4b..6e0ff056716 100644 --- a/ai/references/repo.md +++ b/ai/references/repo.md @@ -1,6 +1,6 @@ ## Repo Overview -This is a pnpm 10.23.0 monorepo that uses turborepo @turbo.json. The following workspaces are relevant +This is a pnpm 10.33.2 monorepo that uses turborepo @turbo.json. The following workspaces are relevant ## Apps diff --git a/apps/supervisor/Containerfile b/apps/supervisor/Containerfile index d5bb5862e96..5b3b148a7cb 100644 --- a/apps/supervisor/Containerfile +++ b/apps/supervisor/Containerfile @@ -16,7 +16,7 @@ COPY --from=pruner --chown=node:node /app/out/json/ . COPY --from=pruner --chown=node:node /app/out/pnpm-lock.yaml ./pnpm-lock.yaml COPY --from=pruner --chown=node:node /app/out/pnpm-workspace.yaml ./pnpm-workspace.yaml -RUN corepack enable && corepack prepare pnpm@10.23.0 --activate +RUN corepack enable && corepack prepare pnpm@10.33.2 --activate FROM base AS deps-fetcher RUN apk add --no-cache python3-dev py3-setuptools make g++ gcc linux-headers diff --git a/apps/supervisor/package.json b/apps/supervisor/package.json index 7456d421850..2725fe2b729 100644 --- a/apps/supervisor/package.json +++ b/apps/supervisor/package.json @@ -18,6 +18,7 @@ "@kubernetes/client-node": "^1.0.0", "@trigger.dev/core": "workspace:*", "dockerode": "^4.0.6", + "ioredis": "~5.6.0", "p-limit": "^6.2.0", "prom-client": "^15.1.0", "socket.io": "4.7.4", @@ -25,6 +26,7 @@ "zod": "3.25.76" }, "devDependencies": { + "@internal/testcontainers": "workspace:*", "@types/dockerode": "^3.3.33" } } diff --git a/apps/supervisor/src/backpressure/backpressureMetrics.ts b/apps/supervisor/src/backpressure/backpressureMetrics.ts new file mode 100644 index 00000000000..ffe57628548 --- /dev/null +++ b/apps/supervisor/src/backpressure/backpressureMetrics.ts @@ -0,0 +1,34 @@ +import { Counter, Gauge, type Registry } from "prom-client"; + +/** Prometheus metrics for dequeue backpressure. */ +export class BackpressureMetrics { + /** 1 while backpressure is engaged (computed signal, set even in dry-run). */ + readonly engaged: Gauge; + /** 1 when running in dry-run (gates inert). */ + readonly dryRun: Gauge; + /** Dequeue attempts the gate skipped - or would have, in dry-run (labelled). */ + readonly skipsTotal: Counter; + + constructor(opts: { register: Registry; prefix?: string }) { + const prefix = opts.prefix ?? "supervisor_backpressure"; + + this.engaged = new Gauge({ + name: `${prefix}_engaged`, + help: "1 while dequeue backpressure is engaged (computed signal, regardless of dry-run)", + registers: [opts.register], + }); + + this.dryRun = new Gauge({ + name: `${prefix}_dry_run`, + help: "1 when dequeue backpressure is in dry-run mode (gates inert)", + registers: [opts.register], + }); + + this.skipsTotal = new Counter({ + name: `${prefix}_skipped_dequeues_total`, + help: "Dequeue attempts skipped by backpressure (or would be, in dry-run)", + labelNames: ["dry_run"], + registers: [opts.register], + }); + } +} diff --git a/apps/supervisor/src/backpressure/backpressureMonitor.test.ts b/apps/supervisor/src/backpressure/backpressureMonitor.test.ts new file mode 100644 index 00000000000..7af28ffc9f5 --- /dev/null +++ b/apps/supervisor/src/backpressure/backpressureMonitor.test.ts @@ -0,0 +1,353 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { Registry } from "prom-client"; +import { BackpressureMonitor, type BackpressureSignalSource } from "./backpressureMonitor.js"; +import { BackpressureMetrics } from "./backpressureMetrics.js"; + +function countingSource(verdict: { engaged: boolean } | null): { + source: BackpressureSignalSource; + reads: () => number; +} { + let reads = 0; + return { + source: { + read: async () => { + reads++; + return verdict; + }, + }, + reads: () => reads, + }; +} + +describe("BackpressureMonitor", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("when disabled, never skips dequeue and never reads the signal source", () => { + // Even though the source would report "engaged", a disabled monitor must be + // a complete no-op: this is the backwards-compatibility guarantee. + const { source, reads } = countingSource({ engaged: true }); + const monitor = new BackpressureMonitor({ enabled: false, source }); + + monitor.start(); + + expect(monitor.shouldSkipDequeue()).toBe(false); + expect(reads()).toBe(0); + + monitor.stop(); + }); + + it("when enabled and the source reports engaged, skips dequeue after a refresh", async () => { + const { source } = countingSource({ engaged: true }); + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); // flush the initial async read + + expect(monitor.shouldSkipDequeue()).toBe(true); + + monitor.stop(); + }); + + it("when enabled and the source reports clear, does not skip dequeue", async () => { + const { source } = countingSource({ engaged: false }); + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + + expect(monitor.shouldSkipDequeue()).toBe(false); + + monitor.stop(); + }); + + it("fails open (stops skipping) when the source throws", async () => { + let call = 0; + const source: BackpressureSignalSource = { + read: async () => { + call++; + if (call === 1) { + return { engaged: true }; + } + throw new Error("signal source unreachable"); + }, + }; + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(monitor.shouldSkipDequeue()).toBe(true); // engaged from the first read + + await vi.advanceTimersByTimeAsync(1000); // next refresh throws + expect(monitor.shouldSkipDequeue()).toBe(false); // fail-open: a dead source must not pin the brake + + monitor.stop(); + }); + + it("fails open when the source reports unknown (null)", async () => { + const { source } = countingSource(null); + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + + expect(monitor.shouldSkipDequeue()).toBe(false); + + monitor.stop(); + }); + + it("fails open when the cached verdict goes stale (older than max age)", async () => { + // Source stops updating (e.g. hangs) after the first read; the verdict ages out. + const source: BackpressureSignalSource = { + read: async () => ({ engaged: true, ts: Date.now() }), + }; + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1_000_000, // effectively only the initial read fires + maxVerdictAgeMs: 15_000, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(monitor.shouldSkipDequeue()).toBe(true); + + await vi.advanceTimersByTimeAsync(15_001); // verdict now older than max age + expect(monitor.shouldSkipDequeue()).toBe(false); + + monitor.stop(); + }); + + it("does not read the source on the hot path (reads are driven by the refresh tick)", async () => { + const { source, reads } = countingSource({ engaged: true }); + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(reads()).toBe(1); // just the initial refresh + + for (let i = 0; i < 1000; i++) { + monitor.shouldSkipDequeue(); + } + + expect(reads()).toBe(1); // hot-path calls performed zero I/O + + monitor.stop(); + }); + + it("does not start an overlapping refresh while one is in flight", async () => { + let reads = 0; + const source: BackpressureSignalSource = { + // Never resolves - simulates a hung read. + read: () => { + reads++; + return new Promise<{ engaged: boolean } | null>(() => {}); + }, + }; + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(3000); // several intervals while the first read hangs + + expect(reads).toBe(1); // in-flight guard prevents stacking + + monitor.stop(); + }); + + it("stops refreshing after stop()", async () => { + const { source, reads } = countingSource({ engaged: true }); + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + const readsAtStop = reads(); + + monitor.stop(); + await vi.advanceTimersByTimeAsync(5000); + + expect(reads()).toBe(readsAtStop); + }); + + it("isEngaged reflects the hard engaged state (the signal for freezing scale-up)", async () => { + const { source } = countingSource({ engaged: true }); + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + + expect(monitor.isEngaged()).toBe(true); + + monitor.stop(); + }); + + it("isEngaged is false when clear and when stale", async () => { + const source: BackpressureSignalSource = { + read: async () => ({ engaged: true, ts: Date.now() }), + }; + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1_000_000, + maxVerdictAgeMs: 15_000, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(monitor.isEngaged()).toBe(true); + + await vi.advanceTimersByTimeAsync(15_001); // stale → fail-open + expect(monitor.isEngaged()).toBe(false); + + monitor.stop(); + }); + + it("ramps the dequeue gate after release instead of resuming instantly", async () => { + let engaged = true; + let rnd = 0.5; + const source: BackpressureSignalSource = { read: async () => ({ engaged }) }; + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1000, + rampMs: 10_000, + random: () => rnd, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(monitor.shouldSkipDequeue()).toBe(true); // hard engaged + + // Release: the next refresh observes the clear verdict and starts the ramp. + engaged = false; + await vi.advanceTimersByTimeAsync(1000); + expect(monitor.isEngaged()).toBe(false); + + // Just after release (progress ~0): skip probability ~1, so skip regardless. + rnd = 0.99; + expect(monitor.shouldSkipDequeue()).toBe(true); + + // Halfway through the ramp (progress 0.5): skip probability 0.5. + await vi.advanceTimersByTimeAsync(5000); + rnd = 0.4; + expect(monitor.shouldSkipDequeue()).toBe(true); // 0.4 < 0.5 → skip + rnd = 0.6; + expect(monitor.shouldSkipDequeue()).toBe(false); // 0.6 ≥ 0.5 → allow + + // Past the ramp window: never skip. + await vi.advanceTimersByTimeAsync(5000); + rnd = 0.0; + expect(monitor.shouldSkipDequeue()).toBe(false); + + monitor.stop(); + }); + + it("fails open on an engaged verdict with no timestamp when staleness is enforced", async () => { + // A verdict claiming engaged but carrying no ts can't be checked for freshness; + // when maxVerdictAgeMs is set we must not trust it (else a dead producer could + // pin the brake forever). + const { source } = countingSource({ engaged: true }); // no ts + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1000, + maxVerdictAgeMs: 15_000, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + + expect(monitor.computeEngaged()).toBe(false); + expect(monitor.shouldSkipDequeue()).toBe(false); + + monitor.stop(); + }); + + it("in dry-run, the gates are inert but computeEngaged still reflects the real signal", async () => { + const { source } = countingSource({ engaged: true }); + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1000, + dryRun: true, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + + expect(monitor.computeEngaged()).toBe(true); // real signal, for observability/metrics + expect(monitor.isEngaged()).toBe(false); // inert: no scale-up freeze + expect(monitor.shouldSkipDequeue()).toBe(false); // inert: no dequeue skip + + monitor.stop(); + }); + + it("logs on verdict transitions", async () => { + let engaged = true; + const source: BackpressureSignalSource = { read: async () => ({ engaged }) }; + const logs: Array<{ message: string; meta?: Record }> = []; + const logger = { + info: (message: string, meta?: Record) => logs.push({ message, meta }), + }; + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1000, + logger, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(logs.some((l) => l.meta?.engaged === true)).toBe(true); + + engaged = false; + await vi.advanceTimersByTimeAsync(1000); + expect(logs.some((l) => l.meta?.engaged === false)).toBe(true); + + monitor.stop(); + }); + + it("records prometheus metrics", async () => { + const { source } = countingSource({ engaged: true }); + const register = new Registry(); + const metrics = new BackpressureMetrics({ register }); + const monitor = new BackpressureMonitor({ + enabled: true, + source, + refreshIntervalMs: 1000, + metrics, + }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + + expect(await register.metrics()).toContain("supervisor_backpressure_engaged 1"); + + monitor.shouldSkipDequeue(); + expect(await register.metrics()).toMatch( + /supervisor_backpressure_skipped_dequeues_total\{dry_run="false"\} [1-9]/ + ); + + monitor.stop(); + }); + + it("resumes instantly when no ramp is configured", async () => { + let engaged = true; + const source: BackpressureSignalSource = { read: async () => ({ engaged }) }; + const monitor = new BackpressureMonitor({ enabled: true, source, refreshIntervalMs: 1000 }); + + monitor.start(); + await vi.advanceTimersByTimeAsync(0); + expect(monitor.shouldSkipDequeue()).toBe(true); + + engaged = false; + await vi.advanceTimersByTimeAsync(1000); + expect(monitor.shouldSkipDequeue()).toBe(false); // no ramp → instant resume + + monitor.stop(); + }); +}); diff --git a/apps/supervisor/src/backpressure/backpressureMonitor.ts b/apps/supervisor/src/backpressure/backpressureMonitor.ts new file mode 100644 index 00000000000..6b4170697e5 --- /dev/null +++ b/apps/supervisor/src/backpressure/backpressureMonitor.ts @@ -0,0 +1,179 @@ +import type { BackpressureMetrics } from "./backpressureMetrics.js"; + +export interface BackpressureLogger { + info(message: string, meta?: Record): void; +} + +export type BackpressureVerdict = { + engaged: boolean; + /** Epoch ms the verdict was produced. Used for consumer-side staleness fail-open. */ + ts?: number; +}; + +/** + * Source of the current backpressure verdict. `read()` returns `null` when the + * verdict is unknown (missing/unreadable) - the monitor treats unknown as + * "not engaged" (fail-open). + */ +export interface BackpressureSignalSource { + read(): Promise; +} + +export type BackpressureMonitorOptions = { + enabled: boolean; + source: BackpressureSignalSource; + refreshIntervalMs?: number; + /** + * If set, a cached verdict older than this is treated as unknown (fail-open). + * Guards against the source silently going stale (e.g. hanging reads). + */ + maxVerdictAgeMs?: number; + /** + * If set, after backpressure releases the dequeue gate stays partially engaged + * for this long, skipping a linearly-decaying fraction of attempts so the + * aggregate dequeue rate ramps from ~0 to full instead of snapping to full and + * re-flooding a freshly-recovered cluster. 0/unset = instant resume. + */ + rampMs?: number; + /** Injectable RNG for the resume ramp; defaults to Math.random. */ + random?: () => number; + /** + * When true, the gates are inert (never skip dequeues, never freeze scale-up). + * computeEngaged() still reflects the real signal so it can be observed. + */ + dryRun?: boolean; + logger?: BackpressureLogger; + metrics?: BackpressureMetrics; +}; + +const DEFAULT_REFRESH_INTERVAL_MS = 1000; + +export class BackpressureMonitor { + private verdict: BackpressureVerdict | null = null; + private timer?: ReturnType; + private refreshInFlight = false; + private wasEngaged = false; + private releasedAt?: number; + + constructor(private readonly opts: BackpressureMonitorOptions) { + this.opts.metrics?.dryRun.set(this.opts.dryRun ? 1 : 0); + } + + start(): void { + if (!this.opts.enabled) { + return; + } + + void this.refreshTick(); + this.timer = setInterval( + () => void this.refreshTick(), + this.opts.refreshIntervalMs ?? DEFAULT_REFRESH_INTERVAL_MS + ); + } + + /** Skip a tick if the previous refresh is still in flight, so slow/hung reads can't stack. */ + private async refreshTick(): Promise { + if (this.refreshInFlight) { + return; + } + this.refreshInFlight = true; + try { + await this.refresh(); + } finally { + this.refreshInFlight = false; + } + } + + stop(): void { + if (this.timer) { + clearInterval(this.timer); + this.timer = undefined; + } + } + + /** + * Raw hard backpressure state: true while the (fresh) verdict says engaged, + * ignoring dry-run. Used for observability/metrics so the real signal is + * visible even when the gates are inert. + */ + computeEngaged(): boolean { + const verdict = this.verdict; + if (verdict?.engaged !== true) { + return false; + } + + // When staleness enforcement is on, an engaged verdict must carry a fresh + // timestamp. A missing or stale ts can't be trusted (a dead producer could + // otherwise pin the brake forever), so fail open. + const maxAge = this.opts.maxVerdictAgeMs; + if (maxAge !== undefined) { + if (verdict.ts === undefined || Date.now() - verdict.ts > maxAge) { + return false; + } + } + + return true; + } + + /** + * Effective hard state: the signal for freezing consumer-pool scale-up. Inert + * (false) in dry-run. Hot-path read, no I/O. + */ + isEngaged(): boolean { + return this.opts.dryRun ? false : this.computeEngaged(); + } + + /** Hot-path read: synchronous, never performs I/O. Inert (false) in dry-run. */ + shouldSkipDequeue(): boolean { + const wouldSkip = this.computeShouldSkip(); + if (wouldSkip) { + this.opts.metrics?.skipsTotal.inc({ dry_run: this.opts.dryRun ? "true" : "false" }); + } + return this.opts.dryRun ? false : wouldSkip; + } + + private computeShouldSkip(): boolean { + if (this.computeEngaged()) { + return true; + } + + // Post-release ramp: skip a linearly-decaying fraction of attempts so the + // aggregate dequeue rate climbs back to full over rampMs rather than snapping. + const rampMs = this.opts.rampMs; + if (rampMs && this.releasedAt !== undefined) { + const elapsed = Date.now() - this.releasedAt; + if (elapsed < rampMs) { + const skipProbability = 1 - elapsed / rampMs; + return (this.opts.random ?? Math.random)() < skipProbability; + } + } + + return false; + } + + private async refresh(): Promise { + try { + this.verdict = await this.opts.source.read(); + } catch { + // Fail-open: a dead/unreachable source must never pin the brake. Treat as + // unknown (no verdict) so dequeue resumes as if backpressure were off. + this.verdict = null; + } + + // Track the engaged→released transition to anchor the resume ramp. Use the + // staleness-aware state so a stale verdict doesn't pin wasEngaged / the gauge. + const nowEngaged = this.computeEngaged(); + this.opts.metrics?.engaged.set(nowEngaged ? 1 : 0); + + if (nowEngaged !== this.wasEngaged) { + this.opts.logger?.info("backpressure verdict changed", { + engaged: nowEngaged, + dryRun: !!this.opts.dryRun, + }); + } + if (this.wasEngaged && !nowEngaged) { + this.releasedAt = Date.now(); + } + this.wasEngaged = nowEngaged; + } +} diff --git a/apps/supervisor/src/backpressure/redisBackpressureSignalSource.test.ts b/apps/supervisor/src/backpressure/redisBackpressureSignalSource.test.ts new file mode 100644 index 00000000000..77a7457b13c --- /dev/null +++ b/apps/supervisor/src/backpressure/redisBackpressureSignalSource.test.ts @@ -0,0 +1,62 @@ +import { redisTest } from "@internal/testcontainers"; +import { Redis } from "ioredis"; +import { describe, expect } from "vitest"; +import { RedisBackpressureSignalSource } from "./redisBackpressureSignalSource.js"; + +const KEY = "backpressure:test"; + +describe("RedisBackpressureSignalSource", () => { + redisTest("returns null when the key is absent", async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + try { + const source = new RedisBackpressureSignalSource(redis, KEY); + expect(await source.read()).toBeNull(); + } finally { + await redis.quit(); + } + }); + + redisTest("parses a valid engaged verdict", async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + try { + await redis.set(KEY, JSON.stringify({ engaged: true, ts: 1_700_000_000_000 })); + const source = new RedisBackpressureSignalSource(redis, KEY); + expect(await source.read()).toEqual({ engaged: true, ts: 1_700_000_000_000 }); + } finally { + await redis.quit(); + } + }); + + redisTest("parses a clear verdict", async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + try { + await redis.set(KEY, JSON.stringify({ engaged: false })); + const source = new RedisBackpressureSignalSource(redis, KEY); + expect(await source.read()).toEqual({ engaged: false }); + } finally { + await redis.quit(); + } + }); + + redisTest("returns null for malformed JSON (fail-open)", async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + try { + await redis.set(KEY, "not json {"); + const source = new RedisBackpressureSignalSource(redis, KEY); + expect(await source.read()).toBeNull(); + } finally { + await redis.quit(); + } + }); + + redisTest("returns null for valid JSON of the wrong shape (fail-open)", async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + try { + await redis.set(KEY, JSON.stringify({ foo: "bar" })); + const source = new RedisBackpressureSignalSource(redis, KEY); + expect(await source.read()).toBeNull(); + } finally { + await redis.quit(); + } + }); +}); diff --git a/apps/supervisor/src/backpressure/redisBackpressureSignalSource.ts b/apps/supervisor/src/backpressure/redisBackpressureSignalSource.ts new file mode 100644 index 00000000000..4f8a54c6247 --- /dev/null +++ b/apps/supervisor/src/backpressure/redisBackpressureSignalSource.ts @@ -0,0 +1,35 @@ +import type { Redis } from "ioredis"; +import { z } from "zod"; +import type { BackpressureSignalSource, BackpressureVerdict } from "./backpressureMonitor.js"; + +const VerdictSchema = z.object({ + engaged: z.boolean(), + ts: z.number().optional(), +}); + +/** Reads the backpressure verdict from a Redis key written by the cluster-side aggregator. */ +export class RedisBackpressureSignalSource implements BackpressureSignalSource { + constructor( + private readonly redis: Redis, + private readonly key: string + ) {} + + async read(): Promise { + const raw = await this.redis.get(this.key); + if (raw === null) { + return null; + } + + // A malformed or wrong-shaped value is treated as unknown (null) so the + // monitor fails open rather than acting on garbage. + let json: unknown; + try { + json = JSON.parse(raw); + } catch { + return null; + } + + const parsed = VerdictSchema.safeParse(json); + return parsed.success ? parsed.data : null; + } +} diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index b69fb24d73f..3919e73a7ee 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -34,6 +34,10 @@ const Env = z // Dequeue settings (provider mode) TRIGGER_DEQUEUE_ENABLED: BoolEnv.default(true), + // Which worker-queue class this supervisor fleet serves. "default" pulls the + // region queue (standard/agent runs); "scheduled" pulls the dedicated + // scheduled-lineage queue. Run a separate fleet per class for isolation. + TRIGGER_WORKER_QUEUE_CLASS: z.enum(["default", "scheduled"]).default("default"), TRIGGER_DEQUEUE_INTERVAL_MS: z.coerce.number().int().default(250), TRIGGER_DEQUEUE_IDLE_INTERVAL_MS: z.coerce.number().int().default(1000), TRIGGER_DEQUEUE_MAX_RUN_COUNT: z.coerce.number().int().default(1), @@ -47,6 +51,29 @@ const Env = z TRIGGER_DEQUEUE_SCALING_BATCH_WINDOW_MS: z.coerce.number().int().positive().default(1000), // Batch window for metrics processing (ms) TRIGGER_DEQUEUE_SCALING_DAMPING_FACTOR: z.coerce.number().min(0).max(1).default(0.7), // Smooths consumer count changes after EWMA (0=no scaling, 1=immediate) + // Dequeue backpressure - off by default. When enabled, the supervisor reads a + // verdict from Redis (written by the cluster-side aggregator) and pauses dequeues + // while the worker cluster can't schedule pods. Disabled = total no-op: no Redis + // client is created, no reads happen, and the dequeue loop is unaffected. + TRIGGER_DEQUEUE_BACKPRESSURE_ENABLED: BoolEnv.default(false), + // Safety default: even when enabled, backpressure only logs what it would do. + // Set to false to actually skip dequeues / freeze scale-up. + TRIGGER_DEQUEUE_BACKPRESSURE_DRY_RUN: BoolEnv.default(true), + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_KEY: z.string().default("engine:dequeue:backpressure"), + TRIGGER_DEQUEUE_BACKPRESSURE_REFRESH_MS: z.coerce.number().int().positive().default(1000), + TRIGGER_DEQUEUE_BACKPRESSURE_RAMP_MS: z.coerce.number().int().min(0).default(30_000), // Resume ramp window after release; 0 = instant resume + + TRIGGER_DEQUEUE_BACKPRESSURE_MAX_VERDICT_AGE_MS: z.coerce + .number() + .int() + .positive() + .default(15_000), // Stale verdict → fail-open (treat as not engaged) + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_HOST: z.string().optional(), + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_PORT: z.coerce.number().int().optional(), + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_USERNAME: z.string().optional(), + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_PASSWORD: z.string().optional(), + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_TLS_DISABLED: BoolEnv.default(false), + // Optional services TRIGGER_WARM_START_URL: z.string().optional(), TRIGGER_CHECKPOINT_URL: z.string().optional(), @@ -87,6 +114,14 @@ const Env = z COMPUTE_TRACE_OTLP_ENDPOINT: z.string().url().optional(), // Override for span export (derived from TRIGGER_API_URL if unset) COMPUTE_SNAPSHOT_DELAY_MS: z.coerce.number().int().min(0).max(60_000).default(5_000), COMPUTE_SNAPSHOT_DISPATCH_LIMIT: z.coerce.number().int().min(1).max(100).default(10), + // Instance create retries for transient placement failures (1 = no retries) + COMPUTE_INSTANCE_CREATE_MAX_ATTEMPTS: z.coerce.number().int().min(1).max(10).default(3), + COMPUTE_INSTANCE_CREATE_RETRY_BASE_DELAY_MS: z.coerce + .number() + .int() + .min(0) + .max(10_000) + .default(250), // Kubernetes settings KUBERNETES_FORCE_ENABLED: BoolEnv.default(false), @@ -121,6 +156,16 @@ const Env = z KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods + + // Pod DNS config — override the cluster default ndots to `KUBERNETES_POD_DNS_NDOTS`. + // Default k8s ndots is 5: any name with fewer than 5 dots (e.g. `api.example.com`, 2 dots) is first walked + // through every entry in the cluster search list (`.svc.cluster.local`, `svc.cluster.local`, `cluster.local`) + // before being tried as-is, turning one resolution into 4+ CoreDNS queries (×2 with A+AAAA). + // Overriding the default can be useful to cut CoreDNS query amplification for external domains. + // Note: before enabling, make sure no code path relies on search-list expansion for names with dots ≥ the value + // set here — those names will now hit their as-is form first and could resolve externally before falling back. + KUBERNETES_POD_DNS_NDOTS_OVERRIDE_ENABLED: BoolEnv.default(false), + KUBERNETES_POD_DNS_NDOTS: z.coerce.number().int().min(1).max(15).default(2), // Large machine affinity settings - large-* presets prefer a dedicated pool KUBERNETES_LARGE_MACHINE_AFFINITY_ENABLED: BoolEnv.default(false), KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_KEY: z @@ -189,7 +234,9 @@ const Env = z if (!validEffects.includes(effect)) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: `Invalid toleration effect "${effect}" in "${entry}". Must be one of: ${validEffects.join(", ")}`, + message: `Invalid toleration effect "${effect}" in "${entry}". Must be one of: ${validEffects.join( + ", " + )}`, }); return z.NEVER; } @@ -244,6 +291,15 @@ const Env = z // Debug DEBUG: BoolEnv.default(false), SEND_RUN_DEBUG_LOGS: BoolEnv.default(false), + + // Wide-event observability - off by default. Emits one flat-keyed JSON + // line per natural unit of work (dequeue iteration, HTTP request, socket + // lifecycle). High-QPS hotpath, so the kill switch must be honoured. + TRIGGER_WIDE_EVENTS_ENABLED: BoolEnv.default(false), + // When true, also emit wide events for high-frequency HTTP routes + // (heartbeat, snapshots-since, logs/debug). Off in prod to keep event + // volume manageable; on in test environments for full-fidelity debugging. + TRIGGER_WIDE_EVENTS_NOISY_ROUTES: BoolEnv.default(false), }) .superRefine((data, ctx) => { if (data.COMPUTE_SNAPSHOTS_ENABLED && !data.TRIGGER_METADATA_URL) { @@ -260,6 +316,14 @@ const Env = z path: ["TRIGGER_WORKLOAD_API_DOMAIN"], }); } + if (data.TRIGGER_DEQUEUE_BACKPRESSURE_ENABLED && !data.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_HOST) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: + "TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_HOST is required when TRIGGER_DEQUEUE_BACKPRESSURE_ENABLED is true", + path: ["TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_HOST"], + }); + } }) .transform((data) => ({ ...data, diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index 6f5913c47ca..e97c4c7bb96 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -28,6 +28,18 @@ import { FailedPodHandler } from "./services/failedPodHandler.js"; import { getWorkerToken } from "./workerToken.js"; import { OtlpTraceService } from "./services/otlpTraceService.js"; import { extractTraceparent, getRestoreRunnerId } from "./util.js"; +import { Redis } from "ioredis"; +import { BackpressureMonitor } from "./backpressure/backpressureMonitor.js"; +import { RedisBackpressureSignalSource } from "./backpressure/redisBackpressureSignalSource.js"; +import { BackpressureMetrics } from "./backpressure/backpressureMetrics.js"; +import { + fromContext, + recordPhaseSince, + runWideEvent, + setExtra, + setMeta, + type WideEventOptions, +} from "./wideEvents/index.js"; if (env.METRICS_COLLECT_DEFAULTS) { collectDefaultMetrics({ register }); @@ -46,15 +58,28 @@ class ManagedSupervisor { private readonly podCleaner?: PodCleaner; private readonly failedPodHandler?: FailedPodHandler; private readonly tracing?: OtlpTraceService; + private readonly backpressureMonitor?: BackpressureMonitor; + private readonly backpressureRedis?: Redis; private readonly isKubernetes = isKubernetesEnvironment(env.KUBERNETES_FORCE_ENABLED); private readonly warmStartUrl = env.TRIGGER_WARM_START_URL; + private readonly wideEventOpts: WideEventOptions = { + service: "supervisor", + env: { nodeId: env.TRIGGER_WORKER_INSTANCE_NAME }, + enabled: env.TRIGGER_WIDE_EVENTS_ENABLED, + }; + private readonly wideEventsNoisyRoutes = env.TRIGGER_WIDE_EVENTS_NOISY_ROUTES; + constructor() { + // Strip secret-like env vars before debug-logging the rest. Add any new + // secret env var here so it never lands in the DEBUG "Starting up" log. const { TRIGGER_WORKER_TOKEN, MANAGED_WORKER_SECRET, COMPUTE_GATEWAY_AUTH_TOKEN, + DOCKER_REGISTRY_PASSWORD, + TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_PASSWORD, ...envWithoutSecrets } = env; @@ -119,6 +144,10 @@ class ManagedSupervisor { otelEndpoint: env.OTEL_EXPORTER_OTLP_ENDPOINT, prettyLogs: env.RUNNER_PRETTY_LOGS, }, + createRetry: { + maxAttempts: env.COMPUTE_INSTANCE_CREATE_MAX_ATTEMPTS, + baseDelayMs: env.COMPUTE_INSTANCE_CREATE_RETRY_BASE_DELAY_MS, + }, }); this.computeManager = computeManager; this.workloadManager = computeManager; @@ -166,6 +195,42 @@ class ManagedSupervisor { ); } + if (env.TRIGGER_DEQUEUE_BACKPRESSURE_ENABLED) { + this.backpressureRedis = new Redis({ + host: env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_HOST, + port: env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_PORT, + username: env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_USERNAME, + password: env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_PASSWORD, + ...(env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_TLS_DISABLED ? {} : { tls: {} }), + maxRetriesPerRequest: null, + }); + this.backpressureRedis.on("error", (error) => + this.logger.error("Backpressure redis error", { error: error.message }) + ); + + this.backpressureMonitor = new BackpressureMonitor({ + enabled: true, + source: new RedisBackpressureSignalSource( + this.backpressureRedis, + env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_KEY + ), + refreshIntervalMs: env.TRIGGER_DEQUEUE_BACKPRESSURE_REFRESH_MS, + maxVerdictAgeMs: env.TRIGGER_DEQUEUE_BACKPRESSURE_MAX_VERDICT_AGE_MS, + rampMs: env.TRIGGER_DEQUEUE_BACKPRESSURE_RAMP_MS, + dryRun: env.TRIGGER_DEQUEUE_BACKPRESSURE_DRY_RUN, + logger: this.logger, + metrics: new BackpressureMetrics({ register }), + }); + + this.logger.log("🛑 Dequeue backpressure enabled", { + key: env.TRIGGER_DEQUEUE_BACKPRESSURE_REDIS_KEY, + refreshIntervalMs: env.TRIGGER_DEQUEUE_BACKPRESSURE_REFRESH_MS, + maxVerdictAgeMs: env.TRIGGER_DEQUEUE_BACKPRESSURE_MAX_VERDICT_AGE_MS, + rampMs: env.TRIGGER_DEQUEUE_BACKPRESSURE_RAMP_MS, + dryRun: env.TRIGGER_DEQUEUE_BACKPRESSURE_DRY_RUN, + }); + } + this.workerSession = new SupervisorSession({ workerToken: getWorkerToken(), apiUrl: env.TRIGGER_API_URL, @@ -175,6 +240,7 @@ class ManagedSupervisor { dequeueIdleIntervalMs: env.TRIGGER_DEQUEUE_IDLE_INTERVAL_MS, queueConsumerEnabled: env.TRIGGER_DEQUEUE_ENABLED, maxRunCount: env.TRIGGER_DEQUEUE_MAX_RUN_COUNT, + queueClass: env.TRIGGER_WORKER_QUEUE_CLASS, metricsRegistry: register, scaling: { strategy: env.TRIGGER_DEQUEUE_SCALING_STRATEGY, @@ -186,18 +252,20 @@ class ManagedSupervisor { ewmaAlpha: env.TRIGGER_DEQUEUE_SCALING_EWMA_ALPHA, batchWindowMs: env.TRIGGER_DEQUEUE_SCALING_BATCH_WINDOW_MS, dampingFactor: env.TRIGGER_DEQUEUE_SCALING_DAMPING_FACTOR, + // Freeze scale-up while backpressure is hard-engaged (not during the resume + // ramp). Undefined when backpressure is disabled → no effect on scaling. + shouldPauseScaling: () => this.backpressureMonitor?.isEngaged() ?? false, }, runNotificationsEnabled: env.TRIGGER_WORKLOAD_API_ENABLED, heartbeatIntervalSeconds: env.TRIGGER_WORKER_HEARTBEAT_INTERVAL_SECONDS, sendRunDebugLogs: env.SEND_RUN_DEBUG_LOGS, preDequeue: async () => { - if (!env.RESOURCE_MONITOR_ENABLED) { - return {}; - } + // Synchronous, hot-path-safe cached read; undefined when backpressure is disabled. + const skipForBackpressure = this.backpressureMonitor?.shouldSkipDequeue() ?? false; - if (this.isKubernetes) { - // Not used in k8s for now - return {}; + if (!env.RESOURCE_MONITOR_ENABLED || this.isKubernetes) { + // Resource monitor is not used in k8s; backpressure is the only gate there. + return { skipDequeue: skipForBackpressure }; } const resources = await this.resourceMonitor.getNodeResources(); @@ -207,7 +275,10 @@ class ManagedSupervisor { cpu: resources.cpuAvailable, memory: resources.memoryAvailable, }, - skipDequeue: resources.cpuAvailable < 0.25 || resources.memoryAvailable < 0.25, + skipDequeue: + skipForBackpressure || + resources.cpuAvailable < 0.25 || + resources.memoryAvailable < 0.25, }; }, preSkip: async () => { @@ -239,149 +310,205 @@ class ManagedSupervisor { async ({ time, message, dequeueResponseMs, pollingIntervalMs }) => { this.logger.verbose(`Received message with timestamp ${time.toLocaleString()}`, message); - if (message.completedWaitpoints.length > 0) { - this.logger.debug("Run has completed waitpoints", { - runId: message.run.id, - completedWaitpoints: message.completedWaitpoints.length, - }); - } - - if (!message.image) { - this.logger.error("Run has no image", { runId: message.run.id }); - return; - } - - const { checkpoint, ...rest } = message; - - // Register trace context early so snapshot spans work for all paths - // (cold create, restore, warm start). Re-registration on restore is safe - // since dequeue always provides fresh context. - if (this.computeManager?.traceSpansEnabled) { - const traceparent = extractTraceparent(message.run.traceContext); - - if (traceparent) { - this.workloadServer.registerRunTraceContext(message.run.friendlyId, { - traceparent, - envId: message.environment.id, - orgId: message.organization.id, - projectId: message.project.id, - }); - } - } + const traceparent = extractTraceparent(message.run.traceContext); + + await runWideEvent( + { + ...this.wideEventOpts, + op: "dequeue", + kind: "inbound", + traceparent, + setup: (state) => { + setMeta(state, "run_id", message.run.friendlyId); + setMeta(state, "env_id", message.environment.id); + setMeta(state, "org_id", message.organization.id); + setMeta(state, "project_id", message.project.id); + if (message.deployment.friendlyId) { + setMeta(state, "deployment_id", message.deployment.friendlyId); + } + setMeta(state, "machine_preset", message.run.machine.name); + state.extras.iteration = "dequeue"; + state.extras.dequeue_response_ms = dequeueResponseMs; + state.extras.polling_interval_ms = pollingIntervalMs; + state.extras.completed_waitpoints = message.completedWaitpoints.length; + }, + }, + async () => { + if (message.completedWaitpoints.length > 0) { + this.logger.debug("Run has completed waitpoints", { + runId: message.run.id, + completedWaitpoints: message.completedWaitpoints.length, + }); + } - if (checkpoint) { - this.logger.debug("Restoring run", { runId: message.run.id }); + if (!message.image) { + setExtra(fromContext(), "path_taken", "skipped_no_image"); + this.logger.error("Run has no image", { runId: message.run.id }); + return; + } - if (this.computeManager) { - try { - const runnerId = getRestoreRunnerId(message.run.friendlyId, checkpoint.id); + const { checkpoint, ...rest } = message; - const didRestore = await this.computeManager.restore({ - snapshotId: checkpoint.location, - runnerId, - runFriendlyId: message.run.friendlyId, - snapshotFriendlyId: message.snapshot.friendlyId, - machine: message.run.machine, - traceContext: message.run.traceContext, + // Register trace context early so snapshot spans work for all paths + // (cold create, restore, warm start). Re-registration on restore is safe + // since dequeue always provides fresh context. + if (this.computeManager?.traceSpansEnabled && traceparent) { + this.workloadServer.registerRunTraceContext(message.run.friendlyId, { + traceparent, envId: message.environment.id, orgId: message.organization.id, projectId: message.project.id, - dequeuedAt: message.dequeuedAt, }); + } - if (didRestore) { - this.logger.debug("Compute restore successful", { - runId: message.run.id, - runnerId, + if (checkpoint) { + setExtra(fromContext(), "path_taken", "restore"); + this.logger.debug("Restoring run", { runId: message.run.id }); + + if (this.computeManager) { + const restoreStart = performance.now(); + try { + const runnerId = getRestoreRunnerId(message.run.friendlyId, checkpoint.id); + + const didRestore = await this.computeManager.restore({ + snapshotId: checkpoint.location, + runnerId, + runFriendlyId: message.run.friendlyId, + snapshotFriendlyId: message.snapshot.friendlyId, + machine: message.run.machine, + traceContext: message.run.traceContext, + envId: message.environment.id, + orgId: message.organization.id, + projectId: message.project.id, + hasPrivateLink: message.organization.hasPrivateLink, + dequeuedAt: message.dequeuedAt, + }); + recordPhaseSince("restore", restoreStart, undefined); + setExtra(fromContext(), "did_restore", didRestore); + + if (didRestore) { + this.logger.debug("Compute restore successful", { + runId: message.run.id, + runnerId, + }); + } else { + this.logger.error("Compute restore failed", { + runId: message.run.id, + runnerId, + }); + } + } catch (error) { + recordPhaseSince( + "restore", + restoreStart, + error instanceof Error ? error : new Error(String(error)) + ); + this.logger.error("Failed to restore run (compute)", { error }); + } + + return; + } + + if (!this.checkpointClient) { + this.logger.error("No checkpoint client", { runId: message.run.id }); + return; + } + + const restoreStart = performance.now(); + try { + const didRestore = await this.checkpointClient.restoreRun({ + runFriendlyId: message.run.friendlyId, + snapshotFriendlyId: message.snapshot.friendlyId, + body: { + ...rest, + checkpoint, + }, }); - } else { - this.logger.error("Compute restore failed", { runId: message.run.id, runnerId }); + recordPhaseSince("restore", restoreStart, undefined); + setExtra(fromContext(), "did_restore", didRestore); + + if (didRestore) { + this.logger.debug("Restore successful", { runId: message.run.id }); + } else { + this.logger.error("Restore failed", { runId: message.run.id }); + } + } catch (error) { + recordPhaseSince( + "restore", + restoreStart, + error instanceof Error ? error : new Error(String(error)) + ); + this.logger.error("Failed to restore run", { error }); } - } catch (error) { - this.logger.error("Failed to restore run (compute)", { error }); + + return; } - return; - } + this.logger.debug("Scheduling run", { runId: message.run.id }); - if (!this.checkpointClient) { - this.logger.error("No checkpoint client", { runId: message.run.id }); - return; - } + const warmStartStart = performance.now(); + const didWarmStart = await this.tryWarmStart(message, traceparent); + const warmStartCheckMs = Math.round(performance.now() - warmStartStart); + recordPhaseSince("warm_start", warmStartStart, undefined); + setExtra(fromContext(), "did_warm_start", didWarmStart); - try { - const didRestore = await this.checkpointClient.restoreRun({ - runFriendlyId: message.run.friendlyId, - snapshotFriendlyId: message.snapshot.friendlyId, - body: { - ...rest, - checkpoint, - }, - }); - - if (didRestore) { - this.logger.debug("Restore successful", { runId: message.run.id }); - } else { - this.logger.error("Restore failed", { runId: message.run.id }); + if (didWarmStart) { + setExtra(fromContext(), "path_taken", "warm_start"); + this.logger.debug("Warm start successful", { runId: message.run.id }); + return; } - } catch (error) { - this.logger.error("Failed to restore run", { error }); - } - - return; - } - this.logger.debug("Scheduling run", { runId: message.run.id }); + setExtra(fromContext(), "path_taken", "cold_create"); - const warmStartStart = performance.now(); - const didWarmStart = await this.tryWarmStart(message); - const warmStartCheckMs = Math.round(performance.now() - warmStartStart); + const createStart = performance.now(); + try { + if (!message.deployment.friendlyId) { + // mostly a type guard, deployments always exists for deployed environments + // a proper fix would be to use a discriminated union schema to differentiate between dequeued runs in dev and in deployed environments. + throw new Error("Deployment is missing"); + } - if (didWarmStart) { - this.logger.debug("Warm start successful", { runId: message.run.id }); - return; - } + await this.workloadManager.create({ + dequeuedAt: message.dequeuedAt, + dequeueResponseMs, + pollingIntervalMs, + warmStartCheckMs, + envId: message.environment.id, + envType: message.environment.type, + image: message.image, + machine: message.run.machine, + orgId: message.organization.id, + projectId: message.project.id, + deploymentFriendlyId: message.deployment.friendlyId, + deploymentVersion: message.backgroundWorker.version, + runId: message.run.id, + runFriendlyId: message.run.friendlyId, + version: message.version, + nextAttemptNumber: message.run.attemptNumber, + snapshotId: message.snapshot.id, + snapshotFriendlyId: message.snapshot.friendlyId, + placementTags: message.placementTags, + traceContext: message.run.traceContext, + annotations: message.run.annotations, + hasPrivateLink: message.organization.hasPrivateLink, + }); + recordPhaseSince("workload_create", createStart, undefined); - try { - if (!message.deployment.friendlyId) { - // mostly a type guard, deployments always exists for deployed environments - // a proper fix would be to use a discriminated union schema to differentiate between dequeued runs in dev and in deployed environments. - throw new Error("Deployment is missing"); + // Disabled for now + // this.resourceMonitor.blockResources({ + // cpu: message.run.machine.cpu, + // memory: message.run.machine.memory, + // }); + } catch (error) { + recordPhaseSince( + "workload_create", + createStart, + error instanceof Error ? error : new Error(String(error)) + ); + this.logger.error("Failed to create workload", { error }); + } } - - await this.workloadManager.create({ - dequeuedAt: message.dequeuedAt, - dequeueResponseMs, - pollingIntervalMs, - warmStartCheckMs, - envId: message.environment.id, - envType: message.environment.type, - image: message.image, - machine: message.run.machine, - orgId: message.organization.id, - projectId: message.project.id, - deploymentFriendlyId: message.deployment.friendlyId, - deploymentVersion: message.backgroundWorker.version, - runId: message.run.id, - runFriendlyId: message.run.friendlyId, - version: message.version, - nextAttemptNumber: message.run.attemptNumber, - snapshotId: message.snapshot.id, - snapshotFriendlyId: message.snapshot.friendlyId, - placementTags: message.placementTags, - traceContext: message.run.traceContext, - annotations: message.run.annotations, - hasPrivateLink: message.organization.hasPrivateLink, - }); - - // Disabled for now - // this.resourceMonitor.blockResources({ - // cpu: message.run.machine.cpu, - // memory: message.run.machine.memory, - // }); - } catch (error) { - this.logger.error("Failed to create workload", { error }); - } + ); } ); @@ -404,6 +531,8 @@ class ManagedSupervisor { checkpointClient: this.checkpointClient, computeManager: this.computeManager, tracing: this.tracing, + wideEventOpts: this.wideEventOpts, + wideEventsNoisyRoutes: this.wideEventsNoisyRoutes, }); this.workloadServer.on("runConnected", this.onRunConnected.bind(this)); @@ -420,19 +549,31 @@ class ManagedSupervisor { this.workerSession.unsubscribeFromRunNotifications([run.friendlyId]); } - private async tryWarmStart(dequeuedMessage: DequeuedMessage): Promise { + private async tryWarmStart( + dequeuedMessage: DequeuedMessage, + traceparent: string | undefined + ): Promise { if (!this.warmStartUrl) { return false; } const warmStartUrlWithPath = new URL("/warm-start", this.warmStartUrl); + const headers: Record = { + "Content-Type": "application/json", + }; + // Propagate the inbound W3C traceparent so the upstream warm-start + // receiver continues the same trace instead of minting a new one. Gated + // by the same kill switch as the wide-event emission so the whole PR is + // a no-op on the wire when disabled. + if (this.wideEventOpts.enabled && traceparent) { + headers.traceparent = traceparent; + } + try { const res = await fetch(warmStartUrlWithPath.href, { method: "POST", - headers: { - "Content-Type": "application/json", - }, + headers, body: JSON.stringify({ dequeuedMessage }), }); @@ -468,6 +609,7 @@ class ManagedSupervisor { this.logger.log("Starting up"); // Optional services + this.backpressureMonitor?.start(); await this.podCleaner?.start(); await this.failedPodHandler?.start(); await this.metricsServer?.start(); @@ -492,6 +634,8 @@ class ManagedSupervisor { await this.workerSession.stop(); // Optional services + this.backpressureMonitor?.stop(); + await this.backpressureRedis?.quit(); await this.podCleaner?.stop(); await this.failedPodHandler?.stop(); await this.metricsServer?.stop(); diff --git a/apps/supervisor/src/services/computeSnapshotService.test.ts b/apps/supervisor/src/services/computeSnapshotService.test.ts new file mode 100644 index 00000000000..b039b63bd4d --- /dev/null +++ b/apps/supervisor/src/services/computeSnapshotService.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it, vi } from "vitest"; +import { setTimeout as sleep } from "node:timers/promises"; +import { ComputeSnapshotService } from "./computeSnapshotService.js"; +import type { ComputeWorkloadManager } from "../workloadManager/compute.js"; +import type { SupervisorHttpClient } from "@trigger.dev/core/v3/workers"; + +// The TimerWheel ticks every 100ms, so a 200ms delay dispatches within ~300ms. +const DELAY_MS = 200; +// Long enough that a pending snapshot would certainly have dispatched. +const SETTLE_MS = 600; + +function createService() { + const snapshot = vi.fn(async (_opts: { runnerId: string; metadata: Record }) => true); + + const computeManager = { + snapshotDelayMs: DELAY_MS, + snapshotDispatchLimit: 1, + snapshot, + } as unknown as ComputeWorkloadManager; + + const service = new ComputeSnapshotService({ + computeManager, + workerClient: {} as SupervisorHttpClient, + wideEventOpts: { service: "supervisor-test", env: {}, enabled: false }, + }); + + return { service, snapshot }; +} + +function delayedSnapshot(runnerId = "runner-1") { + return { + runnerId, + runFriendlyId: "run_1", + snapshotFriendlyId: "snapshot_1", + }; +} + +describe("ComputeSnapshotService", () => { + it("dispatches a scheduled snapshot after the delay", async () => { + const { service, snapshot } = createService(); + try { + service.schedule("run_1", delayedSnapshot()); + + await vi.waitFor(() => expect(snapshot).toHaveBeenCalledTimes(1), { timeout: 2_000 }); + expect(snapshot).toHaveBeenCalledWith({ + runnerId: "runner-1", + metadata: { runId: "run_1", snapshotFriendlyId: "snapshot_1" }, + }); + } finally { + service.stop(); + } + }); + + it("cancel before the delay expires prevents the dispatch", async () => { + const { service, snapshot } = createService(); + try { + service.schedule("run_1", delayedSnapshot()); + + expect(service.cancel("run_1")).toBe(true); + + await sleep(SETTLE_MS); + expect(snapshot).not.toHaveBeenCalled(); + } finally { + service.stop(); + } + }); + + it("cancel returns false when nothing is pending", () => { + const { service } = createService(); + try { + expect(service.cancel("run_1")).toBe(false); + } finally { + service.stop(); + } + }); + + it("cancel with a matching runnerId cancels the pending snapshot", async () => { + const { service, snapshot } = createService(); + try { + service.schedule("run_1", delayedSnapshot("runner-a")); + + expect(service.cancel("run_1", "runner-a")).toBe(true); + + await sleep(SETTLE_MS); + expect(snapshot).not.toHaveBeenCalled(); + } finally { + service.stop(); + } + }); + + it("cancel with a different runnerId leaves the pending snapshot alone", async () => { + const { service, snapshot } = createService(); + try { + service.schedule("run_1", delayedSnapshot("runner-a")); + + // A stale runner for a reassigned run must not cancel the new runner's snapshot. + expect(service.cancel("run_1", "runner-b")).toBe(false); + + await vi.waitFor(() => expect(snapshot).toHaveBeenCalledTimes(1), { timeout: 2_000 }); + expect(snapshot).toHaveBeenCalledWith( + expect.objectContaining({ runnerId: "runner-a" }) + ); + } finally { + service.stop(); + } + }); + + it("re-scheduling the same run replaces the pending snapshot", async () => { + const { service, snapshot } = createService(); + try { + service.schedule("run_1", delayedSnapshot()); + service.schedule("run_1", { + runnerId: "runner-1", + runFriendlyId: "run_1", + snapshotFriendlyId: "snapshot_2", + }); + + await vi.waitFor(() => expect(snapshot).toHaveBeenCalledTimes(1), { timeout: 2_000 }); + await sleep(SETTLE_MS); + + expect(snapshot).toHaveBeenCalledTimes(1); + expect(snapshot).toHaveBeenCalledWith({ + runnerId: "runner-1", + metadata: { runId: "run_1", snapshotFriendlyId: "snapshot_2" }, + }); + } finally { + service.stop(); + } + }); +}); diff --git a/apps/supervisor/src/services/computeSnapshotService.ts b/apps/supervisor/src/services/computeSnapshotService.ts index 041e2902c75..216753fc12d 100644 --- a/apps/supervisor/src/services/computeSnapshotService.ts +++ b/apps/supervisor/src/services/computeSnapshotService.ts @@ -6,6 +6,15 @@ import { type SnapshotCallbackPayload } from "@internal/compute"; import type { ComputeWorkloadManager } from "../workloadManager/compute.js"; import { TimerWheel } from "./timerWheel.js"; import type { OtlpTraceService } from "./otlpTraceService.js"; +import { + emitOneShot, + fromContext, + recordPhaseSince, + runWideEvent, + setExtra, + setMeta, + type WideEventOptions, +} from "../wideEvents/index.js"; type DelayedSnapshot = { runnerId: string; @@ -24,6 +33,7 @@ export type ComputeSnapshotServiceOptions = { computeManager: ComputeWorkloadManager; workerClient: SupervisorHttpClient; tracing?: OtlpTraceService; + wideEventOpts: WideEventOptions; }; export class ComputeSnapshotService { @@ -37,11 +47,13 @@ export class ComputeSnapshotService { private readonly computeManager: ComputeWorkloadManager; private readonly workerClient: SupervisorHttpClient; private readonly tracing?: OtlpTraceService; + private readonly wideEventOpts: WideEventOptions; constructor(opts: ComputeSnapshotServiceOptions) { this.computeManager = opts.computeManager; this.workerClient = opts.workerClient; this.tracing = opts.tracing; + this.wideEventOpts = opts.wideEventOpts; this.dispatchLimit = pLimit(this.computeManager.snapshotDispatchLimit); this.timerWheel = new TimerWheel({ @@ -62,6 +74,17 @@ export class ComputeSnapshotService { /** Schedule a delayed snapshot for a run. Replaces any pending snapshot for the same run. */ schedule(runFriendlyId: string, data: DelayedSnapshot) { this.timerWheel.submit(runFriendlyId, data); + emitOneShot({ + ...this.wideEventOpts, + op: "snapshot.schedule", + kind: "event", + populate: (state) => { + state.meta.run_id = runFriendlyId; + state.meta.snapshot_id = data.snapshotFriendlyId; + state.extras.runner_id = data.runnerId; + state.extras.delay_ms = this.computeManager.snapshotDelayMs; + }, + }); this.logger.debug("Snapshot scheduled", { runFriendlyId, snapshotFriendlyId: data.snapshotFriendlyId, @@ -69,10 +92,29 @@ export class ComputeSnapshotService { }); } - /** Cancel a pending delayed snapshot. Returns true if one was cancelled. */ - cancel(runFriendlyId: string): boolean { + /** + * Cancel a pending delayed snapshot. Returns true if one was cancelled. + * When `runnerId` is given, only a snapshot scheduled for that same runner + * is cancelled - a stale runner for a run that has since been reassigned + * must not cancel the new runner's pending snapshot. + */ + cancel(runFriendlyId: string, runnerId?: string): boolean { + if (runnerId) { + const pending = this.timerWheel.peek(runFriendlyId); + if (pending && pending.data.runnerId !== runnerId) { + return false; + } + } const cancelled = this.timerWheel.cancel(runFriendlyId); if (cancelled) { + emitOneShot({ + ...this.wideEventOpts, + op: "snapshot.canceled", + kind: "event", + populate: (state) => { + state.meta.run_id = runFriendlyId; + }, + }); this.logger.debug("Snapshot cancelled", { runFriendlyId }); } return cancelled; @@ -81,6 +123,23 @@ export class ComputeSnapshotService { /** Handle the callback from the gateway after a snapshot completes or fails. */ async handleCallback(body: SnapshotCallbackPayload) { const snapshotId = body.status === "completed" ? body.snapshot_id : undefined; + const runId = body.metadata?.runId; + const snapshotFriendlyId = body.metadata?.snapshotFriendlyId; + + // Enrich the wrapping route's wide event with snapshot metadata. The + // `/api/v1/compute/snapshot-complete` route is registered with `wideRoute`, + // so `fromContext()` returns the State of that route and these calls + // become extras/meta on the same wide event - no nested emission. + const state = fromContext(); + if (state) { + state.extras["snapshot.status"] = body.status; + if (body.instance_id) state.extras["snapshot.instance_id"] = body.instance_id; + if (body.duration_ms !== undefined) state.extras["snapshot.duration_ms"] = body.duration_ms; + if (snapshotId) state.extras["snapshot.id"] = snapshotId; + if (body.status === "failed" && body.error) state.extras["snapshot.error"] = body.error; + } + if (runId) setMeta(state, "run_id", runId); + if (snapshotFriendlyId) setMeta(state, "snapshot_id", snapshotFriendlyId); this.logger.debug("Snapshot callback", { snapshotId, @@ -91,9 +150,6 @@ export class ComputeSnapshotService { durationMs: body.duration_ms, }); - const runId = body.metadata?.runId; - const snapshotFriendlyId = body.metadata?.snapshotFriendlyId; - if (!runId || !snapshotFriendlyId) { this.logger.error("Snapshot callback missing metadata", { body }); return { ok: false as const, status: 400 }; @@ -102,6 +158,7 @@ export class ComputeSnapshotService { this.#emitSnapshotSpan(runId, body.duration_ms, snapshotId); if (body.status === "completed") { + const submitStart = performance.now(); const result = await this.workerClient.submitSuspendCompletion({ runId, snapshotId: snapshotFriendlyId, @@ -113,6 +170,11 @@ export class ComputeSnapshotService { }, }, }); + recordPhaseSince( + "submit_completion", + submitStart, + result.success ? undefined : new Error(String(result.error)) + ); if (result.success) { this.logger.debug("Suspend completion submitted", { @@ -121,6 +183,7 @@ export class ComputeSnapshotService { snapshotId: body.snapshot_id, }); } else { + setExtra(state, "submit_completion.error", String(result.error)); this.logger.error("Failed to submit suspend completion", { runId, snapshotFriendlyId, @@ -128,6 +191,7 @@ export class ComputeSnapshotService { }); } } else { + const submitStart = performance.now(); const result = await this.workerClient.submitSuspendCompletion({ runId, snapshotId: snapshotFriendlyId, @@ -136,8 +200,14 @@ export class ComputeSnapshotService { error: body.error ?? "Snapshot failed", }, }); + recordPhaseSince( + "submit_completion", + submitStart, + result.success ? undefined : new Error(String(result.error)) + ); if (!result.success) { + setExtra(state, "submit_completion.error", String(result.error)); this.logger.error("Failed to submit suspend failure", { runId, snapshotFriendlyId, @@ -184,20 +254,31 @@ export class ComputeSnapshotService { /** Dispatch a snapshot request to the gateway. */ private async dispatch(snapshot: DelayedSnapshot): Promise { - const result = await this.computeManager.snapshot({ - runnerId: snapshot.runnerId, - metadata: { - runId: snapshot.runFriendlyId, - snapshotFriendlyId: snapshot.snapshotFriendlyId, + await runWideEvent( + { + ...this.wideEventOpts, + op: "snapshot.dispatch", + kind: "scheduled", + setup: (state) => { + state.meta.run_id = snapshot.runFriendlyId; + state.meta.snapshot_id = snapshot.snapshotFriendlyId; + state.extras.runner_id = snapshot.runnerId; + }, }, - }); + async () => { + const result = await this.computeManager.snapshot({ + runnerId: snapshot.runnerId, + metadata: { + runId: snapshot.runFriendlyId, + snapshotFriendlyId: snapshot.snapshotFriendlyId, + }, + }); - if (!result) { - this.logger.error("Failed to request snapshot", { - runId: snapshot.runFriendlyId, - runnerId: snapshot.runnerId, - }); - } + if (!result) { + throw new Error("Snapshot dispatch returned no result"); + } + } + ); } #emitSnapshotSpan(runFriendlyId: string, durationMs?: number, snapshotId?: string) { diff --git a/apps/supervisor/src/services/timerWheel.test.ts b/apps/supervisor/src/services/timerWheel.test.ts index 3f6bb9aa19b..e685a26b1b4 100644 --- a/apps/supervisor/src/services/timerWheel.test.ts +++ b/apps/supervisor/src/services/timerWheel.test.ts @@ -51,6 +51,23 @@ describe("TimerWheel", () => { wheel.stop(); }); + it("peek returns the pending item without removing it", () => { + const wheel = new TimerWheel({ delayMs: 3000, onExpire: () => {} }); + + wheel.start(); + wheel.submit("run-1", "data"); + + expect(wheel.peek("run-1")).toEqual({ key: "run-1", data: "data" }); + expect(wheel.size).toBe(1); + expect(wheel.peek("run-2")).toBeUndefined(); + + // Dispatched items are no longer peekable + vi.advanceTimersByTime(3100); + expect(wheel.peek("run-1")).toBeUndefined(); + + wheel.stop(); + }); + it("cancel returns false for unknown key", () => { const wheel = new TimerWheel({ delayMs: 3000, diff --git a/apps/supervisor/src/services/timerWheel.ts b/apps/supervisor/src/services/timerWheel.ts index 9584423824d..cab5a5d7a25 100644 --- a/apps/supervisor/src/services/timerWheel.ts +++ b/apps/supervisor/src/services/timerWheel.ts @@ -121,6 +121,12 @@ export class TimerWheel { return true; } + /** Look up a pending item without removing it. */ + peek(key: string): TimerWheelItem | undefined { + const entry = this.entries.get(key); + return entry ? { key, data: entry.data } : undefined; + } + /** Number of pending items in the wheel. */ get size(): number { return this.entries.size; diff --git a/apps/supervisor/src/wideEvents/baggage.test.ts b/apps/supervisor/src/wideEvents/baggage.test.ts new file mode 100644 index 00000000000..0579533345e --- /dev/null +++ b/apps/supervisor/src/wideEvents/baggage.test.ts @@ -0,0 +1,44 @@ +import { describe, it, expect } from "vitest"; +import { encodeBaggage } from "./baggage.js"; + +describe("encodeBaggage", () => { + it("returns empty string for an empty map", () => { + expect(encodeBaggage({})).toBe(""); + }); + + it("encodes a single entry as k=v", () => { + expect(encodeBaggage({ run_id: "run-1" })).toBe("run_id=run-1"); + }); + + it("sorts keys for stable output across hops", () => { + expect(encodeBaggage({ b: "2", a: "1", c: "3" })).toBe("a=1,b=2,c=3"); + }); + + it("skips empty keys and empty values", () => { + expect(encodeBaggage({ "": "v", k: "", real: "x" })).toBe("real=x"); + }); + + it("truncates values longer than the cap", () => { + const long = "x".repeat(1024); + const got = encodeBaggage({ k: long }); + const value = got.slice("k=".length); + expect(value.length).toBe(256); + }); + + it("caps multibyte values by UTF-8 bytes, not code units", () => { + const long = "あ".repeat(512); // 3 UTF-8 bytes each + const got = encodeBaggage({ k: long }); + const value = got.slice("k=".length); + expect(Buffer.byteLength(value, "utf8")).toBeLessThanOrEqual(256); + }); + + it("caps the number of entries", () => { + const meta: Record = {}; + for (let i = 0; i < 50; i++) { + // Sortable two-digit keys so we know which 32 survive. + meta[`k${String(i).padStart(2, "0")}`] = "v"; + } + const got = encodeBaggage(meta); + expect(got.split(",").length).toBe(32); + }); +}); diff --git a/apps/supervisor/src/wideEvents/baggage.ts b/apps/supervisor/src/wideEvents/baggage.ts new file mode 100644 index 00000000000..7750ac79303 --- /dev/null +++ b/apps/supervisor/src/wideEvents/baggage.ts @@ -0,0 +1,45 @@ +/** + * W3C Baggage (https://www.w3.org/TR/baggage/) encoding for outbound peer + * calls. Serialises a State's `meta` map into a `Baggage` header value so + * the downstream service auto-stamps the same labels onto its own wide + * events - even on early-error paths that bail before parsing the request + * body. + * + * Outbound discipline: only call this on peer-to-peer hops within the trust + * boundary. External-endpoint calls (image registries, cloud-provider + * APIs, third-party webhooks) must not include the Baggage header. + */ + +import { truncateUtf8 } from "./truncate.js"; + +/** + * Cap the number of entries serialised onto the header. A misbehaving + * caller's `meta` map shouldn't blow up downstream event width. + */ +const MAX_BAGGAGE_ENTRIES = 32; + +/** + * Cap each value's length. Defense against an upstream that stuffs + * unbounded payloads into a meta value. + */ +const MAX_BAGGAGE_VALUE_BYTES = 256; + +/** + * Encode a `meta` map as a Baggage header value (`k1=v1,k2=v2`). Keys are + * sorted for stable output across hops; an empty input yields the empty + * string so the caller can skip emitting the header entirely. + */ +export function encodeBaggage(meta: Record): string { + const entries = Object.entries(meta).filter(([k, v]) => k && v); + if (entries.length === 0) return ""; + + entries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0)); + + const out: string[] = []; + for (const [k, raw] of entries) { + if (out.length >= MAX_BAGGAGE_ENTRIES) break; + const v = truncateUtf8(raw, MAX_BAGGAGE_VALUE_BYTES); + out.push(`${k}=${v}`); + } + return out.join(","); +} diff --git a/apps/supervisor/src/wideEvents/context.ts b/apps/supervisor/src/wideEvents/context.ts new file mode 100644 index 00000000000..a89859c2707 --- /dev/null +++ b/apps/supervisor/src/wideEvents/context.ts @@ -0,0 +1,14 @@ +import { AsyncLocalStorage } from "node:async_hooks"; +import type { State } from "./state.js"; + +/** + * AsyncLocalStorage threading per-operation `State` through the call stack. + * Wrappers enter a state via `wideEventStorage.run(state, () => fn())` and + * any code in the async call tree retrieves it via `fromContext()`. + */ +export const wideEventStorage = new AsyncLocalStorage(); + +/** Returns the State attached to the current async context, or null. */ +export function fromContext(): State | null { + return wideEventStorage.getStore() ?? null; +} diff --git a/apps/supervisor/src/wideEvents/emit.test.ts b/apps/supervisor/src/wideEvents/emit.test.ts new file mode 100644 index 00000000000..0daefa64873 --- /dev/null +++ b/apps/supervisor/src/wideEvents/emit.test.ts @@ -0,0 +1,134 @@ +import { describe, it, expect } from "vitest"; +import { emit, EmitMessage } from "./emit.js"; +import { newState } from "./new.js"; + +function captureEmit(state: Parameters[0]): Record { + const captured: string[] = []; + const origWrite = process.stdout.write; + process.stdout.write = ((chunk: unknown) => { + captured.push(String(chunk)); + return true; + }) as typeof process.stdout.write; + try { + emit(state); + } finally { + process.stdout.write = origWrite; + } + expect(captured).toHaveLength(1); + const line = captured[0]; + if (!line) throw new Error("no captured line"); + return JSON.parse(line) as Record; +} + +describe("emit", () => { + it("emits a single line with the stable message + request_id", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 200; + s.ok = true; + s.durationMs = 5; + const out = captureEmit(s); + expect(out.msg).toBe(EmitMessage); + expect(out.request_id).toBe(s.requestId); + expect(out.service).toBe("supervisor"); + expect(out.ok).toBe(true); + expect(out.status).toBe(200); + expect(out.duration_ms).toBe(5); + }); + + it("emits start_time as an ISO timestamp set by newState", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 200; + s.ok = true; + const out = captureEmit(s); + expect(typeof out.start_time).toBe("string"); + // Microsecond-precision RFC3339 (6 fractional digits), parseable as a date. + expect(out.start_time).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z$/); + expect(Number.isNaN(new Date(out.start_time as string).getTime())).toBe(false); + }); + + it("omits start_time when unset", () => { + const s = newState({ service: "supervisor", env: {} }); + delete s.startTime; + s.statusCode = 200; + s.ok = true; + const out = captureEmit(s); + expect(out).not.toHaveProperty("start_time"); + }); + + it("omits empty optional fields", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 200; + s.ok = true; + const out = captureEmit(s); + expect(out).not.toHaveProperty("trace_id"); + expect(out).not.toHaveProperty("version"); + expect(out).not.toHaveProperty("commit_sha"); + expect(out).not.toHaveProperty("error.code"); + }); + + it("flattens meta keys as meta.", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 200; + s.ok = true; + s.meta.run_id = "run_abc"; + s.meta.deployment_id = "dep_xyz"; + const out = captureEmit(s); + expect(out["meta.run_id"]).toBe("run_abc"); + expect(out["meta.deployment_id"]).toBe("dep_xyz"); + expect(out).not.toHaveProperty("meta"); + }); + + it("flattens phases as phase..", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 200; + s.ok = true; + s.phases.push({ name: "warm_start", durationMs: 12, ok: true, attempts: 1 }); + s.phases.push({ + name: "workload_create", + durationMs: 3, + ok: false, + attempts: 2, + errorCode: "Error", + errorMsg: "boom", + sub: { create_ms: 1 }, + }); + const out = captureEmit(s); + expect(out["phase.warm_start.duration_ms"]).toBe(12); + expect(out["phase.warm_start.ok"]).toBe(true); + expect(out["phase.warm_start.attempts"]).toBe(1); + expect(out["phase.workload_create.duration_ms"]).toBe(3); + expect(out["phase.workload_create.ok"]).toBe(false); + expect(out["phase.workload_create.attempts"]).toBe(2); + expect(out["phase.workload_create.error_code"]).toBe("Error"); + expect(out["phase.workload_create.error_message"]).toBe("boom"); + expect(out["phase.workload_create.create_ms"]).toBe(1); + }); + + it("includes error.code/message/kind when state.error is set", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 500; + s.error = { code: "InternalError", message: "kaboom", kind: "internal" }; + const out = captureEmit(s); + expect(out["error.code"]).toBe("InternalError"); + expect(out["error.message"]).toBe("kaboom"); + expect(out["error.kind"]).toBe("internal"); + }); + + it("truncates very long error messages", () => { + const s = newState({ service: "supervisor", env: {} }); + s.error = { code: "Big", message: "x".repeat(2000), kind: "internal" }; + const out = captureEmit(s); + expect((out["error.message"] as string).length).toBe(512); + }); + + it("flattens extras at the top level", () => { + const s = newState({ service: "supervisor", env: {} }); + s.statusCode = 200; + s.ok = true; + s.extras.route = "/health"; + s.extras["dispatch.result"] = "hit"; + const out = captureEmit(s); + expect(out.route).toBe("/health"); + expect(out["dispatch.result"]).toBe("hit"); + }); +}); diff --git a/apps/supervisor/src/wideEvents/emit.ts b/apps/supervisor/src/wideEvents/emit.ts new file mode 100644 index 00000000000..bfb03ad36c4 --- /dev/null +++ b/apps/supervisor/src/wideEvents/emit.ts @@ -0,0 +1,84 @@ +import type { State } from "./state.js"; +import { truncateUtf8 } from "./truncate.js"; + +/** + * Stable slog message string for every wide event. Downstream filters (jq, + * Axiom queries, Vector pipelines) pin to this constant. The `service` field + * disambiguates which service emitted it. + */ +export const EmitMessage = "wide_event"; + +const MAX_ERROR_MSG_BYTES = 512; + +/** + * Serializes a State as a single flat-keyed JSON line on stdout. Keys are + * flat (no nested objects) to keep jq filtering and Axiom indexing cheap. + * Empty optional fields are omitted. + */ +export function emit(state: State): void { + // Best-effort: an observability failure (serialization, stdout write) must + // never break or mask the caller's operation. Every call site relies on this. + try { + const out: Record = { + msg: EmitMessage, + request_id: state.requestId, + }; + + if (state.traceId) out.trace_id = state.traceId; + appendIfSet(out, "start_time", state.startTime); + appendIfSet(out, "service", state.service); + appendIfSet(out, "version", state.version); + appendIfSet(out, "commit_sha", state.commitSha); + appendIfSet(out, "region", state.region); + appendIfSet(out, "node_id", state.nodeId); + + appendIfSet(out, "op", state.op); + appendIfSet(out, "kind", state.kind); + + out.ok = state.ok; + if (state.statusCode !== 0) out.status = state.statusCode; + out.duration_ms = state.durationMs; + + if (state.error) { + appendIfSet(out, "error.code", state.error.code); + appendIfSet(out, "error.message", truncateUtf8(state.error.message, MAX_ERROR_MSG_BYTES)); + appendIfSet(out, "error.kind", state.error.kind); + } + + for (const [k, v] of Object.entries(state.meta)) { + out["meta." + k] = v; + } + + for (const p of state.phases) { + const prefix = "phase." + p.name + "."; + out[prefix + "duration_ms"] = p.durationMs; + out[prefix + "ok"] = p.ok; + out[prefix + "attempts"] = p.attempts; + if (p.errorCode) out[prefix + "error_code"] = p.errorCode; + if (p.errorMsg) out[prefix + "error_message"] = p.errorMsg; + if (p.sub) { + for (const [sk, sv] of Object.entries(p.sub)) { + out[prefix + sk] = sv; + } + } + } + + for (const [k, v] of Object.entries(state.extras)) { + out[k] = v; + } + + process.stdout.write(JSON.stringify(out) + "\n"); + } catch (err) { + try { + process.stderr.write( + `wide_event_emit_failed: ${err instanceof Error ? err.message : String(err)}\n` + ); + } catch { + // last resort - drop the event rather than throw + } + } +} + +function appendIfSet(out: Record, key: string, value: string | undefined): void { + if (value) out[key] = value; +} diff --git a/apps/supervisor/src/wideEvents/index.ts b/apps/supervisor/src/wideEvents/index.ts new file mode 100644 index 00000000000..4eda429a50a --- /dev/null +++ b/apps/supervisor/src/wideEvents/index.ts @@ -0,0 +1,29 @@ +/** + * Wide-event observability surface for the supervisor. One flat-keyed JSON + * line per natural unit of work (HTTP request, dequeue iteration, socket + * lifecycle event). Events join across services via `trace_id` (parsed from + * the inbound W3C `traceparent`) and `meta.run_id`. + * + * Off by default behind a kill switch - the dispatch hotpath runs at high + * QPS, so logging pressure must be cleanly removable. + */ +export { type Env, isValidRequestId, newState, type NewStateOptions } from "./new.js"; +export { emit, EmitMessage } from "./emit.js"; +export { parseTraceId } from "./traceparent.js"; +export { fromContext, wideEventStorage } from "./context.js"; +export { + type PhaseOpt, + recordPhase, + recordPhaseSince, + timePhase, +} from "./record.js"; +export { + emitOneShot, + runWideEvent, + setExtra, + setMeta, + type WideEventLifecycleOptions, + type WideEventOptions, +} from "./middleware.js"; +export type { ErrorInfo, PhaseRecord, State } from "./state.js"; +export { encodeBaggage } from "./baggage.js"; diff --git a/apps/supervisor/src/wideEvents/middleware.test.ts b/apps/supervisor/src/wideEvents/middleware.test.ts new file mode 100644 index 00000000000..afb59f43d6e --- /dev/null +++ b/apps/supervisor/src/wideEvents/middleware.test.ts @@ -0,0 +1,207 @@ +import { describe, it, expect } from "vitest"; +import { fromContext } from "./context.js"; +import { emitOneShot, runWideEvent, setMeta } from "./middleware.js"; + +function captureStdout(fn: () => Promise | unknown): Promise { + const captured: string[] = []; + const orig = process.stdout.write; + process.stdout.write = ((chunk: unknown) => { + captured.push(String(chunk)); + return true; + }) as typeof process.stdout.write; + return Promise.resolve(fn()) + .finally(() => { + process.stdout.write = orig; + }) + .then(() => captured); +} + +describe("runWideEvent", () => { + it("emits one event with ok=true when no statusCode is set", async () => { + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test", route: "/x", method: "POST" }, + async () => undefined + ); + }); + expect(lines).toHaveLength(1); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev.ok).toBe(true); + expect(ev.service).toBe("supervisor"); + expect(ev.route).toBe("/x"); + expect(ev.method).toBe("POST"); + expect(typeof ev.duration_ms).toBe("number"); + expect(typeof ev.request_id).toBe("string"); + }); + + it("derives ok from statusCode set via finalize", async () => { + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test" }, + async () => undefined, + (state) => { + state.statusCode = 200; + } + ); + }); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev.ok).toBe(true); + expect(ev.status).toBe(200); + }); + + it("treats 4xx as ok=false", async () => { + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test" }, + async () => undefined, + (state) => { + state.statusCode = 400; + } + ); + }); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev.ok).toBe(false); + expect(ev.status).toBe(400); + }); + + it("emits ok=false with error.kind=internal on throw", async () => { + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test" }, + async () => { + throw new Error("boom"); + } + ).catch(() => undefined); + }); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev.ok).toBe(false); + expect(ev.status).toBe(500); + expect(ev["error.kind"]).toBe("internal"); + expect(ev["error.message"]).toBe("boom"); + }); + + it("threads state through AsyncLocalStorage", async () => { + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test" }, + async () => { + setMeta(fromContext(), "run_id", "run_abc"); + } + ); + }); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev["meta.run_id"]).toBe("run_abc"); + expect(ev.ok).toBe(true); + }); + + it("picks up inbound traceparent for trace_id", async () => { + const tp = "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"; + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test", traceparent: tp }, + async () => undefined + ); + }); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev.trace_id).toBe("4bf92f3577b34da6a3ce929d0e0e4736"); + }); + + it("honours setup() to attach meta and extras before fn runs", async () => { + const lines = await captureStdout(async () => { + await runWideEvent( + { + service: "supervisor", + env: {}, + enabled: true, op: "test", + setup: (state) => { + state.meta.run_id = "run_abc"; + state.extras.iteration = "dequeue"; + }, + }, + async () => undefined + ); + }); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev["meta.run_id"]).toBe("run_abc"); + expect(ev.iteration).toBe("dequeue"); + }); + + it("short-circuits to pass-through when enabled=false", async () => { + let seenState: ReturnType = null; + const lines = await captureStdout(async () => { + await runWideEvent( + { service: "supervisor", env: {}, enabled: false, op: "test" }, + async () => { + seenState = fromContext(); + } + ); + }); + expect(lines).toHaveLength(0); + expect(seenState).toBe(null); + }); + + it("isolates state across concurrent invocations", async () => { + const lines = await captureStdout(async () => { + await Promise.all( + ["a", "b", "c"].map((tag) => + runWideEvent( + { service: "supervisor", env: {}, enabled: true, op: "test" }, + async () => { + const s = fromContext(); + if (!s) throw new Error("no state"); + s.meta.tag = tag; + await new Promise((r) => setTimeout(r, 5)); + expect(s.meta.tag).toBe(tag); + } + ) + ) + ); + }); + const tags = lines.map((l) => (JSON.parse(l) as Record)["meta.tag"]); + expect(tags.sort()).toEqual(["a", "b", "c"]); + }); +}); + +describe("emitOneShot", () => { + it("emits a single event with populated meta when enabled", async () => { + const lines = await captureStdout(() => { + emitOneShot({ + service: "supervisor", + env: {}, + enabled: true, op: "test", + populate: (s) => { + s.meta.run_id = "run_abc"; + s.extras.event = "run:start"; + }, + }); + }); + expect(lines).toHaveLength(1); + const line = lines[0]; + if (!line) throw new Error("no line"); + const ev = JSON.parse(line) as Record; + expect(ev.ok).toBe(true); + expect(ev["meta.run_id"]).toBe("run_abc"); + expect(ev.event).toBe("run:start"); + }); + + it("emits nothing when disabled", async () => { + const lines = await captureStdout(() => { + emitOneShot({ service: "supervisor", env: {}, enabled: false, op: "test" }); + }); + expect(lines).toHaveLength(0); + }); +}); diff --git a/apps/supervisor/src/wideEvents/middleware.ts b/apps/supervisor/src/wideEvents/middleware.ts new file mode 100644 index 00000000000..034c136414f --- /dev/null +++ b/apps/supervisor/src/wideEvents/middleware.ts @@ -0,0 +1,132 @@ +import { emit } from "./emit.js"; +import { newState, type Env } from "./new.js"; +import { wideEventStorage } from "./context.js"; +import type { State } from "./state.js"; + +/** Options common to every wide-event lifecycle. */ +export type WideEventOptions = { + service: string; + env: Env; + /** + * Kill switch. When false, lifecycles degenerate into transparent + * pass-through - no State allocation, no AsyncLocalStorage run, no emit. + * Important for the dispatch hotpath where logging pressure must be + * cleanly removable. + */ + enabled: boolean; +}; + +/** Per-invocation options layered on top of `WideEventOptions`. */ +export type WideEventLifecycleOptions = WideEventOptions & { + /** Operation discriminator (`instance.create`, `dequeue`, ...). Required. */ + op: string; + /** Event shape: `inbound` | `outbound` | `event` | `scheduled`. Optional. */ + kind?: string; + /** Route template (HTTP only) captured into `extras.route`. */ + route?: string; + /** HTTP method captured into `extras.method`. */ + method?: string; + /** Inbound W3C traceparent (HTTP header, queue message field). */ + traceparent?: string; + /** Inbound request id (e.g. `x-request-id` header). */ + inboundRequestId?: string; + /** Runs after the state is built, before the wrapped fn. Use to attach meta. */ + setup?: (state: State) => void; +}; + +/** + * Runs `fn` inside an AsyncLocalStorage state and emits one wide event on + * completion or error. `finalize` runs after `fn` returns but before emit - + * use it to read out-of-band outcome info (e.g. `res.statusCode` for an HTTP + * route) and assign to `state.statusCode`. The wrapper computes `ok` from + * `statusCode` if it's set; otherwise it defaults to true on success. + * + * Returns the original `fn` result. When `enabled=false`, `fn` runs unchanged + * with no event emitted. + */ +export async function runWideEvent( + opts: WideEventLifecycleOptions, + fn: () => Promise | T, + finalize?: (state: State) => void +): Promise { + if (!opts.enabled) { + return fn(); + } + + const state = newState({ + service: opts.service, + env: opts.env, + inboundRequestId: opts.inboundRequestId, + traceparent: opts.traceparent, + op: opts.op, + kind: opts.kind, + }); + if (opts.route) state.extras.route = opts.route; + if (opts.method) state.extras.method = opts.method; + + const start = performance.now(); + try { + if (opts.setup) opts.setup(state); + const result = await wideEventStorage.run(state, () => Promise.resolve(fn())); + state.durationMs = Math.round(performance.now() - start); + if (finalize) finalize(state); + if (state.statusCode !== 0) { + state.ok = state.statusCode >= 200 && state.statusCode < 300; + } else { + state.ok = true; + } + emit(state); + return result; + } catch (err) { + state.durationMs = Math.round(performance.now() - start); + const e = err instanceof Error ? err : new Error(String(err)); + if (state.statusCode === 0) state.statusCode = 500; + state.ok = false; + state.error = { + code: e.name || "Error", + message: e.message, + kind: "internal", + }; + emit(state); + throw err; + } +} + +/** + * One-shot wide event with no wrapped operation. Use for socket lifecycle + * events (`run:start`, `run:stop`) where there is no surrounding async unit + * of work to time. `populate` runs synchronously to attach meta/extras + * before emit. + */ +export function emitOneShot( + opts: WideEventOptions & { + op: string; + kind?: string; + traceparent?: string; + populate?: (state: State) => void; + } +): void { + if (!opts.enabled) return; + const state = newState({ + service: opts.service, + env: opts.env, + traceparent: opts.traceparent, + op: opts.op, + kind: opts.kind, + }); + if (opts.populate) opts.populate(state); + state.ok = true; + emit(state); +} + +/** Convenience accessor for in-handler meta mutation. */ +export function setMeta(state: State | null, key: string, value: string): void { + if (!state) return; + state.meta[key] = value; +} + +/** Convenience for free-form fields (did_warm_start, dispatch.result, ...). */ +export function setExtra(state: State | null, key: string, value: unknown): void { + if (!state) return; + state.extras[key] = value; +} diff --git a/apps/supervisor/src/wideEvents/new.test.ts b/apps/supervisor/src/wideEvents/new.test.ts new file mode 100644 index 00000000000..476c49c3d0e --- /dev/null +++ b/apps/supervisor/src/wideEvents/new.test.ts @@ -0,0 +1,81 @@ +import { describe, it, expect } from "vitest"; +import { isValidRequestId, newState } from "./new.js"; + +describe("isValidRequestId", () => { + it("accepts visible ASCII", () => { + expect(isValidRequestId("req-abc-123_456.7")).toBe(true); + expect(isValidRequestId("a")).toBe(true); + }); + + it("rejects empty string", () => { + expect(isValidRequestId("")).toBe(false); + }); + + it("rejects overlong strings (>128 bytes)", () => { + expect(isValidRequestId("a".repeat(128))).toBe(true); + expect(isValidRequestId("a".repeat(129))).toBe(false); + }); + + it("rejects whitespace, newlines, control chars", () => { + expect(isValidRequestId("has space")).toBe(false); + expect(isValidRequestId("has\ttab")).toBe(false); + expect(isValidRequestId("has\nnewline")).toBe(false); + expect(isValidRequestId("\x00null")).toBe(false); + }); + + it("rejects high-bit / non-ASCII", () => { + expect(isValidRequestId("café")).toBe(false); + expect(isValidRequestId("a\x7f")).toBe(false); + }); +}); + +describe("newState", () => { + const env = { version: "1.0.0", commitSha: "abc123", region: "us-east-1", nodeId: "node-1" }; + + it("populates service identity from env", () => { + const s = newState({ service: "supervisor", env }); + expect(s.service).toBe("supervisor"); + expect(s.version).toBe("1.0.0"); + expect(s.commitSha).toBe("abc123"); + expect(s.region).toBe("us-east-1"); + expect(s.nodeId).toBe("node-1"); + }); + + it("mints a fresh request id when none provided", () => { + const s = newState({ service: "test", env: {} }); + expect(s.requestId).toMatch(/^req-[0-9a-f]{32}$/); + }); + + it("honours a valid inbound request id", () => { + const s = newState({ service: "test", env: {}, inboundRequestId: "trace-abc-123" }); + expect(s.requestId).toBe("trace-abc-123"); + }); + + it("rejects unsafe inbound request id and mints a fresh one", () => { + const s = newState({ service: "test", env: {}, inboundRequestId: "has space" }); + expect(s.requestId).toMatch(/^req-[0-9a-f]{32}$/); + }); + + it("parses traceparent into traceId and preserves the raw header", () => { + const tp = "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"; + const s = newState({ service: "test", env: {}, traceparent: tp }); + expect(s.traceId).toBe("4bf92f3577b34da6a3ce929d0e0e4736"); + expect(s.traceparent).toBe(tp); + }); + + it("leaves traceId empty when no traceparent provided", () => { + const s = newState({ service: "test", env: {} }); + expect(s.traceId).toBe(""); + expect(s.traceparent).toBe(""); + }); + + it("initialises empty meta/extras/phases", () => { + const s = newState({ service: "test", env: {} }); + expect(s.meta).toEqual({}); + expect(s.extras).toEqual({}); + expect(s.phases).toEqual([]); + expect(s.ok).toBe(false); + expect(s.statusCode).toBe(0); + expect(s.durationMs).toBe(0); + }); +}); diff --git a/apps/supervisor/src/wideEvents/new.ts b/apps/supervisor/src/wideEvents/new.ts new file mode 100644 index 00000000000..7a4dba8a09c --- /dev/null +++ b/apps/supervisor/src/wideEvents/new.ts @@ -0,0 +1,96 @@ +import { randomBytes } from "node:crypto"; +import { parseTraceId } from "./traceparent.js"; +import type { State } from "./state.js"; + +const MAX_REQUEST_ID_LEN = 128; + +/** + * Validates an inbound request id. Non-empty, no longer than 128 bytes, + * composed entirely of visible ASCII (0x21..0x7E). Rejects newlines, control + * characters, whitespace, DEL, high-bit bytes - any of which could poison the + * log pipeline if echoed back verbatim. + */ +export function isValidRequestId(s: string): boolean { + if (s.length === 0 || s.length > MAX_REQUEST_ID_LEN) return false; + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + if (c < 0x21 || c > 0x7e) return false; + } + return true; +} + +/** + * Service-level identity that's constant for the lifetime of the process. + * Populated once at startup, copied into every State. + */ +export type Env = { + version?: string; + commitSha?: string; + region?: string; + nodeId?: string; +}; + +export type NewStateOptions = { + service: string; + env: Env; + /** Optional inbound request id (e.g. from `x-request-id`). If unsafe or absent, a fresh `req-` is minted. */ + inboundRequestId?: string; + /** Optional inbound W3C traceparent (HTTP header, queue message field). */ + traceparent?: string; + /** Operation discriminator. Dotted `noun.verb`. Defaults to empty (set later). */ + op?: string; + /** Event shape: `inbound` | `outbound` | `event` | `scheduled`. Defaults to empty. */ + kind?: string; +}; + +/** + * Builds a State for a wide-event lifecycle. + * + * - requestId: honours `inboundRequestId` if present and safe; otherwise + * mints a fresh `req-` id. + * - traceId: parsed from the provided traceparent (graceful empty if + * absent or malformed). + * - traceparent: preserved verbatim for downstream propagation. + */ +export function newState(opts: NewStateOptions): State { + const traceparent = opts.traceparent ?? ""; + const inbound = opts.inboundRequestId ?? ""; + const requestId = isValidRequestId(inbound) ? inbound : newRequestId(); + + return { + startTime: nowRfc3339(), + requestId, + traceId: parseTraceId(traceparent), + traceparent, + service: opts.service, + version: opts.env.version, + commitSha: opts.env.commitSha, + region: opts.env.region, + nodeId: opts.env.nodeId, + op: opts.op ?? "", + kind: opts.kind ?? "", + meta: {}, + phases: [], + ok: false, + statusCode: 0, + durationMs: 0, + extras: {}, + }; +} + +function newRequestId(): string { + return "req-" + randomBytes(16).toString("hex"); +} + +/** + * Current wall-clock time as an RFC3339 string with microsecond precision. + * `Date.toISOString()` only has millisecond resolution, which is too coarse to + * order multiple wide events emitted within the same millisecond. + * `performance.timeOrigin + performance.now()` gives a sub-millisecond wall-clock + * reading; we append the microsecond digits to the millisecond ISO string. + */ +function nowRfc3339(): string { + const ms = performance.timeOrigin + performance.now(); + const micros = Math.floor((ms % 1) * 1000); // microseconds within the millisecond (0..999) + return new Date(ms).toISOString().slice(0, -1) + String(micros).padStart(3, "0") + "Z"; +} diff --git a/apps/supervisor/src/wideEvents/record.test.ts b/apps/supervisor/src/wideEvents/record.test.ts new file mode 100644 index 00000000000..beeb0fff221 --- /dev/null +++ b/apps/supervisor/src/wideEvents/record.test.ts @@ -0,0 +1,112 @@ +import { describe, it, expect } from "vitest"; +import { fromContext, wideEventStorage } from "./context.js"; +import { recordPhase, recordPhaseSince, timePhase } from "./record.js"; +import { newState } from "./new.js"; +import type { State } from "./state.js"; + +function makeState(): State { + return newState({ service: "test", env: {} }); +} + +describe("recordPhase", () => { + it("appends a successful phase", () => { + const s = makeState(); + recordPhase(s, "lookup", performance.now() - 50, undefined); + expect(s.phases).toHaveLength(1); + const phase = s.phases[0]; + if (!phase) throw new Error("missing phase"); + expect(phase.name).toBe("lookup"); + expect(phase.ok).toBe(true); + expect(phase.attempts).toBe(1); + expect(phase.durationMs).toBeGreaterThanOrEqual(45); + }); + + it("appends a failed phase with error code/message", () => { + const s = makeState(); + recordPhase(s, "dispatch", performance.now(), new Error("nope")); + const phase = s.phases[0]; + if (!phase) throw new Error("missing phase"); + expect(phase.ok).toBe(false); + expect(phase.errorCode).toBe("Error"); + expect(phase.errorMsg).toBe("nope"); + }); + + it("truncates very long error messages", () => { + const s = makeState(); + recordPhase(s, "x", performance.now(), new Error("y".repeat(2000))); + const phase = s.phases[0]; + if (!phase) throw new Error("missing phase"); + expect(phase.errorMsg?.length).toBe(512); + }); + + it("honours opts.attempts", () => { + const s = makeState(); + recordPhase(s, "retry", performance.now(), undefined, { attempts: 3 }); + expect(s.phases[0]?.attempts).toBe(3); + }); + + it("attaches sub-timings", () => { + const s = makeState(); + recordPhase(s, "complex", performance.now(), undefined, { sub: { setup_ms: 10, work_ms: 5 } }); + expect(s.phases[0]?.sub).toEqual({ setup_ms: 10, work_ms: 5 }); + }); + + it("is a no-op when state is null", () => { + expect(() => recordPhase(null, "x", performance.now(), undefined)).not.toThrow(); + }); +}); + +describe("timePhase + AsyncLocalStorage threading", () => { + it("records via fromContext on success", async () => { + const s = makeState(); + const value = await wideEventStorage.run(s, () => timePhase("work", async () => 42)); + expect(value).toBe(42); + expect(s.phases).toHaveLength(1); + expect(s.phases[0]?.ok).toBe(true); + }); + + it("records via fromContext on error and rethrows", async () => { + const s = makeState(); + await expect( + wideEventStorage.run(s, () => + timePhase("work", async () => { + throw new Error("boom"); + }) + ) + ).rejects.toThrow("boom"); + expect(s.phases).toHaveLength(1); + expect(s.phases[0]?.ok).toBe(false); + expect(s.phases[0]?.errorMsg).toBe("boom"); + }); + + it("runs fn unchanged when no state on context", async () => { + const value = await timePhase("work", async () => "ok"); + expect(value).toBe("ok"); + }); +}); + +describe("recordPhaseSince", () => { + it("records using a caller-captured start time", async () => { + const s = makeState(); + await wideEventStorage.run(s, async () => { + const start = performance.now(); + await new Promise((r) => setTimeout(r, 10)); + recordPhaseSince("spanning", start, undefined); + }); + expect(s.phases).toHaveLength(1); + expect(s.phases[0]?.durationMs).toBeGreaterThanOrEqual(8); + }); +}); + +describe("fromContext", () => { + it("returns null when no state attached", () => { + expect(fromContext()).toBe(null); + }); + + it("returns the state when inside wideEventStorage.run", () => { + const s = makeState(); + wideEventStorage.run(s, () => { + expect(fromContext()).toBe(s); + }); + }); +}); diff --git a/apps/supervisor/src/wideEvents/record.ts b/apps/supervisor/src/wideEvents/record.ts new file mode 100644 index 00000000000..b7b59089a0e --- /dev/null +++ b/apps/supervisor/src/wideEvents/record.ts @@ -0,0 +1,82 @@ +import { fromContext } from "./context.js"; +import type { PhaseRecord, State } from "./state.js"; +import { truncateUtf8 } from "./truncate.js"; + +const MAX_ERROR_MSG_BYTES = 512; + +/** Optional knobs for a phase record. */ +export type PhaseOpt = { + /** Attempt count for the phase (default 1). */ + attempts?: number; + /** Sub-timings to fold into `phase..`. */ + sub?: Record; +}; + +/** + * Appends a phase outcome to `state.phases`. Safe to call on success + * (`err === undefined`) and error paths. `errorMsg` is truncated to 512 bytes + * to keep the wide event compact. No-op if state is null. + */ +export function recordPhase( + state: State | null, + name: string, + startMs: number, + err: Error | undefined, + opts: PhaseOpt = {} +): void { + if (!state) return; + const p: PhaseRecord = { + name, + durationMs: Math.round(performance.now() - startMs), + ok: err === undefined, + attempts: opts.attempts ?? 1, + }; + if (err) { + p.errorCode = err.name || "Error"; + p.errorMsg = truncateUtf8(err.message, MAX_ERROR_MSG_BYTES); + } + if (opts.sub) p.sub = opts.sub; + state.phases.push(p); +} + +/** + * Runs `fn` and appends a phase outcome to the State attached to the current + * async context. If no state is on context (test paths, background work), + * `fn` runs unchanged. The phase is recorded on both success and error paths + * so failed phases still appear in the wide event with duration_ms + + * error_code. + */ +export async function timePhase( + name: string, + fn: () => Promise | T, + opts: PhaseOpt = {} +): Promise { + const start = performance.now(); + try { + const result = await fn(); + recordPhase(fromContext(), name, start, undefined, opts); + return result; + } catch (err) { + recordPhase(fromContext(), name, start, asError(err), opts); + throw err; + } +} + +/** + * Appends a phase outcome to the State attached to the current async context + * using a `startMs` captured by the caller. Use when the phase boundary spans + * multiple calls with intermediate error handling that can't fit inside a + * single `timePhase` closure. Nil-state safe. + */ +export function recordPhaseSince( + name: string, + startMs: number, + err: Error | undefined, + opts: PhaseOpt = {} +): void { + recordPhase(fromContext(), name, startMs, err, opts); +} + +function asError(e: unknown): Error { + return e instanceof Error ? e : new Error(String(e)); +} diff --git a/apps/supervisor/src/wideEvents/state.ts b/apps/supervisor/src/wideEvents/state.ts new file mode 100644 index 00000000000..dece3a3f5fd --- /dev/null +++ b/apps/supervisor/src/wideEvents/state.ts @@ -0,0 +1,84 @@ +/** + * Per-event accumulator backing a single wide event. The supervisor emits one + * flat-keyed JSON line per natural unit of work (dequeue iteration, HTTP + * request, socket lifecycle event). Optional fields are omitted on emit so + * events stay compact. + */ +export type State = { + /** + * Wall-clock time the event began, as an ISO-8601 string. Emitted as + * `start_time` so log collection orders events by when work started rather + * than by the collector's ingestion time. + */ + startTime?: string; + + // Cross-stack correlation. + requestId: string; + traceId: string; + /** + * Raw inbound W3C `traceparent`, preserved verbatim so outbound calls can + * propagate the same trace context without losing the parent span-id. + * Empty when no inbound traceparent was set. + */ + traceparent: string; + + // Service identity (set by `newState` from Env). + service: string; + version?: string; + commitSha?: string; + region?: string; + nodeId?: string; + + /** + * Operation discriminator. Dotted `noun.verb` (e.g. `instance.create`, + * `snapshot.dispatch`). Low cardinality - bounded set per service, not + * unbounded. Empty allowed during construction but expected to be set + * before emit. + */ + op: string; + + /** + * Event shape. `inbound` for received requests, `outbound` for outgoing + * calls, `event` for ambient occurrences with no meaningful duration, + * `scheduled` for timer-driven work. Empty allowed; omitted from emit + * when empty. + */ + kind: string; + + // Caller-attached opaque metadata, flattened to `meta.` on emit. + meta: Record; + + // Per-phase outcomes, in completion order. + phases: PhaseRecord[]; + + // Top-level outcome (set after the wrapped operation returns). + ok: boolean; + statusCode: number; + durationMs: number; + error?: ErrorInfo; + + // Free-form ad-hoc additions (route, method, did_warm_start, ...). + extras: Record; +}; + +/** + * Single named phase outcome. Retries collapse into `attempts > 1` with the + * last error reflected in errorCode/errorMsg. + */ +export type PhaseRecord = { + name: string; + durationMs: number; + ok: boolean; + attempts: number; + errorCode?: string; + errorMsg?: string; + sub?: Record; +}; + +/** Top-level error summary for a failed operation. */ +export type ErrorInfo = { + code: string; + message: string; + /** Coarse classification - "client" | "upstream" | "internal" | "timeout". */ + kind: string; +}; diff --git a/apps/supervisor/src/wideEvents/traceparent.test.ts b/apps/supervisor/src/wideEvents/traceparent.test.ts new file mode 100644 index 00000000000..85ed31c3b6f --- /dev/null +++ b/apps/supervisor/src/wideEvents/traceparent.test.ts @@ -0,0 +1,43 @@ +import { describe, it, expect } from "vitest"; +import { parseTraceId } from "./traceparent.js"; + +describe("parseTraceId", () => { + const validTraceId = "4bf92f3577b34da6a3ce929d0e0e4736"; + const validHeader = `00-${validTraceId}-00f067aa0ba902b7-01`; + + it("extracts the trace-id from a valid W3C traceparent", () => { + expect(parseTraceId(validHeader)).toBe(validTraceId); + }); + + it("returns empty string for empty/null/undefined input", () => { + expect(parseTraceId("")).toBe(""); + expect(parseTraceId(null)).toBe(""); + expect(parseTraceId(undefined)).toBe(""); + }); + + it("returns empty for wrong segment count", () => { + expect(parseTraceId("00-abc-def")).toBe(""); + expect(parseTraceId("00-abc-def-01-extra")).toBe(""); + }); + + it("returns empty for non-zero version byte", () => { + expect(parseTraceId(`01-${validTraceId}-00f067aa0ba902b7-01`)).toBe(""); + }); + + it("returns empty for wrong-length trace-id", () => { + expect(parseTraceId("00-abc-00f067aa0ba902b7-01")).toBe(""); + }); + + it("returns empty for non-hex trace-id", () => { + expect(parseTraceId("00-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz-00f067aa0ba902b7-01")).toBe(""); + }); + + it("returns empty for all-zero trace-id", () => { + expect(parseTraceId("00-00000000000000000000000000000000-00f067aa0ba902b7-01")).toBe(""); + }); + + it("accepts uppercase hex", () => { + const tid = "4BF92F3577B34DA6A3CE929D0E0E4736"; + expect(parseTraceId(`00-${tid}-00f067aa0ba902b7-01`)).toBe(tid); + }); +}); diff --git a/apps/supervisor/src/wideEvents/traceparent.ts b/apps/supervisor/src/wideEvents/traceparent.ts new file mode 100644 index 00000000000..9e84294f067 --- /dev/null +++ b/apps/supervisor/src/wideEvents/traceparent.ts @@ -0,0 +1,39 @@ +/** + * Extracts the trace-id from a W3C `traceparent` header. Returns "" when the + * header is absent, malformed, or carries an all-zero trace-id. + * + * Format: `---` + * version : 2 hex chars, must be "00" + * trace-id: 32 hex chars, non-zero + * span-id : 16 hex chars (not validated - we only need trace-id) + * flags : 2 hex chars (not validated) + */ +export function parseTraceId(header: string | null | undefined): string { + if (!header) return ""; + const parts = header.split("-"); + if (parts.length !== 4) return ""; + if (parts[0] !== "00") return ""; + const tid = parts[1]; + if (!tid || tid.length !== 32) return ""; + if (!isHex(tid)) return ""; + if (isAllZero(tid)) return ""; + return tid; +} + +function isHex(s: string): boolean { + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + const isDigit = c >= 0x30 && c <= 0x39; + const isLower = c >= 0x61 && c <= 0x66; + const isUpper = c >= 0x41 && c <= 0x46; + if (!isDigit && !isLower && !isUpper) return false; + } + return true; +} + +function isAllZero(s: string): boolean { + for (let i = 0; i < s.length; i++) { + if (s.charCodeAt(i) !== 0x30) return false; + } + return true; +} diff --git a/apps/supervisor/src/wideEvents/truncate.test.ts b/apps/supervisor/src/wideEvents/truncate.test.ts new file mode 100644 index 00000000000..4eb272f1e60 --- /dev/null +++ b/apps/supervisor/src/wideEvents/truncate.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect } from "vitest"; +import { truncateUtf8 } from "./truncate.js"; + +describe("truncateUtf8", () => { + it("returns short ASCII unchanged", () => { + expect(truncateUtf8("hello", 512)).toBe("hello"); + }); + + it("truncates ASCII to the byte cap", () => { + expect(truncateUtf8("x".repeat(1024), 256)).toBe("x".repeat(256)); + }); + + it("never exceeds the byte cap for multibyte input", () => { + // "あ" is 3 UTF-8 bytes; 200 of them = 600 bytes. + const got = truncateUtf8("あ".repeat(200), 256); + expect(Buffer.byteLength(got, "utf8")).toBeLessThanOrEqual(256); + }); + + it("does not split a multibyte sequence", () => { + // 256 / 3 bytes = 85 whole chars (255 bytes), the 86th would overflow. + expect(truncateUtf8("あ".repeat(200), 256)).toBe("あ".repeat(85)); + }); + + it("does not split a surrogate pair", () => { + // "😀" is 2 UTF-16 units / 4 UTF-8 bytes; only one fits under a 5-byte cap. + const got = truncateUtf8("😀😀", 5); + expect(got).toBe("😀"); + expect(Buffer.byteLength(got, "utf8")).toBe(4); + }); +}); diff --git a/apps/supervisor/src/wideEvents/truncate.ts b/apps/supervisor/src/wideEvents/truncate.ts new file mode 100644 index 00000000000..417735fe77d --- /dev/null +++ b/apps/supervisor/src/wideEvents/truncate.ts @@ -0,0 +1,20 @@ +/** + * Truncate `value` to at most `maxBytes` UTF-8 bytes without splitting a + * multi-byte sequence or surrogate pair. Plain `.slice()` counts UTF-16 code + * units, so multibyte text can blow past a byte cap and cutting mid-pair + * leaves a lone surrogate that downstream JSON / Postgres consumers reject. + */ +export function truncateUtf8(value: string, maxBytes: number): string { + if (Buffer.byteLength(value, "utf8") <= maxBytes) return value; + + let bytes = 0; + let end = 0; + // `for..of` yields whole code points, so a surrogate pair is never split. + for (const ch of value) { + const size = Buffer.byteLength(ch, "utf8"); + if (bytes + size > maxBytes) break; + bytes += size; + end += ch.length; + } + return value.slice(0, end); +} diff --git a/apps/supervisor/src/workloadManager/compute.test.ts b/apps/supervisor/src/workloadManager/compute.test.ts new file mode 100644 index 00000000000..ea5ddabf285 --- /dev/null +++ b/apps/supervisor/src/workloadManager/compute.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from "vitest"; +import { ComputeClientError } from "@internal/compute"; +import { isRetryableCreateError, runnerNameForAttempt } from "./compute.js"; + +describe("runnerNameForAttempt", () => { + it("keeps the unsuffixed name for the first attempt", () => { + expect(runnerNameForAttempt("runner-abc123", 1)).toBe("runner-abc123"); + }); + + it("suffixes retry attempts deterministically", () => { + expect(runnerNameForAttempt("runner-abc123", 2)).toBe("runner-abc123-r2"); + expect(runnerNameForAttempt("runner-abc123", 3)).toBe("runner-abc123-r3"); + }); +}); + +describe("isRetryableCreateError", () => { + it("retries statuses where the create definitely did not commit", () => { + expect(isRetryableCreateError(new ComputeClientError(500, "tap busy", "http://gw"))).toBe( + true + ); + expect(isRetryableCreateError(new ComputeClientError(503, "no placement", "http://gw"))).toBe( + true + ); + }); + + it("does not retry lost-response statuses (create may have committed)", () => { + expect(isRetryableCreateError(new ComputeClientError(502, "bad gateway", "http://gw"))).toBe( + false + ); + expect( + isRetryableCreateError(new ComputeClientError(504, "gateway timeout", "http://gw")) + ).toBe(false); + }); + + it("does not retry 4xx responses", () => { + expect(isRetryableCreateError(new ComputeClientError(400, "bad request", "http://gw"))).toBe( + false + ); + expect(isRetryableCreateError(new ComputeClientError(409, "conflict", "http://gw"))).toBe( + false + ); + }); + + it("does not retry timeouts (instance may still be provisioning)", () => { + expect(isRetryableCreateError(new DOMException("timed out", "TimeoutError"))).toBe(false); + }); + + it("retries network-level fetch failures", () => { + expect(isRetryableCreateError(new TypeError("fetch failed"))).toBe(true); + }); + + it("does not retry unknown errors", () => { + expect(isRetryableCreateError(new Error("something else"))).toBe(false); + expect(isRetryableCreateError("string error")).toBe(false); + }); +}); diff --git a/apps/supervisor/src/workloadManager/compute.ts b/apps/supervisor/src/workloadManager/compute.ts index 1c00f33aad3..88c7645bbdf 100644 --- a/apps/supervisor/src/workloadManager/compute.ts +++ b/apps/supervisor/src/workloadManager/compute.ts @@ -6,10 +6,57 @@ import { type WorkloadManagerCreateOptions, type WorkloadManagerOptions, } from "./types.js"; -import { ComputeClient, stripImageDigest } from "@internal/compute"; +import { ComputeClient, ComputeClientError, stripImageDigest } from "@internal/compute"; +import { setTimeout as sleep } from "node:timers/promises"; import { extractTraceparent, getRunnerId } from "../util.js"; import type { OtlpTraceService } from "../services/otlpTraceService.js"; import { tryCatch } from "@trigger.dev/core"; +import { encodeBaggage, fromContext } from "../wideEvents/index.js"; + +const DEFAULT_CREATE_MAX_ATTEMPTS = 3; +const DEFAULT_CREATE_RETRY_BASE_DELAY_MS = 250; + +/** + * TEMPORARY (TRI-10293): a failed create can leave its instance name + * registered gateway/fcrun-side until async cleanup runs, so a same-name + * retry can 409 against our own residue. Until the gateway cleans up + * failed-create registrations properly, retry attempts get a deterministic + * suffix. Attempt 1 keeps the unsuffixed name so the non-retry path is + * unchanged; the suffixed name flows into both the instance name and + * TRIGGER_RUNNER_ID, which downstream flows treat as one opaque + * self-reported token. Only attempts following a ComputeClientError are + * suffixed - network-failure retries keep the same name on purpose, because + * the gateway's name-collision 409 is their safety net against + * double-creating an instance whose create response was lost. + */ +export function runnerNameForAttempt(runnerId: string, attempt: number): string { + return attempt === 1 ? runnerId : `${runnerId}-r${attempt}`; +} + +/** + * Whether a failed instance create is worth retrying. Only statuses where + * the create definitely did NOT commit are retried: 500 means the agent or + * fcrun returned a create error (e.g. a netns slot holding the tap busy, a + * full node disk - placement may differ on retry), 503 means the gateway + * had nowhere to place it. 502/504 are excluded: the gateway emits those + * when it fails to reach the node or read its response, which can happen + * AFTER the agent committed the create - and the gateway only records the + * instance name on a clean 201, so a same-name retry would miss the + * collision check and could double-create the VM on another node. 4xx won't + * heal on retry, and timeouts may still be provisioning. Network-level + * fetch failures are safe: if the gateway processed the create, its name + * index is populated and the retry 409s harmlessly. + */ +export function isRetryableCreateError(error: unknown): boolean { + if (error instanceof ComputeClientError) { + return error.status === 500 || error.status === 503; + } + if (error instanceof DOMException && error.name === "TimeoutError") { + return false; + } + // Network-level fetch failures (gateway briefly unreachable) + return error instanceof TypeError; +} type ComputeWorkloadManagerOptions = WorkloadManagerOptions & { gateway: { @@ -29,13 +76,23 @@ type ComputeWorkloadManagerOptions = WorkloadManagerOptions & { otelEndpoint: string; prettyLogs: boolean; }; + createRetry?: { + maxAttempts: number; + baseDelayMs: number; + }; }; export class ComputeWorkloadManager implements WorkloadManager { private readonly logger = new SimpleStructuredLogger("compute-workload-manager"); private readonly compute: ComputeClient; + private readonly createMaxAttempts: number; + private readonly createRetryBaseDelayMs: number; constructor(private opts: ComputeWorkloadManagerOptions) { + this.createMaxAttempts = opts.createRetry?.maxAttempts ?? DEFAULT_CREATE_MAX_ATTEMPTS; + this.createRetryBaseDelayMs = + opts.createRetry?.baseDelayMs ?? DEFAULT_CREATE_RETRY_BASE_DELAY_MS; + if (opts.workloadApiDomain) { this.logger.warn("⚠️ Custom workload API domain", { domain: opts.workloadApiDomain, @@ -46,6 +103,26 @@ export class ComputeWorkloadManager implements WorkloadManager { gatewayUrl: opts.gateway.url, authToken: opts.gateway.authToken, timeoutMs: opts.gateway.timeoutMs, + // Forward the current wide-event scope's traceparent + request_id so the + // downstream service continues the same trace and joins its own wide + // events to ours. Additionally serialize caller-supplied meta labels + // into the W3C Baggage header so the downstream service auto-stamps + // them even on early-error paths that bail before parsing the body. + // When called outside a wide-event scope (or when wide events are + // disabled), `fromContext` returns undefined and propagation is skipped. + getPropagationHeaders: () => { + const state = fromContext(); + if (!state) return {}; + const headers: Record = { "x-request-id": state.requestId }; + if (state.traceparent) { + headers.traceparent = state.traceparent; + } + const baggage = encodeBaggage(state.meta); + if (baggage) { + headers.baggage = baggage; + } + return headers; + }, }); } @@ -112,6 +189,12 @@ export class ComputeWorkloadManager implements WorkloadManager { // Strip image digest - resolve by tag, not digest const imageRef = stripImageDigest(opts.image); + // Labels forwarded to the compute provider for network-policy selection. + // `org` is always set so every run carries its org identity. + const labels: Record = { + org: opts.orgId, + }; + // Wide event: single canonical log line emitted in finally const event: Record = { // High-cardinality identifiers @@ -136,26 +219,71 @@ export class ComputeWorkloadManager implements WorkloadManager { const startMs = performance.now(); try { - const [error, data] = await tryCatch( - this.compute.instances.create({ - name: runnerId, - image: imageRef, - env: envVars, - cpu: opts.machine.cpu, - memory_gb: opts.machine.memory, - metadata: { - runId: opts.runFriendlyId, - envId: opts.envId, - envType: opts.envType, - orgId: opts.orgId, - projectId: opts.projectId, - deploymentVersion: opts.deploymentVersion, - machine: opts.machine.name, - }, - }) - ); + const createRequest = { + name: runnerId, + image: imageRef, + env: envVars, + cpu: opts.machine.cpu, + memory_gb: opts.machine.memory, + metadata: { + runId: opts.runFriendlyId, + envId: opts.envId, + envType: opts.envType, + orgId: opts.orgId, + projectId: opts.projectId, + deploymentVersion: opts.deploymentVersion, + machine: opts.machine.name, + }, + ...(Object.keys(labels).length > 0 ? { labels } : {}), + }; + + // Retry transient placement failures instead of abandoning the run: a + // swallowed create error leaves the run waiting for the run engine's + // PENDING_EXECUTING timeout (minutes) before it is redriven, while a + // retried create typically succeeds in under a second (TRI-10293). + let error: unknown; + let data: Awaited> | null | undefined; + let attempt = 1; + // Set after a ComputeClientError: the failed create may have left its + // name registered, so subsequent attempts use a suffixed name. + let suffixAttempts = false; + for (; attempt <= this.createMaxAttempts; attempt++) { + const attemptRunnerId = suffixAttempts + ? runnerNameForAttempt(runnerId, attempt) + : runnerId; + [error, data] = await tryCatch( + this.compute.instances.create( + attemptRunnerId === runnerId + ? createRequest + : { + ...createRequest, + name: attemptRunnerId, + env: { ...envVars, TRIGGER_RUNNER_ID: attemptRunnerId }, + } + ) + ); + + if (!error) { + event.runnerId = attemptRunnerId; + break; + } + + if (error instanceof ComputeClientError) { + suffixAttempts = true; + } + + this.logger.warn("create instance attempt failed", { + runnerId: attemptRunnerId, + attempt, + error: error instanceof Error ? error.message : String(error), + }); + + if (!isRetryableCreateError(error) || attempt === this.createMaxAttempts) break; + await sleep(this.createRetryBaseDelayMs * attempt); + } + event.createAttempts = attempt; - if (error) { + if (error || !data) { event.error = error instanceof Error ? error.message : String(error); event.errorType = error instanceof DOMException && error.name === "TimeoutError" ? "timeout" : "fetch"; @@ -276,6 +404,7 @@ export class ComputeWorkloadManager implements WorkloadManager { envId?: string; orgId?: string; projectId?: string; + hasPrivateLink?: boolean; dequeuedAt?: Date; }): Promise { const metadata: Record = { @@ -288,6 +417,13 @@ export class ComputeWorkloadManager implements WorkloadManager { TRIGGER_WORKER_INSTANCE_NAME: this.opts.runner.instanceName, }; + // Resupply labels on restore (the provider doesn't persist them across a + // snapshot). orgId is optional on the restore opts type, so guard it. + const labels: Record = {}; + if (opts.orgId) { + labels.org = opts.orgId; + } + this.logger.verbose("restore request body", { snapshotId: opts.snapshotId, runnerId: opts.runnerId, @@ -301,6 +437,7 @@ export class ComputeWorkloadManager implements WorkloadManager { metadata, cpu: opts.machine.cpu, memory_gb: opts.machine.memory, + ...(Object.keys(labels).length > 0 ? { labels } : {}), }) ); diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index ec089267219..b2ed05c9f11 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -321,6 +321,13 @@ export class KubernetesWorkloadManager implements WorkloadManager { }, } : {}), + ...(env.KUBERNETES_POD_DNS_NDOTS_OVERRIDE_ENABLED + ? { + dnsConfig: { + options: [{ name: "ndots", value: `${env.KUBERNETES_POD_DNS_NDOTS}` }], + }, + } + : {}), }; } diff --git a/apps/supervisor/src/workloadServer/index.ts b/apps/supervisor/src/workloadServer/index.ts index bd38cc8700f..bf4b38012d5 100644 --- a/apps/supervisor/src/workloadServer/index.ts +++ b/apps/supervisor/src/workloadServer/index.ts @@ -31,6 +31,14 @@ import { } from "../services/computeSnapshotService.js"; import type { ComputeWorkloadManager } from "../workloadManager/compute.js"; import type { OtlpTraceService } from "../services/otlpTraceService.js"; +import type { ServerResponse } from "node:http"; +import { + emitOneShot, + runWideEvent, + setMeta, + type State, + type WideEventOptions, +} from "../wideEvents/index.js"; // Use the official export when upgrading to socket.io@4.8.0 interface DefaultEventsMap { @@ -43,6 +51,18 @@ const WorkloadActionParams = z.object({ snapshotFriendlyId: z.string(), }); +// Workloads bundled into customer task images before CLI v4.4.4 use a strict +// zod enum for checkpoint type that only allows DOCKER and KUBERNETES. The +// workload never reads this field - it only validates the response shape - so +// rewriting it to a known value keeps older runners working without affecting +// the value stored in the database or seen by internal services. +function legacifyCheckpointType(item: T): T { + if (item.checkpoint?.type === "COMPUTE") { + return { ...item, checkpoint: { ...item.checkpoint, type: "KUBERNETES" } } as T; + } + return item; +} + type WorkloadServerEvents = { runConnected: [ { @@ -67,6 +87,9 @@ type WorkloadServerOptions = { checkpointClient?: CheckpointClient; computeManager?: ComputeWorkloadManager; tracing?: OtlpTraceService; + wideEventOpts: WideEventOptions; + /** When true, high-frequency HTTP routes also emit wide events. */ + wideEventsNoisyRoutes: boolean; }; export class WorkloadServer extends EventEmitter { @@ -74,6 +97,8 @@ export class WorkloadServer extends EventEmitter { private readonly snapshotService?: ComputeSnapshotService; private readonly logger = new SimpleStructuredLogger("workload-server"); + private readonly wideEventOpts: WideEventOptions; + private readonly wideEventsNoisyRoutes: boolean; private readonly httpServer: HttpServer; private readonly websocketServer: Namespace< @@ -103,12 +128,15 @@ export class WorkloadServer extends EventEmitter { this.workerClient = opts.workerClient; this.checkpointClient = opts.checkpointClient; + this.wideEventOpts = opts.wideEventOpts; + this.wideEventsNoisyRoutes = opts.wideEventsNoisyRoutes; if (opts.computeManager?.snapshotsEnabled) { this.snapshotService = new ComputeSnapshotService({ computeManager: opts.computeManager, workerClient: opts.workerClient, tracing: opts.tracing, + wideEventOpts: this.wideEventOpts, }); } @@ -142,6 +170,59 @@ export class WorkloadServer extends EventEmitter { return this.headerValueFromRequest(req, WORKLOAD_HEADERS.PROJECT_REF); } + /** + * Sets common route meta on the wide-event state from URL params. + */ + private attachRouteMeta(state: State, params: unknown): void { + if (!params || typeof params !== "object") return; + const p = params as Record; + if (typeof p.runFriendlyId === "string") setMeta(state, "run_id", p.runFriendlyId); + if (typeof p.snapshotFriendlyId === "string") { + setMeta(state, "snapshot_id", p.snapshotFriendlyId); + } + if (typeof p.deploymentId === "string") setMeta(state, "deployment_id", p.deploymentId); + } + + /** + * Wraps an HTTP route handler body with the wide-event lifecycle. Reads + * `traceparent` and `x-request-id` from `req.headers`, attaches `run_id` / + * `snapshot_id` / `deployment_id` meta from `params` when present, and + * captures the response status from `res.statusCode` after `fn` returns. + * + * Pass `highFrequency: true` for noisy routes (heartbeat, polling). Those + * still go through the wrapper but only emit when + * `TRIGGER_WIDE_EVENTS_NOISY_ROUTES` is on, so prod can keep them dark + * while test envs capture full-fidelity traffic for debugging. + */ + private wideRoute( + ctx: { req: IncomingMessage; res: ServerResponse; params?: unknown }, + op: string, + route: string, + method: string, + fn: () => Promise | T, + routeOpts: { highFrequency?: boolean } = {} + ): Promise { + const enabled = + this.wideEventOpts.enabled && (!routeOpts.highFrequency || this.wideEventsNoisyRoutes); + return runWideEvent( + { + ...this.wideEventOpts, + enabled, + op, + kind: "inbound", + route, + method, + traceparent: this.headerValueFromRequest(ctx.req, "traceparent"), + inboundRequestId: this.headerValueFromRequest(ctx.req, "x-request-id"), + setup: (state) => this.attachRouteMeta(state, ctx.params), + }, + fn, + (state) => { + state.statusCode = ctx.res.statusCode; + } + ); + } + private createHttpServer({ host, port }: { host: string; port: number }) { const httpServer = new HttpServer({ port, @@ -162,26 +243,34 @@ export class WorkloadServer extends EventEmitter { { paramsSchema: WorkloadActionParams, bodySchema: WorkloadRunAttemptStartRequestBody, - handler: async ({ req, reply, params, body }) => { - const startResponse = await this.workerClient.startRunAttempt( - params.runFriendlyId, - params.snapshotFriendlyId, - body, - this.runnerIdFromRequest(req) - ); - - if (!startResponse.success) { - this.logger.error("Failed to start run", { - params, - error: startResponse.error, - }); - reply.empty(500); - return; - } - - reply.json(startResponse.data satisfies WorkloadRunAttemptStartResponseBody); - return; - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "attempt.start", + "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/attempts/start", + "POST", + async () => { + const { req, reply, params, body } = ctx; + const startResponse = await this.workerClient.startRunAttempt( + params.runFriendlyId, + params.snapshotFriendlyId, + body, + this.runnerIdFromRequest(req) + ); + + if (!startResponse.success) { + this.logger.error("Failed to start run", { + params, + error: startResponse.error, + }); + reply.empty(500); + return; + } + + reply.json(startResponse.data satisfies WorkloadRunAttemptStartResponseBody); + return; + } + ), } ) .route( @@ -190,26 +279,50 @@ export class WorkloadServer extends EventEmitter { { paramsSchema: WorkloadActionParams, bodySchema: WorkloadRunAttemptCompleteRequestBody, - handler: async ({ req, reply, params, body }) => { - const completeResponse = await this.workerClient.completeRunAttempt( - params.runFriendlyId, - params.snapshotFriendlyId, - body, - this.runnerIdFromRequest(req) - ); - - if (!completeResponse.success) { - this.logger.error("Failed to complete run", { - params, - error: completeResponse.error, - }); - reply.empty(500); - return; - } - - reply.json(completeResponse.data satisfies WorkloadRunAttemptCompleteResponseBody); - return; - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "attempt.complete", + "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/attempts/complete", + "POST", + async () => { + const { req, reply, params, body } = ctx; + const runnerId = this.runnerIdFromRequest(req); + + // A completion attempt invalidates any pending delayed snapshot + // regardless of outcome: the runner has finished executing, so the + // suspended state the snapshot was scheduled to capture no longer + // exists. Cancel BEFORE the async completion call - the timer + // wheel can tick during the await, so cancelling after it leaves + // a real window for a due snapshot to dispatch and pause a VM + // that has moved on. The runnerId guard keeps a stale duplicate + // runner's completion from cancelling a fresh runner's snapshot, + // and the runner can't schedule a new suspend until it receives + // this route's reply, so nothing legitimate can be cancelled here. + this.snapshotService?.cancel(params.runFriendlyId, runnerId); + + const completeResponse = await this.workerClient.completeRunAttempt( + params.runFriendlyId, + params.snapshotFriendlyId, + body, + runnerId + ); + + if (!completeResponse.success) { + this.logger.error("Failed to complete run", { + params, + error: completeResponse.error, + }); + reply.empty(500); + return; + } + + reply.json( + completeResponse.data satisfies WorkloadRunAttemptCompleteResponseBody + ); + return; + } + ), } ) .route( @@ -218,27 +331,36 @@ export class WorkloadServer extends EventEmitter { { paramsSchema: WorkloadActionParams, bodySchema: WorkloadHeartbeatRequestBody, - handler: async ({ req, reply, params, body }) => { - const heartbeatResponse = await this.workerClient.heartbeatRun( - params.runFriendlyId, - params.snapshotFriendlyId, - body, - this.runnerIdFromRequest(req) - ); - - if (!heartbeatResponse.success) { - this.logger.error("Failed to heartbeat run", { - params, - error: heartbeatResponse.error, - }); - reply.empty(500); - return; - } - - reply.json({ - ok: true, - } satisfies WorkloadHeartbeatResponseBody); - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "heartbeat", + "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/heartbeat", + "POST", + async () => { + const { req, reply, params, body } = ctx; + const heartbeatResponse = await this.workerClient.heartbeatRun( + params.runFriendlyId, + params.snapshotFriendlyId, + body, + this.runnerIdFromRequest(req) + ); + + if (!heartbeatResponse.success) { + this.logger.error("Failed to heartbeat run", { + params, + error: heartbeatResponse.error, + }); + reply.empty(500); + return; + } + + reply.json({ + ok: true, + } satisfies WorkloadHeartbeatResponseBody); + }, + { highFrequency: true } + ), } ) .route( @@ -246,87 +368,95 @@ export class WorkloadServer extends EventEmitter { "GET", { paramsSchema: WorkloadActionParams, - handler: async ({ reply, params, req }) => { - const runnerId = this.runnerIdFromRequest(req); - const deploymentVersion = this.deploymentVersionFromRequest(req); - const projectRef = this.projectRefFromRequest(req); - - this.logger.debug("Suspend request", { - params, - runnerId, - deploymentVersion, - projectRef, - }); - - if (!runnerId || !deploymentVersion || !projectRef) { - this.logger.error("Invalid headers for suspend request", { - ...params, - runnerId, - deploymentVersion, - projectRef, - }); - reply.json( - { - ok: false, - error: "Invalid headers", - } satisfies WorkloadSuspendRunResponseBody, - false, - 400 - ); - return; - } - - if (this.snapshotService) { - // Compute mode: delay snapshot to avoid wasted work on short-lived waitpoints. - // If the run continues before the delay expires, the snapshot is cancelled. - reply.json({ ok: true } satisfies WorkloadSuspendRunResponseBody, false, 202); - - this.snapshotService.schedule(params.runFriendlyId, { - runnerId, - runFriendlyId: params.runFriendlyId, - snapshotFriendlyId: params.snapshotFriendlyId, - }); - - return; - } - - if (!this.checkpointClient) { - reply.json( - { - ok: false, - error: "Checkpoints disabled", - } satisfies WorkloadSuspendRunResponseBody, - false, - 400 - ); - return; - } - - reply.json( - { - ok: true, - } satisfies WorkloadSuspendRunResponseBody, - false, - 202 - ); - - const suspendResult = await this.checkpointClient.suspendRun({ - runFriendlyId: params.runFriendlyId, - snapshotFriendlyId: params.snapshotFriendlyId, - body: { - runnerId, - runId: params.runFriendlyId, - snapshotId: params.snapshotFriendlyId, - projectRef, - deploymentVersion, - }, - }); - - if (!suspendResult) { - this.logger.error("Failed to suspend run", { params }); - return; - } - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "suspend", + "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/suspend", + "GET", + async () => { + const { reply, params, req } = ctx; + const runnerId = this.runnerIdFromRequest(req); + const deploymentVersion = this.deploymentVersionFromRequest(req); + const projectRef = this.projectRefFromRequest(req); + + this.logger.debug("Suspend request", { + params, + runnerId, + deploymentVersion, + projectRef, + }); + + if (!runnerId || !deploymentVersion || !projectRef) { + this.logger.error("Invalid headers for suspend request", { + ...params, + runnerId, + deploymentVersion, + projectRef, + }); + reply.json( + { + ok: false, + error: "Invalid headers", + } satisfies WorkloadSuspendRunResponseBody, + false, + 400 + ); + return; + } + + if (this.snapshotService) { + // Compute mode: delay snapshot to avoid wasted work on short-lived waitpoints. + // If the run continues before the delay expires, the snapshot is cancelled. + reply.json({ ok: true } satisfies WorkloadSuspendRunResponseBody, false, 202); + + this.snapshotService.schedule(params.runFriendlyId, { + runnerId, + runFriendlyId: params.runFriendlyId, + snapshotFriendlyId: params.snapshotFriendlyId, + }); + + return; + } + + if (!this.checkpointClient) { + reply.json( + { + ok: false, + error: "Checkpoints disabled", + } satisfies WorkloadSuspendRunResponseBody, + false, + 400 + ); + return; + } + + reply.json( + { + ok: true, + } satisfies WorkloadSuspendRunResponseBody, + false, + 202 + ); + + const suspendResult = await this.checkpointClient.suspendRun({ + runFriendlyId: params.runFriendlyId, + snapshotFriendlyId: params.snapshotFriendlyId, + body: { + runnerId, + runId: params.runFriendlyId, + snapshotId: params.snapshotFriendlyId, + projectRef, + deploymentVersion, + }, + }); + + if (!suspendResult) { + this.logger.error("Failed to suspend run", { params }); + return; + } + } + ), } ) .route( @@ -334,33 +464,41 @@ export class WorkloadServer extends EventEmitter { "GET", { paramsSchema: WorkloadActionParams, - handler: async ({ req, reply, params }) => { - this.logger.debug("Run continuation request", { params }); - - // Cancel any pending delayed snapshot for this run - this.snapshotService?.cancel(params.runFriendlyId); - - const continuationResult = await this.workerClient.continueRunExecution( - params.runFriendlyId, - params.snapshotFriendlyId, - this.runnerIdFromRequest(req) - ); - - if (!continuationResult.success) { - this.logger.error("Failed to continue run execution", { params }); - reply.json( - { - ok: false, - error: "Failed to continue run execution", - }, - false, - 400 - ); - return; - } - - reply.json(continuationResult.data as WorkloadContinueRunExecutionResponseBody); - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "continue", + "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/:snapshotFriendlyId/continue", + "GET", + async () => { + const { req, reply, params } = ctx; + this.logger.debug("Run continuation request", { params }); + + // Cancel any pending delayed snapshot for this run + this.snapshotService?.cancel(params.runFriendlyId); + + const continuationResult = await this.workerClient.continueRunExecution( + params.runFriendlyId, + params.snapshotFriendlyId, + this.runnerIdFromRequest(req) + ); + + if (!continuationResult.success) { + this.logger.error("Failed to continue run execution", { params }); + reply.json( + { + ok: false, + error: "Failed to continue run execution", + }, + false, + 400 + ); + return; + } + + reply.json(continuationResult.data as WorkloadContinueRunExecutionResponseBody); + } + ), } ) .route( @@ -368,24 +506,35 @@ export class WorkloadServer extends EventEmitter { "GET", { paramsSchema: WorkloadActionParams, - handler: async ({ req, reply, params }) => { - const sinceSnapshotResponse = await this.workerClient.getSnapshotsSince( - params.runFriendlyId, - params.snapshotFriendlyId, - this.runnerIdFromRequest(req) - ); - - if (!sinceSnapshotResponse.success) { - this.logger.error("Failed to get snapshots since", { - runId: params.runFriendlyId, - error: sinceSnapshotResponse.error, - }); - reply.empty(500); - return; - } - - reply.json(sinceSnapshotResponse.data satisfies WorkloadRunSnapshotsSinceResponseBody); - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "snapshots.since", + "/api/v1/workload-actions/runs/:runFriendlyId/snapshots/since/:snapshotFriendlyId", + "GET", + async () => { + const { req, reply, params } = ctx; + const sinceSnapshotResponse = await this.workerClient.getSnapshotsSince( + params.runFriendlyId, + params.snapshotFriendlyId, + this.runnerIdFromRequest(req) + ); + + if (!sinceSnapshotResponse.success) { + this.logger.error("Failed to get snapshots since", { + runId: params.runFriendlyId, + error: sinceSnapshotResponse.error, + }); + reply.empty(500); + return; + } + + reply.json({ + snapshots: sinceSnapshotResponse.data.snapshots.map(legacifyCheckpointType), + } satisfies WorkloadRunSnapshotsSinceResponseBody); + }, + { highFrequency: true } + ), } ) .route("/api/v1/workload-actions/deployments/:deploymentId/dequeue", "GET", { @@ -393,61 +542,90 @@ export class WorkloadServer extends EventEmitter { deploymentId: z.string(), }), - handler: async ({ req, reply, params }) => { - const dequeueResponse = await this.workerClient.dequeueFromVersion( - params.deploymentId, - 1, - this.runnerIdFromRequest(req) - ); - - if (!dequeueResponse.success) { - this.logger.error("Failed to get latest snapshot", { - deploymentId: params.deploymentId, - error: dequeueResponse.error, - }); - reply.empty(500); - return; - } + handler: async (ctx) => + this.wideRoute( + ctx, + "deployment.dequeue", + "/api/v1/workload-actions/deployments/:deploymentId/dequeue", + "GET", + async () => { + const { req, reply, params } = ctx; + const dequeueResponse = await this.workerClient.dequeueFromVersion( + params.deploymentId, + 1, + this.runnerIdFromRequest(req) + ); - reply.json(dequeueResponse.data satisfies WorkloadDequeueFromVersionResponseBody); - }, + if (!dequeueResponse.success) { + this.logger.error("Failed to get latest snapshot", { + deploymentId: params.deploymentId, + error: dequeueResponse.error, + }); + reply.empty(500); + return; + } + + reply.json( + dequeueResponse.data.map(legacifyCheckpointType) satisfies WorkloadDequeueFromVersionResponseBody + ); + } + ), }); if (env.SEND_RUN_DEBUG_LOGS) { httpServer.route("/api/v1/workload-actions/runs/:runFriendlyId/logs/debug", "POST", { paramsSchema: WorkloadActionParams.pick({ runFriendlyId: true }), bodySchema: WorkloadDebugLogRequestBody, - handler: async ({ req, reply, params, body }) => { - reply.empty(204); - - await this.workerClient.sendDebugLog( - params.runFriendlyId, - body, - this.runnerIdFromRequest(req) - ); - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "logs.debug", + "/api/v1/workload-actions/runs/:runFriendlyId/logs/debug", + "POST", + async () => { + const { req, reply, params, body } = ctx; + reply.empty(204); + + await this.workerClient.sendDebugLog( + params.runFriendlyId, + body, + this.runnerIdFromRequest(req) + ); + }, + { highFrequency: true } + ), }); } else { // Lightweight mock route without schemas httpServer.route("/api/v1/workload-actions/runs/:runFriendlyId/logs/debug", "POST", { - handler: async ({ reply }) => { - reply.empty(204); - }, + handler: async (ctx) => + this.wideRoute( + ctx, + "logs.debug", + "/api/v1/workload-actions/runs/:runFriendlyId/logs/debug", + "POST", + async () => { + ctx.reply.empty(204); + }, + { highFrequency: true } + ), }); } - // Compute snapshot callback endpoint + // Snapshot callback endpoint (inbound from compute path) httpServer.route("/api/v1/compute/snapshot-complete", "POST", { bodySchema: SnapshotCallbackPayloadSchema, - handler: async ({ reply, body }) => { - if (!this.snapshotService) { - reply.empty(404); - return; - } + handler: async (ctx) => + this.wideRoute(ctx, "snapshot.callback", "/api/v1/compute/snapshot-complete", "POST", async () => { + const { reply, body } = ctx; + if (!this.snapshotService) { + reply.empty(404); + return; + } - const result = await this.snapshotService.handleCallback(body); - reply.empty(result.status); - }, + const result = await this.snapshotService.handleCallback(body); + reply.empty(result.status); + }), }); return httpServer; @@ -520,6 +698,28 @@ export class WorkloadServer extends EventEmitter { }; }; + const emitSocketLifecycle = ( + event: "run_connected" | "run_disconnected", + friendlyId: string, + disconnectReason?: string + ) => { + emitOneShot({ + ...this.wideEventOpts, + op: event === "run_connected" ? "socket.run.connected" : "socket.run.disconnected", + kind: "event", + populate: (state) => { + state.extras.event = event; + setMeta(state, "run_id", friendlyId); + if (socket.data.deploymentId) { + setMeta(state, "deployment_id", socket.data.deploymentId); + } + if (socket.data.runnerId) setMeta(state, "runner_id", socket.data.runnerId); + state.extras.socket_id = socket.id; + if (disconnectReason) state.extras.disconnect_reason = disconnectReason; + }, + }); + }; + const runConnected = (friendlyId: string) => { socketLogger.debug("runConnected", { ...getSocketMetadata() }); @@ -530,20 +730,35 @@ export class WorkloadServer extends EventEmitter { newRunId: friendlyId, oldRunId: socket.data.runFriendlyId, }); - runDisconnected(socket.data.runFriendlyId); + runDisconnected(socket.data.runFriendlyId, "socket_run_replaced"); } this.runSockets.set(friendlyId, socket); this.emit("runConnected", { run: { friendlyId } }); socket.data.runFriendlyId = friendlyId; + emitSocketLifecycle("run_connected", friendlyId); }; - const runDisconnected = (friendlyId: string) => { + const runDisconnected = (friendlyId: string, reason: string) => { socketLogger.debug("runDisconnected", { ...getSocketMetadata() }); + // The run is gone from this runner (crash, exit, or replaced by a new + // run), so a pending delayed snapshot for it is stale. Genuine + // waitpoint suspensions keep the socket connected, so this doesn't + // cancel a snapshot that's still wanted; the runnerId match guards + // against a stale duplicate runner cancelling a fresh runner's + // snapshot after the run was reassigned. Caveat: socket.data.runnerId + // is frozen at the websocket handshake, so after a same-supervisor + // restore (new runner id, socket not recreated) this guard refuses + // the cancel - a missed cancel, never a wrong one. The + // attempt.complete cancel uses the runner's current HTTP header id + // and is unaffected. + this.snapshotService?.cancel(friendlyId, socket.data.runnerId); + this.runSockets.delete(friendlyId); this.emit("runDisconnected", { run: { friendlyId } }); socket.data.runFriendlyId = undefined; + emitSocketLifecycle("run_disconnected", friendlyId, reason); }; socketLogger.debug("wsServer socket connected", { ...getSocketMetadata() }); @@ -561,7 +776,7 @@ export class WorkloadServer extends EventEmitter { }); if (socket.data.runFriendlyId) { - runDisconnected(socket.data.runFriendlyId); + runDisconnected(socket.data.runFriendlyId, `socket_disconnecting:${reason}`); } }); @@ -606,7 +821,7 @@ export class WorkloadServer extends EventEmitter { log.debug("Handling run:stop"); try { - runDisconnected(message.run.friendlyId); + runDisconnected(message.run.friendlyId, "run_stop_message"); // Don't delete trace context here - run:stop fires after each snapshot/shutdown // but the run may be restored on a new VM and snapshot again. Trace context is // re-populated on dequeue, and entries are small (4 strings per run). diff --git a/apps/webapp/CLAUDE.md b/apps/webapp/CLAUDE.md index b0f5e09b829..a4de6ab57b7 100644 --- a/apps/webapp/CLAUDE.md +++ b/apps/webapp/CLAUDE.md @@ -1,6 +1,6 @@ # Webapp -Remix 2.1.0 app serving as the main API, dashboard, and orchestration engine. Uses an Express server (`server.ts`). +Remix 2.17.4 app serving as the main API, dashboard, and orchestration engine. Uses an Express server (`server.ts`). ## Verifying Changes @@ -59,6 +59,17 @@ Use the `chrome-devtools` MCP server to visually verify local dashboard changes. Routes use Remix flat-file convention with dot-separated segments: `api.v1.tasks.$taskId.trigger.ts` -> `/api/v1/tasks/:taskId/trigger` +## Abort Signals + +**Never use `request.signal`** for detecting client disconnects. It is broken due to a Node.js bug ([nodejs/node#55428](https://github.com/nodejs/node/issues/55428)) where the AbortSignal chain is severed when Remix internally clones the Request object. Instead, use `getRequestAbortSignal()` from `app/services/httpAsyncStorage.server.ts`, which is wired directly to Express `res.on("close")` and fires reliably. + +```typescript +import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server"; + +// In route handlers, SSE streams, or any server-side code: +const signal = getRequestAbortSignal(); +``` + ## Environment Variables Access via `env` export from `app/env.server.ts`. **Never use `process.env` directly.** diff --git a/apps/webapp/app/clientBeforeFirstRender.ts b/apps/webapp/app/clientBeforeFirstRender.ts new file mode 100644 index 00000000000..3275c54423a --- /dev/null +++ b/apps/webapp/app/clientBeforeFirstRender.ts @@ -0,0 +1,38 @@ +/** + * Runs once on the client, synchronously, before React hydrates the app. + * Reserved for housekeeping that must happen before any component mounts. + */ +export function clientBeforeFirstRender() { + cleanupLegacyResizablePanelStorage(); +} + +/** + * Earlier versions of the resizable panel library wrote a per-session + * localStorage entry for every PanelGroup, including ones without an + * `autosaveId`. The keys look like `panel-group-react-aria-::` + * and accumulate without bound across sessions until they exhaust the + * ~5 MB origin quota and break subsequent `setItem` calls. + * + * The library no longer behaves this way, but existing users still carry + * the residue. Evict it (plus the orphaned `panel-run-parent-v2` key from + * the v2→v3 autosaveId bump) once on load. + */ +function cleanupLegacyResizablePanelStorage() { + try { + const toRemove: string[] = []; + for (let i = 0; i < window.localStorage.length; i++) { + const key = window.localStorage.key(i); + if ( + key && + (key.startsWith("panel-group-react-aria") || key === "panel-run-parent-v2") + ) { + toRemove.push(key); + } + } + for (const key of toRemove) { + window.localStorage.removeItem(key); + } + } catch { + // localStorage may be disabled (private browsing, security policy) + } +} diff --git a/apps/webapp/app/components/BlankStatePanels.tsx b/apps/webapp/app/components/BlankStatePanels.tsx index fe39f6785c5..9a4884e09d3 100644 --- a/apps/webapp/app/components/BlankStatePanels.tsx +++ b/apps/webapp/app/components/BlankStatePanels.tsx @@ -1,4 +1,5 @@ import { + ArrowsRightLeftIcon, BeakerIcon, BellAlertIcon, BookOpenIcon, @@ -189,6 +190,28 @@ export function BatchesNone() { ); } +export function SessionsNone() { + return ( + + Sessions docs + + } + > + + You have no sessions in this environment. Sessions are durable, typed, bidirectional I/O + primitives that outlive a single run — used by chat.agent and any + long-running task that needs streaming input and output. + + + ); +} + export function TestHasNoTasks() { const organization = useOrganization(); const project = useProject(); diff --git a/apps/webapp/app/components/BulkActionFilterSummary.tsx b/apps/webapp/app/components/BulkActionFilterSummary.tsx index a230e70b346..a2eabc879de 100644 --- a/apps/webapp/app/components/BulkActionFilterSummary.tsx +++ b/apps/webapp/app/components/BulkActionFilterSummary.tsx @@ -215,6 +215,19 @@ export function BulkActionFilterSummary({ /> ); } + case "regions": { + const values = Array.isArray(value) ? value : [`${value}`]; + return ( + + ); + } case "machines": { const values = Array.isArray(value) ? value : [`${value}`]; return ( @@ -240,6 +253,19 @@ export function BulkActionFilterSummary({ /> ); } + case "sources": { + const values = Array.isArray(value) ? value : [`${value}`]; + return ( + + ); + } default: { assertNever(typedKey); } diff --git a/apps/webapp/app/components/DefinitionTooltip.tsx b/apps/webapp/app/components/DefinitionTooltip.tsx index 5bb3a713997..d91cce92c99 100644 --- a/apps/webapp/app/components/DefinitionTooltip.tsx +++ b/apps/webapp/app/components/DefinitionTooltip.tsx @@ -6,14 +6,16 @@ export function DefinitionTip({ content, children, title, + disableHoverableContent = true, }: { content: React.ReactNode; children: React.ReactNode; title: React.ReactNode; + disableHoverableContent?: boolean; }) { return ( - + {children} diff --git a/apps/webapp/app/components/Feedback.tsx b/apps/webapp/app/components/Feedback.tsx index ecfd4e88c9a..0848359e219 100644 --- a/apps/webapp/app/components/Feedback.tsx +++ b/apps/webapp/app/components/Feedback.tsx @@ -1,10 +1,10 @@ import { conform, useForm } from "@conform-to/react"; import { parse } from "@conform-to/zod"; import { InformationCircleIcon, ArrowUpCircleIcon } from "@heroicons/react/20/solid"; -import { EnvelopeIcon } from "@heroicons/react/24/solid"; +import { EnvelopeIcon, ShieldCheckIcon } from "@heroicons/react/24/solid"; import { Form, useActionData, useLocation, useNavigation, useSearchParams } from "@remix-run/react"; import { type ReactNode, useEffect, useState } from "react"; -import { type FeedbackType, feedbackTypeLabel, schema } from "~/routes/resources.feedback"; +import { type FeedbackType, feedbackTypes, schema } from "~/routes/resources.feedback"; import { Button } from "./primitives/Buttons"; import { Dialog, DialogContent, DialogHeader, DialogTrigger } from "./primitives/Dialog"; import { Fieldset } from "./primitives/Fieldset"; @@ -84,9 +84,12 @@ export function Feedback({ button, defaultValue = "bug", onOpenChange }: Feedbac How can we help? We read every message and will respond as quickly as we can. - {!(type === "feature" || type === "help" || type === "concurrency") && ( -
- )} + {!( + type === "feature" || + type === "help" || + type === "concurrency" || + type === "hipaa" + ) &&
}
@@ -132,6 +135,19 @@ export function Feedback({ button, defaultValue = "bug", onOpenChange }: Feedbac )} + {type === "hipaa" && ( + + + We offer a signed Business Associate Agreement (BAA) as a paid add-on on any + paid plan. To help us get back to you quickly, please include your company + name, and a brief description of the PHI workload you plan to run. + + + )} diff --git a/apps/webapp/app/components/MachineLabelCombo.tsx b/apps/webapp/app/components/MachineLabelCombo.tsx index 3d22ca527d0..485f6094cf0 100644 --- a/apps/webapp/app/components/MachineLabelCombo.tsx +++ b/apps/webapp/app/components/MachineLabelCombo.tsx @@ -31,7 +31,9 @@ export function MachineLabel({ className?: string; }) { return ( - {formatMachinePresetName(preset)} + + {formatMachinePresetName(preset)} + ); } diff --git a/apps/webapp/app/components/admin/backOffice/ApiRateLimitSection.server.ts b/apps/webapp/app/components/admin/backOffice/ApiRateLimitSection.server.ts new file mode 100644 index 00000000000..4855c4c2465 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/ApiRateLimitSection.server.ts @@ -0,0 +1,55 @@ +import { prisma } from "~/db.server"; +import { env } from "~/env.server"; +import { logger } from "~/services/logger.server"; +import { type Duration } from "~/services/rateLimiter.server"; +import { API_RATE_LIMIT_INTENT } from "./ApiRateLimitSection"; +import { + handleRateLimitAction, + resolveEffectiveRateLimit, + type RateLimitActionResult, + type RateLimitDomain, +} from "./RateLimitSection.server"; +import type { EffectiveRateLimit } from "./RateLimitSection"; + +export const apiRateLimitDomain: RateLimitDomain = { + intent: API_RATE_LIMIT_INTENT, + systemDefault: () => ({ + type: "tokenBucket", + refillRate: env.API_RATE_LIMIT_REFILL_RATE, + interval: env.API_RATE_LIMIT_REFILL_INTERVAL as Duration, + maxTokens: env.API_RATE_LIMIT_MAX, + }), + apply: async (orgId, next, adminUserId) => { + const existing = await prisma.organization.findFirst({ + where: { id: orgId }, + select: { apiRateLimiterConfig: true }, + }); + if (!existing) { + throw new Response(null, { status: 404 }); + } + await prisma.organization.update({ + where: { id: orgId }, + data: { apiRateLimiterConfig: next as any }, + }); + logger.info("admin.backOffice.apiRateLimit", { + adminUserId, + orgId, + previous: existing.apiRateLimiterConfig, + next, + }); + }, +}; + +export function resolveEffectiveApiRateLimit( + override: unknown +): EffectiveRateLimit { + return resolveEffectiveRateLimit(override, apiRateLimitDomain); +} + +export function handleApiRateLimitAction( + formData: FormData, + orgId: string, + adminUserId: string +): Promise { + return handleRateLimitAction(formData, orgId, adminUserId, apiRateLimitDomain); +} diff --git a/apps/webapp/app/components/admin/backOffice/ApiRateLimitSection.tsx b/apps/webapp/app/components/admin/backOffice/ApiRateLimitSection.tsx new file mode 100644 index 00000000000..b27956f4360 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/ApiRateLimitSection.tsx @@ -0,0 +1,17 @@ +import { + RateLimitSection, + type RateLimitWrapperProps, +} from "./RateLimitSection"; + +export const API_RATE_LIMIT_INTENT = "set-rate-limit"; +export const API_RATE_LIMIT_SAVED_VALUE = "rate-limit"; + +export function ApiRateLimitSection(props: RateLimitWrapperProps) { + return ( + + ); +} diff --git a/apps/webapp/app/components/admin/backOffice/BatchRateLimitSection.server.ts b/apps/webapp/app/components/admin/backOffice/BatchRateLimitSection.server.ts new file mode 100644 index 00000000000..83a368094a9 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/BatchRateLimitSection.server.ts @@ -0,0 +1,55 @@ +import { prisma } from "~/db.server"; +import { env } from "~/env.server"; +import { logger } from "~/services/logger.server"; +import { type Duration } from "~/services/rateLimiter.server"; +import { BATCH_RATE_LIMIT_INTENT } from "./BatchRateLimitSection"; +import { + handleRateLimitAction, + resolveEffectiveRateLimit, + type RateLimitActionResult, + type RateLimitDomain, +} from "./RateLimitSection.server"; +import type { EffectiveRateLimit } from "./RateLimitSection"; + +export const batchRateLimitDomain: RateLimitDomain = { + intent: BATCH_RATE_LIMIT_INTENT, + systemDefault: () => ({ + type: "tokenBucket", + refillRate: env.BATCH_RATE_LIMIT_REFILL_RATE, + interval: env.BATCH_RATE_LIMIT_REFILL_INTERVAL as Duration, + maxTokens: env.BATCH_RATE_LIMIT_MAX, + }), + apply: async (orgId, next, adminUserId) => { + const existing = await prisma.organization.findFirst({ + where: { id: orgId }, + select: { batchRateLimitConfig: true }, + }); + if (!existing) { + throw new Response(null, { status: 404 }); + } + await prisma.organization.update({ + where: { id: orgId }, + data: { batchRateLimitConfig: next as any }, + }); + logger.info("admin.backOffice.batchRateLimit", { + adminUserId, + orgId, + previous: existing.batchRateLimitConfig, + next, + }); + }, +}; + +export function resolveEffectiveBatchRateLimit( + override: unknown +): EffectiveRateLimit { + return resolveEffectiveRateLimit(override, batchRateLimitDomain); +} + +export function handleBatchRateLimitAction( + formData: FormData, + orgId: string, + adminUserId: string +): Promise { + return handleRateLimitAction(formData, orgId, adminUserId, batchRateLimitDomain); +} diff --git a/apps/webapp/app/components/admin/backOffice/BatchRateLimitSection.tsx b/apps/webapp/app/components/admin/backOffice/BatchRateLimitSection.tsx new file mode 100644 index 00000000000..0e52124d290 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/BatchRateLimitSection.tsx @@ -0,0 +1,17 @@ +import { + RateLimitSection, + type RateLimitWrapperProps, +} from "./RateLimitSection"; + +export const BATCH_RATE_LIMIT_INTENT = "set-batch-rate-limit"; +export const BATCH_RATE_LIMIT_SAVED_VALUE = "batch-rate-limit"; + +export function BatchRateLimitSection(props: RateLimitWrapperProps) { + return ( + + ); +} diff --git a/apps/webapp/app/components/admin/backOffice/MaxProjectsSection.server.ts b/apps/webapp/app/components/admin/backOffice/MaxProjectsSection.server.ts new file mode 100644 index 00000000000..ec27234a306 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/MaxProjectsSection.server.ts @@ -0,0 +1,48 @@ +import { z } from "zod"; +import { prisma } from "~/db.server"; +import { logger } from "~/services/logger.server"; +import { MAX_PROJECTS_INTENT } from "./MaxProjectsSection"; + +const SetMaxProjectsSchema = z.object({ + intent: z.literal(MAX_PROJECTS_INTENT), + // Capped at PostgreSQL INTEGER max (Prisma Int) so oversized input fails + // validation cleanly instead of crashing the update. + maximumProjectCount: z.coerce.number().int().min(1).max(2_147_483_647), +}); + +export type MaxProjectsActionResult = + | { ok: true } + | { ok: false; errors: Record }; + +export async function handleMaxProjectsAction( + formData: FormData, + orgId: string, + adminUserId: string +): Promise { + const submission = SetMaxProjectsSchema.safeParse(Object.fromEntries(formData)); + if (!submission.success) { + return { ok: false, errors: submission.error.flatten().fieldErrors }; + } + + const existing = await prisma.organization.findFirst({ + where: { id: orgId }, + select: { maximumProjectCount: true }, + }); + if (!existing) { + throw new Response(null, { status: 404 }); + } + + await prisma.organization.update({ + where: { id: orgId }, + data: { maximumProjectCount: submission.data.maximumProjectCount }, + }); + + logger.info("admin.backOffice.maxProjects", { + adminUserId, + orgId, + previous: existing.maximumProjectCount, + next: submission.data.maximumProjectCount, + }); + + return { ok: true }; +} diff --git a/apps/webapp/app/components/admin/backOffice/MaxProjectsSection.tsx b/apps/webapp/app/components/admin/backOffice/MaxProjectsSection.tsx new file mode 100644 index 00000000000..bf8ecf83161 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/MaxProjectsSection.tsx @@ -0,0 +1,115 @@ +import { Form } from "@remix-run/react"; +import { useEffect, useState } from "react"; +import { Button } from "~/components/primitives/Buttons"; +import { FormError } from "~/components/primitives/FormError"; +import { Header2 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import * as Property from "~/components/primitives/PropertyTable"; + +export const MAX_PROJECTS_INTENT = "set-max-projects"; +export const MAX_PROJECTS_SAVED_VALUE = "max-projects"; + +type FieldErrors = Record | null; + +type Props = { + maximumProjectCount: number; + errors: FieldErrors; + savedJustNow: boolean; + isSubmitting: boolean; +}; + +export function MaxProjectsSection({ + maximumProjectCount, + errors, + savedJustNow, + isSubmitting, +}: Props) { + const hasFieldErrors = !!errors && Object.keys(errors).length > 0; + const fieldError = (field: string) => + errors && field in errors ? errors[field]?.[0] : undefined; + + const [isEditing, setIsEditing] = useState(hasFieldErrors); + const [value, setValue] = useState(String(maximumProjectCount)); + + useEffect(() => { + if (hasFieldErrors) setIsEditing(true); + }, [hasFieldErrors]); + + useEffect(() => { + if (savedJustNow && !hasFieldErrors) setIsEditing(false); + }, [savedJustNow, hasFieldErrors]); + + return ( +
+
+ Maximum projects + {!isEditing && ( + + )} +
+ + {savedJustNow && ( +
+ + Saved. + +
+ )} + + {!isEditing ? ( + + + Limit + + {maximumProjectCount.toLocaleString()} + + + + ) : ( + + +
+ + setValue(e.target.value)} + required + /> + {fieldError("maximumProjectCount")} +
+
+ + +
+ + )} +
+ ); +} diff --git a/apps/webapp/app/components/admin/backOffice/RateLimitSection.server.ts b/apps/webapp/app/components/admin/backOffice/RateLimitSection.server.ts new file mode 100644 index 00000000000..799fc3605df --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/RateLimitSection.server.ts @@ -0,0 +1,76 @@ +import { z } from "zod"; +import { + RateLimitTokenBucketConfig, + RateLimiterConfig, +} from "~/services/authorizationRateLimitMiddleware.server"; +import { + parseDurationToMs, + type EffectiveRateLimit, +} from "./RateLimitSection"; + +export type RateLimitDomain = { + intent: string; + systemDefault: () => RateLimiterConfig; + apply: ( + orgId: string, + next: RateLimitTokenBucketConfig, + adminUserId: string + ) => Promise; +}; + +export function resolveEffectiveRateLimit( + override: unknown, + domain: RateLimitDomain +): EffectiveRateLimit { + if (override == null) { + return { source: "default", config: domain.systemDefault() }; + } + const parsed = RateLimiterConfig.safeParse(override); + if (parsed.success) { + return { source: "override", config: parsed.data }; + } + // Column holds malformed JSON — fall back silently. Admin must investigate + // at the DB level; this UI can't recover it. + return { source: "default", config: domain.systemDefault() }; +} + +export type RateLimitActionResult = + | { ok: true } + | { ok: false; errors: Record }; + +export async function handleRateLimitAction( + formData: FormData, + orgId: string, + adminUserId: string, + domain: RateLimitDomain +): Promise { + const schema = z.object({ + intent: z.literal(domain.intent), + refillRate: z.coerce.number().int().min(1), + interval: z + .string() + .trim() + .refine((v) => parseDurationToMs(v) > 0, { + message: "Must be a duration like 10s, 1m, 500ms.", + }), + maxTokens: z.coerce.number().int().min(1), + }); + + const submission = schema.safeParse(Object.fromEntries(formData)); + if (!submission.success) { + return { ok: false, errors: submission.error.flatten().fieldErrors }; + } + + const built = RateLimitTokenBucketConfig.safeParse({ + type: "tokenBucket", + refillRate: submission.data.refillRate, + interval: submission.data.interval, + maxTokens: submission.data.maxTokens, + }); + if (!built.success) { + return { ok: false, errors: built.error.flatten().fieldErrors }; + } + + await domain.apply(orgId, built.data, adminUserId); + return { ok: true }; +} diff --git a/apps/webapp/app/components/admin/backOffice/RateLimitSection.tsx b/apps/webapp/app/components/admin/backOffice/RateLimitSection.tsx new file mode 100644 index 00000000000..1af8abab3d9 --- /dev/null +++ b/apps/webapp/app/components/admin/backOffice/RateLimitSection.tsx @@ -0,0 +1,306 @@ +import { Form } from "@remix-run/react"; +import { useEffect, useState } from "react"; +import { Button } from "~/components/primitives/Buttons"; +import { FormError } from "~/components/primitives/FormError"; +import { Header2 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import * as Property from "~/components/primitives/PropertyTable"; + +// Local shape mirrors the server-side discriminated union just enough for this +// view. Decoupled from the .server module so the component stays client-safe. +// Duration fields are always suffixed strings — the server's DurationSchema +// rejects anything else, so non-string overrides fall back to the default. +export type RateLimitConfig = + | { + type: "tokenBucket"; + refillRate: number; + interval: string; + maxTokens: number; + } + | { + type: "fixedWindow" | "slidingWindow"; + window: string; + tokens: number; + }; + +export type EffectiveRateLimit = { + source: "override" | "default"; + config: RateLimitConfig; +}; + +export type FieldErrors = Record | null; + +// Props shared by every per-domain wrapper (Api / Batch / future ones). +export type RateLimitWrapperProps = { + effective: EffectiveRateLimit; + errors: FieldErrors; + savedJustNow: boolean; + isSubmitting: boolean; +}; + +type Props = RateLimitWrapperProps & { + title: string; + intent: string; +}; + +export function RateLimitSection({ + title, + intent, + effective, + errors, + savedJustNow, + isSubmitting, +}: Props) { + const hasFieldErrors = !!errors && Object.keys(errors).length > 0; + const fieldError = (field: string) => + errors && field in errors ? errors[field]?.[0] : undefined; + + const current = + effective.config.type === "tokenBucket" ? effective.config : null; + + const [isEditing, setIsEditing] = useState(hasFieldErrors); + const [refillRate, setRefillRate] = useState( + current ? String(current.refillRate) : "" + ); + const [intervalStr, setIntervalStr] = useState( + current ? String(current.interval) : "" + ); + const [maxTokens, setMaxTokens] = useState( + current ? String(current.maxTokens) : "" + ); + + useEffect(() => { + if (hasFieldErrors) setIsEditing(true); + }, [hasFieldErrors]); + + useEffect(() => { + if (savedJustNow && !hasFieldErrors) setIsEditing(false); + }, [savedJustNow, hasFieldErrors]); + + const currentDescription = current + ? describeRateLimit( + current.refillRate, + parseDurationToMs(String(current.interval)), + current.maxTokens + ) + : null; + + const previewDescription = describeRateLimit( + Number(refillRate) || 0, + parseDurationToMs(intervalStr), + Number(maxTokens) || 0 + ); + + const cancelEdit = () => { + setRefillRate(current ? String(current.refillRate) : ""); + setIntervalStr(current ? String(current.interval) : ""); + setMaxTokens(current ? String(current.maxTokens) : ""); + setIsEditing(false); + }; + + return ( +
+
+ {title} + {!isEditing && ( + + )} +
+ + {savedJustNow && ( +
+ + Saved. + +
+ )} + + + Status:{" "} + {effective.source === "override" + ? "Custom override active." + : "Using system default."} + + + {!isEditing ? ( + <> + + {effective.config.type === "tokenBucket" ? ( + currentDescription ? ( + <> + + Sustained rate + + {currentDescription.sustained} + + + + Burst allowance + {currentDescription.burst} + + + ) : ( + + + Invalid interval on the stored config. + + + ) + ) : ( + <> + + Type + {effective.config.type} + + + Window + {String(effective.config.window)} + + + Tokens + + {effective.config.tokens.toLocaleString()} + + + + )} + + {effective.config.type !== "tokenBucket" && ( + + This override is a {effective.config.type} limit and can't be + edited from this form. Change it in the database directly. + + )} + + ) : ( +
+ + +
+ + setRefillRate(e.target.value)} + required + /> + {fieldError("refillRate")} +
+ +
+ + setIntervalStr(e.target.value)} + required + /> + {fieldError("interval")} +
+ +
+ + setMaxTokens(e.target.value)} + required + /> + {fieldError("maxTokens")} +
+ + + {previewDescription + ? `Preview: ${previewDescription.sustained} · ${previewDescription.burst}.` + : "Preview: enter valid values to see the effective limit."} + + +
+ + +
+
+ )} +
+ ); +} + +export function parseDurationToMs(duration: string): number { + const match = duration.trim().match(/^(\d+)\s*(ms|s|m|h|d)$/); + if (!match) return 0; + const value = parseInt(match[1], 10); + switch (match[2]) { + case "ms": + return value; + case "s": + return value * 1_000; + case "m": + return value * 60_000; + case "h": + return value * 3_600_000; + case "d": + return value * 86_400_000; + default: + return 0; + } +} + +function describeRateLimit( + refillRate: number, + intervalMs: number, + maxTokens: number +): { sustained: string; burst: string } | null { + if (refillRate <= 0 || intervalMs <= 0 || maxTokens <= 0) return null; + const perMin = (refillRate * 60_000) / intervalMs; + let sustained: string; + if (perMin >= 1) { + sustained = `${formatRateValue(perMin)} requests per minute`; + } else { + const perHour = perMin * 60; + if (perHour >= 1) { + sustained = `${formatRateValue(perHour)} requests per hour`; + } else { + const perDay = perHour * 24; + sustained = `${formatRateValue(perDay)} requests per day`; + } + } + return { + sustained, + burst: `${maxTokens.toLocaleString()} request burst allowance`, + }; +} + +function formatRateValue(value: number): string { + return value >= 10 ? Math.round(value).toLocaleString() : value.toFixed(1); +} diff --git a/apps/webapp/app/components/code/AIQueryInput.tsx b/apps/webapp/app/components/code/AIQueryInput.tsx index 0775ec2c2a0..cd5e9db3bd8 100644 --- a/apps/webapp/app/components/code/AIQueryInput.tsx +++ b/apps/webapp/app/components/code/AIQueryInput.tsx @@ -1,25 +1,15 @@ import { CheckIcon, PencilSquareIcon, PlusIcon, XMarkIcon } from "@heroicons/react/20/solid"; import { AnimatePresence, motion } from "framer-motion"; -import { Suspense, lazy, useCallback, useEffect, useRef, useState } from "react"; +import { Suspense, useCallback, useEffect, useRef, useState } from "react"; import { Button } from "~/components/primitives/Buttons"; import { Spinner } from "~/components/primitives/Spinner"; +import { StreamdownRenderer } from "~/components/code/StreamdownRenderer"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import type { AITimeFilter } from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/types"; import { cn } from "~/utils/cn"; -// Lazy load streamdown components to avoid SSR issues -const StreamdownRenderer = lazy(() => - import("streamdown").then((mod) => ({ - default: ({ children, isAnimating }: { children: string; isAnimating: boolean }) => ( - - {children} - - ), - })) -); - type StreamEventType = | { type: "thinking"; content: string } | { type: "tool_call"; tool: string; args: unknown } diff --git a/apps/webapp/app/components/code/StreamdownRenderer.tsx b/apps/webapp/app/components/code/StreamdownRenderer.tsx new file mode 100644 index 00000000000..996234ab180 --- /dev/null +++ b/apps/webapp/app/components/code/StreamdownRenderer.tsx @@ -0,0 +1,29 @@ +import { lazy } from "react"; +import type { CodeHighlighterPlugin } from "streamdown"; + +export const StreamdownRenderer = lazy(() => + Promise.all([import("streamdown"), import("@streamdown/code"), import("./shikiTheme")]).then( + ([{ Streamdown }, { createCodePlugin }, { triggerDarkTheme }]) => { + // Type assertion needed: @streamdown/code and streamdown resolve different shiki + // versions under pnpm, causing structurally-identical CodeHighlighterPlugin types + // to be considered incompatible (different BundledLanguage string unions). + const codePlugin = createCodePlugin({ + themes: [triggerDarkTheme, triggerDarkTheme], + }) as unknown as CodeHighlighterPlugin; + + return { + default: ({ + children, + isAnimating = false, + }: { + children: string; + isAnimating?: boolean; + }) => ( + + {children} + + ), + }; + } + ) +); diff --git a/apps/webapp/app/components/code/shikiTheme.ts b/apps/webapp/app/components/code/shikiTheme.ts new file mode 100644 index 00000000000..5d47155b979 --- /dev/null +++ b/apps/webapp/app/components/code/shikiTheme.ts @@ -0,0 +1,222 @@ +import type { ThemeRegistrationAny } from "streamdown"; + +// Custom Shiki theme matching the Trigger.dev VS Code dark theme. +// Colors taken directly from the VS Code extension's tokenColors. +export const triggerDarkTheme: ThemeRegistrationAny = { + name: "trigger-dark", + type: "dark", + colors: { + "editor.background": "#212327", + "editor.foreground": "#878C99", + "editorLineNumber.foreground": "#484c54", + }, + tokenColors: [ + // Control flow keywords: pink-purple + { + scope: [ + "keyword.control", + "keyword.operator.delete", + "keyword.other.using", + "keyword.other.operator", + "entity.name.operator", + ], + settings: { foreground: "#E888F8" }, + }, + // Storage type (const, let, var, function, class): purple + { + scope: "storage.type", + settings: { foreground: "#8271ED" }, + }, + // Storage modifiers (async, export, etc.): purple + { + scope: ["storage.modifier", "keyword.operator.noexcept"], + settings: { foreground: "#8271ED" }, + }, + // Keyword operator expressions (new, typeof, instanceof, etc.): purple + { + scope: [ + "keyword.operator.new", + "keyword.operator.expression", + "keyword.operator.cast", + "keyword.operator.sizeof", + "keyword.operator.instanceof", + "keyword.operator.logical.python", + "keyword.operator.wordlike", + ], + settings: { foreground: "#8271ED" }, + }, + // Types and namespaces: hot pink + { + scope: [ + "support.class", + "support.type", + "entity.name.type", + "entity.name.namespace", + "entity.name.scope-resolution", + "entity.name.class", + "entity.other.inherited-class", + ], + settings: { foreground: "#F770C6" }, + }, + // Functions: lime/yellow-green + { + scope: ["entity.name.function", "support.function"], + settings: { foreground: "#D9F07C" }, + }, + // Variables and parameters: light lavender + { + scope: [ + "variable", + "meta.definition.variable.name", + "support.variable", + "entity.name.variable", + "constant.other.placeholder", + ], + settings: { foreground: "#CCCBFF" }, + }, + // Constants and enums: medium purple + { + scope: ["variable.other.constant", "variable.other.enummember"], + settings: { foreground: "#9C9AF2" }, + }, + // this/self: purple-blue + { + scope: "variable.language", + settings: { foreground: "#9B99FF" }, + }, + // Object literal keys: medium purple-blue + { + scope: "meta.object-literal.key", + settings: { foreground: "#8B89FF" }, + }, + // Strings: sage green + { + scope: ["string", "meta.embedded.assembly"], + settings: { foreground: "#AFEC73" }, + }, + // String interpolation punctuation: blue-purple + { + scope: [ + "punctuation.definition.template-expression.begin", + "punctuation.definition.template-expression.end", + "punctuation.section.embedded", + ], + settings: { foreground: "#7A78EA" }, + }, + // Template expression reset + { + scope: "meta.template.expression", + settings: { foreground: "#d4d4d4" }, + }, + // Operators: gray (same as foreground) + { + scope: "keyword.operator", + settings: { foreground: "#878C99" }, + }, + // Comments: olive gray + { + scope: "comment", + settings: { foreground: "#6f736d" }, + }, + // Language constants (true, false, null, undefined): purple-blue + { + scope: "constant.language", + settings: { foreground: "#9B99FF" }, + }, + // Numeric constants: light green + { + scope: [ + "constant.numeric", + "keyword.operator.plus.exponent", + "keyword.operator.minus.exponent", + ], + settings: { foreground: "#b5cea8" }, + }, + // Regex: dark red + { + scope: "constant.regexp", + settings: { foreground: "#646695" }, + }, + // HTML/JSX tags: purple-blue + { + scope: "entity.name.tag", + settings: { foreground: "#9B99FF" }, + }, + // Tag brackets: dark gray + { + scope: "punctuation.definition.tag", + settings: { foreground: "#5F6570" }, + }, + // HTML/JSX attributes: light purple + { + scope: "entity.other.attribute-name", + settings: { foreground: "#C39EFF" }, + }, + // Escape characters: gold + { + scope: "constant.character.escape", + settings: { foreground: "#d7ba7d" }, + }, + // Regex string: dark red + { + scope: "string.regexp", + settings: { foreground: "#d16969" }, + }, + // Storage: purple-blue + { + scope: "storage", + settings: { foreground: "#9B99FF" }, + }, + // TS-specific: type casts, math/dom/json constants + { + scope: [ + "meta.type.cast.expr", + "meta.type.new.expr", + "support.constant.math", + "support.constant.dom", + "support.constant.json", + ], + settings: { foreground: "#9B99FF" }, + }, + // Markdown headings: purple-blue bold + { + scope: "markup.heading", + settings: { foreground: "#9B99FF", fontStyle: "bold" }, + }, + // Markup bold: purple-blue + { + scope: "markup.bold", + settings: { foreground: "#9B99FF", fontStyle: "bold" }, + }, + // Markup inline raw: sage green + { + scope: "markup.inline.raw", + settings: { foreground: "#AFEC73" }, + }, + // Markup inserted: light green + { + scope: "markup.inserted", + settings: { foreground: "#b5cea8" }, + }, + // Markup deleted: sage green + { + scope: "markup.deleted", + settings: { foreground: "#AFEC73" }, + }, + // Markup changed: purple-blue + { + scope: "markup.changed", + settings: { foreground: "#9B99FF" }, + }, + // Invalid: red + { + scope: "invalid", + settings: { foreground: "#f44747" }, + }, + // JSX text content + { + scope: ["meta.jsx.children"], + settings: { foreground: "#D7D9DD" }, + }, + ], +}; diff --git a/apps/webapp/app/components/environments/RegenerateApiKeyModal.tsx b/apps/webapp/app/components/environments/RegenerateApiKeyModal.tsx index 439fd892f91..52e1f499cbe 100644 --- a/apps/webapp/app/components/environments/RegenerateApiKeyModal.tsx +++ b/apps/webapp/app/components/environments/RegenerateApiKeyModal.tsx @@ -75,8 +75,9 @@ const RegenerateApiKeyModalContent = ({ return (
- {`Regenerating the keys for this environment will temporarily break any live tasks in the - ${title} environment until the new API keys are set in the relevant environment variables.`} + {`A new API key will be issued for the ${title} environment. The previous key stays valid + for 24 hours so you can roll out the new key in your environment variables without downtime. + After 24 hours, the previous key stops working.`} Select a Slack channel} heading="Filter channels…" - defaultValue={selectedSlackChannelValue} + value={selectedSlackChannelValue ?? ""} dropdownIcon variant="tertiary/medium" items={slack.channels} @@ -218,6 +218,15 @@ export function ConfigureErrorAlerts({ > {(matches) => ( <> + +
+ + No channel +
+
{matches?.map((channel) => ( >; autoPromote?: boolean; onAutoPromoteChange?: (value: boolean) => void; + /** The currently pinned TRIGGER_VERSION on Vercel production, if any. Shown under the + * Atomic deployments toggle so the user knows what version is set on Vercel right now. */ + currentTriggerVersion?: string | null; + /** True when the Vercel lookup for TRIGGER_VERSION failed. We show this so the user knows + * the pin status is unknown — distinct from "not set". */ + currentTriggerVersionFetchFailed?: boolean; + /** Hide the section-level master toggles for "Pull env vars" and "Discover new env vars". */ + hideSectionToggles?: boolean; }; export function BuildSettingsFields({ @@ -37,6 +45,9 @@ export function BuildSettingsFields({ disabledEnvSlugs, autoPromote, onAutoPromoteChange, + currentTriggerVersion, + currentTriggerVersionFetchFailed, + hideSectionToggles, }: BuildSettingsFieldsProps) { const isSlugDisabled = (slug: EnvSlug) => !!disabledEnvSlugs?.[slug]; const enabledSlugs = availableEnvSlugs.filter((s) => !isSlugDisabled(s)); @@ -48,7 +59,7 @@ export function BuildSettingsFields({
- {availableEnvSlugs.length > 1 && ( + {!hideSectionToggles && availableEnvSlugs.length > 1 && (
- {availableEnvSlugs.length > 1 && ( + {!hideSectionToggles && availableEnvSlugs.length > 1 && ( . + {currentTriggerVersion && ( + + Currently pinned to{" "} + {currentTriggerVersion} in Vercel + production. + + )} + {!currentTriggerVersion && currentTriggerVersionFetchFailed && ( + + Couldn't read{" "} + TRIGGER_VERSION from Vercel — + check the Vercel dashboard to confirm the production pin. + + )}
{/* Auto promotion — only visible when atomic deployments are on */} diff --git a/apps/webapp/app/components/integrations/VercelOnboardingModal.tsx b/apps/webapp/app/components/integrations/VercelOnboardingModal.tsx index 7ff99d7d448..21734c5c038 100644 --- a/apps/webapp/app/components/integrations/VercelOnboardingModal.tsx +++ b/apps/webapp/app/components/integrations/VercelOnboardingModal.tsx @@ -600,6 +600,20 @@ export function VercelOnboardingModal({ } }, [completeOnboardingFetcher.data, completeOnboardingFetcher.state, state]); + useEffect(() => { + if (state === "github-connection" && isGitHubConnectedForOnboarding) { + trackOnboarding("vercel onboarding github completed"); + if (fromMarketplaceContext && nextUrl) { + const validUrl = safeRedirectUrl(nextUrl); + if (validUrl) { + window.location.href = validUrl; + return; + } + } + setState("completed"); + } + }, [state, isGitHubConnectedForOnboarding, fromMarketplaceContext, nextUrl, trackOnboarding]); + useEffect(() => { if (state === "completed" && !hasTrackedCompletionRef.current) { hasTrackedCompletionRef.current = true; @@ -1114,6 +1128,7 @@ export function VercelOnboardingModal({ redirectParams.set("next", nextUrl); } const redirectUrlWithContext = `${baseSettingsPath}?${redirectParams.toString()}`; + const nextDirectRedirect = nextUrl ? safeRedirectUrl(nextUrl) : null; return gitHubAppInstallations.length === 0 ? (
@@ -1137,7 +1152,10 @@ export function VercelOnboardingModal({ organizationSlug={organizationSlug} projectSlug={projectSlug} environmentSlug={environmentSlug} - redirectUrl={redirectUrlWithContext} + redirectUrl={ + nextDirectRedirect ?? + (fromMarketplaceContext ? redirectUrlWithContext : baseSettingsPath) + } preventDismiss={fromMarketplaceContext} /> diff --git a/apps/webapp/app/components/logs/LogsLevelFilter.tsx b/apps/webapp/app/components/logs/LogsLevelFilter.tsx index 947bef88fcc..c61da4d3084 100644 --- a/apps/webapp/app/components/logs/LogsLevelFilter.tsx +++ b/apps/webapp/app/components/logs/LogsLevelFilter.tsx @@ -53,7 +53,7 @@ export function LogsLevelFilter() { const hasLevels = selectedLevels.length > 0 && selectedLevels.some((v) => v !== ""); if (hasLevels) { - return ; + return ; } return ( @@ -64,19 +64,16 @@ export function LogsLevelFilter() { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by level" + className="pl-1.5" > - Level + Level } /> ); } -function LevelDropdown({ - trigger, -}: { - trigger: ReactNode; -}) { +function LevelDropdown({ trigger }: { trigger: ReactNode }) { const { values, replace } = useSearchParams(); const handleChange = (values: string[]) => { diff --git a/apps/webapp/app/components/logs/LogsRunIdFilter.tsx b/apps/webapp/app/components/logs/LogsRunIdFilter.tsx index 857e623d7c9..e23c39534a6 100644 --- a/apps/webapp/app/components/logs/LogsRunIdFilter.tsx +++ b/apps/webapp/app/components/logs/LogsRunIdFilter.tsx @@ -6,11 +6,7 @@ import { Button } from "~/components/primitives/Buttons"; import { FormError } from "~/components/primitives/FormError"; import { Input } from "~/components/primitives/Input"; import { Label } from "~/components/primitives/Label"; -import { - SelectPopover, - SelectProvider, - SelectTrigger, -} from "~/components/primitives/Select"; +import { SelectPopover, SelectProvider, SelectTrigger } from "~/components/primitives/Select"; import { useSearchParams } from "~/hooks/useSearchParam"; import { FilterMenuProvider } from "~/components/runs/v3/SharedFilters"; @@ -34,8 +30,9 @@ export function LogsRunIdFilter() { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by run ID" + className="pl-1.5" > - Run ID + Run ID } clearSearchValue={() => setSearch("")} diff --git a/apps/webapp/app/components/logs/LogsTaskFilter.tsx b/apps/webapp/app/components/logs/LogsTaskFilter.tsx index fa64eff7bd3..6c15464cc49 100644 --- a/apps/webapp/app/components/logs/LogsTaskFilter.tsx +++ b/apps/webapp/app/components/logs/LogsTaskFilter.tsx @@ -4,6 +4,8 @@ import { useMemo } from "react"; import * as Ariakit from "@ariakit/react"; import { ComboBox, + SelectGroup, + SelectGroupLabel, SelectItem, SelectList, SelectPopover, @@ -21,6 +23,7 @@ const shortcut = { key: "t" }; type TaskOption = { slug: string; triggerSource: TaskTriggerSource; + isInLatestDeployment: boolean; }; interface LogsTaskFilterProps { @@ -42,8 +45,9 @@ export function LogsTaskFilter({ possibleTasks }: LogsTaskFilterProps) { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by task" + className="pl-1.5" > - Tasks + Tasks } searchValue={search} @@ -126,17 +130,44 @@ function TasksDropdown({ > - {filtered.map((item, index) => ( - - } - > - {item.slug} - - ))} + {filtered + .filter((item) => item.isInLatestDeployment) + .map((item) => ( + + } + > + {item.slug} + + ))} + {filtered.some((item) => !item.isInLatestDeployment) && ( + + Archived + {filtered + .filter((item) => !item.isInLatestDeployment) + .map((item) => ( + + + + } + > + {item.slug} + + ))} + + )} diff --git a/apps/webapp/app/components/logs/LogsVersionFilter.tsx b/apps/webapp/app/components/logs/LogsVersionFilter.tsx index 4cc10545060..a5a83f6eda4 100644 --- a/apps/webapp/app/components/logs/LogsVersionFilter.tsx +++ b/apps/webapp/app/components/logs/LogsVersionFilter.tsx @@ -22,8 +22,9 @@ export function LogsVersionFilter() { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by version" + className="pl-1.5" > - Versions + Versions } searchValue={search} diff --git a/apps/webapp/app/components/metrics/ModelsFilter.tsx b/apps/webapp/app/components/metrics/ModelsFilter.tsx index e641f826ae3..9b330834c84 100644 --- a/apps/webapp/app/components/metrics/ModelsFilter.tsx +++ b/apps/webapp/app/components/metrics/ModelsFilter.tsx @@ -16,7 +16,7 @@ import { tablerIcons } from "~/utils/tablerIcons"; import tablerSpritePath from "~/components/primitives/tabler-sprite.svg"; import { AnthropicLogoIcon } from "~/assets/icons/AnthropicLogoIcon"; -const shortcut = { key: "l" }; +const shortcut = { key: "m" }; export type ModelOption = { model: string; @@ -38,19 +38,19 @@ function modelIcon(system: string, model: string): ReactNode { // Special case: Anthropic uses a custom SVG icon if (provider === "anthropic") { - return ; + return ; } const iconName = `tabler-brand-${provider}`; if (tablerIcons.has(iconName)) { return ( - + ); } - return ; + return ; } export function ModelsFilter({ possibleModels }: ModelsFilterProps) { @@ -68,8 +68,9 @@ export function ModelsFilter({ possibleModels }: ModelsFilterProps) { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by model" + className="pl-1.5" > - Models + Models } searchValue={search} @@ -147,7 +148,7 @@ function ModelsDropdown({ {filtered.map((m) => ( - + {m.model} ))} diff --git a/apps/webapp/app/components/metrics/OperationsFilter.tsx b/apps/webapp/app/components/metrics/OperationsFilter.tsx index 679332fc3c4..679e73ccb7f 100644 --- a/apps/webapp/app/components/metrics/OperationsFilter.tsx +++ b/apps/webapp/app/components/metrics/OperationsFilter.tsx @@ -13,7 +13,7 @@ import { import { useSearchParams } from "~/hooks/useSearchParam"; import { appliedSummary, FilterMenuProvider } from "~/components/runs/v3/SharedFilters"; -const shortcut = { key: "n" }; +const shortcut = { key: "o" }; interface OperationsFilterProps { possibleOperations: string[]; @@ -45,8 +45,9 @@ export function OperationsFilter({ possibleOperations }: OperationsFilterProps) variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by operation" + className="pl-1.5" > - Operations + Operations } searchValue={search} @@ -125,7 +126,7 @@ function OperationsDropdown({ {filtered.map((op) => ( - }> + }> {formatOperation(op)} ))} diff --git a/apps/webapp/app/components/metrics/PromptsFilter.tsx b/apps/webapp/app/components/metrics/PromptsFilter.tsx index a4ad8a00045..09a91f4f1fd 100644 --- a/apps/webapp/app/components/metrics/PromptsFilter.tsx +++ b/apps/webapp/app/components/metrics/PromptsFilter.tsx @@ -34,8 +34,9 @@ export function PromptsFilter({ possiblePrompts }: PromptsFilterProps) { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by prompt" + className="pl-1.5" > - Prompts + Prompts } searchValue={search} @@ -113,7 +114,7 @@ function PromptsDropdown({ {filtered.map((slug) => ( - }> + }> {slug} ))} diff --git a/apps/webapp/app/components/metrics/ProvidersFilter.tsx b/apps/webapp/app/components/metrics/ProvidersFilter.tsx index fe018eefb98..d22bec8f70b 100644 --- a/apps/webapp/app/components/metrics/ProvidersFilter.tsx +++ b/apps/webapp/app/components/metrics/ProvidersFilter.tsx @@ -34,8 +34,9 @@ export function ProvidersFilter({ possibleProviders }: ProvidersFilterProps) { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by provider" + className="pl-1.5" > - Providers + Providers } searchValue={search} @@ -111,7 +112,7 @@ function ProvidersDropdown({ {filtered.map((provider) => ( - }> + }> {provider} ))} diff --git a/apps/webapp/app/components/metrics/QueuesFilter.tsx b/apps/webapp/app/components/metrics/QueuesFilter.tsx index 87d7a612547..3da71e0c7d0 100644 --- a/apps/webapp/app/components/metrics/QueuesFilter.tsx +++ b/apps/webapp/app/components/metrics/QueuesFilter.tsx @@ -39,6 +39,7 @@ export function QueuesFilter() { variant="secondary/small" shortcut={shortcut} tooltipTitle="Filter by queue" + className="pl-1.5" > Queues @@ -190,6 +191,7 @@ function QueuesDropdown({ diff --git a/apps/webapp/app/components/metrics/ScopeFilter.tsx b/apps/webapp/app/components/metrics/ScopeFilter.tsx index 1bf6b685676..0cdaa4adb32 100644 --- a/apps/webapp/app/components/metrics/ScopeFilter.tsx +++ b/apps/webapp/app/components/metrics/ScopeFilter.tsx @@ -1,14 +1,17 @@ import * as Ariakit from "@ariakit/react"; -import { EnvironmentLabel } from "~/components/environments/EnvironmentLabel"; +import { FolderIcon } from "@heroicons/react/20/solid"; +import { useRef } from "react"; +import { EnvironmentIcon, EnvironmentLabel } from "~/components/environments/EnvironmentLabel"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; +import { Avatar } from "~/components/primitives/Avatar"; import { SelectItem, SelectPopover, SelectProvider } from "~/components/primitives/Select"; +import { ShortcutKey } from "~/components/primitives/ShortcutKey"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import { useSearchParams } from "~/hooks/useSearchParam"; +import { type ShortcutDefinition, useShortcutKeys } from "~/hooks/useShortcutKeys"; import type { QueryScope } from "~/services/queryService.server"; -import { CubeTransparentIcon, GlobeAltIcon } from "@heroicons/react/20/solid"; -import { IconListLetters } from "@tabler/icons-react"; const scopeOptions = [ { value: "environment", label: "Environment" }, @@ -16,29 +19,76 @@ const scopeOptions = [ { value: "organization", label: "Organization" }, ] as const; -export function ScopeFilter() { - const { value, replace } = useSearchParams(); - const scope = (value("scope") as QueryScope) ?? "environment"; +export type ScopeFilterProps = { + shortcut?: ShortcutDefinition; + /** Controlled value. If provided, the filter uses controlled mode and ignores search params. */ + value?: QueryScope; + /** Called when the user selects a new scope. Required when `value` is provided. */ + onValueChange?: (scope: QueryScope) => void; +}; + +export function ScopeFilter({ shortcut, value, onValueChange }: ScopeFilterProps = {}) { + const { value: paramValue, replace } = useSearchParams(); + const isControlled = value !== undefined; + const scope: QueryScope = isControlled + ? value + : ((paramValue("scope") as QueryScope) ?? "environment"); + const triggerRef = useRef(null); const handleChange = (newScope: string) => { + if (isControlled) { + onValueChange?.(newScope as QueryScope); + return; + } replace({ scope: newScope === "environment" ? undefined : newScope }); }; + useShortcutKeys({ + shortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + disabled: !shortcut, + }); + return ( - }> - } - value={} - removable={false} - variant="secondary/small" - /> - + + } + /> + } + > + } + removable={false} + variant="secondary/small" + /> + + {shortcut && ( + +
+ Change scope + +
+
+ )} +
{scopeOptions.map((option) => ( - - + } + > + ))} @@ -46,19 +96,44 @@ export function ScopeFilter() { ); } -function ScopeItem({ scope }: { scope: QueryScope }) { +function ScopeIcon({ scope }: { scope: QueryScope }) { + const organization = useOrganization(); + const environment = useEnvironment(); + + switch (scope) { + case "organization": + return ; + case "project": + return ; + case "environment": + return ; + default: + return null; + } +} + +function ScopeLabel({ scope }: { scope: QueryScope }) { const organization = useOrganization(); const project = useProject(); const environment = useEnvironment(); switch (scope) { case "organization": - return `Org: ${organization.title}`; + return {organization.title}; case "project": - return `Project: ${project.name}`; + return {project.name}; case "environment": - return ; + return ; default: return scope; } } + +function ScopeItem({ scope }: { scope: QueryScope }) { + return ( + + + + + ); +} diff --git a/apps/webapp/app/components/navigation/DashboardDialogs.tsx b/apps/webapp/app/components/navigation/DashboardDialogs.tsx index f0cdd0406e0..6466038400c 100644 --- a/apps/webapp/app/components/navigation/DashboardDialogs.tsx +++ b/apps/webapp/app/components/navigation/DashboardDialogs.tsx @@ -4,6 +4,7 @@ import { motion } from "framer-motion"; import { ArrowUpCircleIcon } from "@heroicons/react/24/outline"; import { PlusIcon } from "@heroicons/react/20/solid"; import { useEffect, useState } from "react"; +import { type ShortcutDefinition } from "~/hooks/useShortcutKeys"; import { type MatchedOrganization, useDashboardLimits } from "~/hooks/useOrganizations"; import { useCurrentPlan } from "~/routes/_app.orgs.$organizationSlug/route"; import { Feedback } from "~/components/Feedback"; @@ -118,17 +119,19 @@ export function CreateDashboardPageButton({ organization, project, environment, + shortcut, }: { organization: { slug: string }; project: { slug: string }; environment: { slug: string }; + shortcut?: ShortcutDefinition; }) { const dashboard = useCreateDashboard({ organization, project, environment }); return ( - @@ -162,7 +165,6 @@ function CreateDashboardUpgradeDialog({ isFreePlan: boolean; organization: { slug: string }; }) { - if (isFreePlan) { return ( diff --git a/apps/webapp/app/components/navigation/NotificationCard.tsx b/apps/webapp/app/components/navigation/NotificationCard.tsx new file mode 100644 index 00000000000..8b03c27fff9 --- /dev/null +++ b/apps/webapp/app/components/navigation/NotificationCard.tsx @@ -0,0 +1,142 @@ +import { XMarkIcon } from "@heroicons/react/20/solid"; +import { useLayoutEffect, useRef, useState } from "react"; +import ReactMarkdown from "react-markdown"; +import { cn } from "~/utils/cn"; + +export function NotificationCard({ + title, + description, + image, + actionUrl, + onDismiss, + onCardClick, + onLinkClick, +}: { + title: string; + description: string; + image?: string; + actionUrl?: string; + onDismiss?: () => void; + onCardClick?: () => void; + onLinkClick?: () => void; +}) { + const [isExpanded, setIsExpanded] = useState(false); + const [isOverflowing, setIsOverflowing] = useState(false); + const descriptionRef = useRef(null); + + useLayoutEffect(() => { + const el = descriptionRef.current; + if (!el) return; + + const check = () => setIsOverflowing(el.scrollHeight - el.clientHeight > 1); + check(); + + const observer = new ResizeObserver(check); + observer.observe(el); + return () => observer.disconnect(); + }, [description]); + + const handleDismiss = (e: React.MouseEvent) => { + e.preventDefault(); + e.stopPropagation(); + onDismiss?.(); + }; + + const handleToggleExpand = (e: React.MouseEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsExpanded((v) => !v); + }; + + const safeActionUrl = sanitizeUrl(actionUrl); + const safeImage = sanitizeUrl(image); + + return ( + + ); +} + +function getMarkdownComponents(onLinkClick?: () => void) { + return { + p: ({ children }: { children?: React.ReactNode }) => ( +

{children}

+ ), + a: ({ href, children }: { href?: string; children?: React.ReactNode }) => ( +
{ + e.stopPropagation(); + onLinkClick?.(); + }} + > + {children} + + ), + strong: ({ children }: { children?: React.ReactNode }) => ( + {children} + ), + em: ({ children }: { children?: React.ReactNode }) => {children}, + code: ({ children }: { children?: React.ReactNode }) => ( + {children} + ), + }; +} + +const SAFE_URL_PROTOCOLS = new Set(["http:", "https:", "mailto:", "tel:"]); + +/** Sanitize a URL to prevent XSS via javascript: or data: URIs. Returns "" if invalid. */ +function sanitizeUrl(url: string | undefined): string { + if (!url) return ""; + try { + const parsed = new URL(url); + return SAFE_URL_PROTOCOLS.has(parsed.protocol) ? parsed.href : ""; + } catch { + return ""; + } +} diff --git a/apps/webapp/app/components/navigation/NotificationPanel.tsx b/apps/webapp/app/components/navigation/NotificationPanel.tsx index fdfbb2f8742..15af60fde35 100644 --- a/apps/webapp/app/components/navigation/NotificationPanel.tsx +++ b/apps/webapp/app/components/navigation/NotificationPanel.tsx @@ -1,13 +1,12 @@ -import { BellAlertIcon, ChevronRightIcon, XMarkIcon } from "@heroicons/react/20/solid"; +import { BellAlertIcon } from "@heroicons/react/20/solid"; import { useFetcher } from "@remix-run/react"; -import { motion } from "framer-motion"; -import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react"; -import ReactMarkdown from "react-markdown"; -import { Header3 } from "~/components/primitives/Headers"; +import { useCallback, useEffect, useRef, useState } from "react"; +import simplur from "simplur"; +import { Button } from "~/components/primitives/Buttons"; import { Popover, PopoverContent, PopoverTrigger } from "~/components/primitives/Popover"; import { SimpleTooltip } from "~/components/primitives/Tooltip"; import { usePlatformNotifications } from "~/routes/resources.platform-notifications"; -import { cn } from "~/utils/cn"; +import { NotificationCard } from "./NotificationCard"; type Notification = { id: string; @@ -102,211 +101,57 @@ export function NotificationPanel({ return null; } + const { title, description, image, actionUrl, dismissOnAction } = notification.payload.data; const card = ( handleDismiss(notification.id)} + onCardClick={() => { + fireClickBeacon(notification.id); + if (dismissOnAction) { + handleDismiss(notification.id); + } + }} onLinkClick={() => fireClickBeacon(notification.id)} /> ); return ( -
- {/* Expanded sidebar: show card directly */} - - {card} - - - {/* Collapsed sidebar: show bell icon that opens popover */} - +
+ {isCollapsed ? ( -
- - - {visibleNotifications.length} - -
- +
+ + + + + {visibleNotifications.length} + +
} - content="Notifications" + content={simplur`${visibleNotifications.length} notification[|s]`} side="right" sideOffset={8} disableHoverableContent - asChild /> - + ) : ( + card + )}
- + {card} ); } - -function NotificationCard({ - notification, - onDismiss, - onLinkClick, -}: { - notification: Notification; - onDismiss: (id: string) => void; - onLinkClick: () => void; -}) { - const { title, description, image, actionUrl, dismissOnAction } = notification.payload.data; - const [isExpanded, setIsExpanded] = useState(false); - const [isOverflowing, setIsOverflowing] = useState(false); - const descriptionRef = useRef(null); - - useLayoutEffect(() => { - const el = descriptionRef.current; - if (el) { - setIsOverflowing(el.scrollHeight > el.clientHeight); - } - }, [description]); - - const handleDismiss = (e: React.MouseEvent) => { - e.preventDefault(); - e.stopPropagation(); - onDismiss(notification.id); - }; - - const handleToggleExpand = (e: React.MouseEvent) => { - e.preventDefault(); - e.stopPropagation(); - setIsExpanded((v) => !v); - }; - - const handleCardClick = () => { - onLinkClick(); - if (dismissOnAction) { - onDismiss(notification.id); - } - }; - - const Wrapper = actionUrl ? "a" : "div"; - const wrapperProps = actionUrl - ? { - href: actionUrl, - target: "_blank" as const, - rel: "noopener noreferrer" as const, - onClick: handleCardClick, - } - : {}; - - return ( - - {/* Header: title + dismiss */} -
- - {title} - - -
- - {/* Body: description + chevron */} -
-
-
-
- {description} -
- {(isOverflowing || isExpanded) && ( - - )} -
- {actionUrl && ( -
- -
- )} -
- - {image && ( - - )} -
-
- ); -} - -/** Sanitize image URL to prevent XSS via javascript: or data: URIs. */ -function sanitizeImageUrl(url: string): string { - try { - const parsed = new URL(url); - if (parsed.protocol === "https:" || parsed.protocol === "http:") { - return parsed.href; - } - return ""; - } catch { - return ""; - } -} - -function getMarkdownComponents(onLinkClick: () => void) { - return { - p: ({ children }: { children?: React.ReactNode }) => ( -

{children}

- ), - a: ({ href, children }: { href?: string; children?: React.ReactNode }) => ( - { - e.stopPropagation(); - onLinkClick(); - }} - > - {children} - - ), - strong: ({ children }: { children?: React.ReactNode }) => ( - {children} - ), - em: ({ children }: { children?: React.ReactNode }) => {children}, - code: ({ children }: { children?: React.ReactNode }) => ( - {children} - ), - }; -} diff --git a/apps/webapp/app/components/navigation/OrganizationSettingsSideMenu.tsx b/apps/webapp/app/components/navigation/OrganizationSettingsSideMenu.tsx index c8cd131d962..3c17ff482ba 100644 --- a/apps/webapp/app/components/navigation/OrganizationSettingsSideMenu.tsx +++ b/apps/webapp/app/components/navigation/OrganizationSettingsSideMenu.tsx @@ -4,6 +4,7 @@ import { Cog8ToothIcon, CreditCardIcon, LockClosedIcon, + ShieldCheckIcon, UserGroupIcon, } from "@heroicons/react/20/solid"; import { ArrowLeftIcon } from "@heroicons/react/24/solid"; @@ -14,6 +15,7 @@ import { useFeatures } from "~/hooks/useFeatures"; import { type MatchedOrganization } from "~/hooks/useOrganizations"; import { cn } from "~/utils/cn"; import { + organizationRolesPath, organizationSettingsPath, organizationSlackIntegrationPath, organizationTeamPath, @@ -45,9 +47,11 @@ export type BuildInfo = { export function OrganizationSettingsSideMenu({ organization, buildInfo, + isUsingPlugin, }: { organization: MatchedOrganization; buildInfo: BuildInfo; + isUsingPlugin: boolean; }) { const { isManagedCloud } = useFeatures(); const featureFlags = useFeatureFlags(); @@ -128,6 +132,16 @@ export function OrganizationSettingsSideMenu({ to={organizationTeamPath(organization)} data-action="team" /> + {isUsingPlugin && ( + + )} + + + )} )} - {(user.admin || user.isImpersonating) && ( - - )} + {buttonContent} - + {tooltip} {shortcut && renderShortcutKey()} @@ -318,12 +318,12 @@ export function ButtonContent(props: ButtonContentPropsType) { type ButtonPropsType = Pick< JSX.IntrinsicElements["button"], - "type" | "disabled" | "onClick" | "name" | "value" | "form" | "autoFocus" + "type" | "disabled" | "onClick" | "name" | "value" | "form" | "autoFocus" | "aria-label" > & React.ComponentProps; export const Button = forwardRef( - ({ type, disabled, autoFocus, onClick, ...props }, ref) => { + ({ type, disabled, autoFocus, onClick, "aria-label": ariaLabel, ...props }, ref) => { const innerRef = useRef(null); useImperativeHandle(ref, () => innerRef.current as HTMLButtonElement); @@ -352,8 +352,13 @@ export const Button = forwardRef( ref={innerRef} form={props.form} autoFocus={autoFocus} + aria-label={ariaLabel} > - + ); @@ -362,13 +367,13 @@ export const Button = forwardRef( - + {buttonElement} - + {props.tooltip} {props.shortcut && !props.hideShortcutKey && ( - + )} diff --git a/apps/webapp/app/components/primitives/Input.tsx b/apps/webapp/app/components/primitives/Input.tsx index 3364e48bed2..15a7592c32f 100644 --- a/apps/webapp/app/components/primitives/Input.tsx +++ b/apps/webapp/app/components/primitives/Input.tsx @@ -67,6 +67,7 @@ const variants = { export type InputProps = React.InputHTMLAttributes & { variant?: keyof typeof variants; icon?: RenderIcon; + iconClassName?: string; accessory?: React.ReactNode; fullWidth?: boolean; containerClassName?: string; @@ -81,6 +82,7 @@ const Input = React.forwardRef( fullWidth = true, variant = "medium", icon, + iconClassName, containerClassName, ...props }, @@ -91,7 +93,7 @@ const Input = React.forwardRef( const variantContainerClassName = variants[variant].container; const inputClassName = variants[variant].input; - const iconClassName = variants[variant].iconSize; + const variantIconClassName = variants[variant].iconSize; return (
( > {icon && (
- +
)} + + + + ); +} diff --git a/apps/webapp/app/components/primitives/Resizable.tsx b/apps/webapp/app/components/primitives/Resizable.tsx index 2efaae4258e..b2964c8e958 100644 --- a/apps/webapp/app/components/primitives/Resizable.tsx +++ b/apps/webapp/app/components/primitives/Resizable.tsx @@ -1,7 +1,7 @@ "use client"; import React, { useRef } from "react"; -import { PanelGroup, Panel, PanelResizer } from "react-window-splitter"; +import { PanelGroup, Panel, PanelResizer } from "@window-splitter/react"; import { cn } from "~/utils/cn"; const ResizablePanelGroup = ({ className, ...props }: React.ComponentProps) => ( @@ -69,10 +69,14 @@ const ResizableHandle = ({ ); -const RESIZABLE_PANEL_ANIMATION = { - easing: "ease-in-out" as const, - duration: 200, -}; +// react-window-splitter drives the collapse animation through @react-spring/rafz, +// which has timing/interaction issues with Firefox that produce visual glitches +// (alternating frames, panels stuck at min, panelHasSpace invariant violations). +// Disable the animation on Firefox; it works correctly in Chromium and Safari. +const RESIZABLE_PANEL_ANIMATION = + typeof navigator !== "undefined" && /firefox/i.test(navigator.userAgent) + ? undefined + : ({ easing: "ease-in-out", duration: 300 } as const); const COLLAPSIBLE_HANDLE_CLASSNAME = "transition-opacity duration-200"; diff --git a/apps/webapp/app/components/primitives/SearchInput.tsx b/apps/webapp/app/components/primitives/SearchInput.tsx index 639a4d1a737..3c31cf788b7 100644 --- a/apps/webapp/app/components/primitives/SearchInput.tsx +++ b/apps/webapp/app/components/primitives/SearchInput.tsx @@ -1,13 +1,16 @@ import { MagnifyingGlassIcon, XMarkIcon } from "@heroicons/react/20/solid"; import { motion } from "framer-motion"; -import { useCallback, useEffect, useRef, useState } from "react"; +import { useEffect, useRef, useState } from "react"; import { Input } from "~/components/primitives/Input"; import { ShortcutKey } from "~/components/primitives/ShortcutKey"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; import { useSearchParams } from "~/hooks/useSearchParam"; import { cn } from "~/utils/cn"; export type SearchInputProps = { placeholder?: string; + /** The URL search param name to read/write. Defaults to "search". */ + paramName?: string; /** Additional URL params to reset when searching or clearing (e.g. pagination). Defaults to ["cursor", "direction"]. */ resetParams?: string[]; autoFocus?: boolean; @@ -15,6 +18,7 @@ export type SearchInputProps = { export function SearchInput({ placeholder = "Search logs…", + paramName = "search", resetParams = ["cursor", "direction"], autoFocus, }: SearchInputProps) { @@ -22,36 +26,31 @@ export function SearchInput({ const { value, replace, del } = useSearchParams(); - const initialSearch = value("search") ?? ""; + const initialSearch = value(paramName) ?? ""; const [text, setText] = useState(initialSearch); const [isFocused, setIsFocused] = useState(false); useEffect(() => { - const urlSearch = value("search") ?? ""; + const urlSearch = value(paramName) ?? ""; if (urlSearch !== text && !isFocused) { setText(urlSearch); } - }, [value, text, isFocused]); + }, [value, text, isFocused, paramName]); - const handleSubmit = useCallback(() => { + const handleSubmit = () => { const resetValues = Object.fromEntries(resetParams.map((p) => [p, undefined])); if (text.trim()) { - replace({ search: text.trim(), ...resetValues }); + replace({ [paramName]: text.trim(), ...resetValues }); } else { - del(["search", ...resetParams]); + del([paramName, ...resetParams]); } - }, [text, replace, del, resetParams]); + }; - const handleClear = useCallback( - (e: React.MouseEvent) => { - e.preventDefault(); - e.stopPropagation(); - setText(""); - del(["search", ...resetParams]); - }, - [del, resetParams] - ); + const handleClear = () => { + setText(""); + del([paramName, ...resetParams]); + }; return (
@@ -81,24 +80,42 @@ export function SearchInput({ handleSubmit(); } if (e.key === "Escape") { - e.currentTarget.blur(); + if (text.length > 0) { + e.stopPropagation(); + handleClear(); + } else { + e.currentTarget.blur(); + } } }} onFocus={() => setIsFocused(true)} onBlur={() => setIsFocused(false)} - icon={} + icon={} accessory={ text.length > 0 ? (
- + e.preventDefault()} + onClick={() => handleClear()} + className="flex size-4.5 items-center justify-center rounded-[2px] border border-text-dimmed/40 text-text-dimmed transition hover:bg-charcoal-600 hover:text-text-bright" + > + + + } + content={ +
+ Clear field + +
+ } + className="px-2 py-1.5 text-xs" + disableHoverableContent + />
) : undefined } diff --git a/apps/webapp/app/components/primitives/Select.tsx b/apps/webapp/app/components/primitives/Select.tsx index d3e4c866891..23c587621c4 100644 --- a/apps/webapp/app/components/primitives/Select.tsx +++ b/apps/webapp/app/components/primitives/Select.tsx @@ -104,6 +104,7 @@ export interface SelectProps open?: boolean; setOpen?: (open: boolean) => void; shortcut?: ShortcutDefinition; + tooltipTitle?: string; allowItemShortcuts?: boolean; clearSearchOnSelection?: boolean; dropdownIcon?: boolean | React.ReactNode; @@ -127,6 +128,7 @@ export function Select({ open, setOpen, shortcut, + tooltipTitle, allowItemShortcuts = true, disabled, clearSearchOnSelection = true, @@ -206,6 +208,7 @@ export function Select({ text={text} placeholder={placeholder} shortcut={shortcut} + tooltipTitle={tooltipTitle} disabled={disabled} dropdownIcon={dropdownIcon} {...props} @@ -354,7 +357,7 @@ export function SelectTrigger({ {showTooltip && (
@@ -460,7 +463,15 @@ export function SelectItem({ ...props }: SelectItemProps) { const combobox = Ariakit.useComboboxContext(); - const render = combobox ? : undefined; + // In a Combobox context we wrap the caller's render in ComboboxItem + // so combobox keyboard nav still works. Outside a Combobox we pass + // the render through verbatim — without this, callers like + // SelectLinkItem (which uses render to swap in a ) get their + // render prop silently dropped, which is why those rows looked + // clickable but didn't navigate. + const render = combobox + ? + : props.render; const ref = React.useRef(null); const select = Ariakit.useSelectContext(); const selectValue = select?.useState("value"); diff --git a/apps/webapp/app/components/primitives/Table.tsx b/apps/webapp/app/components/primitives/Table.tsx index 1a30bc82b8a..bd3a27868aa 100644 --- a/apps/webapp/app/components/primitives/Table.tsx +++ b/apps/webapp/app/components/primitives/Table.tsx @@ -65,6 +65,7 @@ type TableProps = { children: ReactNode; fullWidth?: boolean; showTopBorder?: boolean; + stickyHeader?: boolean; }; // Add TableContext @@ -79,6 +80,7 @@ export const Table = forwardRef { @@ -86,7 +88,8 @@ export const Table = forwardRef
type TableBodyProps = { className?: string; children?: ReactNode; + style?: React.CSSProperties; }; export const TableBody = forwardRef( - ({ className, children }, ref) => { + ({ className, children, style }, ref) => { return ( - + {children} ); diff --git a/apps/webapp/app/components/primitives/Tabs.tsx b/apps/webapp/app/components/primitives/Tabs.tsx index cbc5cf42752..0fb8a020a7b 100644 --- a/apps/webapp/app/components/primitives/Tabs.tsx +++ b/apps/webapp/app/components/primitives/Tabs.tsx @@ -11,6 +11,7 @@ export type TabsProps = { tabs: { label: string; to: string; + end?: boolean; }[]; className?: string; layoutId: string; @@ -21,7 +22,13 @@ export function Tabs({ tabs, className, layoutId, variant = "underline" }: TabsP return ( {tabs.map((tab, index) => ( - + {tab.label} ))} @@ -62,18 +69,20 @@ export function TabLink({ children, layoutId, variant = "underline", + end = true, }: { to: string; children: ReactNode; layoutId: string; variant?: Variants; + end?: boolean; }) { if (variant === "segmented") { return ( {({ isActive, isPending }) => { const active = isActive || isPending; @@ -110,7 +119,7 @@ export function TabLink({ {({ isActive, isPending }) => { const active = isActive || isPending; @@ -131,7 +140,7 @@ export function TabLink({ // underline variant (default) return ( - + {({ isActive, isPending }) => { return ( <> diff --git a/apps/webapp/app/components/primitives/Tooltip.tsx b/apps/webapp/app/components/primitives/Tooltip.tsx index c03492abcfd..92f95babce0 100644 --- a/apps/webapp/app/components/primitives/Tooltip.tsx +++ b/apps/webapp/app/components/primitives/Tooltip.tsx @@ -66,6 +66,7 @@ function SimpleTooltip({ sideOffset, open, onOpenChange, + delayDuration, }: { button: React.ReactNode; content: React.ReactNode; @@ -80,12 +81,13 @@ function SimpleTooltip({ sideOffset?: number; open?: boolean; onOpenChange?: (open: boolean) => void; + delayDuration?: number; }) { return ( - + ({ const virtualItems = virtualizer.getVirtualItems(); + // id -> node lookup so each virtual row resolves in O(1) instead of + // scanning the whole tree per row. + const nodesById = useMemo(() => { + const map = new Map>(); + for (const node of tree) { + map.set(node.id, node); + } + return map; + }, [tree]); + const scrollCallback = useCallback( (event: Event) => { if (!onScroll) return; @@ -99,7 +117,7 @@ export function TreeView({ }} > {virtualItems.map((virtualItem) => { - const node = tree.find((node) => node.id === virtualItem.key); + const node = nodesById.get(virtualItem.key as string); if (!node) return null; const state = nodes[node.id]; if (!state) return null; @@ -197,6 +215,16 @@ export function useTree({ concreteStateFromInput({ tree, selectedId, collapsedIds, filter }) ); + // id -> index lookup so getNodeProps resolves in O(1) instead of scanning + // the whole tree per rendered row. + const treeIndexById = useMemo(() => { + const map = new Map(); + tree.forEach((node, index) => { + map.set(node.id, index); + }); + return map; + }, [tree]); + //sync external selectedId prop into internal state useEffect(() => { const internalSelectedId = selectedIdFromState(state.nodes); @@ -497,7 +525,7 @@ export function useTree({ (id: string) => { const node = state.nodes[id]; if (!node) return {}; - const treeItemIndex = tree.findIndex((node) => node.id === id); + const treeItemIndex = treeIndexById.get(id) ?? -1; const treeItem = tree[treeItemIndex]; return { "aria-expanded": node.expanded, @@ -506,7 +534,7 @@ export function useTree({ tabIndex: node.selected ? -1 : undefined, }; }, - [state] + [state, treeIndexById] ); return { diff --git a/apps/webapp/app/components/query/QueryEditor.tsx b/apps/webapp/app/components/query/QueryEditor.tsx index bb9ed036267..e7098b94c59 100644 --- a/apps/webapp/app/components/query/QueryEditor.tsx +++ b/apps/webapp/app/components/query/QueryEditor.tsx @@ -37,6 +37,7 @@ import { type QueryWidgetData, } from "~/components/metrics/QueryWidget"; import { SaveToDashboardDialog } from "~/components/metrics/SaveToDashboardDialog"; +import { ScopeFilter } from "~/components/metrics/ScopeFilter"; import { Button, LinkButton } from "~/components/primitives/Buttons"; import { Callout } from "~/components/primitives/Callout"; import { @@ -89,12 +90,6 @@ function toISOString(value: Date | string): string { return value.toISOString(); } -const scopeOptions = [ - { value: "environment", label: "Environment" }, - { value: "project", label: "Project" }, - { value: "organization", label: "Organization" }, -] as const; - // Type for the query action response type QueryActionResponse = { error: string | null; @@ -277,7 +272,7 @@ const QueryEditorForm = forwardRef< -
+
{isAdmin && ( + } + content={ +
+ Clear field + +
+ } + className="px-2 py-1.5 text-xs" + disableHoverableContent + /> +
) : undefined } /> diff --git a/apps/webapp/app/components/runs/v3/BatchFilters.tsx b/apps/webapp/app/components/runs/v3/BatchFilters.tsx index e0d417ddec4..2c2f15b4584 100644 --- a/apps/webapp/app/components/runs/v3/BatchFilters.tsx +++ b/apps/webapp/app/components/runs/v3/BatchFilters.tsx @@ -8,25 +8,18 @@ import { } from "@heroicons/react/20/solid"; import { Form } from "@remix-run/react"; import type { BatchTaskRunStatus, RuntimeEnvironment } from "@trigger.dev/database"; -import { ListFilterIcon } from "lucide-react"; -import type { ReactNode } from "react"; -import { useCallback, useMemo, useState } from "react"; +import { type ReactNode, useCallback, useRef, useState } from "react"; import { z } from "zod"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; -import { FormError } from "~/components/primitives/FormError"; -import { Input } from "~/components/primitives/Input"; -import { Label } from "~/components/primitives/Label"; import { Paragraph } from "~/components/primitives/Paragraph"; import { - ComboBox, - SelectButtonItem, SelectItem, SelectList, SelectPopover, SelectProvider, - SelectTrigger, shortcutFromIndex, } from "~/components/primitives/Select"; +import { ShortcutKey } from "~/components/primitives/ShortcutKey"; import { Tooltip, TooltipContent, @@ -35,6 +28,7 @@ import { } from "~/components/primitives/Tooltip"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; import { useSearchParams } from "~/hooks/useSearchParam"; +import { useShortcutKeys } from "~/hooks/useShortcutKeys"; import { Button } from "../../primitives/Buttons"; import { allBatchStatuses, @@ -42,7 +36,13 @@ import { batchStatusTitle, descriptionForBatchStatus, } from "./BatchStatus"; -import { TimeFilter, appliedSummary, FilterMenuProvider } from "./SharedFilters"; +import { + TimeFilter, + appliedSummary, + FilterMenuProvider, + IdFilterDropdown, + type IdFilterDropdownProps, +} from "./SharedFilters"; import { StatusIcon } from "~/assets/icons/StatusIcon"; export const BatchStatus = z.enum(allBatchStatuses); @@ -69,133 +69,33 @@ type BatchFiltersProps = { export function BatchFilters(props: BatchFiltersProps) { const location = useOptimisticLocation(); const searchParams = new URLSearchParams(location.search); - const hasFilters = searchParams.has("statuses") || searchParams.has("id"); + const hasFilters = + searchParams.has("statuses") || + searchParams.has("id") || + searchParams.has("period") || + searchParams.has("from") || + searchParams.has("to"); return ( -
- - - +
+ + + {hasFilters && ( -
-
); } -const filterTypes = [ - { - name: "statuses", - title: "Status", - icon: ( -
-
-
- ), - }, - { name: "batch", title: "Batch ID", icon: }, -] as const; - -type FilterType = (typeof filterTypes)[number]["name"]; - -const shortcut = { key: "f" }; - -function FilterMenu(props: BatchFiltersProps) { - const [filterType, setFilterType] = useState(); - - const filterTrigger = ( - - -
- } - variant={"secondary/small"} - shortcut={shortcut} - tooltipTitle={"Filter batches"} - > - Filter - - ); - - return ( - setFilterType(undefined)}> - {(search, setSearch) => ( - setSearch("")} - trigger={filterTrigger} - filterType={filterType} - setFilterType={setFilterType} - {...props} - /> - )} - - ); -} - -function AppliedFilters() { - return ( - <> - - - - ); -} - -type MenuProps = { - searchValue: string; - clearSearchValue: () => void; - trigger: React.ReactNode; - filterType: FilterType | undefined; - setFilterType: (filterType: FilterType | undefined) => void; -} & BatchFiltersProps; - -function Menu(props: MenuProps) { - switch (props.filterType) { - case undefined: - return ; - case "statuses": - return props.setFilterType(undefined)} {...props} />; - case "batch": - return props.setFilterType(undefined)} {...props} />; - } -} - -function MainMenu({ searchValue, trigger, clearSearchValue, setFilterType }: MenuProps) { - const filtered = useMemo(() => { - return filterTypes.filter((item) => { - return item.title.toLowerCase().includes(searchValue.toLowerCase()); - }); - }, [searchValue]); - - return ( - - {trigger} - - - - {filtered.map((type, index) => ( - { - clearSearchValue(); - setFilterType(type.name); - }} - icon={type.icon} - shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} - > - {type.title} - - ))} - - - - ); -} - const statuses = allBatchStatuses.map((status) => ({ title: batchStatusTitle(status), value: status, @@ -219,10 +119,6 @@ function StatusDropdown({ replace({ statuses: values, cursor: undefined, direction: undefined }); }; - const filtered = useMemo(() => { - return statuses.filter((item) => item.title.toLowerCase().includes(searchValue.toLowerCase())); - }, [searchValue]); - return ( {trigger} @@ -237,9 +133,8 @@ function StatusDropdown({ return true; }} > - - {filtered.map((item, index) => ( + {statuses.map((item, index) => ( 0; + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: statusShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - } - value={appliedSummary( - statuses.map((v) => batchStatusTitle(v as BatchTaskRunStatus)) + + } + /> + } + > + {hasStatuses ? ( + } + value={appliedSummary( + statuses.map((v) => batchStatusTitle(v as BatchTaskRunStatus)) + )} + onRemove={() => del(["statuses", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+
+
+
+ Status +
)} - onRemove={() => del(["statuses", "cursor", "direction"])} - variant="secondary/small" - /> - + + +
+ Filter by status + +
+
+ } searchValue={search} clearSearchValue={() => setSearch("")} @@ -298,117 +231,83 @@ function AppliedStatusFilter() { ); } -function BatchIdDropdown({ - trigger, - clearSearchValue, - searchValue, - onClose, -}: { - trigger: ReactNode; - clearSearchValue: () => void; - searchValue: string; - onClose?: () => void; -}) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const batchIdValue = value("id"); - - const [batchId, setBatchId] = useState(batchIdValue); - - const apply = useCallback(() => { - clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - id: batchId === "" ? undefined : batchId?.toString(), - }); - - setOpen(false); - }, [batchId, replace]); - - let error: string | undefined = undefined; - if (batchId) { - if (!batchId.startsWith("batch_")) { - error = "Batch IDs start with 'batch_'"; - } else if (batchId.length !== 27 && batchId.length !== 31) { - error = "Batch IDs are 27/32 characters long"; - } - } +function validateBatchId(value: string): string | undefined { + if (!value.startsWith("batch_")) return "Batch IDs start with 'batch_'"; + if (value.length !== 27 && value.length !== 31) return "Batch IDs are 27 or 31 characters long"; +} +function BatchIdDropdown( + props: Omit +) { return ( - - {trigger} - { - if (onClose) { - onClose(); - return false; - } - - return true; - }} - className="max-w-[min(32ch,var(--popover-available-width))]" - > -
-
- - setBatchId(e.target.value)} - variant="small" - className="w-[29ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
-
-
+ ); } -function AppliedBatchIdFilter() { - const { value, del } = useSearchParams(); - - if (value("id") === undefined) { - return null; - } +const batchIdShortcut = { key: "b" }; +function PermanentBatchIdFilter() { + const { value, del } = useSearchParams(); const batchId = value("id"); + const hasBatchId = batchId !== undefined; + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: batchIdShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - } - value={batchId} - onRemove={() => del(["id", "cursor", "direction"])} - variant="secondary/small" - /> - + + } + /> + } + > + {hasBatchId ? ( + } + value={batchId} + onRemove={() => del(["id", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+ + Batch ID +
+ )} +
+ +
+ Filter by batch ID + +
+
+
} searchValue={search} clearSearchValue={() => setSearch("")} diff --git a/apps/webapp/app/components/runs/v3/CancelRunDialog.tsx b/apps/webapp/app/components/runs/v3/CancelRunDialog.tsx index facff746c5e..566bc787daa 100644 --- a/apps/webapp/app/components/runs/v3/CancelRunDialog.tsx +++ b/apps/webapp/app/components/runs/v3/CancelRunDialog.tsx @@ -10,9 +10,18 @@ import { SpinnerWhite } from "~/components/primitives/Spinner"; type CancelRunDialogProps = { runFriendlyId: string; redirectPath: string; + // Fired on submit so the parent can close the Radix Dialog without + // wrapping the submit button in `DialogClose` — that wrapper races + // submit (close fires first, unmounts the form, and the cancel POST + // never lands). Optional so existing call sites still type-check. + onCancelSubmitted?: () => void; }; -export function CancelRunDialog({ runFriendlyId, redirectPath }: CancelRunDialogProps) { +export function CancelRunDialog({ + runFriendlyId, + redirectPath, + onCancelSubmitted, +}: CancelRunDialogProps) { const navigation = useNavigation(); const formAction = `/resources/taskruns/${runFriendlyId}/cancel`; @@ -27,7 +36,11 @@ export function CancelRunDialog({ runFriendlyId, redirectPath }: CancelRunDialog +
onCancelSubmitted?.()} + >
@@ -373,21 +414,17 @@ export function RunsFilters(props: RunFiltersProps) { } const filterTypes = [ - { - name: "statuses", - title: "Status", - icon: , - }, - { name: "tasks", title: "Tasks", icon: }, { name: "tags", title: "Tags", icon: }, { name: "versions", title: "Versions", icon: }, { name: "queues", title: "Queues", icon: }, + { name: "regions", title: "Region", icon: }, { name: "machines", title: "Machines", icon: }, { name: "run", title: "Run ID", icon: }, { name: "batch", title: "Batch ID", icon: }, { name: "schedule", title: "Schedule ID", icon: }, { name: "bulk", title: "Bulk action", icon: }, { name: "error", title: "Error ID", icon: }, + { name: "source", title: "Source", icon: }, ] as const; type FilterType = (typeof filterTypes)[number]["name"]; @@ -401,15 +438,15 @@ function FilterMenu(props: RunFiltersProps) { - +
} variant={"secondary/small"} shortcut={shortcut} - tooltipTitle={"Filter runs"} - className="pr-0.5" + tooltipTitle={"More filters"} + className="pl-1 pr-2" > - <> + More filters ); @@ -429,20 +466,20 @@ function FilterMenu(props: RunFiltersProps) { ); } -function AppliedFilters({ possibleTasks, bulkActions }: RunFiltersProps) { +function AppliedFilters({ bulkActions }: RunFiltersProps) { return ( <> - - + + ); } @@ -459,16 +496,14 @@ function Menu(props: MenuProps) { switch (props.filterType) { case undefined: return ; - case "statuses": - return props.setFilterType(undefined)} {...props} />; - case "tasks": - return props.setFilterType(undefined)} {...props} />; case "bulk": return props.setFilterType(undefined)} {...props} />; case "tags": return props.setFilterType(undefined)} {...props} />; case "queues": return props.setFilterType(undefined)} {...props} />; + case "regions": + return props.setFilterType(undefined)} {...props} />; case "machines": return props.setFilterType(undefined)} {...props} />; case "run": @@ -481,15 +516,20 @@ function Menu(props: MenuProps) { return props.setFilterType(undefined)} {...props} />; case "error": return props.setFilterType(undefined)} {...props} />; + case "source": + return props.setFilterType(undefined)} {...props} />; } } function MainMenu({ searchValue, trigger, clearSearchValue, setFilterType }: MenuProps) { + const environment = useEnvironment(); + const showRegion = environment.type !== "DEVELOPMENT"; const filtered = useMemo(() => { return filterTypes.filter((item) => { + if (item.name === "regions" && !showRegion) return false; return item.title.toLowerCase().includes(searchValue.toLowerCase()); }); - }, [searchValue]); + }, [searchValue, showRegion]); return ( @@ -507,7 +547,7 @@ function MainMenu({ searchValue, trigger, clearSearchValue, setFilterType }: Men icon={type.icon} shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} > - {type.title} + {type.title} ))} @@ -585,28 +625,67 @@ function StatusDropdown({ ); } -function AppliedStatusFilter() { +const statusShortcut = { key: "s" }; + +function PermanentStatusFilter() { const { values, del } = useSearchParams(); const statuses = values("statuses"); - - if (statuses.length === 0 || statuses.every((v) => v === "")) { - return null; - } + const hasStatuses = statuses.length > 0 && !statuses.every((v) => v === ""); + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: statusShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - runStatusTitle(v as TaskRunStatus)))} - onRemove={() => del(["statuses", "cursor", "direction"])} - variant="secondary/small" - /> - + + } + /> + } + > + {hasStatuses ? ( + runStatusTitle(v as TaskRunStatus)))} + onRemove={() => del(["statuses", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+
+
+
+ Status +
+ )} + + +
+ Filter by status + +
+
+ } searchValue={search} clearSearchValue={() => setSearch("")} @@ -627,13 +706,27 @@ function TasksDropdown({ clearSearchValue: () => void; searchValue: string; onClose?: () => void; - possibleTasks: { slug: string; triggerSource: TaskTriggerSource }[]; + possibleTasks: { slug: string; triggerSource: TaskTriggerSource; isInLatestDeployment: boolean }[]; }) { const { values, replace } = useSearchParams(); - const handleChange = (values: string[]) => { + const handleChange = (newValues: string[]) => { clearSearchValue(); - replace({ tasks: values, cursor: undefined, direction: undefined }); + const previousTasks = values("tasks"); + const wasEmpty = previousTasks.length === 0 || previousTasks.every((v) => v === ""); + const isEmpty = newValues.length === 0 || newValues.every((v) => v === ""); + // empty -> tasks: temporarily force rootOnly off so child runs of the selected + // task are visible. tasks -> empty: drop rootOnly so the toggle reverts to the + // user's saved session preference. Neither writes to the cookie (see loader). + const transitioningToTasks = wasEmpty && !isEmpty; + const transitioningToNoTasks = !wasEmpty && isEmpty; + replace({ + tasks: newValues, + cursor: undefined, + direction: undefined, + ...(transitioningToTasks ? { rootOnly: "false" } : {}), + ...(transitioningToNoTasks ? { rootOnly: undefined } : {}), + }); }; const filtered = useMemo(() => { @@ -658,49 +751,114 @@ function TasksDropdown({ > - {filtered.map((item, index) => ( - - } - > - - - ))} + {filtered + .filter((item) => item.isInLatestDeployment) + .map((item) => ( + + } + className="text-text-bright" + > + + + ))} + {filtered.some((item) => !item.isInLatestDeployment) && ( + + Archived + {filtered + .filter((item) => !item.isInLatestDeployment) + .map((item) => ( + + + + } + className="text-text-bright" + > + + + ))} + + )} ); } -function AppliedTaskFilter({ possibleTasks }: Pick) { - const { values, del } = useSearchParams(); +const tasksShortcut = { key: "t" }; - if (values("tasks").length === 0 || values("tasks").every((v) => v === "")) { - return null; - } +function PermanentTasksFilter({ possibleTasks }: Pick) { + const { values, del } = useSearchParams(); + const tasks = values("tasks"); + const hasTasks = tasks.length > 0 && !tasks.every((v) => v === ""); + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: tasksShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - { - const task = possibleTasks.find((task) => task.slug === v); - return task ? task.slug : v; - }) + + } + /> + } + > + {hasTasks ? ( + { + const task = possibleTasks.find((task) => task.slug === v); + return task ? task.slug : v; + }) + )} + onRemove={() => del(["tasks", "cursor", "direction", "rootOnly"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+ {filterIcon("tasks")} + Tasks +
)} - onRemove={() => del(["tasks", "cursor", "direction"])} - variant="secondary/small" - /> - +
+ +
+ Filter by task + +
+
+
} searchValue={search} clearSearchValue={() => setSearch("")} @@ -895,15 +1053,17 @@ function TagsDropdown({ return true; }} > - ( -
- - {fetcher.state === "loading" && } -
- )} - /> + {(filtered.length > 0 || fetcher.state === "loading" || searchValue.length > 0) && ( + ( +
+ + {fetcher.state === "loading" && } +
+ )} + /> + )} {filtered.length > 0 ? filtered.map((tag, index) => ( @@ -1075,6 +1235,7 @@ function QueuesDropdown({ ) } + className="text-text-bright" > {queue.name} @@ -1121,6 +1282,138 @@ function AppliedQueuesFilter() { ); } +function RegionsDropdown({ + trigger, + clearSearchValue, + searchValue, + onClose, +}: { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + const regions = useRegions(); + + const handleChange = (values: string[]) => { + clearSearchValue(); + replace({ + regions: values.length > 0 ? values : undefined, + cursor: undefined, + direction: undefined, + }); + }; + + const selected = values("regions").filter((v) => v !== ""); + + const filtered = useMemo(() => { + type RegionItem = { masterQueue: string; name: string; location?: string }; + const items: RegionItem[] = []; + + for (const masterQueue of selected) { + const known = regions.find((r) => r.masterQueue === masterQueue); + if (!known) { + items.push({ masterQueue, name: masterQueue }); + } + } + + for (const region of regions) { + if (!items.some((i) => i.masterQueue === region.masterQueue)) { + items.push({ + masterQueue: region.masterQueue, + name: region.name, + location: region.location, + }); + } + } + + return matchSorter(items, searchValue, { keys: ["name", "masterQueue"] }); + }, [searchValue, regions, selected.join(",")]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + ( +
+ +
+ )} + /> + + {filtered.length > 0 + ? filtered.map((region) => ( + + + + )) + : null} + {filtered.length === 0 && No regions found} + +
+
+ ); +} + +function AppliedRegionsFilter() { + const { values, del } = useSearchParams(); + const environment = useEnvironment(); + const knownRegions = useRegions(); + + const regions = values("regions"); + + if (environment.type === "DEVELOPMENT") { + return null; + } + + if (regions.length === 0 || regions.every((v) => v === "")) { + return null; + } + + const labels = regions.map((mq) => { + const match = knownRegions.find((r) => r.masterQueue === mq); + return match?.name ?? mq; + }); + + return ( + + {(search, setSearch) => ( + }> + del(["regions", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + function MachinesDropdown({ trigger, clearSearchValue, @@ -1160,15 +1453,15 @@ function MachinesDropdown({ return true; }} > - {filtered.map((item, index) => ( - + ))} @@ -1316,7 +1609,12 @@ export function VersionsDropdown({ {filtered.length > 0 ? filtered.map((version) => ( - + } + className="text-text-bright" + > {version.version} {version.isCurrent ? Current : null} @@ -1365,118 +1663,67 @@ function AppliedVersionsFilter() { ); } +const rootOnlyShortcut = { key: "o" }; + function RootOnlyToggle({ defaultValue }: { defaultValue: boolean }) { - const { value, values, replace } = useSearchParams(); + const { value, replace } = useSearchParams(); const searchValue = value("rootOnly"); const rootOnly = searchValue !== undefined ? searchValue === "true" : defaultValue; const batchId = value("batchId"); const runId = value("runId"); const scheduleId = value("scheduleId"); - const tasks = values("tasks"); - const disabled = !!batchId || !!runId || !!scheduleId || tasks.length > 0; + const disabled = !!batchId || !!runId || !!scheduleId; return ( - { - replace({ - rootOnly: checked ? "true" : "false", - }); - }} - /> + + }> + { + replace({ + rootOnly: checked ? "true" : "false", + cursor: undefined, + direction: undefined, + }); + }} + /> + + +
+ Toggle root only + +
+
+
); } -function RunIdDropdown({ - trigger, - clearSearchValue, - searchValue, - onClose, -}: { - trigger: ReactNode; - clearSearchValue: () => void; - searchValue: string; - onClose?: () => void; -}) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const runIdValue = value("runId"); - - const [runId, setRunId] = useState(runIdValue); - - const apply = useCallback(() => { - clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - runId: runId === "" ? undefined : runId?.toString(), - }); - - setOpen(false); - }, [runId, replace]); - - let error: string | undefined = undefined; - if (runId) { - if (!runId.startsWith("run_")) { - error = "Run IDs start with 'run_'"; - } else if (runId.length !== 25 && runId.length !== 29) { - error = "Run IDs are 25/30 characters long"; - } - } +function validateRunId(value: string): string | undefined { + if (!value.startsWith("run_")) return "Run IDs start with 'run_'"; + if (value.length !== 25 && value.length !== 29) return "Run IDs are 25 or 29 characters long"; +} +function RunIdDropdown( + props: Omit< + IdFilterDropdownProps, + "label" | "placeholder" | "paramKey" | "validate" | "inputWidth" + > +) { return ( - - {trigger} - { - if (onClose) { - onClose(); - return false; - } - - return true; - }} - className="max-w-[min(32ch,var(--popover-available-width))]" - > -
-
- - setRunId(e.target.value)} - variant="small" - className="w-[27ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
-
-
+ ); } @@ -1512,91 +1759,22 @@ function AppliedRunIdFilter() { ); } -function BatchIdDropdown({ - trigger, - clearSearchValue, - searchValue, - onClose, -}: { - trigger: ReactNode; - clearSearchValue: () => void; - searchValue: string; - onClose?: () => void; -}) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const batchIdValue = value("batchId"); - - const [batchId, setBatchId] = useState(batchIdValue); - - const apply = useCallback(() => { - clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - batchId: batchId === "" ? undefined : batchId?.toString(), - }); - - setOpen(false); - }, [batchId, replace]); - - let error: string | undefined = undefined; - if (batchId) { - if (!batchId.startsWith("batch_")) { - error = "Batch IDs start with 'batch_'"; - } else if (batchId.length !== 27 && batchId.length !== 31) { - error = "Batch IDs are 27 or 31 characters long"; - } - } +function validateBatchId(value: string): string | undefined { + if (!value.startsWith("batch_")) return "Batch IDs start with 'batch_'"; + if (value.length !== 27 && value.length !== 31) return "Batch IDs are 27 or 31 characters long"; +} +function BatchIdDropdown( + props: Omit +) { return ( - - {trigger} - { - if (onClose) { - onClose(); - return false; - } - - return true; - }} - className="max-w-[min(32ch,var(--popover-available-width))]" - > -
-
- - setBatchId(e.target.value)} - variant="small" - className="w-[29ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
-
-
+ ); } @@ -1632,91 +1810,22 @@ function AppliedBatchIdFilter() { ); } -function ScheduleIdDropdown({ - trigger, - clearSearchValue, - searchValue, - onClose, -}: { - trigger: ReactNode; - clearSearchValue: () => void; - searchValue: string; - onClose?: () => void; -}) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const scheduleIdValue = value("scheduleId"); - - const [scheduleId, setScheduleId] = useState(scheduleIdValue); - - const apply = useCallback(() => { - clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - scheduleId: scheduleId === "" ? undefined : scheduleId?.toString(), - }); - - setOpen(false); - }, [scheduleId, replace]); - - let error: string | undefined = undefined; - if (scheduleId) { - if (!scheduleId.startsWith("sched")) { - error = "Schedule IDs start with 'sched_'"; - } else if (scheduleId.length !== 27) { - error = "Schedule IDs are 27 characters long"; - } - } +function validateScheduleId(value: string): string | undefined { + if (!value.startsWith("sched_")) return "Schedule IDs start with 'sched_'"; + if (value.length !== 27) return "Schedule IDs are 27 characters long"; +} +function ScheduleIdDropdown( + props: Omit +) { return ( - - {trigger} - { - if (onClose) { - onClose(); - return false; - } - - return true; - }} - className="max-w-[min(32ch,var(--popover-available-width))]" - > -
-
- - setScheduleId(e.target.value)} - variant="small" - className="w-[29ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
-
-
+ ); } @@ -1752,7 +1861,63 @@ function AppliedScheduleIdFilter() { ); } -function ErrorIdDropdown({ +function validateErrorId(value: string): string | undefined { + if (!value.startsWith("error_")) return "Error IDs start with 'error_'"; +} + +function ErrorIdDropdown( + props: Omit +) { + return ( + + ); +} + +function AppliedErrorIdFilter() { + const { value, del } = useSearchParams(); + + if (value("errorId") === undefined) { + return null; + } + + const errorId = value("errorId"); + + return ( + + {(search, setSearch) => ( + }> + del(["errorId", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +const sourceOptions: { value: TaskTriggerSource; title: string }[] = [ + { value: "STANDARD", title: "Standard" }, + { value: "SCHEDULED", title: "Scheduled" }, + { value: "AGENT", title: "Agent" }, +]; + +function SourceDropdown({ trigger, clearSearchValue, searchValue, @@ -1763,101 +1928,75 @@ function ErrorIdDropdown({ searchValue: string; onClose?: () => void; }) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const errorIdValue = value("errorId"); - - const [errorId, setErrorId] = useState(errorIdValue); + const { values, replace } = useSearchParams(); - const apply = useCallback(() => { + const handleChange = (values: string[]) => { clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - errorId: errorId === "" ? undefined : errorId?.toString(), - }); - - setOpen(false); - }, [errorId, replace]); + replace({ sources: values, cursor: undefined, direction: undefined }); + }; - let error: string | undefined = undefined; - if (errorId) { - if (!errorId.startsWith("error_")) { - error = "Error IDs start with 'error_'"; - } - } + const filtered = useMemo(() => { + return sourceOptions.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); return ( - + {trigger} { if (onClose) { onClose(); return false; } - return true; }} - className="max-w-[min(32ch,var(--popover-available-width))]" > -
-
- - setErrorId(e.target.value)} - variant="small" - className="w-[29ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
+ {item.title} +
+ ))} +
); } -function AppliedErrorIdFilter() { - const { value, del } = useSearchParams(); +function AppliedSourceFilter() { + const { values, del } = useSearchParams(); + const sources = values("sources"); - if (value("errorId") === undefined) { + if (sources.length === 0 || sources.every((v) => v === "")) { return null; } - const errorId = value("errorId"); - return ( {(search, setSearch) => ( - }> del(["errorId", "cursor", "direction"])} + label="Source" + icon={} + value={appliedSummary( + sources.map( + (v) => sourceOptions.find((o) => o.value === v)?.title ?? v + ) + )} + onRemove={() => del(["sources", "cursor", "direction"])} variant="secondary/small" /> diff --git a/apps/webapp/app/components/runs/v3/RunStatusCellTooltip.tsx b/apps/webapp/app/components/runs/v3/RunStatusCellTooltip.tsx new file mode 100644 index 00000000000..fad6b15bcf7 --- /dev/null +++ b/apps/webapp/app/components/runs/v3/RunStatusCellTooltip.tsx @@ -0,0 +1,249 @@ +import { useFetcher } from "@remix-run/react"; +import { AnimatePresence, motion } from "framer-motion"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; +import type { NextRunListItem } from "~/presenters/v3/NextRunListPresenter.server"; +import type { loader as childStatusesLoader } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.children-statuses"; +import { isFinalRunStatus } from "~/v3/taskStatus"; +import { + allTaskRunStatuses, + descriptionForTaskRunStatus, + TaskRunStatusCombo, +} from "./TaskRunStatus"; + +const TOOLTIP_OPEN_DELAY_MS = 400; +const TOOLTIP_POLL_INTERVAL_MS = 3000; + +type ChildStatusEntry = { status: NextRunListItem["status"]; count: number }; + +// Compare status/count pairs so unchanged polling responses don't +// re-render or re-animate the tooltip. +function childStatusesKey(statuses: ChildStatusEntry[]) { + return [...statuses] + .sort((a, b) => a.status.localeCompare(b.status)) + .map((entry) => `${entry.status}:${entry.count}`) + .join("|"); +} + +function areChildStatusesEqual(previous: ChildStatusEntry[] | undefined, next: ChildStatusEntry[]) { + if (previous === undefined) return false; + return childStatusesKey(previous) === childStatusesKey(next); +} + +function hasActiveChildStatuses(statuses: ChildStatusEntry[] | undefined) { + if (statuses === undefined) return false; + + return statuses.some((entry) => entry.count > 0 && !isFinalRunStatus(entry.status)); +} + +function shouldPollWhileTooltipOpen( + statuses: ChildStatusEntry[] | undefined, + rootHasFinished: boolean +) { + if (statuses === undefined) return true; + // Empty child statuses while the root is still running can mean + // children have not been created yet, so keep polling. + if (statuses.length === 0) return !rootHasFinished; + + // All current children may be final while the root is still running — more + // dependents can still be created. + return hasActiveChildStatuses(statuses) || !rootHasFinished; +} + +function ChildStatusBreakdown({ + orderedChildStatuses, +}: { + orderedChildStatuses: { status: NextRunListItem["status"]; count: number }[]; +}) { + return ( +
+

Child run statuses

+ + {orderedChildStatuses.map((entry) => ( + + + + {entry.count} + + + ))} + +
+ ); +} + +function useChildRunStatusesTooltip({ + friendlyId, + hasFinished, + childrenStatusesBasePath, +}: { + friendlyId: string; + hasFinished: boolean; + childrenStatusesBasePath: string; +}) { + const fetcher = useFetcher({ + key: `child-statuses-${friendlyId}`, + }); + const fetcherStateRef = useRef(fetcher.state); + fetcherStateRef.current = fetcher.state; + + const [childStatuses, setChildStatuses] = useState(); + const isOpenRef = useRef(false); + const pollIntervalRef = useRef>(); + const prevHasFinishedRef = useRef(hasFinished); + + const childrenStatusesUrl = useMemo( + () => `${childrenStatusesBasePath}/children-statuses?runIds=${encodeURIComponent(friendlyId)}`, + [childrenStatusesBasePath, friendlyId] + ); + + const loadChildStatuses = useCallback(() => { + if (fetcherStateRef.current !== "idle") return; + fetcher.load(childrenStatusesUrl); + }, [childrenStatusesUrl, fetcher]); + + // Keep the latest loader callback available to the polling interval + // without recreating the interval on every render. + const loadChildStatusesRef = useRef(loadChildStatuses); + loadChildStatusesRef.current = loadChildStatuses; + + const stopPolling = useCallback(() => { + if (pollIntervalRef.current) { + clearInterval(pollIntervalRef.current); + pollIntervalRef.current = undefined; + } + }, []); + + const startPolling = useCallback(() => { + if (pollIntervalRef.current) return; + + pollIntervalRef.current = setInterval(() => { + if (document.visibilityState !== "visible") return; + loadChildStatusesRef.current(); + }, TOOLTIP_POLL_INTERVAL_MS); + }, []); + + useEffect(() => { + if (!fetcher.data?.runs) return; + + const entry = fetcher.data.runs.find((run) => run.friendlyId === friendlyId); + if (!entry) return; + + setChildStatuses((previous) => + areChildStatusesEqual(previous, entry.statuses) ? previous : entry.statuses + ); + + if (isOpenRef.current && !shouldPollWhileTooltipOpen(entry.statuses, hasFinished)) { + stopPolling(); + } + }, [fetcher.data, friendlyId, hasFinished, stopPolling]); + + const onOpenChange = useCallback( + (open: boolean) => { + isOpenRef.current = open; + if (open) { + loadChildStatuses(); + startPolling(); + } else { + stopPolling(); + } + }, + [loadChildStatuses, startPolling, stopPolling] + ); + + useEffect(() => { + prevHasFinishedRef.current = hasFinished; + stopPolling(); + setChildStatuses(undefined); + if (isOpenRef.current) { + loadChildStatuses(); + startPolling(); + } + // Only reset when the hovered run changes, not when hasFinished toggles. + // eslint-disable-next-line react-hooks/exhaustive-deps -- friendlyId + }, [friendlyId]); + + useEffect(() => { + if (!isOpenRef.current) return; + if (prevHasFinishedRef.current === hasFinished) return; + + prevHasFinishedRef.current = hasFinished; + loadChildStatuses(); + }, [hasFinished, loadChildStatuses]); + + useEffect(() => () => stopPolling(), [stopPolling]); + + return { + childStatuses, + onOpenChange, + }; +} + +export function RunStatusCellTooltip({ + friendlyId, + status, + hasFinished, + childrenStatusesBasePath, +}: { + friendlyId: string; + status: NextRunListItem["status"]; + hasFinished: boolean; + childrenStatusesBasePath: string; +}) { + const { childStatuses, onOpenChange } = useChildRunStatusesTooltip({ + friendlyId, + hasFinished, + childrenStatusesBasePath, + }); + + const orderedChildStatuses = useMemo(() => { + const childStatusesMap = new Map( + (childStatuses ?? []).map((entry) => [entry.status, entry.count]) + ); + + return allTaskRunStatuses + .map((s) => ({ + status: s, + count: childStatusesMap.get(s) ?? 0, + })) + .filter((entry) => entry.count > 0); + }, [childStatuses]); + + const hasChildStatuses = orderedChildStatuses.length > 0; + + return ( + + ) : ( + descriptionForTaskRunStatus(status) + ) + } + disableHoverableContent + button={ + + + + } + /> + ); +} diff --git a/apps/webapp/app/components/runs/v3/ScheduleFilters.tsx b/apps/webapp/app/components/runs/v3/ScheduleFilters.tsx index 3a30e0cb37e..e0fb819c8d1 100644 --- a/apps/webapp/app/components/runs/v3/ScheduleFilters.tsx +++ b/apps/webapp/app/components/runs/v3/ScheduleFilters.tsx @@ -1,21 +1,22 @@ -import { MagnifyingGlassIcon, XMarkIcon } from "@heroicons/react/20/solid"; +import * as Ariakit from "@ariakit/react"; +import { ClockIcon, XMarkIcon } from "@heroicons/react/20/solid"; import { useNavigate } from "@remix-run/react"; -import { useCallback } from "react"; +import { useCallback, useRef } from "react"; import { z } from "zod"; -import { Input } from "~/components/primitives/Input"; -import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; -import { useThrottle } from "~/hooks/useThrottle"; -import { Button } from "../../primitives/Buttons"; -import { Paragraph } from "../../primitives/Paragraph"; +import { AppliedFilter } from "~/components/primitives/AppliedFilter"; +import { SearchInput } from "~/components/primitives/SearchInput"; import { - Select, - SelectContent, - SelectGroup, SelectItem, - SelectTrigger, - SelectValue, -} from "../../primitives/SimpleSelect"; -import { ScheduleTypeCombo } from "./ScheduleType"; + SelectList, + SelectPopover, + SelectProvider, +} from "~/components/primitives/Select"; +import { ShortcutKey } from "~/components/primitives/ShortcutKey"; +import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; +import { useShortcutKeys } from "~/hooks/useShortcutKeys"; +import { Button } from "../../primitives/Buttons"; +import { ScheduleTypeIcon, scheduleTypeName } from "./ScheduleType"; +import { FilterMenuProvider } from "./SharedFilters"; export const ScheduleListFilters = z.object({ page: z.coerce.number().default(1), @@ -29,120 +30,232 @@ export const ScheduleListFilters = z.object({ export type ScheduleListFilters = z.infer; -const All = "ALL"; - type ScheduleFiltersProps = { possibleTasks: string[]; }; export function ScheduleFilters({ possibleTasks }: ScheduleFiltersProps) { - const navigate = useNavigate(); const location = useOptimisticLocation(); const searchParams = new URLSearchParams(location.search); - const { tasks, page, search, type } = ScheduleListFilters.parse( - Object.fromEntries(searchParams.entries()) + const hasFilters = + searchParams.has("tasks") || searchParams.has("search") || searchParams.has("type"); + + return ( +
+ + + + {hasFilters && } +
); +} - const hasFilters = searchParams.has("tasks") || searchParams.has("search"); +function ScheduleSearchInput() { + return ; +} - const handleFilterChange = useCallback((filterType: string, value: string | undefined) => { - if (value) { - searchParams.set(filterType, value); - } else { - searchParams.delete(filterType); - } - searchParams.delete("page"); - navigate(`${location.pathname}?${searchParams.toString()}`); - }, []); +const typeShortcut = { key: "y" }; - const handleTaskChange = useCallback((value: string | typeof All) => { - handleFilterChange("tasks", value === "ALL" ? undefined : value); - }, []); +function PermanentTypeFilter() { + const navigate = useNavigate(); + const location = useOptimisticLocation(); + const searchParams = new URLSearchParams(location.search); + const currentType = searchParams.get("type") ?? undefined; + const triggerRef = useRef(null); - const handleTypeChange = useCallback((value: string | typeof All) => { - handleFilterChange("type", value === "ALL" ? undefined : value); - }, []); + useShortcutKeys({ + shortcut: typeShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); - const handleSearchChange = useThrottle((value: string) => { - handleFilterChange("search", value.length === 0 ? undefined : value); - }, 300); + const handleChange = useCallback( + (value: string | string[]) => { + const selected = Array.isArray(value) ? value[0] : value; + const params = new URLSearchParams(location.search); + if (!selected || selected === "ALL") { + params.delete("type"); + } else { + params.set("type", selected); + } + params.delete("page"); + navigate(`${location.pathname}?${params.toString()}`); + }, + [location, navigate] + ); - const clearFilters = useCallback(() => { - searchParams.delete("page"); - searchParams.delete("enabled"); - searchParams.delete("tasks"); - searchParams.delete("search"); - navigate(`${location.pathname}?${searchParams.toString()}`); - }, []); + const typeLabel = currentType + ? scheduleTypeName(currentType.toUpperCase() as "IMPERATIVE" | "DECLARATIVE") + : "All types"; return ( -
- handleSearchChange(e.target.value)} - /> - - - - - - - - - {hasFilters && ( - + + ))} + + + )} + + ); +} + +function ClearFiltersButton() { + const navigate = useNavigate(); + const location = useOptimisticLocation(); + + const clearFilters = useCallback(() => { + const params = new URLSearchParams(location.search); + params.delete("page"); + params.delete("tasks"); + params.delete("search"); + params.delete("type"); + navigate(`${location.pathname}?${params.toString()}`); + }, [location, navigate]); + + return ( +
+
); } diff --git a/apps/webapp/app/components/runs/v3/SharedFilters.tsx b/apps/webapp/app/components/runs/v3/SharedFilters.tsx index 3e24f601f2a..0bdd7c4ac5f 100644 --- a/apps/webapp/app/components/runs/v3/SharedFilters.tsx +++ b/apps/webapp/app/components/runs/v3/SharedFilters.tsx @@ -1,5 +1,4 @@ import * as Ariakit from "@ariakit/react"; -import type { RuntimeEnvironment } from "@trigger.dev/database"; import { endOfDay, endOfMonth, @@ -11,20 +10,23 @@ import { subWeeks, } from "date-fns"; import parse from "parse-duration"; -import { startTransition, useCallback, useEffect, useRef, useState, type ReactNode } from "react"; +import { type ReactNode, startTransition, useCallback, useEffect, useRef, useState } from "react"; import simplur from "simplur"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; import { Callout } from "~/components/primitives/Callout"; import { DateTime } from "~/components/primitives/DateTime"; import { DateTimePicker } from "~/components/primitives/DateTimePicker"; +import { FormError } from "~/components/primitives/FormError"; +import { Header3 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; import { Label } from "~/components/primitives/Label"; import { Paragraph } from "~/components/primitives/Paragraph"; import { RadioButtonCircle } from "~/components/primitives/RadioButton"; import { ComboboxProvider, SelectPopover, SelectProvider } from "~/components/primitives/Select"; +import { ShortcutKey } from "~/components/primitives/ShortcutKey"; import { useOptionalOrganization } from "~/hooks/useOrganizations"; import { useSearchParams } from "~/hooks/useSearchParam"; import { type ShortcutDefinition, useShortcutKeys } from "~/hooks/useShortcutKeys"; -import { ShortcutKey } from "~/components/primitives/ShortcutKey"; import { cn } from "~/utils/cn"; import { organizationBillingPath } from "~/utils/pathBuilder"; import { Button, LinkButton } from "../../primitives/Buttons"; @@ -422,11 +424,7 @@ export function TimeFilter({
Filter by time period - +
)} @@ -1005,3 +1003,102 @@ function QuickDateButton({ ); } + +export type IdFilterDropdownProps = { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; + label: string; + placeholder: string; + paramKey: string; + validate?: (value: string) => string | undefined; + inputWidth?: string; +}; + +export function IdFilterDropdown({ + trigger, + clearSearchValue, + onClose, + label, + placeholder, + paramKey, + validate, + inputWidth = "w-[29ch]", +}: IdFilterDropdownProps) { + const [open, setOpen] = useState(); + const { value, replace } = useSearchParams(); + const currentValue = value(paramKey); + + const [inputValue, setInputValue] = useState(currentValue); + const [prevOpen, setPrevOpen] = useState(open); + if (open !== prevOpen) { + setPrevOpen(open); + if (open) setInputValue(currentValue); + } + + const apply = () => { + clearSearchValue(); + replace({ + cursor: undefined, + direction: undefined, + [paramKey]: inputValue === "" ? undefined : inputValue?.toString(), + }); + + setOpen(false); + }; + + const error = inputValue ? validate?.(inputValue) : undefined; + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + + return true; + }} + className="max-w-[min(32ch,var(--popover-available-width))]" + > +
+
+ + {label} + + setInputValue(e.target.value)} + variant="small" + className={cn(inputWidth, "font-mono")} + spellCheck={false} + /> + {error ? {error} : null} +
+
+ + +
+
+
+
+ ); +} diff --git a/apps/webapp/app/components/runs/v3/SpanTitle.tsx b/apps/webapp/app/components/runs/v3/SpanTitle.tsx index 4c25fc7b9ae..0b9273cd481 100644 --- a/apps/webapp/app/components/runs/v3/SpanTitle.tsx +++ b/apps/webapp/app/components/runs/v3/SpanTitle.tsx @@ -55,20 +55,24 @@ function SpanAccessory({ case "pills": { return ( - {accessory.items.map((item, index) => ( - - ))} + {accessory.items + .filter((item) => typeof item.text === "string") + .map((item, index) => ( + + ))} ); } default: { return ( - {accessory.items.map((item, index) => ( - - {item.text} - - ))} + {accessory.items + .filter((item) => typeof item.text === "string") + .map((item, index) => ( + + {item.text} + + ))} ); } @@ -104,16 +108,18 @@ export function SpanCodePathAccessory({ className )} > - {accessory.items.map((item, index) => ( - - {item.text} - {index < accessory.items.length - 1 && ( - - - - )} - - ))} + {accessory.items + .filter((item) => typeof item.text === "string") + .map((item, index, filtered) => ( + + {item.text} + {index < filtered.length - 1 && ( + + + + )} + + ))} ); } diff --git a/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx b/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx index fbede0e7cec..9c670864900 100644 --- a/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx +++ b/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx @@ -23,6 +23,7 @@ import { useSelectedItems } from "~/components/primitives/SelectedItemsProvider" import { SimpleTooltip } from "~/components/primitives/Tooltip"; import { TruncatedCopyableValue } from "~/components/primitives/TruncatedCopyableValue"; import { useEnvironment } from "~/hooks/useEnvironment"; +import { useRegions } from "~/hooks/useRegions"; import { useFeatures } from "~/hooks/useFeatures"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; @@ -31,7 +32,7 @@ import { type NextRunListItem, } from "~/presenters/v3/NextRunListPresenter.server"; import { formatCurrencyAccurate } from "~/utils/numberFormatter"; -import { docsPath, v3RunSpanPath, v3TestPath,v3TestTaskPath } from "~/utils/pathBuilder"; +import { docsPath, v3RunSpanPath, v3TestPath, v3TestTaskPath } from "~/utils/pathBuilder"; import { DateTime } from "../../primitives/DateTime"; import { Paragraph } from "../../primitives/Paragraph"; import { Spinner } from "../../primitives/Spinner"; @@ -47,6 +48,7 @@ import { type TableVariant, } from "../../primitives/Table"; import { CancelRunDialog } from "./CancelRunDialog"; +import { RegionLabel } from "./RegionLabel"; import { LiveTimer } from "./LiveTimer"; import { ReplayRunDialog } from "./ReplayRunDialog"; import { RunTag } from "./RunTag"; @@ -55,8 +57,11 @@ import { filterableTaskRunStatuses, TaskRunStatusCombo, } from "./TaskRunStatus"; +import { RunStatusCellTooltip } from "./RunStatusCellTooltip"; +import { TaskTriggerSourceIcon } from "./TaskTriggerSource"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; import { useSearchParams } from "~/hooks/useSearchParam"; +import type { TaskTriggerSource } from "@trigger.dev/database"; type RunsTableProps = { total: number; @@ -70,6 +75,7 @@ type RunsTableProps = { variant?: TableVariant; disableAdjacentRows?: boolean; additionalTableState?: Record; + childrenStatusesBasePath?: string; }; export function TaskRunsTable({ @@ -83,9 +89,13 @@ export function TaskRunsTable({ allowSelection = false, variant = "dimmed", additionalTableState, + childrenStatusesBasePath, }: RunsTableProps) { + const regions = useRegions(); + const regionByMasterQueue = new Map(regions.map((r) => [r.masterQueue, r] as const)); const organization = useOrganization(); const project = useProject(); + const environment = useEnvironment(); const checkboxes = useRef<(HTMLInputElement | null)[]>([]); const { has, hasAll, select, deselect, toggle } = useSelectedItems(allowSelection); const { isManagedCloud } = useFeatures(); @@ -102,9 +112,10 @@ export function TaskRunsTable({ } const search = params.toString(); /** TableState has to be encoded as a separate URI component, so it's merged under one, 'tableState' param */ - const tableStateParam = disableAdjacentRows ? '' : encodeURIComponent(search); + const tableStateParam = disableAdjacentRows ? "" : encodeURIComponent(search); const showCompute = isManagedCloud; + const showRegion = environment.type !== "DEVELOPMENT"; const navigateCheckboxes = useCallback( (event: React.KeyboardEvent, index: number) => { @@ -162,6 +173,7 @@ export function TaskRunsTable({ Task Version {filterableTaskRunStatuses.map((status) => ( @@ -185,6 +197,7 @@ export function TaskRunsTable({ Started
@@ -229,6 +242,7 @@ export function TaskRunsTable({ Machine Queue + {showRegion && Region} Test Created at {total === 0 && !hasFilters ? ( - + {!isLoading && } ) : runs.length === 0 ? ( - + ) : ( runs.map((run, index) => { const searchParams = new URLSearchParams(); if (tableStateParam) { searchParams.set("tableState", tableStateParam); } - const path = v3RunSpanPath(organization, project, run.environment, run, { - spanId: run.spanId, - }, searchParams); + const path = v3RunSpanPath( + organization, + project, + run.environment, + run, + { + spanId: run.spanId, + }, + searchParams + ); return ( {allowSelection && ( @@ -343,17 +364,30 @@ export function TaskRunsTable({ + {run.taskIdentifier} {run.rootTaskRunId === null ? Root : null} {run.version ?? "–"} - } - /> + {run.rootTaskRunId === null && childrenStatusesBasePath ? ( + + ) : ( + } + /> + )} {run.startedAt ? : "–"} @@ -426,8 +460,26 @@ export function TaskRunsTable({ {run.queue.name} + {showRegion && ( + + {run.region ? ( + + ) : ( + "–" + )} + + )} - {run.isTest ? : "–"} + {run.isTest ? ( + + ) : ( + "–" + )} {run.createdAt ? : "–"} @@ -448,8 +500,8 @@ export function TaskRunsTable({ )} {isLoading && ( Loading… @@ -584,15 +636,22 @@ function NoRuns({ title }: { title: string }) { ); } -function BlankState({ isLoading, filters }: Pick) { +function BlankState({ + isLoading, + filters, + showRegion, +}: Pick & { showRegion: boolean }) { const organization = useOrganization(); const project = useProject(); const environment = useEnvironment(); - if (isLoading) return ; + const colSpan = showRegion ? 16 : 15; + if (isLoading) return ; const { tasks, from, to, ...otherFilters } = filters; const singleTaskFromFilters = filters.tasks.length === 1 ? filters.tasks[0] : null; - const testPath = singleTaskFromFilters ? v3TestTaskPath(organization, project, environment, {taskIdentifier: singleTaskFromFilters}) : v3TestPath(organization, project, environment); + const testPath = singleTaskFromFilters + ? v3TestTaskPath(organization, project, environment, { taskIdentifier: singleTaskFromFilters }) + : v3TestPath(organization, project, environment); if ( filters.tasks.length === 1 && @@ -601,7 +660,7 @@ function BlankState({ isLoading, filters }: Pick filterArray.length === 0) ) { return ( - + There are no runs for {filters.tasks[0]} @@ -629,7 +688,7 @@ function BlankState({ isLoading, filters }: Pick +
No runs match your filters. Try refreshing, modifying your filters or run a test. @@ -645,11 +704,7 @@ function BlankState({ isLoading, filters }: Pick or - + Run a test
diff --git a/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx b/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx index 8d81e2f36c3..cb06c8e2a92 100644 --- a/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx +++ b/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx @@ -1,4 +1,4 @@ -import { ClockIcon } from "@heroicons/react/20/solid"; +import { ClockIcon, CpuChipIcon } from "@heroicons/react/20/solid"; import type { TaskTriggerSource } from "@trigger.dev/database"; import { TaskIconSmall } from "~/assets/icons/TaskIcon"; import { cn } from "~/utils/cn"; @@ -12,13 +12,20 @@ export function TaskTriggerSourceIcon({ }) { switch (source) { case "STANDARD": { - return ; + return ( + + ); } case "SCHEDULED": { return ( ); } + case "AGENT": { + return ( + + ); + } } } @@ -30,5 +37,8 @@ export function taskTriggerSourceDescription(source: TaskTriggerSource) { case "SCHEDULED": { return "Scheduled task"; } + case "AGENT": { + return "Agent"; + } } } diff --git a/apps/webapp/app/components/runs/v3/WaitpointTokenFilters.tsx b/apps/webapp/app/components/runs/v3/WaitpointTokenFilters.tsx index ae416394147..3868a496d79 100644 --- a/apps/webapp/app/components/runs/v3/WaitpointTokenFilters.tsx +++ b/apps/webapp/app/components/runs/v3/WaitpointTokenFilters.tsx @@ -1,28 +1,24 @@ import * as Ariakit from "@ariakit/react"; -import { CalendarIcon, FingerPrintIcon, TagIcon, TrashIcon } from "@heroicons/react/20/solid"; +import { FingerPrintIcon, TagIcon, XMarkIcon } from "@heroicons/react/20/solid"; import { Form, useFetcher } from "@remix-run/react"; import { WaitpointTokenStatus, waitpointTokenStatuses } from "@trigger.dev/core/v3"; -import { ListChecks, ListFilterIcon } from "lucide-react"; +import { ListChecks } from "lucide-react"; import { matchSorter } from "match-sorter"; -import { type ReactNode, useCallback, useEffect, useMemo, useState } from "react"; +import { type ReactNode, useCallback, useEffect, useMemo, useRef, useState } from "react"; import { z } from "zod"; import { StatusIcon } from "~/assets/icons/StatusIcon"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; import { Button } from "~/components/primitives/Buttons"; -import { FormError } from "~/components/primitives/FormError"; -import { Input } from "~/components/primitives/Input"; -import { Label } from "~/components/primitives/Label"; import { Paragraph } from "~/components/primitives/Paragraph"; import { ComboBox, - SelectButtonItem, SelectItem, SelectList, SelectPopover, SelectProvider, - SelectTrigger, shortcutFromIndex, } from "~/components/primitives/Select"; +import { ShortcutKey } from "~/components/primitives/ShortcutKey"; import { Spinner } from "~/components/primitives/Spinner"; import { Tooltip, @@ -35,8 +31,15 @@ import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import { useSearchParams } from "~/hooks/useSearchParam"; +import { useShortcutKeys } from "~/hooks/useShortcutKeys"; import { type loader as tagsLoader } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.waitpoints.tags"; -import { TimeFilter, appliedSummary, FilterMenuProvider } from "./SharedFilters"; +import { + IdFilterDropdown, + type IdFilterDropdownProps, + appliedSummary, + FilterMenuProvider, + TimeFilter, +} from "./SharedFilters"; import { WaitpointStatusCombo, waitpointStatusTitle } from "./WaitpointStatus"; export const WaitpointSearchParamsSchema = z.object({ @@ -66,136 +69,33 @@ export function WaitpointTokenFilters(props: WaitpointTokenFiltersProps) { searchParams.has("statuses") || searchParams.has("tags") || searchParams.has("id") || - searchParams.has("idempotencyKey"); + searchParams.has("idempotencyKey") || + searchParams.has("period") || + searchParams.has("from") || + searchParams.has("to"); return ( -
- - - +
+ + + + + {hasFilters && ( -
-
); } -const filterTypes = [ - { - name: "statuses", - title: "Status", - icon: , - }, - { name: "tags", title: "Tags", icon: }, - { name: "id", title: "Waitpoint ID", icon: }, - { name: "idempotencyKey", title: "Idempotency key", icon: }, -] as const; - -type FilterType = (typeof filterTypes)[number]["name"]; - -const shortcut = { key: "f" }; - -function FilterMenu() { - const [filterType, setFilterType] = useState(); - - const filterTrigger = ( - - -
- } - variant={"secondary/small"} - shortcut={shortcut} - tooltipTitle={"Filter runs"} - > - Filter - - ); - - return ( - setFilterType(undefined)}> - {(search, setSearch) => ( - setSearch("")} - trigger={filterTrigger} - filterType={filterType} - setFilterType={setFilterType} - /> - )} - - ); -} - -function AppliedFilters() { - return ( - <> - - - - - - ); -} - -type MenuProps = { - searchValue: string; - clearSearchValue: () => void; - trigger: React.ReactNode; - filterType: FilterType | undefined; - setFilterType: (filterType: FilterType | undefined) => void; -}; - -function Menu(props: MenuProps) { - switch (props.filterType) { - case undefined: - return ; - case "statuses": - return props.setFilterType(undefined)} {...props} />; - case "tags": - return props.setFilterType(undefined)} {...props} />; - case "id": - return props.setFilterType(undefined)} {...props} />; - case "idempotencyKey": - return props.setFilterType(undefined)} {...props} />; - } -} - -function MainMenu({ searchValue, trigger, clearSearchValue, setFilterType }: MenuProps) { - const filtered = useMemo(() => { - return filterTypes.filter((item) => { - return item.title.toLowerCase().includes(searchValue.toLowerCase()); - }); - }, [searchValue]); - - return ( - - {trigger} - - - - {filtered.map((type, index) => ( - { - clearSearchValue(); - setFilterType(type.name); - }} - icon={type.icon} - shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} - > - {type.title} - - ))} - - - - ); -} - const statuses = waitpointTokenStatuses.map((status) => ({ title: waitpointStatusTitle(status), value: status, @@ -237,7 +137,6 @@ function StatusDropdown({ return true; }} > - {filtered.map((item, index) => { return ( @@ -249,7 +148,7 @@ function StatusDropdown({ - + @@ -267,30 +166,68 @@ function StatusDropdown({ ); } -function AppliedStatusFilter() { - const { values, del } = useSearchParams(); - const statuses = values("statuses"); +const statusShortcut = { key: "s" }; - if (statuses.length === 0) { - return null; - } +function PermanentStatusFilter() { + const { values, del } = useSearchParams(); + const selectedStatuses = values("statuses"); + const hasStatuses = selectedStatuses.length > 0 && !selectedStatuses.every((v) => v === ""); + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: statusShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - } - value={appliedSummary( - statuses.map((v) => waitpointStatusTitle(v as WaitpointTokenStatus)) + + } + /> + } + > + {hasStatuses ? ( + } + value={appliedSummary( + selectedStatuses.map((v) => waitpointStatusTitle(v as WaitpointTokenStatus)) + )} + onRemove={() => del(["statuses", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+
+
+
+ Status +
)} - onRemove={() => del(["statuses", "cursor", "direction"])} - variant="secondary/small" - /> - + + +
+ Filter by status + +
+
+ } searchValue={search} clearSearchValue={() => setSearch("")} @@ -366,19 +303,21 @@ function TagsDropdown({ return true; }} > - ( -
- - {fetcher.state === "loading" && } -
- )} - /> + {!(filtered.length === 0 && fetcher.state !== "loading" && searchValue === "") && ( + ( +
+ + {fetcher.state === "loading" && } +
+ )} + /> + )} {filtered.length > 0 - ? filtered.map((tag, index) => ( - + ? filtered.map((tag) => ( + {tag} )) @@ -392,29 +331,64 @@ function TagsDropdown({ ); } -function AppliedTagsFilter() { - const { values, del } = useSearchParams(); +const tagsShortcut = { key: "g" }; +function PermanentTagsFilter() { + const { values, del } = useSearchParams(); const tags = values("tags"); - - if (tags.length === 0) { - return null; - } + const hasTags = tags.length > 0 && !tags.every((v) => v === ""); + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: tagsShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - } - value={appliedSummary(values("tags"))} - onRemove={() => del(["tags", "cursor", "direction"])} - variant="secondary/small" - /> - + + } + /> + } + > + {hasTags ? ( + } + value={appliedSummary(tags)} + onRemove={() => del(["tags", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+ + Tags +
+ )} +
+ +
+ Filter by tags + +
+
+
} searchValue={search} clearSearchValue={() => setSearch("")} @@ -424,117 +398,82 @@ function AppliedTagsFilter() { ); } -function WaitpointIdDropdown({ - trigger, - clearSearchValue, - searchValue, - onClose, -}: { - trigger: ReactNode; - clearSearchValue: () => void; - searchValue: string; - onClose?: () => void; -}) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const idValue = value("id"); - - const [id, setId] = useState(idValue); - - const apply = useCallback(() => { - clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - id: id === "" ? undefined : id?.toString(), - }); - - setOpen(false); - }, [id, replace]); - - let error: string | undefined = undefined; - if (id) { - if (!id.startsWith("waitpoint_")) { - error = "Waitpoint IDs start with 'waitpoint_'"; - } else if (id.length !== 35) { - error = "Waitpoint IDs are 35 characters long"; - } - } - +function WaitpointIdDropdown( + props: Omit +) { return ( - - {trigger} - { - if (onClose) { - onClose(); - return false; - } - - return true; - }} - className="max-w-[min(32ch,var(--popover-available-width))]" - > -
-
- - setId(e.target.value)} - variant="small" - className="w-[27ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
-
-
+ { + if (!v.startsWith("waitpoint_")) return "Waitpoint IDs start with 'waitpoint_'"; + if (v.length !== 35) return "Waitpoint IDs are 35 characters long"; + return undefined; + }} + /> ); } -function AppliedWaitpointIdFilter() { - const { value, del } = useSearchParams(); - - if (value("id") === undefined) { - return null; - } +const waitpointIdShortcut = { key: "w" }; +function PermanentWaitpointIdFilter() { + const { value, del } = useSearchParams(); const id = value("id"); + const hasId = id !== undefined; + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: waitpointIdShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - } - value={id} - onRemove={() => del(["id", "cursor", "direction"])} - variant="secondary/small" - /> - + + } + /> + } + > + {hasId ? ( + } + value={id} + onRemove={() => del(["id", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+ + Waitpoint ID +
+ )} +
+ +
+ Filter by waitpoint ID + +
+
+
} searchValue={search} clearSearchValue={() => setSearch("")} @@ -544,115 +483,81 @@ function AppliedWaitpointIdFilter() { ); } -function IdempotencyKeyDropdown({ - trigger, - clearSearchValue, - searchValue, - onClose, -}: { - trigger: ReactNode; - clearSearchValue: () => void; - searchValue: string; - onClose?: () => void; -}) { - const [open, setOpen] = useState(); - const { value, replace } = useSearchParams(); - const idValue = value("idempotencyKey"); - - const [idempotencyKey, setIdempotencyKey] = useState(idValue); - - const apply = useCallback(() => { - clearSearchValue(); - replace({ - cursor: undefined, - direction: undefined, - idempotencyKey: idempotencyKey === "" ? undefined : idempotencyKey?.toString(), - }); - - setOpen(false); - }, [idempotencyKey, replace]); - - let error: string | undefined = undefined; - if (idempotencyKey) { - if (idempotencyKey.length === 0) { - error = "Idempotency keys need to be at least 1 character in length"; - } - } - +function IdempotencyKeyDropdown( + props: Omit +) { return ( - - {trigger} - { - if (onClose) { - onClose(); - return false; - } - - return true; - }} - className="max-w-[min(32ch,var(--popover-available-width))]" - > -
-
- - setIdempotencyKey(e.target.value)} - variant="small" - className="w-[27ch] font-mono" - spellCheck={false} - /> - {error ? {error} : null} -
-
- - -
-
-
-
+ { + if (v.length === 0) return "Idempotency keys need to be at least 1 character in length"; + return undefined; + }} + /> ); } -function AppliedIdempotencyKeyFilter() { - const { value, del } = useSearchParams(); - - if (value("idempotencyKey") === undefined) { - return null; - } +const idempotencyKeyShortcut = { key: "i" }; +function PermanentIdempotencyKeyFilter() { + const { value, del } = useSearchParams(); const idempotencyKey = value("idempotencyKey"); + const hasKey = idempotencyKey !== undefined; + const triggerRef = useRef(null); + + useShortcutKeys({ + shortcut: idempotencyKeyShortcut, + action: (e) => { + e.preventDefault(); + e.stopPropagation(); + triggerRef.current?.click(); + }, + }); return ( {(search, setSearch) => ( }> - } - value={idempotencyKey} - onRemove={() => del(["idempotencyKey", "cursor", "direction"])} - variant="secondary/small" - /> - + + } + /> + } + > + {hasKey ? ( + } + value={idempotencyKey} + onRemove={() => del(["idempotencyKey", "cursor", "direction"])} + variant="secondary/small" + className="pl-1" + /> + ) : ( +
+ + Idempotency key +
+ )} +
+ +
+ Filter by idempotency key + +
+
+
} searchValue={search} clearSearchValue={() => setSearch("")} diff --git a/apps/webapp/app/components/runs/v3/agent/AgentMessageView.tsx b/apps/webapp/app/components/runs/v3/agent/AgentMessageView.tsx new file mode 100644 index 00000000000..fbd7faf2298 --- /dev/null +++ b/apps/webapp/app/components/runs/v3/agent/AgentMessageView.tsx @@ -0,0 +1,306 @@ +import type { UIMessage } from "@ai-sdk/react"; +import { memo } from "react"; +import { + AssistantResponse, + ChatBubble, + ToolUseRow, +} from "~/components/runs/v3/ai/AIChatMessages"; +import { Popover, PopoverContent, PopoverTrigger } from "~/components/primitives/Popover"; + +// --------------------------------------------------------------------------- +// AgentMessageView — renders an AI SDK UIMessage[] conversation. +// +// Extracted from the playground route so it can be reused on the run details +// page when the user picks the Agent view. +// +// UIMessage part types (AI SDK): +// text — markdown text content +// reasoning — model reasoning/thinking +// tool-{name} — tool call with input/output/state +// source-url — citation link +// source-document — citation document reference +// file — file attachment (image, etc.) +// step-start — visual separator between steps +// data-{name} — custom data parts (rendered as a small popover) +// --------------------------------------------------------------------------- + +export function AgentMessageView({ messages }: { messages: UIMessage[] }) { + return ( +
+ {messages.map((msg) => ( + + ))} +
+ ); +} + +// Memoized so stable messages (anything older than the one currently +// streaming) don't re-render on every chunk. This matters a lot during +// `resumeStream()` history replay, where each re-render would otherwise +// re-run Prism highlighting on every tool-call CodeBlock in the list. +// +// Default shallow prop comparison is fine: AI SDK's useChat keeps stable +// references for messages that haven't changed, so only the last message +// (the one receiving new chunks) re-renders. +export const MessageBubble = memo(function MessageBubble({ + message, +}: { + message: UIMessage; +}) { + if (message.role === "user") { + const text = + message.parts + ?.filter((p) => p.type === "text") + .map((p) => (p as { type: "text"; text: string }).text) + .join("") ?? ""; + + return ( +
+
+
{text}
+
+
+ ); + } + + if (message.role === "assistant") { + const hasContent = message.parts && message.parts.length > 0; + if (!hasContent) return null; + + return ( +
+ {message.parts?.map((part, i) => renderPart(part, i))} +
+ ); + } + + return null; +}); + +// URLs in `source-url`/`file` parts come from streamed agent/tool data, so an +// unsafe scheme like `javascript:` would become a clickable XSS payload once it +// reaches an href/src. Allow only http(s)/blob (and data: for inline images), +// and return null for anything else so the caller can skip the link/image. +export function toSafeUrl(value: unknown, allowDataImage = false): string | null { + if (typeof value !== "string") return null; + let parsed: URL; + try { + parsed = new URL(value); + } catch { + return null; + } + if (parsed.protocol === "http:" || parsed.protocol === "https:" || parsed.protocol === "blob:") { + return value; + } + if (allowDataImage && parsed.protocol === "data:" && /^data:image\//i.test(value)) { + return value; + } + return null; +} + +export function renderPart(part: UIMessage["parts"][number], i: number) { + const p = part as any; + const type = part.type as string; + + // Text — markdown rendered via AssistantResponse + if (type === "text") { + return p.text ? : null; + } + + // Reasoning — amber-bordered italic block + if (type === "reasoning") { + return ( +
+ +
+ {p.text ?? ""} +
+
+
+ ); + } + + // Tool call — type: "tool-{name}" with toolCallId, input, output, state + if (type.startsWith("tool-")) { + const toolName = type.slice(5); + + // Sub-agent tool: output is a UIMessage with parts + const isSubAgent = + p.output != null && typeof p.output === "object" && Array.isArray(p.output.parts); + + // For sub-agent tools, show the last text part as the "output" tab + // (mirrors what toModelOutput typically sends to the parent LLM) + // instead of dumping the full UIMessage JSON. + let resultOutput: string | undefined; + if (isSubAgent) { + const lastText = (p.output.parts as any[]) + .filter((part: any) => part.type === "text" && part.text) + .pop(); + resultOutput = lastText?.text ?? undefined; + } else if (p.output != null) { + resultOutput = + typeof p.output === "string" ? p.output : JSON.stringify(p.output, null, 2); + } + + // Status label for the tool row. AI SDK 7 HITL adds the + // approval-requested / approval-responded states between input-available + // and output-available, so surface those alongside the existing states. + let resultSummary: string | undefined; + if (p.state === "input-streaming" || p.state === "input-available") { + resultSummary = "calling..."; + } else if (p.state === "approval-requested") { + resultSummary = "awaiting approval"; + } else if (p.state === "approval-responded" || p.state === "output-denied") { + resultSummary = p.approval?.approved + ? "approved" + : `denied${p.approval?.reason ? `: ${p.approval.reason}` : ""}`; + } else if (p.state === "output-error") { + resultSummary = `error: ${p.errorText ?? "unknown"}`; + } + + return ( + + ); + } + + // Source URL — clickable citation link + if (type === "source-url") { + const safeUrl = toSafeUrl(p.url); + const label = p.title || p.url; + // Unsafe scheme: render the citation text without a clickable link. + if (!safeUrl) { + return label ? ( +
+ {label} +
+ ) : null; + } + return ( + + ); + } + + // Source document — citation label + if (type === "source-document") { + return ( +
+ {p.title} + {p.mediaType ? ` (${p.mediaType})` : ""} +
+ ); + } + + // File — render as image if image type, otherwise as download link + if (type === "file") { + const isImage = typeof p.mediaType === "string" && p.mediaType.startsWith("image/"); + if (isImage) { + const safeSrc = toSafeUrl(p.url, true); // allow data: URIs for inline images + // Unsafe scheme: fall back to the filename, matching the non-image branch. + if (!safeSrc) { + return p.filename ? ( +
+ {p.filename} +
+ ) : null; + } + return ( + {p.filename + ); + } + const safeUrl = toSafeUrl(p.url); + // Unsafe scheme: show the filename without a clickable download link. + if (!safeUrl) { + return p.filename ? ( +
+ {p.filename} +
+ ) : null; + } + return ( + + ); + } + + // Step start — subtle dashed separator with centered label + if (type === "step-start") { + return ( +
+
+ step +
+
+ ); + } + + // Data parts — type: "data-{name}", show as labeled JSON popover + if (type.startsWith("data-")) { + const dataName = type.slice(5); + return ; + } + + return null; +} + +function DataPartPopover({ name, data }: { name: string; data: unknown }) { + const formatted = JSON.stringify(data, null, 2); + + return ( + + + + + +
+ data-{name} +
+
+
{formatted}
+
+
+
+ ); +} diff --git a/apps/webapp/app/components/runs/v3/agent/AgentView.tsx b/apps/webapp/app/components/runs/v3/agent/AgentView.tsx new file mode 100644 index 00000000000..49b34db14c3 --- /dev/null +++ b/apps/webapp/app/components/runs/v3/agent/AgentView.tsx @@ -0,0 +1,932 @@ +import type { UIMessage } from "@ai-sdk/react"; +import { ChatSnapshotV1Schema, SSEStreamSubscription } from "@trigger.dev/core/v3"; +import { useEffect, useMemo, useRef, useState } from "react"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Spinner } from "~/components/primitives/Spinner"; +import { AgentMessageView } from "~/components/runs/v3/agent/AgentMessageView"; +import { useAutoScrollToBottom } from "~/hooks/useAutoScrollToBottom"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; + +export type AgentViewAuth = { + publicAccessToken: string; + apiOrigin: string; + /** + * Session identifier the AgentView uses to address the backing + * {@link Session} when subscribing to `.in` / `.out`. Accepts either + * a `session_*` friendlyId or the transport-supplied externalId + * (typically the browser's `chatId`) — the dashboard resource route + * resolves either form via `resolveSessionByIdOrExternalId`. + */ + sessionId: string; + /** + * User messages extracted from the run's task payload at load time. + * Empty array for runs started with `trigger: "preload"` — in that + * case the first user message arrives over the session's `.in` + * channel and is merged in by the AgentView subscription. + */ + initialMessages: UIMessage[]; + /** + * Presigned GET URL for the session's chat-snapshot S3 blob (written + * by the agent after each turn-complete; see `ChatSnapshotV1`). + * Optional — sessions that registered a `hydrateMessages` hook skip + * snapshot writes and the URL fetch will 404. In that case the + * dashboard falls back to seq=0 SSE (which, post-trim, shows only the + * most recent turn). Generated server-side by `SessionPresenter`. + */ + snapshotPresignedUrl?: string; +}; + +/** + * Max state-update interval while assistant chunks are streaming. Matches + * the `experimental_throttle: 100` we previously passed to `useChat`. + * Chunks mutate a staging ref synchronously; a throttled flush copies the + * ref into React state at most ~10x/sec so tool-call Prism highlighting + * etc. doesn't re-run on every single text-delta. + */ +const STATE_FLUSH_THROTTLE_MS = 100; + +/** + * Sentinel timestamp for messages that came from the run's initial task + * payload — they predate any stream activity, so 0 guarantees they sort + * first regardless of stream race order. + */ +const INITIAL_PAYLOAD_TIMESTAMP = 0; + +/** + * Renders a Session's chat conversation as it unfolds. + * + * Subscribes to both channels of the {@link Session}: + * - **`.out`** delivers assistant `UIMessageChunk`s (text deltas, tool + * calls, reasoning, etc.) produced by the agent's + * `chatStream.writer(...)` calls — objects, already parsed by the S2 + * SSE reader. + * - **`.in`** delivers {@link ChatInputChunk}s sent by + * {@link TriggerChatTransport} (or any other session writer). Each + * chunk is a tagged union (`{kind: "message", payload}` for user + * turns, `{kind: "stop"}` for stop signals) — the AgentView only + * cares about `kind: "message"` and pulls `.payload.messages`. + * + * Both streams are read directly via `SSEStreamSubscription` through the + * dashboard's session-authed resource routes — not through `useChat` or + * `TriggerChatTransport`. This gives us per-chunk server-side timestamps + * (S2 sequence numbers) from both streams, which we use to produce a + * chronologically correct merged message list that works for replays, + * multi-message turns, cross-run session resumes, and steering messages. + * + * Intended to be mounted inside a scrollable container — the component + * does not own its own scrollbar. + */ +export function AgentView({ agentView }: { agentView: AgentViewAuth }) { + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const messages = useAgentSessionMessages({ + sessionId: agentView.sessionId, + apiOrigin: agentView.apiOrigin, + orgSlug: organization.slug, + projectSlug: project.slug, + envSlug: environment.slug, + initialMessages: agentView.initialMessages, + snapshotPresignedUrl: agentView.snapshotPresignedUrl, + }); + + // Sticky-bottom auto-scroll: walks up to find the inspector's scroll + // container, then scrolls to bottom whenever `messages` changes — but + // only if the user was at (or near) the bottom at the time. Scrolling + // away pauses auto-scroll; scrolling back resumes it. + const rootRef = useAutoScrollToBottom([messages]); + + return ( +
+ {messages.length === 0 ? ( +
+
+ + + Loading conversation… + +
+
+ ) : ( + + )} +
+ ); +} + +// --------------------------------------------------------------------------- +// useAgentSessionMessages — reads both realtime streams for a session and +// maintains a chronologically ordered, merged message list. +// --------------------------------------------------------------------------- + +/** + * Shape of each chunk on the session's `.in` channel. Mirrors the + * `ChatInputChunk` tagged union produced by {@link TriggerChatTransport}: + * - `kind: "message"` carries a `ChatTaskWirePayload` in `.payload` + * (user-submitted messages or regenerate calls); we dedupe by id. + * - `kind: "stop"` is a stop signal — no messages, nothing to render + * here, so it's filtered. + * + * Wire payloads are slim-wire (one new UIMessage per record, on + * `payload.message`). The legacy `payload.messages` array shape is kept + * here as a fallback so any historical records on a long-lived session + * still render. + * + * The server wraps records in `{data, id}` and writes `data` as a JSON + * string; SSE v2 delivers the parsed string back. {@link parseChunkPayload} + * re-parses to recover the object. + */ +type InputStreamChunk = { + kind?: "message" | "stop"; + payload?: { + message?: { id?: string; role?: string; parts?: unknown[] }; + messages?: Array<{ id?: string; role?: string; parts?: unknown[] }>; + trigger?: string; + }; + message?: string; +}; + +/** + * Minimal typing for the chunks we care about on the chat output stream. + * Covers the AI SDK `UIMessageChunk` variants that `renderPart` actually + * knows how to display, plus the Trigger.dev control chunks that we filter. + */ +type OutputChunk = { type: string; [key: string]: unknown }; + +/** + * Per-message orchestration state for the output stream accumulator. Mirrors + * the active-part tracking that AI SDK's `processUIMessageStream` keeps in + * its `state` object: a registry of streaming text/reasoning parts so deltas + * can be matched to the right part by id, plus a way to clear them at step + * boundaries (`finish-step`) so the next step's `text-start`/`reasoning-start` + * with the same id starts a fresh part instead of appending to the previous + * step's part. + */ +/** + * Per-message orchestration state — index-based active-part tracking. + * + * Each map points from a part id (text or reasoning) to **the index of the + * currently-streaming part with that id in `message.parts`**. We need + * indexes (not just a `Set` of "active ids") because part ids are *only + * unique within a step*: the SDK happily reuses `text-start id="0"` after + * a `finish-step` boundary. Without index tracking, a `text-delta` for the + * reused id would have to find the right part by id alone — and a search + * would match BOTH the previous step's frozen part and the current step's + * fresh one, which produces a duplication where the previous text gets + * the new content appended to it AND a fresh part with the same content + * also appears. + * + * Mirrors AI SDK's `processUIMessageStream`'s `state.activeTextParts` / + * `state.activeReasoningParts` (which hold direct references in the + * mutating canonical impl). We use indexes here because we do immutable + * updates and need indices that survive `parts.map()` rewrites — adding + * new parts and updating existing ones never reorders, so an index is + * stable for the lifetime of the part. + */ +type MessageOrchestrationState = { + activeTextPartIndexes: Map; + activeReasoningPartIndexes: Map; +}; + +/** + * `SSEStreamSubscription`'s v2 batch path delivers `parsedBody.data` as-is + * — but session channels diverge by direction: + * + * - `.in`: {@link TriggerChatTransport.serializeInputChunk} writes the + * `ChatInputChunk` as a JSON **string**, so `data` is a string that + * needs a second `JSON.parse` to recover the tagged union. + * - `.out`: the agent's `chatStream.writer(...)` writes + * {@link UIMessageChunk} **objects** directly; `data` arrives + * already-parsed. + * + * This helper accepts both shapes defensively: a string is parsed; an + * object is returned as-is. Returns `null` for unparseable payloads. + */ +function parseChunkPayload(raw: unknown): Record | null { + if (raw == null) return null; + if (typeof raw === "string") { + try { + const parsed = JSON.parse(raw); + return parsed && typeof parsed === "object" ? (parsed as Record) : null; + } catch { + return null; + } + } + if (typeof raw === "object") return raw as Record; + return null; +} + +function createOrchestrationState(): MessageOrchestrationState { + return { + activeTextPartIndexes: new Map(), + activeReasoningPartIndexes: new Map(), + }; +} + +function useAgentSessionMessages({ + sessionId, + apiOrigin, + orgSlug, + projectSlug, + envSlug, + initialMessages, + snapshotPresignedUrl, +}: { + sessionId: string; + apiOrigin: string; + orgSlug: string; + projectSlug: string; + envSlug: string; + initialMessages: UIMessage[]; + snapshotPresignedUrl?: string; +}): UIMessage[] { + // Seed with the user messages from the run's task payload. + const seedMessages = useMemo( + () => initialMessages.filter((m) => m.role === "user"), + [initialMessages] + ); + + // `pendingRef` is the authoritative, eagerly-updated message state: + // chunks mutate this synchronously as they arrive. A throttled flush + // copies it into React state so UI updates are capped at ~10x/sec. + const pendingRef = useRef>( + new Map(seedMessages.map((m) => [m.id, m])) + ); + const timestampsRef = useRef>( + new Map(seedMessages.map((m) => [m.id, INITIAL_PAYLOAD_TIMESTAMP])) + ); + // Side-table of orchestration state, keyed by assistant message id. Lives + // outside the UIMessage so React doesn't see it as a renderable prop. + const orchestrationRef = useRef>(new Map()); + + // Buffered HITL resolutions keyed by toolCallId. `addToolApprovalResponse` / + // `addToolOutput` send a slim assistant message on the `.in` channel carrying + // just the resolved tool part; the agent never echoes these on `.out`. We + // stash them here and overlay onto the matching tool part once it exists, so + // a denial/approval lands regardless of which stream arrives first. + const pendingResolutionsRef = useRef>>(new Map()); + + // React state snapshot of pendingRef. Only updated via the throttled + // `scheduleFlush`. The Map *reference* changes on every flush so React + // detects the state update and the downstream `useMemo` recomputes. + const [messagesById, setMessagesById] = useState>( + () => new Map(pendingRef.current) + ); + + // Throttled flush scheduler — leading edge within a single throttle + // window: the first chunk after a quiet period flushes immediately, then + // subsequent chunks coalesce until the next window opens. + const lastFlushAtRef = useRef(0); + const pendingTimerRef = useRef | null>(null); + const scheduleFlush = useRef<() => void>(() => {}); + scheduleFlush.current = () => { + if (pendingTimerRef.current !== null) return; // already scheduled + const now = Date.now(); + const sinceLast = now - lastFlushAtRef.current; + const delay = Math.max(0, STATE_FLUSH_THROTTLE_MS - sinceLast); + pendingTimerRef.current = setTimeout(() => { + pendingTimerRef.current = null; + lastFlushAtRef.current = Date.now(); + setMessagesById(new Map(pendingRef.current)); + }, delay); + }; + + useEffect(() => { + const abort = new AbortController(); + + // Overlay a buffered HITL resolution (approval/output delivered on `.in`) + // onto the matching tool part. Returns true if a part changed. Safe to call + // repeatedly — after each `.out` tool chunk and whenever a `.in` resolution + // arrives — so the resolution lands regardless of cross-stream ordering. + // Never downgrades a part that already reached a terminal output state (so + // an approved-then-executed tool keeps its `output-available` + output). + const applyToolResolution = (toolCallId: string): boolean => { + const res = pendingResolutionsRef.current.get(toolCallId); + if (!res) return false; + for (const [mid, msg] of pendingRef.current) { + const parts = (msg.parts ?? []) as Array>; + const idx = parts.findIndex((p) => (p as { toolCallId?: string }).toolCallId === toolCallId); + if (idx < 0) continue; + const cur = parts[idx]!; + const terminal = + cur.state === "output-available" || + cur.state === "output-error" || + cur.state === "output-denied"; + const nextState = res.state != null && !terminal ? res.state : cur.state; + const sameApproval = JSON.stringify(cur.approval) === JSON.stringify(res.approval); + if ( + nextState === cur.state && + sameApproval && + res.output === undefined && + res.errorText === undefined + ) { + return false; // already applied + } + const next = parts.slice(); + next[idx] = { + ...cur, + ...(res.approval != null ? { approval: res.approval } : {}), + ...(res.output !== undefined ? { output: res.output } : {}), + ...(res.errorText !== undefined ? { errorText: res.errorText } : {}), + state: nextState, + }; + pendingRef.current.set(mid, { ...msg, parts: next } as UIMessage); + return true; + } + return false; + }; + + const encodedSession = encodeURIComponent(sessionId); + // Always use the page's own origin to avoid CORS preflight failures + // when the configured `apiOrigin` (e.g. `localhost`) differs from the + // origin the dashboard was loaded from (e.g. `127.0.0.1`). The dashboard + // resource route is same-origin by construction. + const origin = typeof window !== "undefined" ? window.location.origin : apiOrigin; + const sessionBase = + `${origin}/resources/orgs/${orgSlug}/projects/${projectSlug}/env/${envSlug}` + + `/sessions/${encodedSession}/realtime/v1`; + + const outputUrl = `${sessionBase}/out`; + const inputUrl = `${sessionBase}/in`; + + /** + * Try to seed `pendingRef` from the agent's S3 snapshot blob and return + * the snapshot's `lastOutEventId` so the `.out` SSE subscription resumes + * just past the snapshot. Returns undefined for sessions that don't + * have a snapshot (e.g. `hydrateMessages` customers, or sessions that + * have never completed a turn). + */ + const loadSnapshot = async (): Promise => { + if (!snapshotPresignedUrl) return undefined; + try { + const resp = await fetch(snapshotPresignedUrl, { signal: abort.signal }); + if (!resp.ok) return undefined; + const json = (await resp.json()) as unknown; + const parsed = ChatSnapshotV1Schema.safeParse(json); + if (!parsed.success) return undefined; + const snapshot = parsed.data; + // Preserve the snapshot's array order in the final render by + // giving each message a unique, monotonically increasing + // timestamp from `(savedAt - count + index)`. Real chunk + // timestamps from the SSE path use S2 arrival ms (positive + // numbers in the present), so anything below `savedAt` sorts + // before live chunks while preserving snapshot order among + // themselves. + const count = snapshot.messages.length; + snapshot.messages.forEach((raw, i) => { + const message = raw as UIMessage; + if (!message?.id) return; + // The snapshot's seed wins over the task-payload seed for any + // overlapping ids (the snapshot represents the agent's + // canonical accumulator, post-turn). + pendingRef.current.set(message.id, message); + if (!timestampsRef.current.has(message.id)) { + timestampsRef.current.set(message.id, snapshot.savedAt - count + i); + } + }); + scheduleFlush.current(); + return snapshot.lastOutEventId; + } catch { + // 404 / network / parse / abort — fall back to seq=0 SSE + return undefined; + } + }; + + const outputSubOptions = (lastEventId: string | undefined) => + ({ + signal: abort.signal, + timeoutInSeconds: 120, + ...(lastEventId !== undefined ? { lastEventId } : {}), + }) as const; + + const commonSubOptions = { + signal: abort.signal, + timeoutInSeconds: 120, + } as const; + + // ---- Output stream: assistant messages --------------------------------- + // + // The output stream delivers data records (UIMessageChunks) interleaved + // with Trigger control records (`turn-complete`, `upgrade-required`) and + // S2 command records (`trim`). Control + command records ride on + // `record.headers` with empty bodies; the SSE parser strips S2 command + // records entirely, and control records arrive with `value.chunk === + // undefined`, which `parseChunkPayload` drops below. + // + // We fold everything else into an assistant `UIMessage` via our own + // `applyOutputChunk` accumulator — the AI SDK's `readUIMessageStream` + // helper is only available in `ai@6`, and the webapp is pinned to + // `ai@4`, so we re-implement just the chunk types that `renderPart` + // actually displays. + // + // We capture the **server timestamp of each assistant message's first + // `start` chunk** so later sort-by-timestamp merges with the input + // stream correctly. + const runOutput = async () => { + try { + // Seed messages from the snapshot first (if available), then + // resume the SSE from the snapshot's last event id so we don't + // re-stream chunks already represented in the snapshot. If no + // snapshot exists (no URL, 404, parse failure), the SSE opens + // at seq=0 — which, post-trim, contains roughly one turn of + // records (acceptable fallback for `hydrateMessages` sessions + // and fresh sessions). + const snapshotLastEventId = await loadSnapshot(); + if (abort.signal.aborted) return; + + const sub = new SSEStreamSubscription(outputUrl, outputSubOptions(snapshotLastEventId)); + const raw = await sub.subscribe(); + const reader = raw.getReader(); + + let currentMessageId: string | null = null; + + try { + while (!abort.signal.aborted) { + const { done, value } = await reader.read(); + if (done) return; + + const chunk = parseChunkPayload(value.chunk) as OutputChunk | null; + if (!chunk || typeof chunk.type !== "string") continue; + // Legacy belt-and-suspenders: prior versions of the SDK + // emitted `trigger:turn-complete` / `trigger:upgrade-required` + // as data records (`type` field). Current versions use + // header-form control records, which `parseChunkPayload` + // drops above. Keep this filter to handle any in-flight + // sessions whose `.out` was populated by the older SDK. + if (chunk.type.startsWith("trigger:")) continue; + + if (chunk.type === "start") { + const messageId = + typeof chunk.messageId === "string" && chunk.messageId.length > 0 + ? chunk.messageId + : `asst-${crypto.randomUUID()}`; + currentMessageId = messageId; + + if (!timestampsRef.current.has(messageId)) { + timestampsRef.current.set(messageId, value.timestamp); + } + + const existing = pendingRef.current.get(messageId); + if (existing) { + // Same message id seen again — merge metadata only, keep + // existing parts (canonical `processUIMessageStream` does + // the same on a repeated `start`). + if (chunk.messageMetadata != null) { + pendingRef.current.set(messageId, { + ...existing, + metadata: { + ...((existing as { metadata?: Record }).metadata ?? {}), + ...(chunk.messageMetadata as Record), + }, + } as UIMessage); + scheduleFlush.current(); + } + } else { + const message: UIMessage = { + id: messageId, + role: "assistant", + parts: [], + ...(chunk.messageMetadata != null + ? { metadata: chunk.messageMetadata as UIMessage["metadata"] } + : {}), + } as UIMessage; + pendingRef.current.set(messageId, message); + orchestrationRef.current.set(messageId, createOrchestrationState()); + scheduleFlush.current(); + } + continue; + } + + if (currentMessageId === null) continue; + const existing = pendingRef.current.get(currentMessageId); + if (!existing) continue; + let orchestration = orchestrationRef.current.get(currentMessageId); + if (!orchestration) { + // Defensive: a chunk arrived for a message we never saw a + // `start` for. Lazily create orchestration state so we can + // still display the parts. + orchestration = createOrchestrationState(); + orchestrationRef.current.set(currentMessageId, orchestration); + } + + const updated = applyOutputChunk(existing, chunk, orchestration); + if (updated !== existing) { + pendingRef.current.set(currentMessageId, updated); + scheduleFlush.current(); + } + + // A `.out` chunk just established/updated a tool part — (re)apply any + // buffered `.in` resolution for it. Covers the `.in`-before-`.out` + // order and corrects a `.out` chunk that downgraded the state (e.g. + // a replayed `tool-approval-request` arriving after the denial). + const outToolCallId = (chunk as { toolCallId?: string }).toolCallId; + if (typeof outToolCallId === "string" && applyToolResolution(outToolCallId)) { + scheduleFlush.current(); + } + } + } finally { + try { + reader.releaseLock(); + } catch { + // Lock may already be released. + } + } + } catch (err) { + if (abort.signal.aborted) return; + // eslint-disable-next-line no-console + console.debug("[AgentView] output stream subscription failed", err); + } + }; + + // ---- Input channel: user messages (`ChatInputChunk`) ------------------- + // + // The transport appends a `{kind: "message", payload}` ChatInputChunk + // for every user turn (and `{kind: "stop"}` for stop signals). We pull + // user messages out of `payload.messages` for `kind: "message"` chunks + // and ignore the rest. + const runInput = async () => { + try { + const sub = new SSEStreamSubscription(inputUrl, commonSubOptions); + const raw = await sub.subscribe(); + const reader = raw.getReader(); + try { + while (!abort.signal.aborted) { + const { done, value } = await reader.read(); + if (done) return; + + const chunk = parseChunkPayload(value.chunk) as InputStreamChunk | null; + if (!chunk || chunk.kind !== "message") continue; + const payload = chunk.payload; + if (!payload) continue; + + // Slim-wire is one UIMessage on `payload.message`; legacy + // payloads carried an array on `payload.messages`. Accept + // either so historical records on a long-lived session still + // render. + const candidates = Array.isArray(payload.messages) + ? payload.messages + : payload.message + ? [payload.message] + : []; + + let changed = false; + + // New user turns — merge in (dedupe by id). + for (const m of candidates) { + if (m == null || (m as { role?: string }).role !== "user" || typeof m.id !== "string") { + continue; + } + if (pendingRef.current.has(m.id)) continue; + pendingRef.current.set(m.id, m as UIMessage); + timestampsRef.current.set(m.id, value.timestamp); + changed = true; + } + + // HITL resolutions ride on `.in` as a slim *assistant* message + // carrying just the resolved tool part (state + approval/output). + // Buffer each by toolCallId and overlay onto the matching tool part + // (which usually arrived on `.out` as `tool-approval-request`). + for (const m of candidates) { + if (m == null || (m as { role?: string }).role !== "assistant") continue; + const parts = (m as { parts?: unknown[] }).parts; + if (!Array.isArray(parts)) continue; + for (const sp of parts) { + const part = sp as Record; + if (typeof part.type !== "string" || !part.type.startsWith("tool-")) continue; + const tcId = (part as { toolCallId?: string }).toolCallId; + if (typeof tcId !== "string") continue; + pendingResolutionsRef.current.set(tcId, part); + if (applyToolResolution(tcId)) changed = true; + } + } + + if (changed) scheduleFlush.current(); + } + } finally { + try { + reader.releaseLock(); + } catch { + // Lock may already be released. + } + } + } catch (err) { + if (abort.signal.aborted) return; + // eslint-disable-next-line no-console + console.debug("[AgentView] input stream subscription failed", err); + } + }; + + void runOutput(); + void runInput(); + + return () => { + abort.abort(); + if (pendingTimerRef.current !== null) { + clearTimeout(pendingTimerRef.current); + pendingTimerRef.current = null; + } + }; + }, [sessionId, apiOrigin, orgSlug, projectSlug, envSlug, snapshotPresignedUrl]); + + return useMemo(() => { + const timestamps = timestampsRef.current; + const arr = Array.from(messagesById.values()); + arr.sort((a, b) => { + const ta = timestamps.get(a.id) ?? 0; + const tb = timestamps.get(b.id) ?? 0; + if (ta !== tb) return ta - tb; + // Tie-breaker for messages sharing a stream ID bucket (rare): fall + // back to message id string order so the output is deterministic. + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); + return arr; + }, [messagesById]); +} + +// --------------------------------------------------------------------------- +// applyOutputChunk — minimal UIMessageChunk → UIMessage accumulator. +// --------------------------------------------------------------------------- +// +// A pared-down re-implementation of AI SDK's `processUIMessageStream` (in +// `ai@6`'s `index.mjs`). The webapp is pinned to `ai@4`, which doesn't ship +// the v5+ chunk-stream helpers, so we vendor the bits we actually use. +// +// Scope vs. canonical: +// - We render only the chunk shapes that `AgentMessageView`/`renderPart` +// actually display: text, reasoning, tool-* (input-{start,delta,available} +// + output-{available,error}), source-url, source-document, file, +// step-start/finish-step, data-*, plus metadata/finish lifecycle. +// - Unknown chunk types fall through as no-ops — defensive on purpose for a +// read-only viewer. +// - We **do not parse partial JSON for streaming tool inputs.** Canonical +// uses `parsePartialJson` (which depends on a 300-line `fixJson` state +// machine to repair incomplete JSON) so users see the input growing +// character-by-character. We skip it: tool inputs stay `undefined` +// throughout streaming and snap to the final value when +// `tool-input-available` lands. Acceptable for a viewer; can be added +// later by vendoring `fixJson` if the UX warrants it. +// +// `orchestration` carries per-message active-part trackers that mirror +// canonical's `state.activeTextParts` / `state.activeReasoningParts`. They +// let `text-delta` find the right text part by id and let `finish-step` +// clear them so a new step can re-use the same id without colliding. +// +// Returns the same object reference when nothing changes so the caller can +// skip unnecessary state flushes + React re-renders. + +type AnyPart = { [key: string]: unknown; type: string }; + +function applyOutputChunk( + msg: UIMessage, + chunk: OutputChunk, + orchestration: MessageOrchestrationState +): UIMessage { + const type = chunk.type; + + // Text parts --------------------------------------------------------------- + // + // Track each streaming text part by its index in `msg.parts`. Part ids + // are only unique *within a step* — the SDK happily reuses `text-start + // id="0"` after a `finish-step` boundary — so a delta arriving for a + // reused id needs to land on the *current* part, not every prior part + // that ever shared that id. The index map gives us O(1) "which slot is + // currently streaming this id" without any id-based search. + if (type === "text-start") { + const id = chunk.id as string; + const newIndex = (msg.parts ?? []).length; // index AFTER push + orchestration.activeTextPartIndexes.set(id, newIndex); + return withNewPart(msg, { + type: "text", + id, + text: "", + state: "streaming", + }); + } + if (type === "text-delta") { + const id = chunk.id as string; + const index = orchestration.activeTextPartIndexes.get(id); + if (index === undefined) return msg; // delta with no start — drop. + return updatePartAt(msg, index, (p) => ({ + ...p, + text: ((p as { text?: string }).text ?? "") + String(chunk.delta ?? ""), + })); + } + if (type === "text-end") { + const id = chunk.id as string; + const index = orchestration.activeTextPartIndexes.get(id); + if (index === undefined) return msg; + orchestration.activeTextPartIndexes.delete(id); + return updatePartAt(msg, index, (p) => ({ ...p, state: "done" })); + } + + // Reasoning parts ---------------------------------------------------------- + if (type === "reasoning-start") { + const id = chunk.id as string; + const newIndex = (msg.parts ?? []).length; + orchestration.activeReasoningPartIndexes.set(id, newIndex); + return withNewPart(msg, { + type: "reasoning", + id, + text: "", + state: "streaming", + }); + } + if (type === "reasoning-delta") { + const id = chunk.id as string; + const index = orchestration.activeReasoningPartIndexes.get(id); + if (index === undefined) return msg; + return updatePartAt(msg, index, (p) => ({ + ...p, + text: ((p as { text?: string }).text ?? "") + String(chunk.delta ?? ""), + })); + } + if (type === "reasoning-end") { + const id = chunk.id as string; + const index = orchestration.activeReasoningPartIndexes.get(id); + if (index === undefined) return msg; + orchestration.activeReasoningPartIndexes.delete(id); + return updatePartAt(msg, index, (p) => ({ ...p, state: "done" })); + } + + // Tool call parts ---------------------------------------------------------- + if (type === "tool-input-start") { + const toolName = String(chunk.toolName ?? ""); + return withNewPart(msg, { + type: `tool-${toolName}`, + toolCallId: chunk.toolCallId, + toolName, + state: "input-streaming", + input: undefined, + }); + } + if (type === "tool-input-delta") { + // We don't parse partial JSON, so streaming tool input deltas are a + // no-op. The full input snaps in when `tool-input-available` arrives. + return msg; + } + if (type === "tool-input-available") { + const toolName = String(chunk.toolName ?? ""); + const existingIdx = indexOfPart( + msg, + (p) => (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ); + if (existingIdx >= 0) { + return updatePartAt(msg, existingIdx, (p) => ({ + ...p, + state: "input-available", + input: chunk.input, + })); + } + // Tool input arrived without a preceding tool-input-start (some + // providers do this for fast tools) — synthesize a new part. + return withNewPart(msg, { + type: `tool-${toolName}`, + toolCallId: chunk.toolCallId, + toolName, + state: "input-available", + input: chunk.input, + }); + } + if (type === "tool-output-available") { + return updatePart(msg, (p) => + (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ? { + ...p, + state: "output-available", + output: chunk.output, + ...(chunk.preliminary === true ? { preliminary: true } : {}), + } + : null + ); + } + if (type === "tool-output-error") { + return updatePart(msg, (p) => + (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ? { ...p, state: "output-error", errorText: chunk.errorText } + : null + ); + } + + // HITL approval (AI SDK 7) ------------------------------------------------- + // + // v7 added human-in-the-loop tool approval. A `needsApproval` tool emits a + // `tool-approval-request` after its input is available; the tool then waits + // for a `tool-approval-response` (approve/deny) before executing. Mirror AI + // SDK 7's `processUIMessageStream`: the request marks the matching part + // `approval-requested` and records `approval.id`; the response (matched by + // that id) marks it `approval-responded` with the verdict. An approved tool + // then proceeds to `tool-output-available` as usual. + if (type === "tool-approval-request") { + return updatePart(msg, (p) => + (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ? { + ...p, + state: "approval-requested", + approval: { + id: chunk.approvalId, + ...(chunk.isAutomatic === true ? { isAutomatic: true } : {}), + }, + } + : null + ); + } + if (type === "tool-approval-response") { + return updatePart(msg, (p) => { + const approval = (p as { approval?: { id?: string; isAutomatic?: boolean } }).approval; + if (!approval || approval.id !== chunk.approvalId) return null; + return { + ...p, + state: "approval-responded", + approval: { + ...approval, + id: chunk.approvalId, + approved: chunk.approved, + ...(chunk.reason != null ? { reason: chunk.reason } : {}), + }, + }; + }); + } + + // Source / file / step / data parts — pass through as a whole ------------- + if (type === "source-url" || type === "source-document" || type === "file") { + return withNewPart(msg, chunk as unknown as AnyPart); + } + if (type === "start-step") { + return withNewPart(msg, { type: "step-start" }); + } + if (type === "finish-step") { + // Step boundary — canonical clears the active part trackers so a new + // step can re-use the same text/reasoning part IDs cleanly. The + // message itself doesn't structurally change; the previous step's + // parts stay frozen at their indexes in `msg.parts`. + orchestration.activeTextPartIndexes.clear(); + orchestration.activeReasoningPartIndexes.clear(); + return msg; + } + if (type.startsWith("data-")) { + return withNewPart(msg, chunk as unknown as AnyPart); + } + + // Metadata / lifecycle ----------------------------------------------------- + if (type === "finish" || type === "message-metadata") { + if (chunk.messageMetadata == null) return msg; + return { + ...msg, + metadata: { + ...((msg as { metadata?: Record }).metadata ?? {}), + ...(chunk.messageMetadata as Record), + }, + } as UIMessage; + } + + // Abort / error / unknown — no structural change. (`start` is handled at + // the orchestration level in the output reader, not here.) + return msg; +} + +// --- Small immutable helpers for UIMessage.parts mutation ------------------- + +function withNewPart(msg: UIMessage, part: AnyPart): UIMessage { + return { + ...msg, + parts: [...((msg.parts ?? []) as AnyPart[]), part], + } as UIMessage; +} + +function updatePart( + msg: UIMessage, + updater: (part: AnyPart) => AnyPart | null +): UIMessage { + const parts = (msg.parts ?? []) as AnyPart[]; + let changed = false; + const next = parts.map((p) => { + const updated = updater(p); + if (updated === null) return p; + changed = true; + return updated; + }); + return changed ? ({ ...msg, parts: next } as UIMessage) : msg; +} + +function indexOfPart(msg: UIMessage, predicate: (part: AnyPart) => boolean): number { + const parts = (msg.parts ?? []) as AnyPart[]; + for (let i = 0; i < parts.length; i++) { + if (predicate(parts[i]!)) return i; + } + return -1; +} + +function updatePartAt( + msg: UIMessage, + index: number, + updater: (part: AnyPart) => AnyPart +): UIMessage { + const parts = (msg.parts ?? []) as AnyPart[]; + if (index < 0 || index >= parts.length) return msg; + const next = parts.slice(); + next[index] = updater(parts[index]!); + return { ...msg, parts: next } as UIMessage; +} diff --git a/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx b/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx index 297234b8d05..72539cd7910 100644 --- a/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx +++ b/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx @@ -5,24 +5,14 @@ import { ClipboardDocumentIcon, CodeBracketSquareIcon, } from "@heroicons/react/20/solid"; -import { lazy, Suspense, useState } from "react"; +import { Suspense, useEffect, useState } from "react"; import { CodeBlock } from "~/components/code/CodeBlock"; +import { StreamdownRenderer } from "~/components/code/StreamdownRenderer"; import { Button, LinkButton } from "~/components/primitives/Buttons"; import { Header3 } from "~/components/primitives/Headers"; import tablerSpritePath from "~/components/primitives/tabler-sprite.svg"; import type { DisplayItem, ToolUse } from "./types"; -// Lazy load streamdown to avoid SSR issues -const StreamdownRenderer = lazy(() => - import("streamdown").then((mod) => ({ - default: ({ children }: { children: string }) => ( - - {children} - - ), - })) -); - export type PromptLink = { slug: string; version?: string; @@ -221,7 +211,7 @@ export function AssistantResponse({ /> {mode === "rendered" ? ( -
+
{text}}> {text} @@ -257,30 +247,59 @@ function ToolUseSection({ tools }: { tools: ToolUse[] }) { ); } -type ToolTab = "input" | "output" | "details"; +type ToolTab = "input" | "output" | "details" | "agent"; -function ToolUseRow({ tool }: { tool: ToolUse }) { +export function ToolUseRow({ tool }: { tool: ToolUse }) { const hasInput = tool.inputJson !== "{}"; const hasResult = !!tool.resultOutput; const hasDetails = !!tool.description || !!tool.parametersJson; + const hasSubAgent = !!tool.subAgent; const availableTabs: ToolTab[] = [ + ...(hasSubAgent ? (["agent"] as const) : []), ...(hasInput ? (["input"] as const) : []), ...(hasResult ? (["output"] as const) : []), ...(hasDetails ? (["details"] as const) : []), ]; - const defaultTab: ToolTab | null = hasInput ? "input" : null; - const [activeTab, setActiveTab] = useState(defaultTab); + const [activeTab, setActiveTab] = useState( + hasSubAgent ? "agent" : hasInput ? "input" : null + ); + + // Auto-select input tab when input arrives after initial render (e.g. streaming tool calls) + useEffect(() => { + if (!hasSubAgent && hasInput && activeTab === null) { + setActiveTab("input"); + } + }, [hasInput, hasSubAgent]); function handleTabClick(tab: ToolTab) { setActiveTab(activeTab === tab ? null : tab); } return ( -
+
- {tool.toolName} + {hasSubAgent && ( + + + + )} + + {tool.toolName} + + {hasSubAgent && tool.subAgent?.isStreaming && ( + + + streaming + + )} {tool.resultSummary && ( {tool.resultSummary} )} @@ -288,7 +307,11 @@ function ToolUseRow({ tool }: { tool: ToolUse }) { {availableTabs.length > 0 && ( <> -
+
{availableTabs.map((tab) => (
); } + +function SubAgentContent({ parts }: { parts: any[] }) { + // Extract sub-agent run ID from injected metadata part + const runPart = parts.find( + (p: any) => p.type === "data-subagent-run" && p.data?.runId + ); + const subAgentRunId = runPart?.data?.runId as string | undefined; + + return ( +
+ {subAgentRunId && ( +
+ + View sub-agent run + +
+ )} + {parts.map((part: any, j: number) => { + const partType = part.type as string; + + // Skip the injected metadata part — already rendered above + if (partType === "data-subagent-run") return null; + + if (partType === "text" && part.text) { + return ; + } + + if (partType === "step-start") { + return ( +
+
+ step +
+
+ ); + } + + if (partType.startsWith("tool-")) { + const subToolName = partType.slice(5); + return ( + + ); + } + + if (partType === "reasoning" && part.text) { + return ( +
+
+ {part.text} +
+
+ ); + } + + return null; + })} +
+ ); +} diff --git a/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx b/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx index 5e8bb65688f..c243a1e4d9b 100644 --- a/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx +++ b/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx @@ -1,6 +1,7 @@ import { CheckIcon, ClipboardDocumentIcon } from "@heroicons/react/20/solid"; -import { lazy, Suspense, useState } from "react"; +import { Suspense, useState } from "react"; import { Button } from "~/components/primitives/Buttons"; +import { StreamdownRenderer } from "~/components/code/StreamdownRenderer"; import { Header3 } from "~/components/primitives/Headers"; import { Paragraph } from "~/components/primitives/Paragraph"; import { TabButton, TabContainer } from "~/components/primitives/Tabs"; @@ -20,16 +21,6 @@ import type { AISpanData, DisplayItem } from "./types"; import type { PromptSpanData } from "~/presenters/v3/SpanPresenter.server"; import { SpanHorizontalTimeline } from "~/components/runs/v3/SpanHorizontalTimeline"; -const StreamdownRenderer = lazy(() => - import("streamdown").then((mod) => ({ - default: ({ children }: { children: string }) => ( - - {children} - - ), - })) -); - type AITab = "overview" | "messages" | "tools" | "prompt"; export function AISpanDetails({ diff --git a/apps/webapp/app/components/runs/v3/ai/extractAISpanData.ts b/apps/webapp/app/components/runs/v3/ai/extractAISpanData.ts index 73c5418e488..853f75c493f 100644 --- a/apps/webapp/app/components/runs/v3/ai/extractAISpanData.ts +++ b/apps/webapp/app/components/runs/v3/ai/extractAISpanData.ts @@ -26,6 +26,10 @@ export function extractAISpanData( const gRequest = rec(g.request); const gUsage = rec(g.usage); const gOperation = rec(g.operation); + const gProvider = rec(g.provider); + const gInput = rec(g.input); + const gOutput = rec(g.output); + const gTool = rec(g.tool); const aiModel = rec(ai.model); const aiResponse = rec(ai.response); const aiPrompt = rec(ai.prompt); @@ -49,7 +53,17 @@ export function extractAISpanData( ? Math.round((outputTokens / (durationMs / 1000)) * 10) / 10 : undefined); - const toolDefs = parseToolDefinitions(aiPrompt.tools); + // AI SDK 7 moves span emission into `@ai-sdk/otel`, which emits OTel GenAI + // semantic-convention attributes (gen_ai.input/output.messages, gen_ai.provider.name, + // gen_ai.tool.definitions, gen_ai.response.finish_reasons). AI SDK 6 emits the older + // `ai.*` keys (ai.prompt.messages, ai.response.text/toolCalls/finishReason). Customers + // run either major, so detect the shape and read both. + const isV7 = + typeof gInput.messages === "string" || + typeof gOutput.messages === "string" || + typeof gProvider.name === "string"; + + const toolDefs = parseToolDefinitions(isV7 ? gTool.definitions : aiPrompt.tools); const providerMeta = parseProviderMetadata(aiResponse.providerMetadata); const aiTelemetry = rec(ai.telemetry); const telemetryMetaRaw = rec(aiTelemetry.metadata); @@ -63,15 +77,15 @@ export function extractAISpanData( return { model, - provider: str(g.system) ?? "unknown", + provider: str(gProvider.name) ?? str(g.system) ?? str(aiModel.provider) ?? "unknown", operationName: str(gOperation.name) ?? str(ai.operationId) ?? "", responseId: str(gResponse.id) || undefined, - finishReason: str(aiResponse.finishReason), + finishReason: isV7 ? firstFinishReason(gResponse.finish_reasons) : str(aiResponse.finishReason), serviceTier: providerMeta?.serviceTier, resolvedProvider: providerMeta?.resolvedProvider, toolChoice: parseToolChoice(aiPrompt.toolChoice), toolCount: toolDefs?.length, - messageCount: countMessages(aiPrompt.messages), + messageCount: isV7 ? countGenAiMessages(gInput.messages) : countMessages(aiPrompt.messages), telemetryMetadata: telemetryMeta, promptSlug: promptSlug || undefined, promptVersion: promptVersion || undefined, @@ -81,9 +95,14 @@ export function extractAISpanData( inputTokens, outputTokens, totalTokens, - cachedTokens: num(aiUsage.cachedInputTokens) ?? num(gUsage.cache_read_input_tokens), + cachedTokens: + num(aiUsage.cachedInputTokens) ?? + num(gUsage.cache_read_input_tokens) ?? + num(rec(gUsage.cache_read).input_tokens), cacheCreationTokens: - num(aiUsage.cacheCreationInputTokens) ?? num(gUsage.cache_creation_input_tokens), + num(aiUsage.cacheCreationInputTokens) ?? + num(gUsage.cache_creation_input_tokens) ?? + num(rec(gUsage.cache_creation).input_tokens), reasoningTokens: num(aiUsage.reasoningTokens) ?? num(gUsage.reasoning_tokens), tokensPerSecond, msToFirstChunk: num(aiResponse.msToFirstChunk), @@ -91,10 +110,14 @@ export function extractAISpanData( inputCost: num(triggerLlm.input_cost), outputCost: num(triggerLlm.output_cost), totalCost: num(triggerLlm.total_cost), - responseText: str(aiResponse.text) || undefined, + responseText: isV7 + ? extractGenAiAssistantText(gOutput.messages) || undefined + : str(aiResponse.text) || undefined, responseObject: str(aiResponse.object) || undefined, toolDefinitions: toolDefs, - items: buildDisplayItems(aiPrompt.messages, aiResponse.toolCalls, toolDefs), + items: isV7 + ? buildGenAiDisplayItems(g.system_instructions, gInput.messages, gOutput.messages, toolDefs) + : buildDisplayItems(aiPrompt.messages, aiResponse.toolCalls, toolDefs), }; } @@ -450,3 +473,213 @@ function countMessages(raw: unknown): number | undefined { } } +// --------------------------------------------------------------------------- +// AI SDK 7 — @ai-sdk/otel GenAI semantic-convention shape +// --------------------------------------------------------------------------- +// +// v7 moved span emission out of `ai` core into `@ai-sdk/otel`, which emits OTel +// GenAI semantic-convention attributes instead of the v6 `ai.*` keys: +// gen_ai.input.messages JSON string: [{ role, parts: [...] }] +// gen_ai.output.messages JSON string: [{ role:"assistant", parts, finish_reason }] +// gen_ai.system_instructions system prompt (plain string, or [{ type:"text", content }]) +// Message parts (per @ai-sdk/otel's convertMessagePartToSemConv): +// { type:"text" | "reasoning", content } +// { type:"tool_call", id, name, arguments } +// { type:"tool_call_response", id, response } // response already unwrapped from the AI SDK envelope +// Media / approval / custom parts are not surfaced in the display yet. + +type GenAiMessage = { role: string; parts: Record[]; finishReason?: string }; + +function parseGenAiMessages(raw: unknown): GenAiMessage[] | undefined { + if (typeof raw !== "string") return undefined; + try { + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) return undefined; + return parsed.map((m) => { + const o = rec(m); + return { + role: str(o.role) ?? "user", + parts: Array.isArray(o.parts) ? o.parts.map(rec) : [], + finishReason: str(o.finish_reason), + }; + }); + } catch { + return undefined; + } +} + +/** `gen_ai.response.finish_reasons` arrives as a JSON array string (e.g. `["stop"]`); take the first. */ +function firstFinishReason(raw: unknown): string | undefined { + if (typeof raw !== "string") return undefined; + try { + const parsed = JSON.parse(raw); + if (Array.isArray(parsed)) { + const first = parsed.find((r) => typeof r === "string"); + return typeof first === "string" ? first : undefined; + } + if (typeof parsed === "string") return parsed || undefined; + } catch { + return raw || undefined; + } + return undefined; +} + +function countGenAiMessages(raw: unknown): number | undefined { + const msgs = parseGenAiMessages(raw); + return msgs && msgs.length > 0 ? msgs.length : undefined; +} + +/** Concatenated text of all `text` parts across the assistant output messages. */ +function extractGenAiAssistantText(outputRaw: unknown): string { + const msgs = parseGenAiMessages(outputRaw); + if (!msgs) return ""; + const texts: string[] = []; + for (const m of msgs) { + if (m.role !== "assistant") continue; + for (const p of m.parts) { + if (p.type === "text" && typeof p.content === "string") texts.push(p.content); + } + } + return texts.join("\n"); +} + +/** Plain text of a message's `text` parts (reasoning parts aren't surfaced yet). */ +function genAiMessageText(parts: Record[]): string { + const texts: string[] = []; + for (const p of parts) { + if (p.type === "text" && typeof p.content === "string") texts.push(p.content); + } + return texts.join("\n"); +} + +/** Parse `gen_ai.system_instructions` (plain string, or a JSON array of `{ type:"text", content }`). */ +function parseSystemInstructions(raw: unknown): string | undefined { + if (typeof raw !== "string") return undefined; + const trimmed = raw.trim(); + if (trimmed.startsWith("[")) { + try { + const parsed = JSON.parse(trimmed); + if (Array.isArray(parsed)) { + const text = parsed + .map((p) => str(rec(p).content)) + .filter((t): t is string => Boolean(t)) + .join("\n"); + return text || undefined; + } + } catch { + // fall through to raw + } + } + return raw || undefined; +} + +/** + * Build display items from the v7 GenAI message attributes: the system prompt, + * the input message history, and the assistant's output for this span. Assistant + * tool_call parts are paired with tool_call_response parts from following tool messages. + */ +function buildGenAiDisplayItems( + systemInstructionsRaw: unknown, + inputMessagesRaw: unknown, + outputMessagesRaw: unknown, + toolDefs?: ToolDefinition[] +): DisplayItem[] | undefined { + const items: DisplayItem[] = []; + + const systemText = parseSystemInstructions(systemInstructionsRaw); + if (systemText) items.push({ type: "system", text: systemText }); + + const messages = [ + ...(parseGenAiMessages(inputMessagesRaw) ?? []), + ...(parseGenAiMessages(outputMessagesRaw) ?? []), + ]; + appendGenAiMessages(items, messages); + + if (toolDefs && toolDefs.length > 0) { + const defsByName = new Map(toolDefs.map((d) => [d.name, d])); + for (const item of items) { + if (item.type === "tool-use") { + for (const tool of item.tools) { + const def = defsByName.get(tool.toolName); + if (def) { + tool.description = def.description; + tool.parametersJson = def.parametersJson; + } + } + } + } + } + + return items.length > 0 ? items : undefined; +} + +function appendGenAiMessages(items: DisplayItem[], messages: GenAiMessage[]): void { + let i = 0; + while (i < messages.length) { + const msg = messages[i]; + + if (msg.role === "system") { + const text = genAiMessageText(msg.parts); + if (text) items.push({ type: "system", text }); + i++; + continue; + } + + if (msg.role === "user") { + const text = genAiMessageText(msg.parts); + if (text) items.push({ type: "user", text }); + i++; + continue; + } + + if (msg.role === "assistant") { + const text = genAiMessageText(msg.parts); + if (text) items.push({ type: "assistant", text }); + + const toolCalls = msg.parts.filter((p) => p.type === "tool_call"); + if (toolCalls.length > 0) { + // Collect tool_call_response parts from the tool messages that follow. + const responsesById = new Map(); + let j = i + 1; + while (j < messages.length && messages[j].role === "tool") { + for (const p of messages[j].parts) { + if (p.type === "tool_call_response") { + const id = str(p.id); + if (id) responsesById.set(id, p.response); + } + } + j++; + } + + const tools: ToolUse[] = toolCalls.map((tc) => { + const id = str(tc.id) ?? ""; + let resultSummary: string | undefined; + let resultOutput: string | undefined; + if (id && responsesById.has(id)) { + const summarized = summarizeToolOutput(responsesById.get(id)); + resultSummary = summarized.summary; + resultOutput = summarized.formattedOutput; + } + return { + toolCallId: id, + toolName: str(tc.name) ?? "", + inputJson: JSON.stringify(tc.arguments ?? {}, null, 2), + resultSummary, + resultOutput, + }; + }); + + items.push({ type: "tool-use", tools }); + i = j; + continue; + } + + i++; + continue; + } + + // tool-role messages are consumed via the assistant pairing above; skip stragglers. + i++; + } +} + diff --git a/apps/webapp/app/components/runs/v3/ai/types.ts b/apps/webapp/app/components/runs/v3/ai/types.ts index bb0fd7e74b1..c59c87865d2 100644 --- a/apps/webapp/app/components/runs/v3/ai/types.ts +++ b/apps/webapp/app/components/runs/v3/ai/types.ts @@ -22,6 +22,11 @@ export type ToolUse = { resultSummary?: string; /** Full formatted result for display in a code block */ resultOutput?: string; + /** Sub-agent output — when the tool result is a UIMessage with parts */ + subAgent?: { + parts: any[]; + isStreaming: boolean; + }; }; // --------------------------------------------------------------------------- diff --git a/apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx b/apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx new file mode 100644 index 00000000000..7feba8e6db5 --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/CloseSessionDialog.tsx @@ -0,0 +1,72 @@ +import { XCircleIcon } from "@heroicons/react/24/solid"; +import { DialogClose } from "@radix-ui/react-dialog"; +import { Form, useNavigation } from "@remix-run/react"; +import { Button } from "~/components/primitives/Buttons"; +import { DialogContent, DialogHeader } from "~/components/primitives/Dialog"; +import { FormButtons } from "~/components/primitives/FormButtons"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { SpinnerWhite } from "~/components/primitives/Spinner"; + +type CloseSessionDialogProps = { + sessionParam: string; + environmentId: string; + redirectPath: string; +}; + +export function CloseSessionDialog({ + sessionParam, + environmentId, + redirectPath, +}: CloseSessionDialogProps) { + const navigation = useNavigation(); + + const formAction = `/resources/sessions/${encodeURIComponent(sessionParam)}/close`; + const isLoading = navigation.formAction === formAction; + + return ( + + Close this session? +
+ + Closing a session is permanent. The session will no longer accept new input or trigger + new runs. Any in-flight run continues until it finishes on its own. + +
+ + +
+ + +
+ + {isLoading ? "Closing..." : "Close session"} + + } + cancelButton={ + + + + } + /> + +
+
+ ); +} diff --git a/apps/webapp/app/components/sessions/v1/SessionFilters.tsx b/apps/webapp/app/components/sessions/v1/SessionFilters.tsx new file mode 100644 index 00000000000..9c13b7b4b3f --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/SessionFilters.tsx @@ -0,0 +1,764 @@ +import * as Ariakit from "@ariakit/react"; +import { + CpuChipIcon, + FingerPrintIcon, + TagIcon, + XMarkIcon, +} from "@heroicons/react/20/solid"; +import { Form } from "@remix-run/react"; +import { ListFilterIcon } from "lucide-react"; +import { type ReactNode, useCallback, useMemo, useState } from "react"; +import { z } from "zod"; +import { StatusIcon } from "~/assets/icons/StatusIcon"; +import { TaskIcon } from "~/assets/icons/TaskIcon"; +import { AppliedFilter } from "~/components/primitives/AppliedFilter"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { + ComboBox, + SelectButtonItem, + SelectItem, + SelectList, + SelectPopover, + SelectProvider, + SelectTrigger, + shortcutFromIndex, +} from "~/components/primitives/Select"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "~/components/primitives/Tooltip"; +import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; +import { useSearchParams } from "~/hooks/useSearchParam"; +import { Button } from "../../primitives/Buttons"; +import { + appliedSummary, + FilterMenuProvider, + TimeFilter, +} from "../../runs/v3/SharedFilters"; +import { + allSessionStatuses, + descriptionForSessionStatus, + SessionStatusCombo, + sessionStatusTitle, +} from "./SessionStatus"; + +const StringOrStringArray = z.preprocess( + (value) => (typeof value === "string" ? [value] : value), + z.array(z.string()).optional() +); + +export const SessionStatus = z.enum(allSessionStatuses); + +export const SessionListSearchFilters = z.object({ + cursor: z.string().optional(), + direction: z.enum(["forward", "backward"]).optional(), + statuses: z.preprocess( + (value) => (typeof value === "string" ? [value] : value), + SessionStatus.array().optional() + ), + types: StringOrStringArray, + taskIdentifiers: StringOrStringArray, + externalId: z.string().optional(), + tags: StringOrStringArray, + period: z.preprocess((value) => (value === "all" ? undefined : value), z.string().optional()), + from: z.coerce.number().optional(), + to: z.coerce.number().optional(), +}); + +export type SessionListSearchFilters = z.infer; +export type SessionListSearchFilterKey = keyof SessionListSearchFilters; + +export function getSessionFiltersFromSearchParams( + searchParams: URLSearchParams +): SessionListSearchFilters { + function listOrUndefined(key: string) { + const values = searchParams.getAll(key).filter((v) => v.length > 0); + return values.length > 0 ? values : undefined; + } + + const params = { + cursor: searchParams.get("cursor") ?? undefined, + direction: searchParams.get("direction") ?? undefined, + statuses: listOrUndefined("statuses"), + types: listOrUndefined("types"), + taskIdentifiers: listOrUndefined("taskIdentifiers"), + externalId: searchParams.get("externalId") ?? undefined, + tags: listOrUndefined("tags"), + period: searchParams.get("period") ?? undefined, + from: searchParams.get("from") ?? undefined, + to: searchParams.get("to") ?? undefined, + }; + + const parsed = SessionListSearchFilters.safeParse(params); + if (!parsed.success) { + return {}; + } + return parsed.data; +} + +type SessionFiltersProps = { + hasFilters: boolean; + possibleTypes?: string[]; +}; + +export function SessionFilters(props: SessionFiltersProps) { + const location = useOptimisticLocation(); + const searchParams = new URLSearchParams(location.search); + const hasFilters = + searchParams.has("statuses") || + searchParams.has("types") || + searchParams.has("taskIdentifiers") || + searchParams.has("externalId") || + searchParams.has("tags"); + + return ( +
+ + + + {hasFilters && ( +
+
+ ); +} + +const filterTypes = [ + { + name: "statuses", + title: "Status", + icon: , + }, + { name: "types", title: "Type", icon: }, + { + name: "taskIdentifiers", + title: "Task", + icon: , + }, + { + name: "externalId", + title: "External ID", + icon: , + }, + { name: "tags", title: "Tags", icon: }, +] as const; + +type FilterType = (typeof filterTypes)[number]["name"]; + +const shortcut = { key: "f" }; + +function FilterMenu(props: SessionFiltersProps) { + const [filterType, setFilterType] = useState(); + + const filterTrigger = ( + + +
+ } + variant={"secondary/small"} + shortcut={shortcut} + tooltipTitle={"Filter sessions"} + > + Filter + + ); + + return ( + setFilterType(undefined)}> + {(search, setSearch) => ( + setSearch("")} + trigger={filterTrigger} + filterType={filterType} + setFilterType={setFilterType} + {...props} + /> + )} + + ); +} + +function AppliedFilters() { + return ( + <> + + + + + + + ); +} + +type MenuProps = { + searchValue: string; + clearSearchValue: () => void; + trigger: React.ReactNode; + filterType: FilterType | undefined; + setFilterType: (filterType: FilterType | undefined) => void; +} & SessionFiltersProps; + +function Menu(props: MenuProps) { + switch (props.filterType) { + case undefined: + return ; + case "statuses": + return props.setFilterType(undefined)} {...props} />; + case "types": + return props.setFilterType(undefined)} {...props} />; + case "taskIdentifiers": + return ( + props.setFilterType(undefined)} {...props} /> + ); + case "externalId": + return props.setFilterType(undefined)} {...props} />; + case "tags": + return props.setFilterType(undefined)} {...props} />; + } +} + +function MainMenu({ searchValue, trigger, clearSearchValue, setFilterType }: MenuProps) { + const filtered = useMemo(() => { + return filterTypes.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + + + + {filtered.map((type, index) => ( + { + clearSearchValue(); + setFilterType(type.name); + }} + icon={type.icon} + shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} + > + {type.title} + + ))} + + + + ); +} + +const statusItems = allSessionStatuses.map((status) => ({ + title: sessionStatusTitle(status), + value: status, +})); + +function StatusDropdown({ + trigger, + clearSearchValue, + searchValue, + onClose, +}: { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (next: string[]) => { + clearSearchValue(); + replace({ statuses: next, cursor: undefined, direction: undefined }); + }; + + const filtered = useMemo(() => { + return statusItems.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {filtered.map((item, index) => ( + + + + + + + + + {descriptionForSessionStatus(item.value)} + + + + + + ))} + + + + ); +} + +function AppliedStatusFilter() { + const { values, del } = useSearchParams(); + const statuses = values("statuses"); + + if (statuses.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary( + statuses.map((v) => sessionStatusTitle(v as (typeof allSessionStatuses)[number])) + )} + onRemove={() => del(["statuses", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function TypeDropdown({ + trigger, + searchValue, + clearSearchValue, + possibleTypes, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + possibleTypes?: string[]; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (next: string[]) => { + clearSearchValue(); + replace({ types: next, cursor: undefined, direction: undefined }); + }; + + const items = useMemo(() => { + const all = possibleTypes && possibleTypes.length > 0 ? possibleTypes : ["chat"]; + const seen = new Set(all); + for (const v of values("types")) { + if (!seen.has(v)) { + all.push(v); + seen.add(v); + } + } + return all.filter((t) => t.toLowerCase().includes(searchValue.toLowerCase())); + }, [possibleTypes, searchValue, values]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {items.map((value, index) => ( + + {value} + + ))} + + + + ); +} + +function AppliedTypeFilter() { + const { values, del } = useSearchParams(); + const types = values("types"); + if (types.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary(types)} + onRemove={() => del(["types", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function TaskIdentifierDropdown({ + trigger, + searchValue, + clearSearchValue, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + onClose?: () => void; +}) { + const [open, setOpen] = useState(); + const { value, replace } = useSearchParams(); + const current = value("taskIdentifiers"); + const [draft, setDraft] = useState(current ?? ""); + + const apply = useCallback(() => { + clearSearchValue(); + replace({ + taskIdentifiers: draft.trim() === "" ? undefined : [draft.trim()], + cursor: undefined, + direction: undefined, + }); + setOpen(false); + }, [clearSearchValue, draft, replace]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + className="max-w-[min(32ch,var(--popover-available-width))]" + > +
+
+ + setDraft(e.target.value)} + variant="small" + className="w-[29ch] font-mono" + spellCheck={false} + /> +
+
+ + +
+
+
+
+ ); +} + +function AppliedTaskIdentifierFilter() { + const { values, del } = useSearchParams(); + const taskIdentifiers = values("taskIdentifiers"); + if (taskIdentifiers.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary(taskIdentifiers)} + onRemove={() => del(["taskIdentifiers", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function ExternalIdDropdown({ + trigger, + searchValue, + clearSearchValue, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + onClose?: () => void; +}) { + const [open, setOpen] = useState(); + const { value, replace } = useSearchParams(); + const current = value("externalId"); + const [draft, setDraft] = useState(current ?? ""); + + const apply = useCallback(() => { + clearSearchValue(); + replace({ + externalId: draft.trim() === "" ? undefined : draft.trim(), + cursor: undefined, + direction: undefined, + }); + setOpen(false); + }, [clearSearchValue, draft, replace]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + className="max-w-[min(36ch,var(--popover-available-width))]" + > +
+
+ + setDraft(e.target.value)} + variant="small" + className="w-[33ch] font-mono" + spellCheck={false} + /> +
+
+ + +
+
+
+
+ ); +} + +function AppliedExternalIdFilter() { + const { value, del } = useSearchParams(); + const externalId = value("externalId"); + if (!externalId) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={externalId} + onRemove={() => del(["externalId", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function TagsDropdown({ + trigger, + searchValue, + clearSearchValue, + onClose, +}: { + trigger: ReactNode; + searchValue: string; + clearSearchValue: () => void; + onClose?: () => void; +}) { + const [open, setOpen] = useState(); + const { values, replace } = useSearchParams(); + const current = values("tags"); + const [draft, setDraft] = useState(current.join(", ")); + + const apply = useCallback(() => { + clearSearchValue(); + const next = draft + .split(/[,\n]/) + .map((t) => t.trim()) + .filter((t) => t.length > 0); + replace({ + tags: next.length === 0 ? undefined : next, + cursor: undefined, + direction: undefined, + }); + setOpen(false); + }, [clearSearchValue, draft, replace]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + className="max-w-[min(40ch,var(--popover-available-width))]" + > +
+
+ + setDraft(e.target.value)} + variant="small" + className="w-[37ch] font-mono" + spellCheck={false} + /> + + Comma-separated. Matches sessions with any of these tags. + +
+
+ + +
+
+
+
+ ); +} + +function AppliedTagsFilter() { + const { values, del } = useSearchParams(); + const tags = values("tags"); + if (tags.length === 0) return null; + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary(tags)} + onRemove={() => del(["tags", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + diff --git a/apps/webapp/app/components/sessions/v1/SessionStatus.tsx b/apps/webapp/app/components/sessions/v1/SessionStatus.tsx new file mode 100644 index 00000000000..a4e17affd83 --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/SessionStatus.tsx @@ -0,0 +1,89 @@ +import { CheckCircleIcon, ClockIcon } from "@heroicons/react/20/solid"; +import assertNever from "assert-never"; +import { type SessionStatus } from "~/services/sessionsRepository/sessionsRepository.server"; +import { cn } from "~/utils/cn"; + +export const allSessionStatuses = ["ACTIVE", "CLOSED", "EXPIRED"] as const satisfies Readonly< + Array +>; + +const descriptions: Record = { + ACTIVE: "The session is open and can receive input or schedule new runs.", + CLOSED: "The session was closed; no further input or runs can be triggered against it.", + EXPIRED: "The session passed its expiry time without being closed explicitly.", +}; + +export function descriptionForSessionStatus(status: SessionStatus): string { + return descriptions[status]; +} + +export function sessionStatusTitle(status: SessionStatus): string { + switch (status) { + case "ACTIVE": + return "Active"; + case "CLOSED": + return "Closed"; + case "EXPIRED": + return "Expired"; + default: + assertNever(status); + } +} + +export function sessionStatusColor(status: SessionStatus): string { + switch (status) { + case "ACTIVE": + return "text-pending"; + case "CLOSED": + return "text-success"; + case "EXPIRED": + return "text-text-dimmed"; + default: + assertNever(status); + } +} + +export function SessionStatusIcon({ + status, + className, +}: { + status: SessionStatus; + className: string; +}) { + switch (status) { + case "ACTIVE": + return ( + + + + ); + case "CLOSED": + return ; + case "EXPIRED": + return ; + default: + assertNever(status); + } +} + +export function SessionStatusLabel({ status }: { status: SessionStatus }) { + return {sessionStatusTitle(status)}; +} + +export function SessionStatusCombo({ + status, + className, + iconClassName, +}: { + status: SessionStatus; + className?: string; + iconClassName?: string; +}) { + return ( + + + + + ); +} + diff --git a/apps/webapp/app/components/sessions/v1/SessionsTable.tsx b/apps/webapp/app/components/sessions/v1/SessionsTable.tsx new file mode 100644 index 00000000000..fb83f2d03eb --- /dev/null +++ b/apps/webapp/app/components/sessions/v1/SessionsTable.tsx @@ -0,0 +1,224 @@ +import { ArrowRightIcon } from "@heroicons/react/20/solid"; +import { useLocation, useNavigation } from "@remix-run/react"; +import { formatDuration } from "@trigger.dev/core/v3/utils/durations"; +import { ListBulletIcon } from "~/assets/icons/ListBulletIcon"; +import { MiddleTruncate } from "~/components/primitives/MiddleTruncate"; +import { DateTime } from "~/components/primitives/DateTime"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { PopoverMenuItem } from "~/components/primitives/Popover"; +import { Spinner } from "~/components/primitives/Spinner"; +import { + Table, + TableBlankRow, + TableBody, + TableCell, + TableCellMenu, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; +import { LiveTimer } from "~/components/runs/v3/LiveTimer"; +import { RunTag } from "~/components/runs/v3/RunTag"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { + type SessionListItem, + type SessionList, +} from "~/presenters/v3/SessionListPresenter.server"; +import { v3RunPath, v3RunsPath, v3SessionPath } from "~/utils/pathBuilder"; +import { + descriptionForSessionStatus, + SessionStatusCombo, + allSessionStatuses, +} from "./SessionStatus"; + +type SessionsTableProps = Pick; + +export function SessionsTable({ sessions, hasFilters }: SessionsTableProps) { + const navigation = useNavigation(); + const location = useLocation(); + const isLoading = + navigation.state !== "idle" && navigation.location?.pathname === location.pathname; + + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + return ( + + + + ID + + {allSessionStatuses.map((status) => ( +
+
+ +
+ + {descriptionForSessionStatus(status)} + +
+ ))} + + } + > + Status +
+ Type + Task + Tags + Created + Duration + + Actions + +
+
+ + {sessions.length === 0 ? ( + +
+ + {hasFilters + ? "No sessions match these filters" + : "No sessions in this environment yet"} + +
+
+ ) : ( + sessions.map((session) => { + const runPath = session.currentRunFriendlyId + ? v3RunPath(organization, project, environment, { + friendlyId: session.currentRunFriendlyId, + }) + : undefined; + + const displayId = session.externalId ?? session.friendlyId; + const sessionPath = v3SessionPath(organization, project, environment, { + friendlyId: session.friendlyId, + }); + const allRunsPath = v3RunsPath(organization, project, environment, { + tags: [`chat:${displayId}`], + }); + + return ( + + +
+ +
+
+ + } + /> + + + {session.type} + + +
+ +
+
+ + {session.tags.length > 0 ? ( +
+ {session.tags.map((tag) => ( + + ))} +
+ ) : ( + + )} +
+ + + + + + + +
+ ); + }) + )} + {isLoading && ( + + Loading… + + )} +
+
+ ); +} + +function SessionDuration({ session }: { session: SessionListItem }) { + // Active sessions tick live; closed/expired sessions freeze at the + // moment they ended (closedAt for explicit closes, expiresAt when the + // TTL ran out without a close call). + const endedAt = + session.status === "CLOSED" + ? session.closedAt + : session.status === "EXPIRED" + ? session.expiresAt + : undefined; + + if (endedAt) { + return <>{formatDuration(new Date(session.createdAt), new Date(endedAt), { style: "short" })}; + } + + return ; +} + +function SessionActionsCell({ + runPath, + allRunsPath, +}: { + runPath?: string; + allRunsPath: string; +}) { + return ( + + {runPath && ( + + )} + + + } + /> + ); +} diff --git a/apps/webapp/app/db.server.ts b/apps/webapp/app/db.server.ts index 4668b58fb02..96f6307f576 100644 --- a/apps/webapp/app/db.server.ts +++ b/apps/webapp/app/db.server.ts @@ -13,8 +13,8 @@ import { env } from "./env.server"; import { logger } from "./services/logger.server"; import { isValidDatabaseUrl } from "./utils/db"; import { singleton } from "./utils/singleton"; -import { startActiveSpan } from "./v3/tracer.server"; -import { Span } from "@opentelemetry/api"; +import { DATASOURCE_CONTEXT_KEY, startActiveSpan } from "./v3/tracer.server"; +import { context, Span, trace } from "@opentelemetry/api"; import { queryPerformanceMonitor } from "./utils/queryPerformanceMonitor.server"; export type { @@ -98,12 +98,30 @@ export async function $transaction( export { Prisma }; -export const prisma = singleton("prisma", getClient); +function tagDatasource( + datasource: "writer" | "replica", + client: T +): T { + return client.$extends({ + name: "datasource-tagger", + query: { + $allOperations: ({ query, args }) => { + trace.getActiveSpan()?.setAttribute("db.datasource", datasource); + return context.with( + context.active().setValue(DATASOURCE_CONTEXT_KEY, datasource), + async () => await query(args) + ); + }, + }, + }) as unknown as T; +} -export const $replica: PrismaReplicaClient = singleton( - "replica", - () => getReplicaClient() ?? prisma -); +export const prisma = singleton("prisma", () => tagDatasource("writer", getClient())); + +export const $replica: PrismaReplicaClient = singleton("replica", () => { + const replica = getReplicaClient(); + return replica ? tagDatasource("replica", replica) : prisma; +}); function getClient() { const { DATABASE_URL } = process.env; diff --git a/apps/webapp/app/entry.client.tsx b/apps/webapp/app/entry.client.tsx index 46c2919b832..e0d0f0ac076 100644 --- a/apps/webapp/app/entry.client.tsx +++ b/apps/webapp/app/entry.client.tsx @@ -1,8 +1,11 @@ import { RemixBrowser } from "@remix-run/react"; import { hydrateRoot } from "react-dom/client"; +import { clientBeforeFirstRender } from "./clientBeforeFirstRender"; import { LocaleContextProvider } from "./components/primitives/LocaleProvider"; import { OperatingSystemContextProvider } from "./components/primitives/OperatingSystemProvider"; +clientBeforeFirstRender(); + hydrateRoot( document, ;` as a pure expression statement and tree-shakes +// the entire import — the singleton's initializer never fires and the +// sessions→ClickHouse logical replication slot stops being consumed. Assigning +// to globalThis is an unambiguous side effect the bundler must preserve. See +// TRI-9864 for the incident write-up. +import { sessionsReplicationInstance } from "./services/sessionsReplicationInstance.server"; +(globalThis as Record).__sessionsReplicationInstance = + sessionsReplicationInstance; const ABORT_DELAY = 30000; @@ -83,6 +103,10 @@ function handleBotRequest( ) { return new Promise((resolve, reject) => { let shellRendered = false; + // Timer handle is cleared in every terminal callback so the abort closure + // (which captures the full React render tree + remixContext) doesn't pin + // memory for 30s per successful request. See react-router PR #14200. + let abortTimer: NodeJS.Timeout | undefined; const { pipe, abort } = renderToPipeableStream( @@ -105,8 +129,10 @@ function handleBotRequest( ); pipe(body); + clearTimeout(abortTimer); }, onShellError(error: unknown) { + clearTimeout(abortTimer); reject(error); }, onError(error: unknown) { @@ -121,7 +147,7 @@ function handleBotRequest( } ); - setTimeout(abort, ABORT_DELAY); + abortTimer = setTimeout(abort, ABORT_DELAY); }); } @@ -135,6 +161,10 @@ function handleBrowserRequest( ) { return new Promise((resolve, reject) => { let shellRendered = false; + // Timer handle is cleared in every terminal callback so the abort closure + // (which captures the full React render tree + remixContext) doesn't pin + // memory for 30s per successful request. See react-router PR #14200. + let abortTimer: NodeJS.Timeout | undefined; const { pipe, abort } = renderToPipeableStream( @@ -157,8 +187,10 @@ function handleBrowserRequest( ); pipe(body); + clearTimeout(abortTimer); }, onShellError(error: unknown) { + clearTimeout(abortTimer); reject(error); }, onError(error: unknown) { @@ -173,7 +205,7 @@ function handleBrowserRequest( } ); - setTimeout(abort, ABORT_DELAY); + abortTimer = setTimeout(abort, ABORT_DELAY); }); } @@ -197,6 +229,9 @@ Worker.init().catch((error) => { logError(error); }); +initMollifierDrainerWorker(); +initMollifierStaleSweepWorker(); + bootstrap().catch((error) => { logError(error); }); @@ -235,10 +270,28 @@ process.on("uncaughtException", (error, origin) => { singleton("RunEngineEventBusHandlers", registerRunEngineEventBusHandlers); singleton("SetupBatchQueueCallbacks", setupBatchQueueCallbacks); +// Attach the realtime run-changed publish delegations to the engine event bus. +// No-ops (registers nothing) unless REALTIME_BACKEND_NATIVE_ENABLED=1. +singleton("RunChangeNotifierHandlers", registerRunChangeNotifierHandlers); + +// Wrapped in singleton() so Remix's dev-mode CJS reloads don't append +// duplicate copies of the processor — Sentry's processor list lives in +// node_modules and persists across module reloads. Idempotent at runtime +// (the processor is a pure read+stamp), but the pattern matches the rest +// of this file. +singleton("SentryTenantContextProcessor", () => { + if (env.SENTRY_DSN) { + Sentry.addEventProcessor(addTenantContextToEvent); + } + // Return a truthy value — `singleton()` uses `??=` so a `void` + // callback would re-execute (and re-register) on every dev reload. + return true; +}); export { apiRateLimiter } from "./services/apiRateLimit.server"; export { engineRateLimiter } from "./services/engineRateLimit.server"; export { runWithHttpContext } from "./services/httpAsyncStorage.server"; +export { tenantContextMiddleware } from "./services/tenantContextResolver.server"; export { socketIo } from "./v3/handleSocketIo.server"; export { wss } from "./v3/handleWebsockets.server"; diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 8d72b2e51b2..d9c97711940 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1,7 +1,50 @@ import { z } from "zod"; +import { MachinePresetName } from "@trigger.dev/core/v3"; import { BoolEnv } from "./utils/boolEnv"; import { isValidDatabaseUrl } from "./utils/db"; import { isValidRegex } from "./utils/regex"; +import { isValidDuration } from "./services/realtime/duration.server"; + +// `z.string()` constrained to a `parseDuration`-parseable string (e.g. +// `7d`, `1h`). Validated at boot so a typo'd duration fails fast. +function durationString() { + return z + .string() + .refine(isValidDuration, "must be a duration like 7d, 30d, 365d, 1h, 1y"); +} + +// Parses a CSV of machine preset names (e.g. "small-1x,small-2x") into a +// non-empty array of MachinePresetName. Used by COMPUTE_TEMPLATE_MACHINE_PRESETS +// and its _REQUIRED variant. Adds zod issues for empty input or unknown names. +const parseMachinePresetCsv = ( + raw: string, + ctx: z.RefinementCtx +): MachinePresetName[] => { + const names = raw + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + if (names.length === 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: "must list at least one machine preset", + }); + return z.NEVER; + } + const out: MachinePresetName[] = []; + for (const name of names) { + const parsed = MachinePresetName.safeParse(name); + if (!parsed.success) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `unknown machine preset: "${name}"`, + }); + return z.NEVER; + } + out.push(parsed.data); + } + return out; +}; const GithubAppEnvSchema = z.preprocess( (val) => { @@ -91,6 +134,7 @@ const EnvironmentSchema = z ELECTRIC_ORIGIN_SHARDS: z.string().optional(), APP_ENV: z.string().default(process.env.NODE_ENV), SERVICE_NAME: z.string().default("trigger.dev webapp"), + SENTRY_DSN: z.string().optional(), POSTHOG_PROJECT_KEY: z.string().default("phc_LFH7kJiGhdIlnO22hTAKgHpaKhpM8gkzWAFvHmf5vfS"), TRIGGER_TELEMETRY_DISABLED: z.string().optional(), AUTH_GITHUB_CLIENT_ID: z.string().optional(), @@ -108,6 +152,9 @@ const EnvironmentSchema = z SMTP_PASSWORD: z.string().optional(), PLAIN_API_KEY: z.string().optional(), + PLAIN_CUSTOMER_CARDS_SECRET: z.string().optional(), + PLAIN_CUSTOMER_CARDS_KEY: z.string().optional(), + PLAIN_CUSTOMER_CARDS_HEADERS: z.string().optional(), WORKER_SCHEMA: z.string().default("graphile_worker"), WORKER_CONCURRENCY: z.coerce.number().int().default(10), WORKER_POLL_INTERVAL: z.coerce.number().int().default(1000), @@ -189,6 +236,30 @@ const EnvironmentSchema = z CACHE_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"), CACHE_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"), + TASK_META_CACHE_REDIS_HOST: z + .string() + .optional() + .transform((v) => v ?? process.env.REDIS_HOST), + TASK_META_CACHE_REDIS_PORT: z.coerce + .number() + .optional() + .transform( + (v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined) + ), + TASK_META_CACHE_REDIS_USERNAME: z + .string() + .optional() + .transform((v) => v ?? process.env.REDIS_USERNAME), + TASK_META_CACHE_REDIS_PASSWORD: z + .string() + .optional() + .transform((v) => v ?? process.env.REDIS_PASSWORD), + TASK_META_CACHE_REDIS_TLS_DISABLED: z + .string() + .default(process.env.REDIS_TLS_DISABLED ?? "false"), + TASK_META_CACHE_CURRENT_ENV_TTL_SECONDS: z.coerce.number().default(86400), + TASK_META_CACHE_BY_WORKER_TTL_SECONDS: z.coerce.number().default(2592000), + REALTIME_STREAMS_REDIS_HOST: z .string() .optional() @@ -229,6 +300,67 @@ const EnvironmentSchema = z .int() .default(24 * 60 * 60 * 1000), // 1 day in milliseconds + // Master switch for the native realtime backend; off = Electric serves everything, publishes no-op. + REALTIME_BACKEND_NATIVE_ENABLED: z.string().default("0"), + // Live long-poll backstop hold (ms); matches Electric's ~20s cadence. + REALTIME_BACKEND_NATIVE_LIVE_POLL_TIMEOUT_MS: z.coerce.number().int().default(20_000), + // Jitter ratio on the live-poll hold (0.15 = ±15%) to avoid synchronized refetch herds. + REALTIME_BACKEND_NATIVE_LIVE_POLL_JITTER_RATIO: z.coerce.number().default(0.15), + // Hard cap on the tag-list snapshot size. + REALTIME_BACKEND_NATIVE_MAX_LIST_RESULTS: z.coerce.number().int().default(1_000), + // TTL/size of the coalescing cache for the multi-run resolve+hydrate (same-filter feeds share one query). + REALTIME_BACKEND_NATIVE_RUNSET_CACHE_TTL_MS: z.coerce.number().int().default(1_000), + REALTIME_BACKEND_NATIVE_RUNSET_CACHE_MAX_ENTRIES: z.coerce.number().int().default(5_000), + // Size/TTL of the per-handle working-set cache used to diff multi-run live polls. + REALTIME_BACKEND_NATIVE_WORKING_SET_MAX_ENTRIES: z.coerce.number().int().default(10_000), + REALTIME_BACKEND_NATIVE_WORKING_SET_TTL_MS: z.coerce.number().int().default(300_000), + // Bucket (ms) the tag-list createdAt floor is quantized to so same-tag feeds share a cache entry; 0 disables. + REALTIME_BACKEND_NATIVE_RUNSET_CREATED_AT_BUCKET_MS: z.coerce.number().int().default(60_000), + // Leading-edge throttle (ms) on per-env wake delivery; 0 wakes on every change. + REALTIME_BACKEND_NATIVE_ENV_WAKE_COALESCE_WINDOW_MS: z.coerce.number().int().default(250), + // "1" shares per-connection replay cursors fleet-wide via Redis, so a load-balancer hop reads the connection's true inter-poll gap instead of cold-resolving. + REALTIME_BACKEND_NATIVE_SHARED_REPLAY_CURSORS: z.string().default("1"), + // "1" holds a multi-run live poll open on a non-matching wake instead of replying up-to-date. + REALTIME_BACKEND_NATIVE_HOLD_ON_EMPTY: z.string().default("1"), + // Max concurrent fresh ClickHouse resolves per instance (reconnect-stampede gate); 0 disables. + REALTIME_BACKEND_NATIVE_RESOLVE_ADMISSION_LIMIT: z.coerce.number().int().default(16), + // Replay window (ms) for buffered change records delivered to newly-armed feeds; 0 disables. + REALTIME_BACKEND_NATIVE_REPLAY_WINDOW_MS: z.coerce.number().int().default(2_000), + // Cap on buffered recent records per env (latest record per run). + REALTIME_BACKEND_NATIVE_REPLAY_MAX_RUNS: z.coerce.number().int().default(512), + // Keep an env subscribed + buffering this long (ms) after its last feed closes; 0 disables. + REALTIME_BACKEND_NATIVE_UNSUBSCRIBE_LINGER_MS: z.coerce.number().int().default(5_000), + // Fallback per-env concurrent-connection limit when the org has none configured. + REALTIME_BACKEND_NATIVE_DEFAULT_CONCURRENCY_LIMIT: z.coerce.number().int().default(100_000), + // TTL/size of the single-run read-through cache that collapses duplicate refetch bursts. + REALTIME_BACKEND_NATIVE_RUN_CACHE_TTL_MS: z.coerce.number().int().default(250), + REALTIME_BACKEND_NATIVE_RUN_CACHE_MAX_ENTRIES: z.coerce.number().int().default(5_000), + // TTL/size of the per-org realtimeBackend flag cache used to pick the serving backend. + REALTIME_BACKEND_FLAG_CACHE_TTL_MS: z.coerce.number().int().default(30_000), + REALTIME_BACKEND_FLAG_CACHE_MAX_ENTRIES: z.coerce.number().int().default(50_000), + // "1" enables the read-your-writes gate: wake hydrates wait out the measured replica lag + // (anchored to the change record's updatedAtMs) and stale reads are retried. + REALTIME_BACKEND_NATIVE_REPLICA_LAG_GATE_ENABLED: z.string().default("1"), + // Reader-side lag probe cadence while the router is active; probing pauses when idle. + REALTIME_BACKEND_NATIVE_REPLICA_LAG_SAMPLE_INTERVAL_MS: z.coerce.number().int().default(250), + REALTIME_BACKEND_NATIVE_REPLICA_LAG_IDLE_AFTER_MS: z.coerce.number().int().default(30_000), + // The lag estimate is the max sample inside this window (spikes widen it immediately). + REALTIME_BACKEND_NATIVE_REPLICA_LAG_WINDOW_MS: z.coerce.number().int().default(5_000), + // Estimate before the first sample lands (and the floor when probing is unavailable). + REALTIME_BACKEND_NATIVE_REPLICA_LAG_DEFAULT_MS: z.coerce.number().int().default(30), + // Safety margin (clock skew + scheduling) added on top of the lag estimate. + REALTIME_BACKEND_NATIVE_REPLICA_LAG_MARGIN_MS: z.coerce.number().int().default(10), + // Hard cap on any single gate delay — a sick replica degrades freshness, never liveness. + REALTIME_BACKEND_NATIVE_REPLICA_LAG_MAX_DELAY_MS: z.coerce.number().int().default(1_000), + // Re-hydrate attempts for rows the tripwire still finds stale after the delay. + REALTIME_BACKEND_NATIVE_STALE_HYDRATE_RETRIES: z.coerce.number().int().default(3), + // How long a tripwire-observed staleness floors the lag estimate (vanilla-PG replicas + // can't measure mid-apply lag, so observations carry the estimate between races). + REALTIME_BACKEND_NATIVE_REPLICA_LAG_OBSERVED_FLOOR_TTL_MS: z.coerce + .number() + .int() + .default(60_000), + PUBSUB_REDIS_HOST: z .string() .optional() @@ -261,6 +393,36 @@ const EnvironmentSchema = z PUBSUB_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"), PUBSUB_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"), + // Dedicated pub/sub Redis for the native realtime backend; falls back to PUBSUB_REDIS_* then REDIS_*. + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_HOST: z + .string() + .optional() + .transform((v) => v ?? process.env.PUBSUB_REDIS_HOST ?? process.env.REDIS_HOST), + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_PORT: z.coerce + .number() + .optional() + .transform((v) => { + if (v !== undefined) return v; + const raw = process.env.PUBSUB_REDIS_PORT ?? process.env.REDIS_PORT; + return raw ? parseInt(raw) : undefined; + }), + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_USERNAME: z + .string() + .optional() + .transform((v) => v ?? process.env.PUBSUB_REDIS_USERNAME ?? process.env.REDIS_USERNAME), + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_PASSWORD: z + .string() + .optional() + .transform((v) => v ?? process.env.PUBSUB_REDIS_PASSWORD ?? process.env.REDIS_PASSWORD), + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_TLS_DISABLED: z + .string() + .default(process.env.PUBSUB_REDIS_TLS_DISABLED ?? process.env.REDIS_TLS_DISABLED ?? "false"), + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_CLUSTER_MODE_ENABLED: z + .string() + .default(process.env.PUBSUB_REDIS_CLUSTER_MODE_ENABLED ?? "0"), + // Use sharded pub/sub (SSUBSCRIBE/SPUBLISH) in cluster mode; "0" forces classic pub/sub. + REALTIME_BACKEND_NATIVE_PUBSUB_REDIS_SHARDED_ENABLED: z.string().default("1"), + DEFAULT_ENV_EXECUTION_CONCURRENCY_LIMIT: z.coerce.number().int().default(100), DEFAULT_ENV_EXECUTION_CONCURRENCY_BURST_FACTOR: z.coerce.number().default(1.0), DEFAULT_ORG_EXECUTION_CONCURRENCY_LIMIT: z.coerce.number().int().default(300), @@ -300,6 +462,7 @@ const EnvironmentSchema = z DEPLOY_REGISTRY_ECR_TAGS: z.string().optional(), // csv, for example: "key1=value1,key2=value2" DEPLOY_REGISTRY_ECR_ASSUME_ROLE_ARN: z.string().optional(), DEPLOY_REGISTRY_ECR_ASSUME_ROLE_EXTERNAL_ID: z.string().optional(), + DEPLOY_REGISTRY_ECR_DEFAULT_REPOSITORY_POLICY: z.string().optional(), // raw IAM policy JSON applied to every repo created by the webapp // Deployment registry (v4) - falls back to v3 registry if not specified V4_DEPLOY_REGISTRY_HOST: z @@ -332,11 +495,34 @@ const EnvironmentSchema = z .string() .optional() .transform((v) => v ?? process.env.DEPLOY_REGISTRY_ECR_ASSUME_ROLE_EXTERNAL_ID), + V4_DEPLOY_REGISTRY_ECR_DEFAULT_REPOSITORY_POLICY: z + .string() + .optional() + .transform((v) => v ?? process.env.DEPLOY_REGISTRY_ECR_DEFAULT_REPOSITORY_POLICY), // Compute gateway (template creation during deploy finalize) COMPUTE_GATEWAY_URL: z.string().optional(), COMPUTE_GATEWAY_AUTH_TOKEN: z.string().optional(), COMPUTE_TEMPLATE_SHADOW_ROLLOUT_PCT: z.string().optional(), + // Comma-separated machine preset names to build boot snapshots for on + // deploy (e.g. "small-1x,small-2x,medium-1x"). Default: "small-1x". + COMPUTE_TEMPLATE_MACHINE_PRESETS: z + .string() + .default("small-1x") + .transform(parseMachinePresetCsv), + // Subset of COMPUTE_TEMPLATE_MACHINE_PRESETS that must succeed for a + // required-mode deploy to be considered successful. Failures of presets + // outside this list are logged but don't fail the deploy. Defaults to the + // full COMPUTE_TEMPLATE_MACHINE_PRESETS list when unset (everything required). + COMPUTE_TEMPLATE_MACHINE_PRESETS_REQUIRED: z + .string() + .optional() + .transform((v, ctx) => + parseMachinePresetCsv( + v ?? process.env.COMPUTE_TEMPLATE_MACHINE_PRESETS ?? "small-1x", + ctx + ) + ), DEPLOY_IMAGE_PLATFORM: z.string().default("linux/amd64"), DEPLOY_TIMEOUT_MS: z.coerce @@ -348,6 +534,12 @@ const EnvironmentSchema = z .int() .default(60 * 1000 * 15), // 15 minutes + // When enabled, reject deploys made by v3 CLI versions (i.e. payloads that + // omit the `type` field). v4 CLI versions always send `type` ("MANAGED" or "V1"), + // so they are unaffected. Defaults to off so detection can run in + // log-only mode before enforcement. + DEPRECATE_V3_CLI_DEPLOYS_ENABLED: z.string().default("0"), + OBJECT_STORE_BASE_URL: z.string().optional(), OBJECT_STORE_BUCKET: z.string().optional(), OBJECT_STORE_ACCESS_KEY_ID: z.string().optional(), @@ -359,7 +551,10 @@ const EnvironmentSchema = z // If specified, you must configure the corresponding provider using OBJECT_STORE_{PROTOCOL}_* env vars. // Example: OBJECT_STORE_DEFAULT_PROTOCOL=s3 requires OBJECT_STORE_S3_BASE_URL, OBJECT_STORE_S3_ACCESS_KEY_ID, etc. // Enables zero-downtime migration between providers (old data keeps working, new data uses new provider). - OBJECT_STORE_DEFAULT_PROTOCOL: z.string().regex(/^[a-z0-9]+$/).optional(), + OBJECT_STORE_DEFAULT_PROTOCOL: z + .string() + .regex(/^[a-z0-9]+$/) + .optional(), ARTIFACTS_OBJECT_STORE_BUCKET: z.string().optional(), ARTIFACTS_OBJECT_STORE_BASE_URL: z.string().optional(), @@ -432,6 +627,7 @@ const EnvironmentSchema = z INTERNAL_OTEL_TRACE_SAMPLING_RATE: z.string().default("20"), INTERNAL_OTEL_TRACE_INSTRUMENT_PRISMA_ENABLED: z.string().default("0"), INTERNAL_OTEL_TRACE_DISABLED: z.string().default("0"), + DISABLE_HTTP_INSTRUMENTATION: BoolEnv.default(false), INTERNAL_OTEL_LOG_EXPORTER_URL: z.string().optional(), INTERNAL_OTEL_METRIC_EXPORTER_URL: z.string().optional(), @@ -552,6 +748,9 @@ const EnvironmentSchema = z MAXIMUM_LIVE_RELOADING_EVENTS: z.coerce.number().int().default(1000), MAXIMUM_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000), MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(10_000), + // Emergency circuit breaker: when set, clamps the trace summary and detailed + // summary span limits on both event store paths to this value. Unset = disabled. + TRACE_VIEW_EMERGENCY_SPAN_CAP: z.coerce.number().int().positive().optional(), TASK_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().default(524_288), // 512KB BATCH_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().optional(), // Defaults to TASK_PAYLOAD_OFFLOAD_THRESHOLD if not set TASK_PAYLOAD_MAXIMUM_SIZE: z.coerce.number().int().default(3_145_728), // 3MB @@ -659,6 +858,21 @@ const EnvironmentSchema = z .int() .default(60_000 * 60), // 1 hour + /** + * Bucket size in milliseconds used to quantize the newly computed `delayUntil` + * in the debounce system. Quantization collapses concurrent triggers on the + * same hot debounce key onto the same target time so the unlocked fast-path + * skip is effective. Set to 0 to disable. Default: 1000ms (1s). + */ + RUN_ENGINE_DEBOUNCE_QUANTIZE_NEW_DELAY_UNTIL_MS: z.coerce.number().int().min(0).default(1000), + + /** + * Whether the unlocked fast-path skip is enabled in the debounce system. + * Acts as a kill switch in case the fast-path needs to be disabled in + * production without a redeploy. Default: "1" (enabled). + */ + RUN_ENGINE_DEBOUNCE_FAST_PATH_SKIP_ENABLED: z.string().default("1"), + RUN_ENGINE_WORKER_REDIS_HOST: z .string() .optional() @@ -829,6 +1043,10 @@ const EnvironmentSchema = z .enum(["log", "error", "warn", "info", "debug"]) .default("info"), RUN_ENGINE_TREAT_PRODUCTION_EXECUTION_STALLS_AS_OOM: z.string().default("0"), + RUN_ENGINE_READ_REPLICA_SNAPSHOTS_SINCE_ENABLED: z.string().default("0"), + RUN_ENGINE_SNAPSHOTS_SINCE_REPLICA_RETRY_MIN_MS: z.coerce.number().int().default(50), + RUN_ENGINE_SNAPSHOTS_SINCE_REPLICA_RETRY_MAX_MS: z.coerce.number().int().default(200), + RUN_ENGINE_DEBOUNCE_USE_REPLICA_FOR_FAST_PATH_READ: z.string().default("0"), /** How long should the presence ttl last */ DEV_PRESENCE_SSE_TIMEOUT: z.coerce.number().int().default(30_000), @@ -894,6 +1112,7 @@ const EnvironmentSchema = z LEGACY_RUN_ENGINE_WAITING_FOR_DEPLOY_BATCH_SIZE: z.coerce.number().int().default(100), LEGACY_RUN_ENGINE_WAITING_FOR_DEPLOY_BATCH_STAGGER_MS: z.coerce.number().int().default(1_000), + LEGACY_RUN_ENGINE_WAITING_FOR_DEPLOY_DISABLED: z.string().default("0"), COMMON_WORKER_ENABLED: z.string().default(process.env.WORKER_ENABLED ?? "true"), COMMON_WORKER_CONCURRENCY_WORKERS: z.coerce.number().int().default(2), @@ -936,6 +1155,167 @@ const EnvironmentSchema = z COMMON_WORKER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"), COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"), + // Global default for the scheduled worker-queue split. When "1", runs in a + // scheduled lineage (rootTriggerSource === "schedule") are routed to a + // dedicated `:scheduled` worker queue so a separate consumer fleet + // can dequeue them independently of standard/agent runs. The per-org + // `workerQueueScheduledSplitEnabled` feature flag overrides this default in + // BOTH directions (an org set to false stays on the single queue even when + // this is "1"; an org set to true splits even when this is "0"). Never + // applies to DEVELOPMENT environments. + TRIGGER_WORKER_QUEUE_SCHEDULED_SPLIT_ENABLED: z.string().default("0"), + + TRIGGER_MOLLIFIER_ENABLED: z.string().default("0"), + // Separate switch for the drainer (consumer side) so it can be split + // off onto a dedicated worker service. Unset → inherits + // TRIGGER_MOLLIFIER_ENABLED, so single-container self-hosters don't have to + // flip two switches. Multi-replica drainers are correct — `popAndMarkDraining` + // is an atomic RPOP + status flip in one Lua call, so only one replica + // can win any given entry — but inefficient: polling load (SMEMBERS + + // per-env scans) multiplies by N, and `TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY` + // is per-process so engine load also multiplies. Splitting the drainer + // onto a dedicated worker keeps that traffic off the request-serving + // replicas. `TRIGGER_MOLLIFIER_ENABLED` is still the master kill switch; + // setting this to "1" while `TRIGGER_MOLLIFIER_ENABLED` is "0" is a + // no-op because the gate-side singleton refuses to construct a buffer + // when the system is off. + TRIGGER_MOLLIFIER_DRAINER_ENABLED: z.string().default(process.env.TRIGGER_MOLLIFIER_ENABLED ?? "0"), + TRIGGER_MOLLIFIER_SHADOW_MODE: z.string().default("0"), + TRIGGER_MOLLIFIER_REDIS_HOST: z + .string() + .optional() + .transform((v) => v ?? process.env.REDIS_HOST), + TRIGGER_MOLLIFIER_REDIS_PORT: z.coerce + .number() + .optional() + .transform( + (v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined), + ), + TRIGGER_MOLLIFIER_REDIS_USERNAME: z + .string() + .optional() + .transform((v) => v ?? process.env.REDIS_USERNAME), + TRIGGER_MOLLIFIER_REDIS_PASSWORD: z + .string() + .optional() + .transform((v) => v ?? process.env.REDIS_PASSWORD), + TRIGGER_MOLLIFIER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"), + TRIGGER_MOLLIFIER_TRIP_WINDOW_MS: z.coerce.number().int().positive().default(200), + TRIGGER_MOLLIFIER_TRIP_THRESHOLD: z.coerce.number().int().positive().default(100), + TRIGGER_MOLLIFIER_HOLD_MS: z.coerce.number().int().positive().default(500), + TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY: z.coerce.number().int().positive().default(50), + TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS: z.coerce.number().int().positive().default(3), + TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS: z.coerce.number().int().positive().default(30_000), + TRIGGER_MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK: z.coerce.number().int().positive().default(500), + // Per-env per-tick pop cap. The drainer rotates one env per org per + // tick; this bounds how many entries it pops from that env before + // dispatching them through the shared `DRAIN_CONCURRENCY`-bounded + // limiter. Default matches `DRAIN_CONCURRENCY` so a single-env burst + // uses the full handler-parallelism budget — for 20k buffered on one + // env this is the difference between ~17m (one-pop-per-tick × ~50ms) + // and ~20s (400 ticks × concurrent engine.trigger). Org/env fairness + // is preserved because the per-tick env selection is unchanged; only + // the in-env pop count grows. + TRIGGER_MOLLIFIER_DRAIN_BATCH_SIZE: z.coerce.number().int().positive().default(50), + // Periodic sweep that scans buffer queue LISTs for entries whose + // dwell exceeds the stale threshold. Independent of the drainer — + // its job is exactly to make a stuck/offline drainer visible to + // ops. Defaults: explicitly opt-in (a separate kill switch from + // the mollifier itself), run every 5 minutes, alert on anything + // that's been dwelling for 5+ minutes (matches the sweep interval + // — "anything still here when we check" is the simplest threshold + // that converges). + // + // The sweep was previously defaulting to inherit + // `TRIGGER_MOLLIFIER_ENABLED`, which meant any deployment already + // running with the mollifier on would auto-start the sweep worker + // on upgrade — turning on new background load with no explicit + // rollout step. Hard-defaulting to "0" preserves the intent of + // exposing the sweep as a separate switch. + TRIGGER_MOLLIFIER_STALE_SWEEP_ENABLED: z.string().default("0"), + TRIGGER_MOLLIFIER_STALE_SWEEP_INTERVAL_MS: z.coerce + .number() + .int() + .positive() + .default(5 * 60_000), + TRIGGER_MOLLIFIER_STALE_SWEEP_THRESHOLD_MS: z.coerce + .number() + .int() + .positive() + .default(5 * 60_000), + // Bounds for one stale-sweep pass (see mollifierStaleSweep.server.ts). + // Max entries scanned per env and max orgs visited per tick — together + // they cap the Redis traffic / wall-time of a single sweep pass. + TRIGGER_MOLLIFIER_STALE_SWEEP_MAX_ENTRIES_PER_ENV: z.coerce + .number() + .int() + .positive() + .default(1000), + TRIGGER_MOLLIFIER_STALE_SWEEP_MAX_ORGS_PER_PASS: z.coerce + .number() + .int() + .positive() + .default(100), + + // --- Mollifier buffer internals (wired into MollifierBuffer in + // mollifierBuffer.server.ts). --- + // Grace TTL applied to the entry hash on drainer ack so direct reads + // (retrieve, trace) have a safety net while PG replica lag settles. + TRIGGER_MOLLIFIER_ACK_GRACE_TTL_SECONDS: z.coerce.number().int().positive().default(30), + // ioredis per-request retry limit on the buffer's Redis client. + TRIGGER_MOLLIFIER_REDIS_MAX_RETRIES_PER_REQUEST: z.coerce + .number() + .int() + .positive() + .default(20), + // ioredis reconnect backoff envelope for the buffer client: the base + // grows by `STEP_MS` per attempt, capped at `MAX_MS`, then equal-jittered. + TRIGGER_MOLLIFIER_REDIS_RECONNECT_STEP_MS: z.coerce.number().int().positive().default(50), + TRIGGER_MOLLIFIER_REDIS_RECONNECT_MAX_MS: z.coerce.number().int().positive().default(1000), + + // --- Mollifier drainer loop internals (wired into MollifierDrainer in + // mollifierDrainer.server.ts). --- + // Tick gap when a tick drained nothing; under backlog ticks run back-to-back. + TRIGGER_MOLLIFIER_DRAIN_POLL_INTERVAL_MS: z.coerce.number().int().positive().default(100), + // Cap on the drainer's exponential backoff after consecutive runOnce errors. + TRIGGER_MOLLIFIER_DRAIN_MAX_BACKOFF_MS: z.coerce.number().int().positive().default(5_000), + // Floor for the drainer's backoff base (so a tiny poll interval doesn't + // collapse the backoff to near-zero during a sustained outage). + TRIGGER_MOLLIFIER_DRAIN_BACKOFF_FLOOR_MS: z.coerce.number().int().positive().default(100), + // Required margin between the drainer's shutdown deadline and + // GRACEFUL_SHUTDOWN_TIMEOUT; boot fails loud if the timeout leaves less. + TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_MARGIN_MS: z.coerce.number().int().positive().default(1_000), + + // How often the draining-tracker ZSET cardinality is polled into the gauge. + TRIGGER_MOLLIFIER_DRAINING_GAUGE_INTERVAL_MS: z.coerce + .number() + .int() + .positive() + .default(15_000), + + // --- Pre-gate idempotency claim (idempotencyClaim.server.ts). --- + // TTL on the claim key (and the upper clamp on the customer-derived + // claim TTL), how long a waiter blocks before timing out, and the + // waiter poll interval. + TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS: z.coerce.number().int().positive().default(30), + TRIGGER_MOLLIFIER_CLAIM_WAIT_MS: z.coerce.number().int().positive().default(5_000), + TRIGGER_MOLLIFIER_CLAIM_POLL_MS: z.coerce.number().int().positive().default(25), + + // --- Buffered-run mutate-with-fallback wait loop (mutateWithFallback.server.ts). --- + // Ceiling on the wait-for-materialisation loop, initial poll gap, the + // backoff ceiling, and the exponential growth factor (a float). + TRIGGER_MOLLIFIER_MUTATE_SAFETY_NET_MS: z.coerce.number().int().positive().default(2_000), + TRIGGER_MOLLIFIER_MUTATE_POLL_STEP_MS: z.coerce.number().int().positive().default(20), + TRIGGER_MOLLIFIER_MUTATE_MAX_POLL_STEP_MS: z.coerce.number().int().positive().default(250), + TRIGGER_MOLLIFIER_MUTATE_BACKOFF_FACTOR: z.coerce.number().gt(1).default(1.7), + + // --- Buffered-run metadata CAS retry loop (applyMetadataMutation.server.ts). --- + // Retry budget for concurrent metadata writers, and the jittered + // conflict-backoff envelope: random in [0, base + attempt * step) ms. + TRIGGER_MOLLIFIER_METADATA_MAX_RETRIES: z.coerce.number().int().positive().default(12), + TRIGGER_MOLLIFIER_METADATA_BACKOFF_BASE_MS: z.coerce.number().int().positive().default(5), + TRIGGER_MOLLIFIER_METADATA_BACKOFF_STEP_MS: z.coerce.number().int().positive().default(5), + BATCH_TRIGGER_PROCESS_JOB_VISIBILITY_TIMEOUT_MS: z.coerce .number() .int() @@ -1213,6 +1593,38 @@ const EnvironmentSchema = z RUN_REPLICATION_DISABLE_PAYLOAD_INSERT: z.string().default("0"), RUN_REPLICATION_DISABLE_ERROR_FINGERPRINTING: z.string().default("0"), + // Session replication (Postgres → ClickHouse sessions_v1). Shares Redis + // with the runs replicator for leader locking but has its own slot and + // publication so the two consume independently. + SESSION_REPLICATION_CLICKHOUSE_URL: z.string().optional(), + SESSION_REPLICATION_ENABLED: z.string().default("0"), + SESSION_REPLICATION_SLOT_NAME: z.string().default("sessions_to_clickhouse_v1"), + SESSION_REPLICATION_PUBLICATION_NAME: z + .string() + .default("sessions_to_clickhouse_v1_publication"), + SESSION_REPLICATION_MAX_FLUSH_CONCURRENCY: z.coerce.number().int().default(1), + SESSION_REPLICATION_FLUSH_INTERVAL_MS: z.coerce.number().int().default(1000), + SESSION_REPLICATION_FLUSH_BATCH_SIZE: z.coerce.number().int().default(100), + SESSION_REPLICATION_LEADER_LOCK_TIMEOUT_MS: z.coerce.number().int().default(30_000), + SESSION_REPLICATION_LEADER_LOCK_EXTEND_INTERVAL_MS: z.coerce.number().int().default(10_000), + SESSION_REPLICATION_LEADER_LOCK_ADDITIONAL_TIME_MS: z.coerce.number().int().default(10_000), + SESSION_REPLICATION_LEADER_LOCK_RETRY_INTERVAL_MS: z.coerce.number().int().default(500), + SESSION_REPLICATION_ACK_INTERVAL_SECONDS: z.coerce.number().int().default(10), + SESSION_REPLICATION_LOG_LEVEL: z + .enum(["log", "error", "warn", "info", "debug"]) + .default("info"), + SESSION_REPLICATION_CLICKHOUSE_LOG_LEVEL: z + .enum(["log", "error", "warn", "info", "debug"]) + .default("info"), + SESSION_REPLICATION_WAIT_FOR_ASYNC_INSERT: z.string().default("0"), + SESSION_REPLICATION_KEEP_ALIVE_ENABLED: z.string().default("0"), + SESSION_REPLICATION_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), + SESSION_REPLICATION_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), + SESSION_REPLICATION_INSERT_STRATEGY: z.enum(["insert", "insert_async"]).default("insert"), + SESSION_REPLICATION_INSERT_MAX_RETRIES: z.coerce.number().int().default(3), + SESSION_REPLICATION_INSERT_BASE_DELAY_MS: z.coerce.number().int().default(100), + SESSION_REPLICATION_INSERT_MAX_DELAY_MS: z.coerce.number().int().default(2000), + // Clickhouse CLICKHOUSE_URL: z.string(), CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), @@ -1277,6 +1689,33 @@ const EnvironmentSchema = z EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), EVENTS_CLICKHOUSE_LOG_LEVEL: z.enum(["log", "error", "warn", "info", "debug"]).default("info"), EVENTS_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), + // ClickHouse client used by @internal/run-engine's PendingVersionSystem. + // Kept on its own URL + pool so this low-QPS path can't contend with + // the main analytics client (CLICKHOUSE_URL). Falls back to the main + // URL when unset so unconfigured environments still work. + RUN_ENGINE_CLICKHOUSE_URL: z + .string() + .optional() + .transform((v) => v ?? process.env.CLICKHOUSE_URL), + RUN_ENGINE_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), + RUN_ENGINE_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), + RUN_ENGINE_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(5), + RUN_ENGINE_CLICKHOUSE_LOG_LEVEL: z + .enum(["log", "error", "warn", "info", "debug"]) + .default("info"), + RUN_ENGINE_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), + // Dedicated ClickHouse pool for the native backend's tag/batch id resolution; falls back to CLICKHOUSE_URL. + REALTIME_BACKEND_NATIVE_CLICKHOUSE_URL: z + .string() + .optional() + .transform((v) => v ?? process.env.CLICKHOUSE_URL), + REALTIME_BACKEND_NATIVE_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), + REALTIME_BACKEND_NATIVE_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), + REALTIME_BACKEND_NATIVE_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), + REALTIME_BACKEND_NATIVE_CLICKHOUSE_LOG_LEVEL: z + .enum(["log", "error", "warn", "info", "debug"]) + .default("info"), + REALTIME_BACKEND_NATIVE_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), EVENTS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(1000), EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS: z.coerce.number().int().default(1000), METRICS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(10000), @@ -1298,9 +1737,26 @@ const EnvironmentSchema = z EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(5_000), EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING: z.coerce.number().int().default(2000), + // Organization data stores registry + ORGANIZATION_DATA_STORES_RELOAD_INTERVAL_MS: z.coerce + .number() + .int() + .default(60 * 1000), // 1 minute + // LLM cost tracking LLM_COST_TRACKING_ENABLED: BoolEnv.default(true), - LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes + LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce + .number() + .int() + .default(5 * 60 * 1000), // 5 minutes + LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"), + LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000), + // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL. + // Default off — only OTel-ingesting services need real-time pricing + // freshness; dashboard/worker processes are fine on the existing + // 5-minute periodic reload. In multi-service deployments, set this to + // true on the span-ingesting services. + LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(false), LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false), LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500), LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000), @@ -1392,15 +1848,40 @@ const EnvironmentSchema = z REALTIME_STREAMS_S2_FLUSH_INTERVAL_MS: z.coerce.number().int().default(100), REALTIME_STREAMS_S2_MAX_RETRIES: z.coerce.number().int().default(10), REALTIME_STREAMS_S2_WAIT_SECONDS: z.coerce.number().int().default(60), + // When "true", provision a dedicated S2 basin per org and stamp + // `streamBasinName` on new rows. Off keeps everything on the single + // basin defined by `REALTIME_STREAMS_S2_BASIN`. + REALTIME_STREAMS_PER_ORG_BASINS_ENABLED: z.enum(["true", "false"]).default("false"), + // Per-org basin name = `{prefix}-{env}-org-{orgId}`. + REALTIME_STREAMS_BASIN_NAME_PREFIX: z.string().default("triggerdotdev"), + REALTIME_STREAMS_BASIN_NAME_ENV: z.string().default("dev"), + REALTIME_STREAMS_BASIN_DEFAULT_RETENTION: durationString().default("30d"), + REALTIME_STREAMS_BASIN_STORAGE_CLASS: z.enum(["express", "standard"]).default("express"), + REALTIME_STREAMS_BASIN_DELETE_ON_EMPTY_MIN_AGE: durationString().default("1h"), REALTIME_STREAMS_DEFAULT_VERSION: z.enum(["v1", "v2"]).default("v1"), WAIT_UNTIL_TIMEOUT_MS: z.coerce.number().int().default(600_000), // Private connections PRIVATE_CONNECTIONS_ENABLED: z.string().optional(), PRIVATE_CONNECTIONS_AWS_ACCOUNT_IDS: z.string().optional(), + + // Force RBAC to not use the plugin + RBAC_FORCE_FALLBACK: BoolEnv.default(false), }) .and(GithubAppEnvSchema) - .and(S2EnvSchema); + .and(S2EnvSchema) + .superRefine((env, ctx) => { + const presets = new Set(env.COMPUTE_TEMPLATE_MACHINE_PRESETS); + for (const required of env.COMPUTE_TEMPLATE_MACHINE_PRESETS_REQUIRED) { + if (!presets.has(required)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ["COMPUTE_TEMPLATE_MACHINE_PRESETS_REQUIRED"], + message: `"${required}" is not in COMPUTE_TEMPLATE_MACHINE_PRESETS`, + }); + } + } + }); export type Environment = z.infer; export const env = EnvironmentSchema.parse(process.env); diff --git a/apps/webapp/app/eventLoopMonitor.server.ts b/apps/webapp/app/eventLoopMonitor.server.ts index 0a1b33690bb..2c536e7bb35 100644 --- a/apps/webapp/app/eventLoopMonitor.server.ts +++ b/apps/webapp/app/eventLoopMonitor.server.ts @@ -124,7 +124,7 @@ function startEventLoopUtilizationMonitoring() { const utilization = Number.isFinite(diff.utilization) ? diff.utilization : 0; if (Math.random() < env.EVENT_LOOP_MONITOR_UTILIZATION_SAMPLE_RATE) { - logger.info("nodejs.event_loop.utilization", { utilization }); + logger.debug("nodejs.event_loop.utilization", { utilization }); } lastEventLoopUtilization = currentEventLoopUtilization; diff --git a/apps/webapp/app/hooks/useAutoScrollToBottom.ts b/apps/webapp/app/hooks/useAutoScrollToBottom.ts new file mode 100644 index 00000000000..b8e59687ed6 --- /dev/null +++ b/apps/webapp/app/hooks/useAutoScrollToBottom.ts @@ -0,0 +1,104 @@ +import { useEffect, useLayoutEffect, useRef } from "react"; + +const AT_BOTTOM_TOLERANCE_PX = 16; + +/** + * Chat-style sticky-bottom auto-scroll behavior. + * + * Behavior: + * - On mount, finds the closest scrollable ancestor of the returned ref + * (the inspector content panel, the playground messages panel, etc.). + * - Tracks whether the user is currently "at the bottom" of that scroll + * container via a passive scroll listener. Default is `true` so the very + * first render of an existing conversation lands at the bottom, and the + * "content fits without scrolling" case stays in auto-scroll mode. + * - Whenever the dependency array changes (typically the messages array), + * if the user was at the bottom, programmatically scrolls to the new + * bottom. Uses `useLayoutEffect` so the scroll happens before paint and + * there's no one-frame flicker showing new content above the viewport. + * - Scrolling away from the bottom flips the ref to `false` → auto-scroll + * pauses. Scrolling back into the bottom band (within + * `AT_BOTTOM_TOLERANCE_PX`) flips it back to `true` → auto-scroll + * resumes. + * + * The programmatic scroll fires its own scroll event, which immediately + * re-runs the stickiness check and confirms we're still at the bottom + * (distance ≈ 0 ≤ tolerance), so the ref stays `true`. No special + * "ignore programmatic scroll" flag needed. + * + * @param deps Pass the rendered list (or any dependency that should + * trigger a re-scroll). Typically `[messages]`. + * @returns A ref to attach to the component's root element. The hook + * walks up from this element's parent to locate the scroll + * container, so the root must be mounted *inside* the + * scrollable region. + * + * @example + * ```tsx + * function ChatPanel({ messages }) { + * const rootRef = useAutoScrollToBottom([messages]); + * return ( + *
+ *
+ * {messages.map((m) => )} + *
+ *
+ * ); + * } + * ``` + */ +export function useAutoScrollToBottom(deps: ReadonlyArray) { + const rootRef = useRef(null); + const containerRef = useRef(null); + // Default true so initial mount + replay land at the bottom, and the + // no-overflow case stays sticky once content starts to grow. + const stickToBottomRef = useRef(true); + + // Locate the scroll container on mount and attach a passive scroll + // listener that updates `stickToBottomRef`. + useEffect(() => { + const findScrollContainer = (start: HTMLElement | null): HTMLElement | null => { + let current: HTMLElement | null = start; + while (current) { + const style = getComputedStyle(current); + const overflowY = style.overflowY; + if (overflowY === "auto" || overflowY === "scroll") return current; + current = current.parentElement; + } + return null; + }; + + const container = findScrollContainer(rootRef.current?.parentElement ?? null); + if (!container) return; + containerRef.current = container; + + const updateStickiness = () => { + const distanceFromBottom = + container.scrollHeight - container.scrollTop - container.clientHeight; + stickToBottomRef.current = distanceFromBottom <= AT_BOTTOM_TOLERANCE_PX; + }; + + // Seed from current position so the first messages-effect uses an + // accurate value rather than the default `true` if the user happened + // to mount the view already scrolled. + updateStickiness(); + + container.addEventListener("scroll", updateStickiness, { passive: true }); + return () => { + container.removeEventListener("scroll", updateStickiness); + containerRef.current = null; + }; + }, []); + + // After each commit that changes the deps (typically the messages + // array), if we were at the bottom, scroll to the new bottom. + useLayoutEffect(() => { + if (!stickToBottomRef.current) return; + const container = containerRef.current; + if (!container) return; + container.scrollTop = container.scrollHeight; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, deps); + + return rootRef; +} diff --git a/apps/webapp/app/hooks/useFuzzyFilter.ts b/apps/webapp/app/hooks/useFuzzyFilter.ts index 3f0797179f2..ff4504ce8c2 100644 --- a/apps/webapp/app/hooks/useFuzzyFilter.ts +++ b/apps/webapp/app/hooks/useFuzzyFilter.ts @@ -10,8 +10,9 @@ import { matchSorter } from "match-sorter"; * @param params.items - Array of objects to filter * @param params.keys - Array of object keys to perform the fuzzy search on (supports dot-notation for nested properties) * @returns An object containing: - * - filterText: The current filter text - * - setFilterText: Function to update the filter text + * - filterText: The current filter text (the controlled value if provided, otherwise the internal state) + * - setFilterText: Updates the internal filter text. No-op when `filterText` is provided + * (controlled mode) — the parent owns the value in that case. * - filteredItems: The filtered array of items based on the current filter text * * @example @@ -26,11 +27,15 @@ import { matchSorter } from "match-sorter"; export function useFuzzyFilter({ items, keys, + filterText: controlledFilterText, }: { items: T[]; keys: (Extract | (string & {}))[]; + /** Optional controlled filter text. If provided, internal state is ignored. */ + filterText?: string; }) { - const [filterText, setFilterText] = useState(""); + const [internalFilterText, setInternalFilterText] = useState(""); + const filterText = controlledFilterText ?? internalFilterText; const filteredItems = useMemo(() => { const filterTerms = filterText @@ -43,7 +48,6 @@ export function useFuzzyFilter({ return items; } - // sort by the score of the first term return filterTerms.reduceRight( (results, term) => matchSorter(results, term, { @@ -55,7 +59,7 @@ export function useFuzzyFilter({ return { filterText, - setFilterText, + setFilterText: setInternalFilterText, filteredItems, }; } diff --git a/apps/webapp/app/hooks/useInterval.ts b/apps/webapp/app/hooks/useInterval.ts index 4d5413e977e..b59884d455a 100644 --- a/apps/webapp/app/hooks/useInterval.ts +++ b/apps/webapp/app/hooks/useInterval.ts @@ -6,6 +6,8 @@ type UseIntervalOptions = { onLoad?: boolean; onFocus?: boolean; disabled?: boolean; + /** Skip interval ticks while the document tab is hidden */ + pauseWhenHidden?: boolean; callback: () => void; }; @@ -14,6 +16,7 @@ export function useInterval({ onLoad = true, onFocus = true, disabled = false, + pauseWhenHidden = false, callback, }: UseIntervalOptions) { // Always keep the latest callback in a ref so the effects below @@ -28,11 +31,14 @@ export function useInterval({ if (!interval || interval <= 0 || disabled) return; const intervalId = setInterval(() => { + if (pauseWhenHidden && document.visibilityState !== "visible") { + return; + } latestCallback.current(); }, interval); return () => clearInterval(intervalId); - }, [interval, disabled]); + }, [interval, disabled, pauseWhenHidden]); // On focus useEffect(() => { diff --git a/apps/webapp/app/hooks/useRegions.tsx b/apps/webapp/app/hooks/useRegions.tsx new file mode 100644 index 00000000000..fe696440473 --- /dev/null +++ b/apps/webapp/app/hooks/useRegions.tsx @@ -0,0 +1,16 @@ +import { type UIMatch } from "@remix-run/react"; +import { type UseDataFunctionReturn } from "remix-typedjson"; +import type { loader as orgLoader } from "~/routes/_app.orgs.$organizationSlug/route"; +import { organizationMatchId } from "./useOrganizations"; +import { useTypedMatchesData } from "./useTypedMatchData"; + +export type MatchedRegion = UseDataFunctionReturn["regions"][number]; + +export function useRegions(matches?: UIMatch[]): MatchedRegion[] { + const routeMatch = useTypedMatchesData({ + id: organizationMatchId, + matches, + }); + + return routeMatch?.regions ?? []; +} diff --git a/apps/webapp/app/models/admin.server.ts b/apps/webapp/app/models/admin.server.ts index bd3adc8cbf2..8620ea6a244 100644 --- a/apps/webapp/app/models/admin.server.ts +++ b/apps/webapp/app/models/admin.server.ts @@ -210,8 +210,13 @@ export async function adminGetOrganizations(userId: string, { page, search }: Se }; } -export async function redirectWithImpersonation(request: Request, userId: string, path: string) { - const user = await requireUser(request); +export async function redirectWithImpersonation( + request: Request, + userId: string, + path: string, + currentUser?: { id: string; admin: boolean } +) { + const user = currentUser ?? (await requireUser(request)); if (!user.admin) { throw new Error("Unauthorized"); } diff --git a/apps/webapp/app/models/api-key.server.ts b/apps/webapp/app/models/api-key.server.ts index 86609cc01d7..b5f2bd0f7d9 100644 --- a/apps/webapp/app/models/api-key.server.ts +++ b/apps/webapp/app/models/api-key.server.ts @@ -8,6 +8,8 @@ const apiKeyId = customAlphabet( 12 ); +const REVOKED_API_KEY_GRACE_PERIOD_MS = 24 * 60 * 60 * 1000; + type RegenerateAPIKeyInput = { userId: string; environmentId: string; @@ -63,14 +65,26 @@ export async function regenerateApiKey({ userId, environmentId }: RegenerateAPIK const newApiKey = createApiKeyForEnv(environment.type); const newPkApiKey = createPkApiKeyForEnv(environment.type); - const updatedEnviroment = await prisma.runtimeEnvironment.update({ - data: { - apiKey: newApiKey, - pkApiKey: newPkApiKey, - }, - where: { - id: environmentId, - }, + const revokedApiKeyExpiresAt = new Date(Date.now() + REVOKED_API_KEY_GRACE_PERIOD_MS); + + const updatedEnviroment = await prisma.$transaction(async (tx) => { + await tx.revokedApiKey.create({ + data: { + apiKey: environment.apiKey, + runtimeEnvironmentId: environment.id, + expiresAt: revokedApiKeyExpiresAt, + }, + }); + + return tx.runtimeEnvironment.update({ + data: { + apiKey: newApiKey, + pkApiKey: newPkApiKey, + }, + where: { + id: environmentId, + }, + }); }); return updatedEnviroment; diff --git a/apps/webapp/app/models/member.server.ts b/apps/webapp/app/models/member.server.ts index 04c1df1b41f..b88fc7e11c0 100644 --- a/apps/webapp/app/models/member.server.ts +++ b/apps/webapp/app/models/member.server.ts @@ -1,6 +1,8 @@ import { type Prisma, prisma } from "~/db.server"; import { createEnvironment } from "./organization.server"; import { customAlphabet } from "nanoid"; +import { logger } from "~/services/logger.server"; +import { rbac } from "~/services/rbac.server"; const tokenValueLength = 40; const tokenGenerator = customAlphabet("123456789abcdefghijkmnopqrstuvwxyz", tokenValueLength); @@ -86,10 +88,19 @@ export async function inviteMembers({ slug, emails, userId, + rbacRoleId, }: { slug: string; emails: string[]; userId: string; + /** + * Optional RBAC role to attach to the invite. When set, accepted + * invites trigger `rbac.setUserRole(rbacRoleId)` after the OrgMember + * is created. + * + * `OrgMemberInvite.role` is still set if the plugin isn't installed. + */ + rbacRoleId?: string | null; }) { const org = await prisma.organization.findFirst({ where: { slug, members: { some: { userId } } }, @@ -107,6 +118,7 @@ export async function inviteMembers({ organizationId: org.id, inviterId: userId, role: "MEMBER", + rbacRoleId: rbacRoleId ?? null, } satisfies Prisma.OrgMemberInviteCreateManyInput) ); @@ -163,7 +175,7 @@ export async function acceptInvite({ user: { id: string; email: string }; inviteId: string; }) { - return await prisma.$transaction(async (tx) => { + const result = await prisma.$transaction(async (tx) => { // 1. Delete the invite and get the invite details const invite = await tx.orgMemberInvite.delete({ where: { @@ -207,8 +219,32 @@ export async function acceptInvite({ }, }); - return { remainingInvites, organization: invite.organization }; + return { + remainingInvites, + organization: invite.organization, + inviteRole: invite.role, + rbacRoleId: invite.rbacRoleId, + }; }); + + // If the invite carried an explicit RBAC role. Errors are logged, not fatal. + if (result.rbacRoleId) { + const roleResult = await rbac.setUserRole({ + userId: user.id, + organizationId: result.organization.id, + roleId: result.rbacRoleId, + }); + if (!roleResult.ok) { + logger.error("acceptInvite: skipped RBAC role assignment", { + organizationId: result.organization.id, + userId: user.id, + rbacRoleId: result.rbacRoleId, + reason: roleResult.error, + }); + } + } + + return { remainingInvites: result.remainingInvites, organization: result.organization }; } export async function declineInvite({ diff --git a/apps/webapp/app/models/project.server.ts b/apps/webapp/app/models/project.server.ts index 0dc634b5ab7..d084bec8add 100644 --- a/apps/webapp/app/models/project.server.ts +++ b/apps/webapp/app/models/project.server.ts @@ -4,7 +4,7 @@ import { $replica, prisma } from "~/db.server"; import type { Prisma, Project } from "@trigger.dev/database"; import { type Organization, createEnvironment } from "./organization.server"; import { env } from "~/env.server"; -import { projectCreated } from "~/services/platform.v3.server"; +import { projectCreated } from "~/services/projectCreated.server"; export type { Project } from "@trigger.dev/database"; const externalRefGenerator = customAlphabet("abcdefghijklmnopqrstuvwxyz", 20); diff --git a/apps/webapp/app/models/runtimeEnvironment.server.ts b/apps/webapp/app/models/runtimeEnvironment.server.ts index f65112b71fc..be05adaa8a7 100644 --- a/apps/webapp/app/models/runtimeEnvironment.server.ts +++ b/apps/webapp/app/models/runtimeEnvironment.server.ts @@ -3,35 +3,138 @@ import type { Prisma, PrismaClientOrTransaction, RuntimeEnvironment } from "@tri import { $replica, prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; import { getUsername } from "~/utils/username"; -import { sanitizeBranchName } from "~/v3/gitBranch"; +import { sanitizeBranchName } from "@trigger.dev/core/v3/utils/gitBranch"; export type { RuntimeEnvironment }; +// Prisma include shape that maps cleanly to the slim AuthenticatedEnvironment. +// Use this everywhere we fetch an env that flows to handlers — keeps the +// returned shape consistent (and the Decimal coercion in toAuthenticated() +// strips Prisma's Decimal class from the public surface). +export const authIncludeBase = { + project: true, + organization: true, + orgMember: { + select: { + userId: true, + user: { select: { id: true, displayName: true, name: true } }, + }, + }, +} satisfies Prisma.RuntimeEnvironmentInclude; + +export const authIncludeWithParent = { + ...authIncludeBase, + parentEnvironment: { select: { id: true, apiKey: true } }, +} satisfies Prisma.RuntimeEnvironmentInclude; + +type PrismaEnvWithAuth = Prisma.RuntimeEnvironmentGetPayload<{ include: typeof authIncludeBase }>; +type PrismaEnvWithAuthAndParent = Prisma.RuntimeEnvironmentGetPayload<{ + include: typeof authIncludeWithParent; +}>; + +// Coerce a Prisma RuntimeEnvironment payload to the slim +// AuthenticatedEnvironment shape. Drops the columns handlers don't read +// and converts `concurrencyLimitBurstFactor` from Prisma's Decimal to a +// plain number (lossless at this scale). The optional union accepts both +// query shapes — with parentEnvironment loaded, or without it. +export function toAuthenticated( + env: PrismaEnvWithAuth | PrismaEnvWithAuthAndParent, +): AuthenticatedEnvironment { + return { + id: env.id, + slug: env.slug, + type: env.type, + apiKey: env.apiKey, + organizationId: env.organizationId, + projectId: env.projectId, + orgMemberId: env.orgMemberId, + parentEnvironmentId: env.parentEnvironmentId, + branchName: env.branchName, + archivedAt: env.archivedAt, + paused: env.paused, + shortcode: env.shortcode, + maximumConcurrencyLimit: env.maximumConcurrencyLimit, + // Coerce Prisma's Decimal to a plain number — the slim type accepts + // both, but downstream consumers shouldn't have to narrow before + // doing arithmetic. Lossless at this scale (Decimal(4,2)). + concurrencyLimitBurstFactor: env.concurrencyLimitBurstFactor.toNumber(), + builtInEnvironmentVariableOverrides: env.builtInEnvironmentVariableOverrides, + createdAt: env.createdAt, + updatedAt: env.updatedAt, + project: { + id: env.project.id, + slug: env.project.slug, + name: env.project.name, + externalRef: env.project.externalRef, + engine: env.project.engine, + deletedAt: env.project.deletedAt, + defaultWorkerGroupId: env.project.defaultWorkerGroupId, + organizationId: env.project.organizationId, + builderProjectId: env.project.builderProjectId, + }, + organization: { + id: env.organization.id, + slug: env.organization.slug, + title: env.organization.title, + streamBasinName: env.organization.streamBasinName, + maximumConcurrencyLimit: env.organization.maximumConcurrencyLimit, + runsEnabled: env.organization.runsEnabled, + maximumDevQueueSize: env.organization.maximumDevQueueSize, + maximumDeployedQueueSize: env.organization.maximumDeployedQueueSize, + featureFlags: env.organization.featureFlags, + apiRateLimiterConfig: env.organization.apiRateLimiterConfig, + batchRateLimitConfig: env.organization.batchRateLimitConfig, + batchQueueConcurrencyConfig: env.organization.batchQueueConcurrencyConfig, + }, + orgMember: env.orgMember, + parentEnvironment: "parentEnvironment" in env ? env.parentEnvironment : null, + }; +} + export async function findEnvironmentByApiKey( apiKey: string, branchName: string | undefined ): Promise { - const environment = await $replica.runtimeEnvironment.findFirst({ + const include = { + ...authIncludeBase, + childEnvironments: branchName + ? { + where: { + branchName: sanitizeBranchName(branchName), + archivedAt: null, + }, + } + : undefined, + } satisfies Prisma.RuntimeEnvironmentInclude; + + let environment = await $replica.runtimeEnvironment.findFirst({ where: { apiKey, }, - include: { - project: true, - organization: true, - orgMember: true, - childEnvironments: branchName - ? { - where: { - branchName: sanitizeBranchName(branchName), - archivedAt: null, - }, - } - : undefined, - }, + include, }); + // Fall back to keys that were revoked within the grace window + if (!environment) { + const revokedApiKey = await $replica.revokedApiKey.findFirst({ + where: { + apiKey, + expiresAt: { gt: new Date() }, + }, + include: { + runtimeEnvironment: { include }, + }, + }); + + environment = revokedApiKey?.runtimeEnvironment ?? null; + } + + if (!environment) { + return null; + } + //don't return deleted projects - if (environment?.project.deletedAt !== null) { + if (environment.project.deletedAt !== null) { return null; } @@ -43,26 +146,36 @@ export async function findEnvironmentByApiKey( return null; } - const childEnvironment = environment?.childEnvironments.at(0); + const childEnvironment = environment.childEnvironments.at(0); if (childEnvironment) { - return { + return toAuthenticated({ ...childEnvironment, apiKey: environment.apiKey, orgMember: environment.orgMember, organization: environment.organization, project: environment.project, - }; + }); } //A branch was specified but no child environment was found return null; } - return environment; + return toAuthenticated(environment); } -/** @deprecated We don't use public api keys anymore */ +/** + * @deprecated We don't use public API keys (`pk_*` tokens) anymore — public + * access goes through public JWTs (see `isPublicJWT` / `validatePublicJwtKey`). + * + * Still exported because a handful of pre-RBAC routes that haven't been + * migrated to the apiBuilder still wire this lookup into their + * `authenticateApiKey` / `authenticateApiKeyWithFailure` flow. The new RBAC + * fallback (`internal-packages/rbac/src/fallback.ts`) intentionally does NOT + * call this — any pk_*-authenticated request that hits an apiBuilder route + * returns 401. That's a deliberate cutover, not an oversight. + */ export async function findEnvironmentByPublicApiKey( apiKey: string, branchName: string | undefined @@ -71,46 +184,29 @@ export async function findEnvironmentByPublicApiKey( where: { pkApiKey: apiKey, }, - include: { - project: true, - organization: true, - orgMember: true, - }, + include: authIncludeBase, }); - //don't return deleted projects - if (environment?.project.deletedAt !== null) { + if (!environment || environment.project.deletedAt !== null) { return null; } - return environment; + return toAuthenticated(environment); } -export async function findEnvironmentById( - id: string -): Promise<(AuthenticatedEnvironment & { parentEnvironment: { apiKey: string } | null }) | null> { +export async function findEnvironmentById(id: string): Promise { const environment = await $replica.runtimeEnvironment.findFirst({ where: { id, }, - include: { - project: true, - organization: true, - orgMember: true, - parentEnvironment: { - select: { - apiKey: true, - }, - }, - }, + include: authIncludeWithParent, }); - //don't return deleted projects - if (environment?.project.deletedAt !== null) { + if (!environment || environment.project.deletedAt !== null) { return null; } - return environment; + return toAuthenticated(environment); } export async function findEnvironmentBySlug( @@ -118,7 +214,7 @@ export async function findEnvironmentBySlug( envSlug: string, userId: string ): Promise { - return $replica.runtimeEnvironment.findFirst({ + const environment = await $replica.runtimeEnvironment.findFirst({ where: { projectId: projectId, slug: envSlug, @@ -136,41 +232,47 @@ export async function findEnvironmentBySlug( }, ], }, - include: { - project: true, - organization: true, - orgMember: true, - }, + include: authIncludeBase, }); + return environment ? toAuthenticated(environment) : null; } +// The authenticated environment plus the run scalars the realtime publish needs. +// Both come from one taskRun read — see findEnvironmentFromRun. +export type EnvironmentFromRun = { + environment: AuthenticatedEnvironment; + runTags: string[]; + batchId: string | null; +}; + export async function findEnvironmentFromRun( runId: string, tx?: PrismaClientOrTransaction -): Promise { +): Promise { + // The include (no select) already pulls every taskRun scalar, so runTags/batchId + // ride along for free — no extra query for the realtime publish to send a full record. const taskRun = await (tx ?? $replica).taskRun.findFirst({ where: { id: runId, }, include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - orgMember: true, - }, - }, + runtimeEnvironment: { include: authIncludeBase }, }, }); - - if (!taskRun) { + if (!taskRun?.runtimeEnvironment) { return null; } - - return taskRun?.runtimeEnvironment; + return { + environment: toAuthenticated(taskRun.runtimeEnvironment), + runTags: taskRun.runTags, + batchId: taskRun.batchId, + }; } -export async function createNewSession(environment: RuntimeEnvironment, ipAddress: string) { +export async function createNewSession( + environment: Pick, + ipAddress: string +) { const session = await prisma.runtimeEnvironmentSession.create({ data: { environmentId: environment.id, diff --git a/apps/webapp/app/models/task.server.ts b/apps/webapp/app/models/task.server.ts index b696bac6039..aab3b3bcfc1 100644 --- a/apps/webapp/app/models/task.server.ts +++ b/apps/webapp/app/models/task.server.ts @@ -1,6 +1,9 @@ import type { TaskTriggerSource } from "@trigger.dev/database"; import { PrismaClientOrTransaction, sqlDatabaseSchema } from "~/db.server"; +export { getTaskIdentifiers } from "~/services/taskIdentifierRegistry.server"; +export type { TaskIdentifierEntry } from "~/services/taskIdentifierCache.server"; + /** * * @param prisma An efficient query to get all task identifiers for a project. diff --git a/apps/webapp/app/models/user.server.ts b/apps/webapp/app/models/user.server.ts index 68550f6e98c..c48221c4b61 100644 --- a/apps/webapp/app/models/user.server.ts +++ b/apps/webapp/app/models/user.server.ts @@ -233,7 +233,7 @@ export async function findOrCreateGoogleUser({ // Check if email user and auth user are the same if (existingEmailUser.id !== existingUser.id) { // Different users: email is taken by one user, Google auth belongs to another - logger.error( + logger.warn( `Google auth conflict: Google ID ${authenticationProfile.id} belongs to user ${existingUser.id} but email ${email} is taken by user ${existingEmailUser.id}`, { email, diff --git a/apps/webapp/app/models/vercelIntegration.server.ts b/apps/webapp/app/models/vercelIntegration.server.ts index 82bedc6430f..5acaea50afe 100644 --- a/apps/webapp/app/models/vercelIntegration.server.ts +++ b/apps/webapp/app/models/vercelIntegration.server.ts @@ -197,6 +197,26 @@ export type VercelEnvironmentVariableValue = { isSecret: boolean; }; +/** Minimal shape of a shared (team-level) env var record from `GET /v1/env`. */ +const RawSharedEnvVarSchema = z + .object({ + id: z.string().optional(), + key: z.string().optional(), + type: z.string().optional(), + target: z.union([z.array(z.string()), z.string()]).optional(), + value: z.string().optional(), + applyToAllCustomEnvironments: z.boolean().optional(), + }) + .passthrough(); + +type RawSharedEnvVar = z.infer; + +/** Page shape of `GET /v1/env` (shared env vars), validated at the boundary. */ +const SharedEnvPageSchema = z.object({ + data: z.array(RawSharedEnvVarSchema).default([]), + pagination: z.object({ next: z.number().nullish() }).nullish(), +}); + /** Narrowed Vercel project type – only id and name. */ export type VercelProject = Pick; @@ -298,6 +318,17 @@ export class VercelIntegrationRepository { static getVercelClient( integration: OrganizationIntegration & { tokenReference: SecretReference } ): ResultAsync { + return this.getVercelClientAndToken(integration).map(({ client }) => client); + } + + /** + * Resolve both the Vercel SDK client and the raw bearer token. The raw token + * is needed to paginate shared env vars via `fetch`, since the SDK's + * `listSharedEnvVariable` exposes no `until` cursor param. + */ + static getVercelClientAndToken( + integration: OrganizationIntegration & { tokenReference: SecretReference } + ): ResultAsync<{ client: Vercel; accessToken: string }, VercelApiError> { return ResultAsync.fromPromise( (async () => { const secretStore = getSecretStore(integration.tokenReference.provider); @@ -308,7 +339,7 @@ export class VercelIntegrationRepository { if (!secret) { throw new Error("Failed to get Vercel access token"); } - return new Vercel({ bearerToken: secret.accessToken }); + return { client: new Vercel({ bearerToken: secret.accessToken }), accessToken: secret.accessToken }; })(), (error) => toVercelApiError(error) ); @@ -558,8 +589,68 @@ export class VercelIntegrationRepository { }; } + /** + * Fetch ALL shared (team-level) env var records, following pagination. + * + * Unlike the project env endpoint, the shared endpoint (`/v1/env`) DOES + * paginate (≈25/page) and the SDK's `listSharedEnvVariable` exposes no cursor + * param — so we walk pages via a raw fetch using `pagination.next` (a + * millisecond-timestamp cursor) until it is null. Shared vars are an edge + * case, so we load every page up front and return the full set. + */ + static #fetchAllSharedEnvsRaw(params: { + accessToken: string; + teamId: string; + projectId?: string; + }): ResultAsync { + const { accessToken, teamId, projectId } = params; + return ResultAsync.fromPromise( + (async () => { + const all: RawSharedEnvVar[] = []; + let until: number | undefined = undefined; + const MAX_PAGES = 200; // safety cap (1000-var ceiling / ~25 per page) + + for (let page = 0; page < MAX_PAGES; page++) { + const url = new URL("https://api.vercel.com/v1/env"); + url.searchParams.set("teamId", teamId); + if (projectId) url.searchParams.set("projectId", projectId); + if (until !== undefined) url.searchParams.set("until", String(until)); + + const response = await fetch(url.toString(), { + method: "GET", + headers: { Authorization: `Bearer ${accessToken}` }, + }); + + if (!response.ok) { + const body = await response.text().catch(() => ""); + const error = new Error( + `Failed to fetch Vercel shared environment variables: ${response.status} ${response.statusText} — ${body}` + ) as Error & { status?: number }; + error.status = response.status; + throw error; + } + + const json = SharedEnvPageSchema.parse(await response.json()); + all.push(...json.data); + + // `next` is a millisecond-timestamp cursor; treat 0/null/undefined as "done". + const next = json.pagination?.next; + if (!next) break; + until = next; + + if (page === MAX_PAGES - 1) { + logger.warn("Vercel shared env var pagination hit max page cap", { teamId, projectId }); + } + } + + return all; + })(), + (error) => toVercelApiError(error) + ); + } + static getVercelSharedEnvironmentVariables( - client: Vercel, + accessToken: string, teamId: string, projectId?: string // Optional: filter by project ): ResultAsync, VercelApiError> { - return wrapVercelCallWithRecovery( - client.environment.listSharedEnvVariable({ - teamId, - ...(projectId && { projectId }), - }), - VercelSchemas.listSharedEnvVariable, - "Failed to fetch Vercel shared environment variables", - { teamId, projectId }, - toVercelApiError - ).map((response) => { - const envVars = response.data || []; + return this.#fetchAllSharedEnvsRaw({ accessToken, teamId, projectId }).map((envVars) => { return envVars - .filter((env): env is typeof env & { id: string; key: string } => + .filter((env): env is RawSharedEnvVar & { id: string; key: string } => typeof env.id === "string" && typeof env.key === "string" ) .map((env) => { @@ -599,6 +680,7 @@ export class VercelIntegrationRepository { static getVercelSharedEnvironmentVariableValues( client: Vercel, + accessToken: string, teamId: string, projectId?: string // Optional: filter by project ): ResultAsync< @@ -612,17 +694,7 @@ export class VercelIntegrationRepository { }>, VercelApiError > { - return wrapVercelCallWithRecovery( - client.environment.listSharedEnvVariable({ - teamId, - ...(projectId && { projectId }), - }), - VercelSchemas.listSharedEnvVariable, - "Failed to fetch Vercel shared environment variable values", - { teamId, projectId }, - toVercelApiError - ).andThen((listResponse) => { - const envVars = listResponse.data || []; + return this.#fetchAllSharedEnvsRaw({ accessToken, teamId, projectId }).andThen((envVars) => { if (envVars.length === 0) { return okAsync([]); } @@ -641,8 +713,8 @@ export class VercelIntegrationRepository { if (isSecret) return null; - const listValue = (env as any).value as string | undefined; - const applyToAllCustomEnvs = (env as any).applyToAllCustomEnvironments as boolean | undefined; + const listValue = env.value; + const applyToAllCustomEnvs = env.applyToAllCustomEnvironments; if (listValue) { return { @@ -960,7 +1032,7 @@ export class VercelIntegrationRepository { key: "TRIGGER_SECRET_KEY", value: runtimeEnv.apiKey, target: vercelTarget, - type: "encrypted", + type: "sensitive", environmentType: runtimeEnv.type, }); } @@ -1061,7 +1133,7 @@ export class VercelIntegrationRepository { key: "TRIGGER_SECRET_KEY", value: params.apiKey, target: vercelTarget, - type: "encrypted", + type: "sensitive", }); logger.info("Synced regenerated API key to Vercel", { @@ -1115,28 +1187,26 @@ export class VercelIntegrationRepository { return (env as any).customEnvironmentIds?.includes(customEnvironmentId); }); + // Always delete-then-create rather than editProjectEnv, because Vercel rejects + // in-place type changes (e.g. encrypted -> sensitive). if (existingEnv && existingEnv.id) { - await client.projects.editProjectEnv({ - idOrName: vercelProjectId, - id: existingEnv.id, - ...(teamId && { teamId }), - requestBody: { - value, - type, - }, - }); - } else { - await client.projects.createProjectEnv({ + await client.projects.batchRemoveProjectEnv({ idOrName: vercelProjectId, ...(teamId && { teamId }), - requestBody: { - key, - value, - type, - customEnvironmentIds: [customEnvironmentId], - } as any, + requestBody: { ids: [existingEnv.id] }, }); } + + await client.projects.createProjectEnv({ + idOrName: vercelProjectId, + ...(teamId && { teamId }), + requestBody: { + key, + value, + type, + customEnvironmentIds: [customEnvironmentId], + } as any, + }); })(), (error) => toVercelApiError(error) ) @@ -1203,7 +1273,7 @@ export class VercelIntegrationRepository { syncEnvVarsMappingKeys: Object.keys(params.syncEnvVarsMapping), }); - return this.getVercelClient(params.orgIntegration).andThen((client) => + return this.getVercelClientAndToken(params.orgIntegration).andThen(({ client, accessToken }) => ResultAsync.fromPromise( (async () => { const errors: string[] = []; @@ -1269,6 +1339,7 @@ export class VercelIntegrationRepository { if (params.teamId) { const sharedResult = await this.getVercelSharedEnvironmentVariableValues( client, + accessToken, params.teamId, params.vercelProjectId ); @@ -1709,29 +1780,27 @@ export class VercelIntegrationRepository { return target.length === envTargets.length && target.every((t) => envTargets.includes(t)); }); + // Always delete-then-create rather than editProjectEnv, because Vercel rejects + // in-place type changes (e.g. encrypted -> sensitive). Same approach used by + // syncApiKeysToVercel via removeAllVercelEnvVarsByKey. if (existingEnv && existingEnv.id) { - await client.projects.editProjectEnv({ + await client.projects.batchRemoveProjectEnv({ idOrName: vercelProjectId, - id: existingEnv.id, ...(teamId && { teamId }), - requestBody: { - value, - target: target as any, - type, - }, - }); - } else { - await client.projects.createProjectEnv({ - idOrName: vercelProjectId, - ...(teamId && { teamId }), - requestBody: { - key, - value, - target: target as any, - type, - }, + requestBody: { ids: [existingEnv.id] }, }); } + + await client.projects.createProjectEnv({ + idOrName: vercelProjectId, + ...(teamId && { teamId }), + requestBody: { + key, + value, + target: target as any, + type, + }, + }); } static getAutoAssignCustomDomains( diff --git a/apps/webapp/app/presenters/RunFilters.server.ts b/apps/webapp/app/presenters/RunFilters.server.ts index ff9f53429eb..4254dc83e61 100644 --- a/apps/webapp/app/presenters/RunFilters.server.ts +++ b/apps/webapp/app/presenters/RunFilters.server.ts @@ -34,8 +34,10 @@ export async function getRunFiltersFromRequest(request: Request): Promise m.user.id), + organizationId + ), + ]); + + const memberRoles = result.members.map((m) => ({ + userId: m.user.id, + role: memberRoleMap.get(m.user.id) ?? null, + })); const canPurchaseSeats = currentPlan?.v3Subscription?.plan?.limits.teamMembers.canExceed === true; @@ -38,6 +58,9 @@ export class TeamPresenter extends BasePresenter { seatPricing, maxSeatQuota, planSeatLimit, + roles, + assignableRoleIds, + memberRoles, }; } } diff --git a/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts b/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts new file mode 100644 index 00000000000..fe813ca08b4 --- /dev/null +++ b/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts @@ -0,0 +1,294 @@ +import { + type PrismaClientOrTransaction, + type RuntimeEnvironmentType, + type TaskTriggerSource, +} from "@trigger.dev/database"; +import { type ClickHouse } from "@internal/clickhouse"; +import { z } from "zod"; +import { $replica } from "~/db.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; +import { singleton } from "~/utils/singleton"; +import { findCurrentWorkerFromEnvironment } from "~/v3/models/workerDeployment.server"; + +export type AgentListItem = { + slug: string; + filePath: string; + createdAt: Date; + triggerSource: TaskTriggerSource; + config: unknown; +}; + +export type AgentActiveState = { + running: number; + suspended: number; +}; + +export class AgentListPresenter { + constructor(private readonly _replica: PrismaClientOrTransaction) {} + + public async call({ + organizationId, + projectId, + environmentId, + environmentType, + }: { + organizationId: string; + projectId: string; + environmentId: string; + environmentType: RuntimeEnvironmentType; + }) { + const clickhouse = await clickhouseFactory.getClickhouseForOrganization( + organizationId, + "standard" + ); + + const currentWorker = await findCurrentWorkerFromEnvironment( + { + id: environmentId, + type: environmentType, + }, + this._replica + ); + + if (!currentWorker) { + return { + agents: [], + activeStates: Promise.resolve({} as Record), + conversationSparklines: Promise.resolve({} as Record), + costSparklines: Promise.resolve({} as Record), + tokenSparklines: Promise.resolve({} as Record), + }; + } + + const agents = await this._replica.backgroundWorkerTask.findMany({ + where: { + workerId: currentWorker.id, + triggerSource: "AGENT", + }, + select: { + id: true, + slug: true, + filePath: true, + triggerSource: true, + config: true, + createdAt: true, + }, + orderBy: { + slug: "asc", + }, + }); + + const slugs = agents.map((a) => a.slug); + + if (slugs.length === 0) { + return { + agents, + activeStates: Promise.resolve({} as Record), + conversationSparklines: Promise.resolve({} as Record), + costSparklines: Promise.resolve({} as Record), + tokenSparklines: Promise.resolve({} as Record), + }; + } + + // All queries are deferred for streaming + const activeStates = this.#getActiveStates(clickhouse, environmentId, slugs); + const conversationSparklines = this.#getConversationSparklines(clickhouse, environmentId, slugs); + const costSparklines = this.#getCostSparklines(clickhouse, environmentId, slugs); + const tokenSparklines = this.#getTokenSparklines(clickhouse, environmentId, slugs); + + return { agents, activeStates, conversationSparklines, costSparklines, tokenSparklines }; + } + + /** Count runs currently executing vs suspended per agent */ + async #getActiveStates( + clickhouse: ClickHouse, + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = clickhouse.reader.query({ + name: "agentActiveStates", + query: `SELECT + task_identifier, + countIf(status = 'EXECUTING') AS running, + countIf(status IN ('WAITING_TO_RESUME', 'QUEUED_EXECUTING')) AS suspended + FROM trigger_dev.task_runs_v2 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND task_kind = 'AGENT' + AND status IN ('EXECUTING', 'WAITING_TO_RESUME', 'QUEUED_EXECUTING') + GROUP BY task_identifier`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + running: z.coerce.number(), + suspended: z.coerce.number(), + }), + }); + + const [error, rows] = await queryFn({ environmentId, slugs }); + if (error) { + console.error("Agent active states query failed:", error); + return {}; + } + + const result: Record = {}; + for (const row of rows) { + result[row.task_identifier] = { running: row.running, suspended: row.suspended }; + } + return result; + } + + /** 24h hourly sparkline of conversation (run) count per agent */ + async #getConversationSparklines( + clickhouse: ClickHouse, + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = clickhouse.reader.query({ + name: "agentConversationSparklines", + query: `SELECT + task_identifier, + toStartOfHour(created_at) AS bucket, + count() AS val + FROM trigger_dev.task_runs_v2 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND task_kind = 'AGENT' + AND created_at >= now() - INTERVAL 24 HOUR + GROUP BY task_identifier, bucket + ORDER BY task_identifier, bucket`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + bucket: z.string(), + val: z.coerce.number(), + }), + }); + + return this.#buildSparklineMap(await queryFn({ environmentId, slugs }), slugs); + } + + /** 24h hourly sparkline of LLM cost per agent */ + async #getCostSparklines( + clickhouse: ClickHouse, + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = clickhouse.reader.query({ + name: "agentCostSparklines", + query: `SELECT + task_identifier, + toStartOfHour(start_time) AS bucket, + sum(total_cost) AS val + FROM trigger_dev.llm_metrics_v1 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND start_time >= now() - INTERVAL 24 HOUR + GROUP BY task_identifier, bucket + ORDER BY task_identifier, bucket`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + bucket: z.string(), + val: z.coerce.number(), + }), + }); + + return this.#buildSparklineMap(await queryFn({ environmentId, slugs }), slugs); + } + + /** 24h hourly sparkline of total tokens per agent */ + async #getTokenSparklines( + clickhouse: ClickHouse, + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = clickhouse.reader.query({ + name: "agentTokenSparklines", + query: `SELECT + task_identifier, + toStartOfHour(start_time) AS bucket, + sum(total_tokens) AS val + FROM trigger_dev.llm_metrics_v1 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND start_time >= now() - INTERVAL 24 HOUR + GROUP BY task_identifier, bucket + ORDER BY task_identifier, bucket`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + bucket: z.string(), + val: z.coerce.number(), + }), + }); + + return this.#buildSparklineMap(await queryFn({ environmentId, slugs }), slugs); + } + + /** Convert ClickHouse query result to sparkline map with zero-filled 24 hourly buckets */ + #buildSparklineMap( + queryResult: [Error, null] | [null, { task_identifier: string; bucket: string; val: number }[]], + slugs: string[] + ): Record { + const [error, rows] = queryResult; + if (error) { + console.error("Agent sparkline query failed:", error); + return {}; + } + return this.#buildSparklineFromRows(rows, slugs); + } + + #buildSparklineFromRows( + rows: { task_identifier: string; bucket: string; val: number }[], + slugs: string[] + ): Record { + const now = new Date(); + const startHour = new Date( + Date.UTC( + now.getUTCFullYear(), + now.getUTCMonth(), + now.getUTCDate(), + now.getUTCHours() - 23, + 0, + 0, + 0 + ) + ); + + const bucketKeys: string[] = []; + for (let i = 0; i < 24; i++) { + const h = new Date(startHour.getTime() + i * 3600_000); + bucketKeys.push(h.toISOString().slice(0, 13).replace("T", " ") + ":00:00"); + } + + const rowMap = new Map(); + for (const row of rows) { + rowMap.set(`${row.task_identifier}|${row.bucket}`, row.val); + } + + const result: Record = {}; + for (const slug of slugs) { + result[slug] = bucketKeys.map((key) => rowMap.get(`${slug}|${key}`) ?? 0); + } + return result; + } +} + +export const agentListPresenter = singleton("agentListPresenter", setupAgentListPresenter); + +function setupAgentListPresenter() { + return new AgentListPresenter($replica); +} diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index ebac8e089f5..00290b36203 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -9,12 +9,18 @@ import { logger, } from "@trigger.dev/core/v3"; import { parsePacketAsJson } from "@trigger.dev/core/v3/utils/ioSerialization"; +import { BatchId } from "@trigger.dev/core/v3/isomorphic"; import { getUserProvidedIdempotencyKey } from "@trigger.dev/core/v3/serverOnly"; import { Prisma, TaskRunAttemptStatus, TaskRunStatus } from "@trigger.dev/database"; import assertNever from "assert-never"; import { API_VERSIONS, CURRENT_API_VERSION, RunStatusUnspecifiedApiVersion } from "~/api/versions"; import { $replica, prisma } from "~/db.server"; +import { baseWorkerQueue } from "~/runEngine/concerns/workerQueueSplit.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { + findRunByIdWithMollifierFallback, + type SyntheticRun, +} from "~/v3/mollifier/readFallback.server"; import { generatePresignedUrl } from "~/v3/objectStore.server"; import { tracer } from "~/v3/tracer.server"; import { startSpanWithEnv } from "~/v3/tracing.server"; @@ -42,6 +48,7 @@ const commonRunSelect = { isTest: true, depth: true, scheduleId: true, + workerQueue: true, lockedToVersion: { select: { version: true, @@ -63,13 +70,46 @@ type CommonRelatedRun = Prisma.Result< "findFirstOrThrow" >; -type FoundRun = NonNullable>>; +// Full shape returned by findRun() — the commonRunSelect fields plus the +// extras the route handler reads. Declared explicitly (not inferred via +// ReturnType) so findRun can return a synthesised buffered +// run without the type becoming self-referential. +// Exported so the buffer-synthesis helper below can be unit-tested +// against a stable shape without re-deriving it (FoundRun's exact field +// list is what the buffered run must match for `call()` not to surprise). +export type FoundRun = CommonRelatedRun & { + traceId: string; + payload: string; + payloadType: string; + output: string | null; + outputType: string; + error: Prisma.JsonValue; + attempts: { id: string }[]; + attemptNumber: number | null; + engine: "V1" | "V2"; + taskEventStore: string; + parentTaskRun: CommonRelatedRun | null; + rootTaskRun: CommonRelatedRun | null; + childRuns: CommonRelatedRun[]; + // True when this run was synthesised from the mollifier buffer rather + // than read from Postgres. Callers that would otherwise query backing + // stores keyed on PG identifiers (e.g. ClickHouse event lookups by + // traceId) can short-circuit to an empty response — buffered runs + // haven't executed and have no events to fetch. Devin's analysis on + // PR #3755 (events endpoint) flagged the pre-fix code as making a + // wasted ClickHouse round-trip when this is set; gate on this flag + // instead. + isBuffered: boolean; +}; export class ApiRetrieveRunPresenter { constructor(private readonly apiVersion: API_VERSIONS) {} - public static async findRun(friendlyId: string, env: AuthenticatedEnvironment) { - return $replica.taskRun.findFirst({ + public static async findRun( + friendlyId: string, + env: AuthenticatedEnvironment, + ): Promise { + const pgRow = await $replica.taskRun.findFirst({ where: { friendlyId, runtimeEnvironmentId: env.id, @@ -101,6 +141,23 @@ export class ApiRetrieveRunPresenter { }, }, }); + + if (pgRow) return { ...pgRow, isBuffered: false }; + + // Postgres miss → fall back to the mollifier buffer. When the gate + // diverted a trigger, the run lives in Redis until the drainer replays + // it through engine.trigger. Synthesise the FoundRun shape so call() + // returns a `QUEUED` (or `FAILED`) response with empty output, no + // attempts, no relations. + const buffered = await findRunByIdWithMollifierFallback({ + runId: friendlyId, + environmentId: env.id, + organizationId: env.organizationId, + }); + + if (!buffered) return null; + + return synthesiseFoundRunFromBuffer(buffered); } public async call(taskRun: FoundRun, env: AuthenticatedEnvironment) { @@ -463,6 +520,7 @@ async function createCommonRunStructure(run: CommonRelatedRun, apiVersion: API_V triggerFunction: resolveTriggerFunction(run), batchId: run.batch?.friendlyId, metadata, + region: run.workerQueue ? baseWorkerQueue(run.workerQueue) : undefined, }; } @@ -473,3 +531,162 @@ function resolveTriggerFunction(run: CommonRelatedRun): TriggerFunction { return run.resumeParentOnCompletion ? "triggerAndWait" : "trigger"; } } + +// Build a FoundRun-shaped object from a buffered (mollified) run. The run +// is in the Redis buffer; engine.trigger hasn't created the Postgres row +// yet, so every field that comes from execution state (output, attempts, +// completedAt, cost, relations) takes a default. The presenter's call() +// handles QUEUED-state runs without surprise. +function bufferedStatusToTaskRunStatus(status: SyntheticRun["status"]): TaskRunStatus { + switch (status) { + case "FAILED": + return "SYSTEM_FAILURE"; + case "CANCELED": + return "CANCELED"; + default: + return "PENDING"; + } +} + +// The PG path stores `TaskRun.payload` as `String?`, so in production +// the buffered snapshot's `payload` is always a string. We defensively +// coerce other types instead of silently dropping them: an object gets +// JSON-stringified (matches how the trigger path would serialise it), +// anything truly unrenderable falls back to an empty string. The log +// line surfaces format drift to ops without crashing the read path. +function synthesisePayload(buffered: SyntheticRun): string { + const payload = buffered.payload; + if (typeof payload === "string") return payload; + if (payload === undefined || payload === null) return ""; + try { + const serialised = JSON.stringify(payload); + logger.warn("ApiRetrieveRunPresenter: buffered snapshot.payload non-string coerced", { + runFriendlyId: buffered.friendlyId, + payloadType: typeof payload, + }); + return typeof serialised === "string" ? serialised : ""; + } catch { + logger.error("ApiRetrieveRunPresenter: buffered snapshot.payload unserialisable", { + runFriendlyId: buffered.friendlyId, + payloadType: typeof payload, + }); + return ""; + } +} + +// Mirror synthesisePayload for metadata. The PG path stores +// `TaskRun.metadata` as `String?`, and the snapshot writes it from +// `metadataPacket.data` (also a string), so in production it is always a +// string or absent. We coerce defensively — an object gets JSON-stringified +// (matching how the trigger path serialises it) rather than silently +// dropped to null, and the log line surfaces format drift to ops. +function synthesiseMetadata(buffered: SyntheticRun): string | null { + const metadata = buffered.metadata; + if (typeof metadata === "string") return metadata; + if (metadata === undefined || metadata === null) return null; + try { + const serialised = JSON.stringify(metadata); + logger.warn("ApiRetrieveRunPresenter: buffered snapshot.metadata non-string coerced", { + runFriendlyId: buffered.friendlyId, + metadataType: typeof metadata, + }); + return typeof serialised === "string" ? serialised : null; + } catch { + logger.error("ApiRetrieveRunPresenter: buffered snapshot.metadata unserialisable", { + runFriendlyId: buffered.friendlyId, + metadataType: typeof metadata, + }); + return null; + } +} + +// Exported for unit testing. Used by `findRun()` above when the +// Postgres lookup misses and the buffer carries the run — keep the shape +// in lockstep with `FoundRun`'s field list so `call()` treats a synthesised +// buffered run identically to a freshly-triggered PG row. +export function synthesiseFoundRunFromBuffer(buffered: SyntheticRun): FoundRun { + const status: TaskRunStatus = bufferedStatusToTaskRunStatus(buffered.status); + + const errorJson: Prisma.JsonValue = buffered.error + ? { + type: "STRING_ERROR", + raw: `${buffered.error.code}: ${buffered.error.message}`, + } + : null; + + const metadata: string | null = synthesiseMetadata(buffered); + + return { + // `id` is the internal cuid (Prisma TaskRun.id column), `friendlyId` + // is the user-facing `run_xxx` token. Downstream logging keyed off + // `taskRun.id` correlates with other systems via the cuid — using + // the friendlyId here breaks log correlation. `SyntheticRun` carries + // the cuid alongside the friendlyId for exactly this reason + // (RunId.fromFriendlyId in readFallback.server.ts). + id: buffered.id, + friendlyId: buffered.friendlyId, + status, + taskIdentifier: buffered.taskIdentifier ?? "", + createdAt: buffered.createdAt, + startedAt: null, + updatedAt: buffered.cancelledAt ?? buffered.createdAt, + // PG-resident SYSTEM_FAILURE rows always have `completedAt` set by + // the engine; the buffer-synth path must match so SDK consumers + // that poll on `isCompleted` and then read `finishedAt` see a real + // timestamp instead of `undefined`. CANCELED already had this via + // `buffered.cancelledAt`; fall back to `buffered.createdAt` for + // FAILED (the buffer entry has no separate "failedAt" — the + // best-available approximation of when the terminal state landed + // is the entry's creation time). + completedAt: + buffered.cancelledAt ?? (status === "SYSTEM_FAILURE" ? buffered.createdAt : null), + expiredAt: null, + delayUntil: buffered.delayUntil ?? null, + metadata, + metadataType: buffered.metadataType ?? "application/json", + ttl: buffered.ttl ?? null, + costInCents: 0, + baseCostInCents: 0, + usageDurationMs: 0, + idempotencyKey: buffered.idempotencyKey ?? null, + idempotencyKeyOptions: buffered.idempotencyKeyOptions ?? null, + isTest: buffered.isTest, + depth: buffered.depth, + // Scheduled triggers go through the same TriggerTaskService path as + // API triggers and aren't bypassed by the mollifier gate, so a + // scheduled run can land in the buffer with its scheduleId set on the + // snapshot. Forward it so resolveSchedule() can hydrate the `schedule` + // field in the API response instead of silently dropping it until the + // drainer materialises. + scheduleId: buffered.scheduleId ?? null, + lockedToVersion: buffered.lockedToVersion ? { version: buffered.lockedToVersion } : null, + resumeParentOnCompletion: buffered.resumeParentOnCompletion, + // Reconstruct the batch from the snapshot's internal id so a buffered + // run reports the same `batchId` / triggerFunction as it will once + // materialised, and so batch-scoped JWTs authorise against it (the + // route authorization callbacks read `run.batch?.friendlyId`). + batch: buffered.batchId + ? { id: buffered.batchId, friendlyId: BatchId.toFriendlyId(buffered.batchId) } + : null, + runTags: buffered.tags, + traceId: buffered.traceId ?? "", + payload: synthesisePayload(buffered), + payloadType: buffered.payloadType ?? "application/json", + output: null, + outputType: "application/json", + error: errorJson, + attempts: [], + attemptNumber: null, + engine: "V2", + taskEventStore: "taskEvent", + // Empty string when absent (matches syntheticSpanRun.server.ts and lets + // `createCommonRunStructure`'s `run.workerQueue || undefined` coerce the + // API response's `region` to undefined instead of advertising a + // misleading "main" region for a not-yet-assigned buffered run). + workerQueue: buffered.workerQueue ?? "", + parentTaskRun: null, + rootTaskRun: null, + childRuns: [], + isBuffered: true, + }; +} diff --git a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts index 254ec18d1c0..0e7077b3dfc 100644 --- a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts @@ -9,7 +9,7 @@ import { type Project, type RuntimeEnvironment, type TaskRunStatus } from "@trig import assertNever from "assert-never"; import { z } from "zod"; import { API_VERSIONS, RunStatusUnspecifiedApiVersion } from "~/api/versions"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { logger } from "~/services/logger.server"; import { CoercedDate } from "~/utils/zod"; import { ServiceValidationError } from "~/v3/services/baseService.server"; @@ -116,6 +116,12 @@ export const ApiRunListSearchParams = z.object({ .transform((value) => { return value ? value.split(",") : undefined; }), + "filter[region]": z + .string() + .optional() + .transform((value) => { + return value ? value.split(",") : undefined; + }), "filter[machine]": z .string() .optional() @@ -149,10 +155,10 @@ type ApiRunListSearchParams = z.infer; export class ApiRunListPresenter extends BasePresenter { public async call( - project: Project, + project: Pick, searchParams: ApiRunListSearchParams, apiVersion: API_VERSIONS, - environment?: RuntimeEnvironment + environment?: Pick ) { return this.trace("call", async (span) => { const options: RunListOptions = { @@ -255,11 +261,16 @@ export class ApiRunListPresenter extends BasePresenter { options.queues = searchParams["filter[queue]"]; } + if (searchParams["filter[region]"]) { + options.regions = searchParams["filter[region]"]; + } + if (searchParams["filter[machine]"]) { options.machines = searchParams["filter[machine]"]; } - const presenter = new NextRunListPresenter(this._replica, clickhouseClient); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "standard"); + const presenter = new NextRunListPresenter(this._replica, clickhouse); logger.debug("Calling RunListPresenter", { options }); @@ -304,6 +315,11 @@ export class ApiRunListPresenter extends BasePresenter { durationMs: run.usageDurationMs, depth: run.depth, metadata, + // ClickHouse defaults `task_kind` to "" for pre-migration rows. + // Match `NextRunListPresenter`'s "STANDARD" fallback so API + // consumers and the dashboard see the same value. + taskKind: run.taskKind || "STANDARD", + region: run.region ?? undefined, ...ApiRetrieveRunPresenter.apiBooleanHelpersFromRunStatus( ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status, apiVersion) ), diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts index 2ec34614533..a6374c60be2 100644 --- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts +++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts @@ -216,7 +216,7 @@ const overviewDashboard: BuiltInDashboard = { const llmDashboard: BuiltInDashboard = { key: "llm", - title: "AI Metrics", + title: "AI metrics", filters: ["tasks", "models", "prompts", "operations", "providers"], layout: { version: "1", diff --git a/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts b/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts index acf511f0f5e..c3c62cd5d95 100644 --- a/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts @@ -1,6 +1,6 @@ import { type PrismaClient } from "@trigger.dev/database"; import { CreateBulkActionSearchParams } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.bulkaction"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { getRunFiltersFromRequest } from "../RunFilters.server"; import { BasePresenter } from "./basePresenter.server"; @@ -24,8 +24,9 @@ export class CreateBulkActionPresenter extends BasePresenter { Object.fromEntries(new URL(request.url).searchParams) ); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "standard"); const runsRepository = new RunsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: this._replica as PrismaClient, }); diff --git a/apps/webapp/app/presenters/v3/DevPresence.server.ts b/apps/webapp/app/presenters/v3/DevPresence.server.ts index fa606cf9f1b..d751b6d7114 100644 --- a/apps/webapp/app/presenters/v3/DevPresence.server.ts +++ b/apps/webapp/app/presenters/v3/DevPresence.server.ts @@ -1,4 +1,5 @@ import Redis, { type RedisOptions } from "ioredis"; +import { defaultReconnectOnError } from "@internal/redis"; import { env } from "~/env.server"; const PRESENCE_KEY_PREFIX = "dev-presence:connection:"; @@ -7,7 +8,7 @@ export class DevPresence { private redis: Redis; constructor(options: RedisOptions) { - this.redis = new Redis(options); + this.redis = new Redis({ reconnectOnError: defaultReconnectOnError, ...options }); } async isConnected(environmentId: string) { diff --git a/apps/webapp/app/presenters/v3/EnvironmentQueuePresenter.server.ts b/apps/webapp/app/presenters/v3/EnvironmentQueuePresenter.server.ts index 10201094376..5bcdee6b0a9 100644 --- a/apps/webapp/app/presenters/v3/EnvironmentQueuePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/EnvironmentQueuePresenter.server.ts @@ -47,7 +47,10 @@ export class EnvironmentQueuePresenter extends BasePresenter { running, queued, concurrencyLimit: environment.maximumConcurrencyLimit, - burstFactor: environment.concurrencyLimitBurstFactor.toNumber(), + burstFactor: + typeof environment.concurrencyLimitBurstFactor === "number" + ? environment.concurrencyLimitBurstFactor + : environment.concurrencyLimitBurstFactor.toNumber(), runsEnabled: environment.type === "DEVELOPMENT" || organization.runsEnabled, queueSizeLimit, }; diff --git a/apps/webapp/app/presenters/v3/EnvironmentVariablesPresenter.server.ts b/apps/webapp/app/presenters/v3/EnvironmentVariablesPresenter.server.ts index aff55263fec..720bfda8db7 100644 --- a/apps/webapp/app/presenters/v3/EnvironmentVariablesPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/EnvironmentVariablesPresenter.server.ts @@ -1,7 +1,6 @@ -import { PrismaClient, prisma } from "~/db.server"; +import { $replica, PrismaClient, PrismaReplicaClient, prisma } from "~/db.server"; import { Project } from "~/models/project.server"; import { User } from "~/models/user.server"; -import { filterOrphanedEnvironments, sortEnvironments } from "~/utils/environmentSort"; import { EnvironmentVariablesRepository } from "~/v3/environmentVariables/environmentVariablesRepository.server"; import type { EnvironmentVariableUpdater } from "~/v3/environmentVariables/repository"; import { @@ -9,19 +8,22 @@ import { EnvSlug, } from "~/v3/vercel/vercelProjectIntegrationSchema"; import { VercelIntegrationService } from "~/services/vercelIntegration.server"; +import { loadEnvironmentVariablesEnvironments } from "./environmentVariablesEnvironments.server"; type Result = Awaited>; export type EnvironmentVariableWithSetValues = Result["environmentVariables"][number]; export class EnvironmentVariablesPresenter { #prismaClient: PrismaClient; + #replicaClient: PrismaReplicaClient; - constructor(prismaClient: PrismaClient = prisma) { + constructor(prismaClient: PrismaClient = prisma, replicaClient: PrismaReplicaClient = $replica) { this.#prismaClient = prismaClient; + this.#replicaClient = replicaClient; } public async call({ userId, projectSlug }: { userId: User["id"]; projectSlug: Project["slug"] }) { - const project = await this.#prismaClient.project.findFirst({ + const project = await this.#replicaClient.project.findFirst({ select: { id: true, }, @@ -41,7 +43,18 @@ export class EnvironmentVariablesPresenter { throw new Error("Project not found"); } - const environmentVariables = await this.#prismaClient.environmentVariable.findMany({ + const { environments: sortedEnvironments, hasStaging } = + await loadEnvironmentVariablesEnvironments( + this.#replicaClient, + { userId, projectId: project.id }, + { skipProjectAccessCheck: true } + ); + + // Only load values for the environments we display. Projects can accumulate + // values in archived branch environments, which would otherwise all be loaded here. + const environmentIds = sortedEnvironments.map((env) => env.id); + + const environmentVariables = await this.#replicaClient.environmentVariable.findMany({ select: { id: true, key: true, @@ -59,20 +72,16 @@ export class EnvironmentVariablesPresenter { }, isSecret: true, }, - }, - }, - where: { - project: { - slug: projectSlug, - organization: { - members: { - some: { - userId, - }, + where: { + environmentId: { + in: environmentIds, }, }, }, }, + where: { + projectId: project.id, + }, }); const userIds = new Set( @@ -93,7 +102,7 @@ export class EnvironmentVariablesPresenter { const users = userIds.size > 0 - ? await this.#prismaClient.user.findMany({ + ? await this.#replicaClient.user.findMany({ where: { id: { in: Array.from(userIds), @@ -111,32 +120,22 @@ export class EnvironmentVariablesPresenter { const usersRecord: Record = Object.fromEntries(users.map((u) => [u.id, u])); - const environments = await this.#prismaClient.runtimeEnvironment.findMany({ - select: { - id: true, - type: true, - isBranchableEnvironment: true, - branchName: true, - orgMember: { - select: { - userId: true, - }, - }, - }, - where: { - project: { - slug: projectSlug, - }, - archivedAt: null, - }, - }); + const repository = new EnvironmentVariablesRepository(this.#prismaClient, this.#replicaClient); - const sortedEnvironments = sortEnvironments(filterOrphanedEnvironments(environments)).filter( - (e) => e.orgMember?.userId === userId || e.orgMember === null - ); + const nonSecretItems: Array<{ environmentId: string; key: string }> = []; + for (const environmentVariable of environmentVariables) { + for (const env of sortedEnvironments) { + const valueRecord = environmentVariable.values.find((v) => v.environmentId === env.id); + if (valueRecord && !valueRecord.isSecret) { + nonSecretItems.push({ environmentId: env.id, key: environmentVariable.key }); + } + } + } - const repository = new EnvironmentVariablesRepository(this.#prismaClient); - const variables = await repository.getProject(project.id); + const variableValuesByEnvAndKey = await repository.getVariableValuesForKeys( + project.id, + nonSecretItems + ); // Get Vercel integration data if it exists const vercelService = new VercelIntegrationService(this.#prismaClient); @@ -153,14 +152,19 @@ export class EnvironmentVariablesPresenter { return { environmentVariables: environmentVariables .flatMap((environmentVariable) => { - const variable = variables.find((v) => v.key === environmentVariable.key); - return sortedEnvironments.flatMap((env) => { - const val = variable?.values.find((v) => v.environment.id === env.id); const valueRecord = environmentVariable.values.find((v) => v.environmentId === env.id); const isSecret = valueRecord?.isSecret ?? false; - if (!val || !valueRecord) { + if (!valueRecord) { + return []; + } + + const val = isSecret + ? undefined + : variableValuesByEnvAndKey.get(`${env.id}:${environmentVariable.key}`); + + if (!isSecret && val === undefined) { return []; } @@ -185,7 +189,7 @@ export class EnvironmentVariablesPresenter { id: environmentVariable.id, key: environmentVariable.key, environment: { type: env.type, id: env.id, branchName: env.branchName }, - value: isSecret ? "" : val.value, + value: isSecret ? "" : val!, isSecret, version: valueRecord.version, lastUpdatedBy, @@ -196,13 +200,8 @@ export class EnvironmentVariablesPresenter { }); }) .sort((a, b) => a.key.localeCompare(b.key)), - environments: sortedEnvironments.map((environment) => ({ - id: environment.id, - type: environment.type, - isBranchableEnvironment: environment.isBranchableEnvironment, - branchName: environment.branchName, - })), - hasStaging: environments.some((environment) => environment.type === "STAGING"), + environments: sortedEnvironments, + hasStaging, // Vercel integration data vercelIntegration: vercelIntegration ? { diff --git a/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts b/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts index 5e9df362e4c..d2f6bbfcbe3 100644 --- a/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts @@ -242,10 +242,15 @@ export class ErrorGroupPresenter extends BasePresenter { const sortedVersions = sortVersionsDescending([...versionSet]); + // Build the data for the graph + // For each time bucket, if a value exists for a version set the value (don't add zeros) const data = buckets.map((epoch) => { const point: Record = { date: new Date(epoch * 1000) }; for (const version of sortedVersions) { - point[version] = byBucketVersion.get(`${epoch}:${version}`) ?? 0; + const versionValue = byBucketVersion.get(`${epoch}:${version}`); + if (versionValue) { + point[version] = versionValue; + } } return point; }); diff --git a/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts b/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts index 13da4ff91f8..ea6e522dbd5 100644 --- a/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts @@ -13,7 +13,7 @@ import { type ErrorGroupStatus, type PrismaClientOrTransaction } from "@trigger. import { type Direction } from "~/components/ListPagination"; import { timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; -import { getAllTaskIdentifiers } from "~/models/task.server"; +import { getTaskIdentifiers } from "~/models/task.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { BasePresenter } from "~/presenters/v3/basePresenter.server"; @@ -170,7 +170,7 @@ export class ErrorsListPresenter extends BasePresenter { (search !== undefined && search !== "") || (statuses !== undefined && statuses.length > 0); - const possibleTasksAsync = getAllTaskIdentifiers(this.replica, environmentId); + const possibleTasksAsync = getTaskIdentifiers(environmentId); // Pre-filter by status: since status lives in Postgres (ErrorGroupState) and the error // list comes from ClickHouse, we resolve inclusion/exclusion sets upfront so that diff --git a/apps/webapp/app/presenters/v3/LimitsPresenter.server.ts b/apps/webapp/app/presenters/v3/LimitsPresenter.server.ts index cca3522dc4e..192fbc87f4f 100644 --- a/apps/webapp/app/presenters/v3/LimitsPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/LimitsPresenter.server.ts @@ -375,7 +375,7 @@ export class LimitsPresenter extends BasePresenter { name: "Support level", description: "Type of support available for your plan", enabled: true, - value: supportLevel === "slack" ? "Slack" : "Community", + value: supportLevel === "slack" ? "Priority" : "Community", }, includedUsage: { name: "Included compute", diff --git a/apps/webapp/app/presenters/v3/LogsListPresenter.server.ts b/apps/webapp/app/presenters/v3/LogsListPresenter.server.ts index 8a3bf692b5b..517c586e4e7 100644 --- a/apps/webapp/app/presenters/v3/LogsListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/LogsListPresenter.server.ts @@ -7,7 +7,7 @@ import parseDuration from "parse-duration"; import { type Direction } from "~/components/ListPagination"; import { timeFilterFromTo, timeFilters } from "~/components/runs/v3/SharedFilters"; import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; -import { getAllTaskIdentifiers } from "~/models/task.server"; +import { getTaskIdentifiers } from "~/models/task.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { kindToLevel, type LogLevel, LogLevelSchema } from "~/utils/logUtils"; import { BasePresenter } from "~/presenters/v3/basePresenter.server"; @@ -176,7 +176,7 @@ export class LogsListPresenter extends BasePresenter { (search !== undefined && search !== "") || !time.isDefault; - const possibleTasksAsync = getAllTaskIdentifiers(this.replica, environmentId); + const possibleTasksAsync = getTaskIdentifiers(environmentId); const bulkActionsAsync = this.replica.bulkActionGroup.findMany({ select: { @@ -386,12 +386,7 @@ export class LogsListPresenter extends BasePresenter { next: nextCursor, previous: undefined, // For now, only support forward pagination }, - possibleTasks: possibleTasks - .map((task) => ({ - slug: task.slug, - triggerSource: task.triggerSource, - })) - .sort((a, b) => a.slug.localeCompare(b.slug)), + possibleTasks, bulkActions: bulkActions.map((bulkAction) => ({ id: bulkAction.friendlyId, type: bulkAction.type, diff --git a/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts index f22c7ccf340..8d1b8c13883 100644 --- a/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts @@ -1,5 +1,6 @@ import { type ClickHouse } from "@internal/clickhouse"; import { MachinePresetName } from "@trigger.dev/core/v3"; +import { RunAnnotations } from "@trigger.dev/core/v3/schemas"; import { type PrismaClient, type PrismaClientOrTransaction, @@ -8,8 +9,9 @@ import { import { type Direction } from "~/components/ListPagination"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; -import { getAllTaskIdentifiers } from "~/models/task.server"; +import { getTaskIdentifiers } from "~/models/task.server"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; +import { baseWorkerQueue } from "~/runEngine/concerns/workerQueueSplit.server"; import { machinePresetFromRun } from "~/v3/machinePresets.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { isCancellableRunStatus, isFinalRunStatus, isPendingRunStatus } from "~/v3/taskStatus"; @@ -32,8 +34,10 @@ export type RunListOptions = { batchId?: string; runId?: string[]; queues?: string[]; + regions?: string[]; machines?: MachinePresetName[]; errorId?: string; + sources?: string[]; //pagination direction?: Direction; cursor?: string; @@ -70,8 +74,10 @@ export class NextRunListPresenter { batchId, runId, queues, + regions, machines, errorId, + sources, from, to, direction = "forward", @@ -89,6 +95,7 @@ export class NextRunListPresenter { const hasStatusFilters = statuses && statuses.length > 0; const hasFilters = + (sources !== undefined && sources.length > 0) || (tasks !== undefined && tasks.length > 0) || (versions !== undefined && versions.length > 0) || hasStatusFilters || @@ -98,6 +105,7 @@ export class NextRunListPresenter { batchId !== undefined || (runId !== undefined && runId.length > 0) || (queues !== undefined && queues.length > 0) || + (regions !== undefined && regions.length > 0) || (machines !== undefined && machines.length > 0) || (errorId !== undefined && errorId !== "") || typeof isTest === "boolean" || @@ -105,7 +113,7 @@ export class NextRunListPresenter { !time.isDefault; //get all possible tasks - const possibleTasksAsync = getAllTaskIdentifiers(this.replica, environmentId); + const possibleTasksAsync = getTaskIdentifiers(environmentId); //get possible bulk actions const bulkActionsAsync = this.replica.bulkActionGroup.findMany({ @@ -184,8 +192,10 @@ export class NextRunListPresenter { runId, bulkId, queues, + regions, machines, errorId, + taskKinds: sources, page: { size: pageSize, cursor, @@ -250,17 +260,15 @@ export class NextRunListPresenter { name: run.queue.replace("task/", ""), type: run.queue.startsWith("task/") ? "task" : "custom", }, + region: run.workerQueue ? baseWorkerQueue(run.workerQueue) : undefined, + taskKind: RunAnnotations.safeParse(run.annotations).data?.taskKind ?? "STANDARD", }; }), pagination: { next: pagination.nextCursor ?? undefined, previous: pagination.previousCursor ?? undefined, }, - possibleTasks: possibleTasks - .map((task) => ({ slug: task.slug, triggerSource: task.triggerSource })) - .sort((a, b) => { - return a.slug.localeCompare(b.slug); - }), + possibleTasks, bulkActions: bulkActions.map((bulkAction) => ({ id: bulkAction.friendlyId, type: bulkAction.type, diff --git a/apps/webapp/app/presenters/v3/PlaygroundPresenter.server.ts b/apps/webapp/app/presenters/v3/PlaygroundPresenter.server.ts new file mode 100644 index 00000000000..656bc425cdf --- /dev/null +++ b/apps/webapp/app/presenters/v3/PlaygroundPresenter.server.ts @@ -0,0 +1,147 @@ +import type { RuntimeEnvironmentType, TaskRunStatus, TaskTriggerSource } from "@trigger.dev/database"; +import { $replica } from "~/db.server"; +import { findCurrentWorkerFromEnvironment } from "~/v3/models/workerDeployment.server"; +import { isFinalRunStatus } from "~/v3/taskStatus"; + +export type PlaygroundAgent = { + slug: string; + filePath: string; + triggerSource: TaskTriggerSource; + config: unknown; + payloadSchema: unknown; +}; + +export type PlaygroundConversation = { + id: string; + chatId: string; + title: string; + agentSlug: string; + runFriendlyId: string | null; + runStatus: TaskRunStatus | null; + clientData: unknown; + messages: unknown; + lastEventId: string | null; + isActive: boolean; + createdAt: Date; + updatedAt: Date; +}; + +export class PlaygroundPresenter { + async listAgents({ + environmentId, + environmentType, + }: { + environmentId: string; + environmentType: RuntimeEnvironmentType; + }): Promise { + const currentWorker = await findCurrentWorkerFromEnvironment( + { id: environmentId, type: environmentType }, + $replica + ); + + if (!currentWorker) return []; + + return $replica.backgroundWorkerTask.findMany({ + where: { + workerId: currentWorker.id, + triggerSource: "AGENT", + }, + select: { + slug: true, + filePath: true, + triggerSource: true, + config: true, + payloadSchema: true, + }, + orderBy: { slug: "asc" }, + }); + } + + async getAgent({ + environmentId, + environmentType, + agentSlug, + }: { + environmentId: string; + environmentType: RuntimeEnvironmentType; + agentSlug: string; + }): Promise { + const currentWorker = await findCurrentWorkerFromEnvironment( + { id: environmentId, type: environmentType }, + $replica + ); + + if (!currentWorker) return null; + + return $replica.backgroundWorkerTask.findFirst({ + where: { + workerId: currentWorker.id, + triggerSource: "AGENT", + slug: agentSlug, + }, + select: { + slug: true, + filePath: true, + triggerSource: true, + config: true, + payloadSchema: true, + }, + }); + } + + async getRecentConversations({ + environmentId, + agentSlug, + userId, + limit = 10, + }: { + environmentId: string; + agentSlug: string; + userId: string; + limit?: number; + }): Promise { + const conversations = await $replica.playgroundConversation.findMany({ + where: { + runtimeEnvironmentId: environmentId, + agentSlug, + userId, + }, + select: { + id: true, + chatId: true, + title: true, + agentSlug: true, + clientData: true, + messages: true, + lastEventId: true, + createdAt: true, + updatedAt: true, + run: { + select: { + friendlyId: true, + status: true, + }, + }, + }, + orderBy: { updatedAt: "desc" }, + take: limit, + }); + + return conversations.map((c) => ({ + id: c.id, + chatId: c.chatId, + title: c.title, + agentSlug: c.agentSlug, + runFriendlyId: c.run?.friendlyId ?? null, + runStatus: c.run?.status ?? null, + clientData: c.clientData, + messages: c.messages, + lastEventId: c.lastEventId, + isActive: c.run?.status ? !isFinalRunStatus(c.run.status) : false, + createdAt: c.createdAt, + updatedAt: c.updatedAt, + })); + } +} + +export const playgroundPresenter = new PlaygroundPresenter(); diff --git a/apps/webapp/app/presenters/v3/RegionsPresenter.server.ts b/apps/webapp/app/presenters/v3/RegionsPresenter.server.ts index 55bd30e33be..2dd5a448cb4 100644 --- a/apps/webapp/app/presenters/v3/RegionsPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RegionsPresenter.server.ts @@ -1,3 +1,4 @@ +import { type WorkloadType } from "@trigger.dev/database"; import { type Project } from "~/models/project.server"; import { type User } from "~/models/user.server"; import { FEATURE_FLAG } from "~/v3/featureFlags"; @@ -9,12 +10,14 @@ import { getCurrentPlan } from "~/services/platform.v3.server"; export type Region = { id: string; name: string; + masterQueue: string; description?: string; cloudProvider?: string; location?: string; staticIPs?: string | null; isDefault: boolean; isHidden: boolean; + workloadType: WorkloadType; }; export class RegionsPresenter extends BasePresenter { @@ -71,11 +74,13 @@ export class RegionsPresenter extends BasePresenter { select: { id: true, name: true, + masterQueue: true, description: true, cloudProvider: true, location: true, staticIPs: true, hidden: true, + workloadType: true, }, where: isAdmin ? undefined @@ -93,12 +98,14 @@ export class RegionsPresenter extends BasePresenter { const regions: Region[] = visibleRegions.map((region) => ({ id: region.id, name: region.name, + masterQueue: region.masterQueue, description: region.description ?? undefined, cloudProvider: region.cloudProvider ?? undefined, location: region.location ?? undefined, staticIPs: region.staticIPs ?? undefined, isDefault: region.id === defaultWorkerInstanceGroupId, isHidden: region.hidden, + workloadType: region.workloadType, })); if (project.defaultWorkerGroupId) { @@ -106,11 +113,13 @@ export class RegionsPresenter extends BasePresenter { select: { id: true, name: true, + masterQueue: true, description: true, cloudProvider: true, location: true, staticIPs: true, hidden: true, + workloadType: true, }, where: { id: project.defaultWorkerGroupId }, }); @@ -125,12 +134,14 @@ export class RegionsPresenter extends BasePresenter { regions.push({ id: defaultWorkerGroup.id, name: defaultWorkerGroup.name, + masterQueue: defaultWorkerGroup.masterQueue, description: defaultWorkerGroup.description ?? undefined, cloudProvider: defaultWorkerGroup.cloudProvider ?? undefined, location: defaultWorkerGroup.location ?? undefined, staticIPs: defaultWorkerGroup.staticIPs ?? undefined, isDefault: true, isHidden: defaultWorkerGroup.hidden, + workloadType: defaultWorkerGroup.workloadType, }); } } diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index 5e8dab2d0b6..365206ee75d 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -3,11 +3,11 @@ import { createTreeFromFlatItems, flattenTree } from "~/components/primitives/Tr import { prisma, type PrismaClient } from "~/db.server"; import { createTimelineSpanEventsFromSpanEvents } from "~/utils/timelineSpanEvents"; import { getUsername } from "~/utils/username"; -import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { SpanSummary } from "~/v3/eventRepository/eventRepository.types"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { isFinalRunStatus } from "~/v3/taskStatus"; import { env } from "~/env.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; type Result = Awaited>; export type Run = Result["run"]; @@ -20,6 +20,20 @@ export class RunEnvironmentMismatchError extends Error { } } +// Thrown by `call()` when the run isn't in PG. The route loader catches +// this and falls back to the mollifier buffer via `tryMollifiedRunFallback`. +// Using a typed error (rather than Prisma's `findFirstOrThrow` exception) +// keeps the buffered case off the PrismaClient error path — that path +// emits a `PrismaClient error` log every time it fires, which on the +// run-detail page polls becomes per-tick log spam and Sentry noise for +// any run that legitimately lives in the buffer. +export class RunNotInPgError extends Error { + constructor(public readonly runFriendlyId: string) { + super(`Run ${runFriendlyId} not in PG`); + this.name = "RunNotInPgError"; + } +} + export class RunPresenter { #prismaClient: PrismaClient; @@ -42,7 +56,13 @@ export class RunPresenter { showDeletedLogs: boolean; showDebug: boolean; }) { - const run = await this.#prismaClient.taskRun.findFirstOrThrow({ + // `findFirst` + explicit null check (not `findFirstOrThrow`) because + // a missing PG row is the *expected* path for buffered runs — the + // route catches `RunNotInPgError` and falls back to the synthesised + // buffer view. `findFirstOrThrow` would log a `PrismaClient error` + // every tick of the page poll, masking real DB issues with synthetic + // not-found noise. + const run = await this.#prismaClient.taskRun.findFirst({ select: { id: true, createdAt: true, @@ -106,6 +126,10 @@ export class RunPresenter { }, }); + if (!run) { + throw new RunNotInPgError(runFriendlyId); + } + if (environmentSlug !== run.runtimeEnvironment.slug) { throw new RunEnvironmentMismatchError( `Run ${runFriendlyId} is not in environment ${environmentSlug}` @@ -145,10 +169,13 @@ export class RunPresenter { }; } - const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + const repository = await getEventRepositoryForStore( + run.taskEventStore, + run.runtimeEnvironment.organizationId + ); // get the events - let traceSummary = await eventRepository.getTraceSummary( + let traceSummary = await repository.getTraceSummary( getTaskEventStoreTableForRun(run), run.runtimeEnvironment.id, run.traceId, @@ -228,12 +255,16 @@ export class RunPresenter { linkedRunIdBySpanId[n.id] = n.runId; } + // Raw span events are only needed server-side (to derive timelineEvents); + // keep them out of the serialized loader payload. + const { events: spanEvents, ...data } = n.data; + return { ...n, data: { - ...n.data, + ...data, timelineEvents: createTimelineSpanEventsFromSpanEvents( - n.data.events, + spanEvents, user?.admin ?? false, treeRootStartTimeMs ), @@ -272,7 +303,7 @@ export class RunPresenter { overridesBySpanId: traceSummary.overridesBySpanId, linkedRunIdBySpanId, }, - maximumLiveReloadingSetting: eventRepository.maximumLiveReloadingSetting, + maximumLiveReloadingSetting: repository.maximumLiveReloadingSetting, }; } } diff --git a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts index 1dd4edc6233..e0e88e4dd02 100644 --- a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts @@ -1,8 +1,11 @@ import { type PrismaClient, prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; +import { requireUserId } from "~/services/session.server"; import { singleton } from "~/utils/singleton"; -import { createSSELoader, SendFunction } from "~/utils/sse"; +import { ABORT_REASON_SEND_ERROR, createSSELoader, SendFunction } from "~/utils/sse"; import { throttle } from "~/utils/throttle"; +import { getMollifierBuffer } from "~/v3/mollifier/mollifierBuffer.server"; +import { deserialiseMollifierSnapshot } from "~/v3/mollifier/mollifierSnapshot.server"; import { tracePubSub } from "~/v3/services/tracePubSub.server"; const PING_INTERVAL = 5_000; @@ -28,26 +31,79 @@ export class RunStreamPresenter { throw new Response("Missing runParam", { status: 400 }); } + const userId = await requireUserId(context.request); + + // Scope the lookup to organizations the requesting user is a member + // of, matching RunPresenter's run lookup. Unauthorized and missing + // runs are indistinguishable (both 404). const run = await prismaClient.taskRun.findFirst({ where: { friendlyId: runFriendlyId, + project: { + organization: { + members: { + some: { + userId, + }, + }, + }, + }, }, select: { traceId: true, }, }); - if (!run) { + // Fall back to the mollifier buffer when the run isn't in PG yet. + // The buffered run has no execution events to stream, but we still + // attach a trace-pubsub subscription using the snapshot's traceId + // so that the moment the drainer materialises the row and execution + // begins, those events flow to this open SSE connection. Closing + // with 404 would force the dashboard to keep retrying. + let traceId: string | null = run?.traceId ?? null; + if (!traceId) { + const buffer = getMollifierBuffer(); + if (buffer) { + try { + const entry = await buffer.getEntry(runFriendlyId); + // Same membership scoping as the PG lookup above — the buffer + // entry carries the owning org's id. + const isMember = entry + ? (await prismaClient.orgMember.findFirst({ + where: { organizationId: entry.orgId, userId }, + select: { id: true }, + })) !== null + : false; + if (entry && isMember) { + // Go through the webapp wrapper so this read-side module + // shares a single deserialisation path with readFallback — + // see the contract comment in syntheticRedirectInfo.server.ts. + const snapshot = deserialiseMollifierSnapshot(entry.payload); + if (typeof snapshot.traceId === "string") { + traceId = snapshot.traceId; + } + } + } catch (err) { + logger.warn("RunStreamPresenter buffer fallback failed", { + runFriendlyId, + err: err instanceof Error ? err.message : String(err), + }); + } + } + } + + if (!traceId) { throw new Response("Not found", { status: 404 }); } + const resolvedRun = { traceId }; logger.info("RunStreamPresenter.start", { runFriendlyId, - traceId: run.traceId, + traceId: resolvedRun.traceId, }); // Subscribe to trace updates - const { unsubscribe, eventEmitter } = await tracePubSub.subscribeToTrace(run.traceId); + const { unsubscribe, eventEmitter } = await tracePubSub.subscribeToTrace(resolvedRun.traceId); // Only send max every 1 second const throttledSend = throttle( @@ -66,8 +122,10 @@ export class RunStreamPresenter { }); } } - // Abort the stream on send error - context.controller.abort("Send error"); + // Abort the stream on send error. Uses a stackless string sentinel + // from sse.ts — a no-arg abort() would create a DOMException with a + // stack trace, which is unnecessary retention on the signal.reason. + context.controller.abort(ABORT_REASON_SEND_ERROR); } }, 1000 @@ -103,7 +161,7 @@ export class RunStreamPresenter { cleanup: () => { logger.info("RunStreamPresenter.cleanup", { runFriendlyId, - traceId: run.traceId, + traceId: resolvedRun.traceId, }); // Remove message listener @@ -117,13 +175,13 @@ export class RunStreamPresenter { .then(() => { logger.info("RunStreamPresenter.cleanup.unsubscribe succeeded", { runFriendlyId, - traceId: run.traceId, + traceId: resolvedRun.traceId, }); }) .catch((error) => { logger.error("RunStreamPresenter.cleanup.unsubscribe failed", { runFriendlyId, - traceId: run.traceId, + traceId: resolvedRun.traceId, error: { name: error.name, message: error.message, diff --git a/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts b/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts index e9de368eceb..c4b524ec329 100644 --- a/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts @@ -1,6 +1,6 @@ import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { BasePresenter } from "./basePresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { type PrismaClient } from "@trigger.dev/database"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; @@ -37,8 +37,9 @@ export class RunTagListPresenter extends BasePresenter { }: TagListOptions) { const hasFilters = Boolean(name?.trim()); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "standard"); const runsRepository = new RunsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: this._replica as PrismaClient, }); diff --git a/apps/webapp/app/presenters/v3/ScheduleListPresenter.server.ts b/apps/webapp/app/presenters/v3/ScheduleListPresenter.server.ts index 053414dcfc7..19812b0a548 100644 --- a/apps/webapp/app/presenters/v3/ScheduleListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ScheduleListPresenter.server.ts @@ -1,11 +1,15 @@ import { type RuntimeEnvironmentType, type ScheduleType } from "@trigger.dev/database"; import { type ScheduleListFilters } from "~/components/runs/v3/ScheduleFilters"; import { displayableEnvironment } from "~/models/runtimeEnvironment.server"; +import { getTaskIdentifiers } from "~/models/task.server"; import { getLimit } from "~/services/platform.v3.server"; import { findCurrentWorkerFromEnvironment } from "~/v3/models/workerDeployment.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { CheckScheduleService } from "~/v3/services/checkSchedule.server"; -import { calculateNextScheduledTimestampFromNow } from "~/v3/utils/calculateNextSchedule.server"; +import { + calculateNextScheduledTimestampFromNow, + previousScheduledTimestamp, +} from "~/v3/utils/calculateNextSchedule.server"; import { BasePresenter } from "./basePresenter.server"; type ScheduleListOptions = { @@ -123,14 +127,10 @@ export class ScheduleListPresenter extends BasePresenter { } //get all possible scheduled tasks - const possibleTasks = await this._replica.backgroundWorkerTask.findMany({ - where: { - workerId: latestWorker.id, - projectId: project.id, - runtimeEnvironmentId: environmentId, - triggerSource: "SCHEDULED", - }, - }); + const allIdentifiers = await getTaskIdentifiers(environmentId); + const possibleTasks = allIdentifiers + .filter((t) => t.triggerSource === "SCHEDULED" && t.isInLatestDeployment) + .map((t) => ({ slug: t.slug })); //do this here to protect against SQL injection search = search && search !== "" ? `%${search}%` : undefined; @@ -196,8 +196,8 @@ export class ScheduleListPresenter extends BasePresenter { }, }, active: true, - lastRunTriggeredAt: true, createdAt: true, + updatedAt: true, }, where: { projectId: project.id, @@ -247,6 +247,29 @@ export class ScheduleListPresenter extends BasePresenter { }); const schedules: ScheduleListItem[] = rawSchedules.map((schedule) => { + // Approximate "last run" from the cron's previous slot. Skip inactive + // schedules — the cron's previous slot reflects what *would* have + // fired, but a deactivated schedule didn't actually fire there. Skip + // when the cron's previous slot predates `updatedAt`: any config + // change (cron edited, timezone changed, deactivate/reactivate) + // bumps updatedAt, and a slot from before the most recent change + // didn't fire under the current configuration. cron-parser throws + // on malformed expressions, so degrade to undefined per-row rather + // than failing the whole list. UI is best-effort; the runs page is + // the source of truth. + let lastRun: Date | undefined; + if (schedule.active) { + try { + const cronPrev = previousScheduledTimestamp( + schedule.generatorExpression, + schedule.timezone + ); + lastRun = cronPrev.getTime() > schedule.updatedAt.getTime() ? cronPrev : undefined; + } catch { + lastRun = undefined; + } + } + return { id: schedule.id, type: schedule.type, @@ -259,7 +282,7 @@ export class ScheduleListPresenter extends BasePresenter { timezone: schedule.timezone, active: schedule.active, externalId: schedule.externalId, - lastRun: schedule.lastRunTriggeredAt ?? undefined, + lastRun, nextRun: calculateNextScheduledTimestampFromNow( schedule.generatorExpression, schedule.timezone @@ -285,7 +308,7 @@ export class ScheduleListPresenter extends BasePresenter { totalPages: Math.ceil(totalCount / pageSize), totalCount: totalCount, schedules, - possibleTasks: possibleTasks.map((task) => task.slug).sort((a, b) => a.localeCompare(b)), + possibleTasks: possibleTasks.map((task) => task.slug), hasFilters, limits: { used: schedulesCount, diff --git a/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts new file mode 100644 index 00000000000..df68569c85d --- /dev/null +++ b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts @@ -0,0 +1,227 @@ +import { type Span } from "@opentelemetry/api"; +import { type ClickHouse } from "@internal/clickhouse"; +import { type PrismaClient, type PrismaClientOrTransaction } from "@trigger.dev/database"; +import { type Direction } from "~/components/ListPagination"; +import { timeFilters } from "~/components/runs/v3/SharedFilters"; +import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; +import { + type SessionStatus, + SessionsRepository, +} from "~/services/sessionsRepository/sessionsRepository.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { startActiveSpan } from "~/v3/tracer.server"; + +export type SessionListOptions = { + userId?: string; + projectId: string; + // filters + types?: string[]; + taskIdentifiers?: string[]; + externalId?: string; + tags?: string[]; + statuses?: SessionStatus[]; + period?: string; + from?: number; + to?: number; + // pagination + direction?: Direction; + cursor?: string; + pageSize?: number; +}; + +const DEFAULT_PAGE_SIZE = 25; + +export type SessionList = Awaited>; +export type SessionListItem = SessionList["sessions"][0]; +export type SessionListAppliedFilters = SessionList["filters"]; + +export class SessionListPresenter { + constructor( + private readonly replica: PrismaClientOrTransaction, + private readonly clickhouse: ClickHouse + ) {} + + public async call( + organizationId: string, + environmentId: string, + options: SessionListOptions + ) { + return startActiveSpan( + "SessionListPresenter.call", + (span) => this.#call(organizationId, environmentId, options, span), + { + attributes: { + organizationId, + environmentId, + projectId: options.projectId, + }, + } + ); + } + + async #call( + organizationId: string, + environmentId: string, + { + userId, + projectId, + types, + taskIdentifiers, + externalId, + tags, + statuses, + period, + from, + to, + direction = "forward", + cursor, + pageSize = DEFAULT_PAGE_SIZE, + }: SessionListOptions, + rootSpan: Span + ) { + const time = timeFilters({ period, from, to }); + + const hasFilters = + (types !== undefined && types.length > 0) || + (taskIdentifiers !== undefined && taskIdentifiers.length > 0) || + (externalId !== undefined && externalId !== "") || + (tags !== undefined && tags.length > 0) || + (statuses !== undefined && statuses.length > 0) || + !time.isDefault; + + rootSpan.setAttribute("filters.hasFilters", hasFilters); + rootSpan.setAttribute("page.size", pageSize); + if (cursor) rootSpan.setAttribute("page.cursor", cursor); + + const displayableEnvironment = await startActiveSpan( + "SessionListPresenter.findDisplayableEnvironment", + () => findDisplayableEnvironment(environmentId, userId) + ); + if (!displayableEnvironment) { + throw new ServiceValidationError("No environment found"); + } + + const sessionsRepository = new SessionsRepository({ + clickhouse: this.clickhouse, + prisma: this.replica as PrismaClient, + }); + + function clampToNow(date: Date): Date { + const now = new Date(); + return date > now ? now : date; + } + + const { sessions, pagination } = await sessionsRepository.listSessions({ + organizationId, + projectId, + environmentId, + types, + taskIdentifiers, + externalId, + tags, + statuses, + period, + from: time.from ? time.from.getTime() : undefined, + to: time.to ? clampToNow(time.to).getTime() : undefined, + page: { + size: pageSize, + cursor, + direction, + }, + }); + + rootSpan.setAttribute("page.count", sessions.length); + + let hasAnySessions = sessions.length > 0; + if (!hasAnySessions) { + const firstSession = await startActiveSpan( + "SessionListPresenter.hasAnySessions", + () => + this.replica.session.findFirst({ + where: { runtimeEnvironmentId: environmentId }, + select: { id: true }, + }) + ); + if (firstSession) { + hasAnySessions = true; + } + } + + // Resolve current-run friendlyIds in one query so each row can link to + // its live run. Status is intentionally not joined yet — that lives in + // ClickHouse and would mean a second query per page; the link itself + // is the value most viewers want first. + const currentRunIds = sessions + .map((s) => s.currentRunId) + .filter((id): id is string => Boolean(id)); + + const currentRuns = await startActiveSpan( + "SessionListPresenter.findCurrentRuns", + async (span) => { + span.setAttribute("currentRunIds.count", currentRunIds.length); + // Scope by projectId + runtimeEnvironmentId — Session.currentRunId + // is a plain string column without an FK, so a stale or corrupted + // pointer could surface another tenant's run. The list query above + // is already env-scoped; the run lookup needs the same fence. + return currentRunIds.length > 0 + ? this.replica.taskRun.findMany({ + where: { + id: { in: currentRunIds }, + projectId, + runtimeEnvironmentId: environmentId, + }, + select: { id: true, friendlyId: true }, + }) + : []; + } + ); + const runById = new Map(currentRuns.map((r) => [r.id, r] as const)); + + const now = Date.now(); + + return { + sessions: sessions.map((session) => { + const status: SessionStatus = + session.closedAt != null + ? "CLOSED" + : session.expiresAt != null && session.expiresAt.getTime() < now + ? "EXPIRED" + : "ACTIVE"; + + const currentRun = session.currentRunId ? runById.get(session.currentRunId) : undefined; + + return { + id: session.id, + friendlyId: session.friendlyId, + externalId: session.externalId, + type: session.type, + taskIdentifier: session.taskIdentifier, + tags: session.tags ? [...session.tags].sort((a, b) => a.localeCompare(b)) : [], + status, + closedAt: session.closedAt ? session.closedAt.toISOString() : undefined, + closedReason: session.closedReason ?? undefined, + expiresAt: session.expiresAt ? session.expiresAt.toISOString() : undefined, + createdAt: session.createdAt.toISOString(), + updatedAt: session.updatedAt.toISOString(), + environment: displayableEnvironment, + currentRunFriendlyId: currentRun?.friendlyId, + }; + }), + pagination: { + next: pagination.nextCursor ?? undefined, + previous: pagination.previousCursor ?? undefined, + }, + filters: { + types: types ?? [], + taskIdentifiers: taskIdentifiers ?? [], + externalId, + tags: tags ?? [], + statuses: statuses ?? [], + from: time.from, + to: time.to, + }, + hasFilters, + hasAnySessions, + }; + } +} diff --git a/apps/webapp/app/presenters/v3/SessionPresenter.server.ts b/apps/webapp/app/presenters/v3/SessionPresenter.server.ts new file mode 100644 index 00000000000..c63f9e39a2a --- /dev/null +++ b/apps/webapp/app/presenters/v3/SessionPresenter.server.ts @@ -0,0 +1,206 @@ +import { type Span } from "@opentelemetry/api"; +import { type PrismaClientOrTransaction } from "@trigger.dev/database"; +import { env } from "~/env.server"; +import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; +import { chatSnapshotStorageKey } from "~/services/realtime/chatSnapshot.server"; +import { resolveSessionByIdOrExternalId } from "~/services/realtime/sessions.server"; +import { logger } from "~/services/logger.server"; +import { generatePresignedUrl } from "~/v3/objectStore.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { startActiveSpan } from "~/v3/tracer.server"; + +export type SessionDetail = NonNullable>>; + +export class SessionPresenter { + constructor(private readonly replica: PrismaClientOrTransaction) {} + + public async call(args: { + userId: string; + environmentId: string; + sessionParam: string; + projectExternalRef: string; + environmentSlug: string; + }) { + return startActiveSpan( + "SessionPresenter.call", + (span) => this.#call(args, span), + { + attributes: { + environmentId: args.environmentId, + sessionParam: args.sessionParam, + }, + } + ); + } + + async #call( + { + userId, + environmentId, + sessionParam, + projectExternalRef, + environmentSlug, + }: { + userId: string; + environmentId: string; + sessionParam: string; + projectExternalRef: string; + environmentSlug: string; + }, + rootSpan: Span + ) { + const session = await startActiveSpan( + "SessionPresenter.resolveSession", + () => resolveSessionByIdOrExternalId(this.replica, environmentId, sessionParam) + ); + if (!session) { + rootSpan.setAttribute("session.found", false); + return null; + } + rootSpan.setAttribute("session.found", true); + rootSpan.setAttribute("session.id", session.id); + + const displayableEnvironment = await startActiveSpan( + "SessionPresenter.findDisplayableEnvironment", + () => findDisplayableEnvironment(environmentId, userId) + ); + if (!displayableEnvironment) { + throw new ServiceValidationError("No environment found"); + } + + // Run history is append-only; latest first matches the runs list. + // 50 covers the vast majority of sessions; longer histories link out + // to the runs page via tag filter. + const sessionRuns = await startActiveSpan( + "SessionPresenter.findSessionRuns", + async (span) => { + const rows = await this.replica.sessionRun.findMany({ + where: { sessionId: session.id }, + orderBy: { triggeredAt: "desc" }, + take: 50, + select: { + id: true, + runId: true, + reason: true, + triggeredAt: true, + }, + }); + span.setAttribute("sessionRuns.count", rows.length); + return rows; + } + ); + + const runIds = sessionRuns.map((r) => r.runId); + const runs = await startActiveSpan( + "SessionPresenter.findRuns", + async (span) => { + span.setAttribute("runIds.count", runIds.length); + return runIds.length > 0 + ? this.replica.taskRun.findMany({ + where: { id: { in: runIds } }, + select: { id: true, friendlyId: true, status: true }, + }) + : []; + } + ); + const runsById = new Map(runs.map((r) => [r.id, r] as const)); + + const currentRun = session.currentRunId + ? runsById.get(session.currentRunId) ?? + (await startActiveSpan( + "SessionPresenter.findCurrentRunFallback", + () => + this.replica.taskRun.findFirst({ + where: { id: session.currentRunId! }, + select: { id: true, friendlyId: true, status: true }, + }) + )) + : null; + + // The dashboard SSE route is cookie-authed, so `publicAccessToken` is + // unused — kept here to match the existing `AgentViewAuth` shape. + const addressingKey = session.externalId ?? session.friendlyId; + + // Presign a GET URL for the agent's S3 snapshot blob. The browser + // fetches it directly, parses + validates, and seeds the + // TriggerChatTransport with the full history + lastEventId before + // opening the SSE. Presign succeeds regardless of whether the blob + // exists; the frontend handles 404 gracefully. + // + // Snapshots are only written when no `hydrateMessages` hook is + // registered — sessions that use `hydrateMessages` will 404 here + // and the dashboard falls back to seq=0 SSE (which, post-trim, + // shows only the most recent turn — accepted, those customers + // have their own DB-backed dashboards). + // Resolve the snapshot key via the SAME helper the SDK write + boot read + // use (`chatSnapshotStorageKey`), so the dashboard GET hits the exact + // object (and object store) the snapshot was written to. Recomputing a + // bare key here was the bug: an unqualified key reads the base store while + // the write applied OBJECT_STORE_DEFAULT_PROTOCOL, so they could diverge. + let snapshotPresignedUrl: string | undefined; + try { + const signed = await startActiveSpan( + "SessionPresenter.presignSnapshot", + async () => + generatePresignedUrl( + projectExternalRef, + environmentSlug, + chatSnapshotStorageKey(session), + "GET" + ) + ); + if (signed.success) { + snapshotPresignedUrl = signed.url; + } else { + logger.warn("SessionPresenter: snapshot presign failed", { + sessionId: session.id, + error: signed.error, + }); + } + } catch (error) { + logger.warn("SessionPresenter: snapshot presign threw", { + sessionId: session.id, + error: error instanceof Error ? error.message : String(error), + }); + } + + return { + id: session.id, + friendlyId: session.friendlyId, + externalId: session.externalId, + type: session.type, + taskIdentifier: session.taskIdentifier, + tags: session.tags ? [...session.tags].sort((a, b) => a.localeCompare(b)) : [], + metadata: session.metadata, + triggerConfig: session.triggerConfig, + streamBasinName: session.streamBasinName, + closedAt: session.closedAt ? session.closedAt.toISOString() : undefined, + closedReason: session.closedReason ?? undefined, + expiresAt: session.expiresAt ? session.expiresAt.toISOString() : undefined, + createdAt: session.createdAt.toISOString(), + updatedAt: session.updatedAt.toISOString(), + environment: displayableEnvironment, + currentRun: currentRun + ? { friendlyId: currentRun.friendlyId, status: currentRun.status } + : null, + runs: sessionRuns.map((r) => { + const run = runsById.get(r.runId); + return { + id: r.id, + reason: r.reason, + triggeredAt: r.triggeredAt.toISOString(), + run: run + ? { friendlyId: run.friendlyId, status: run.status } + : null, + }; + }), + agentView: { + publicAccessToken: "", + apiOrigin: env.API_ORIGIN || env.LOGIN_ORIGIN, + sessionId: addressingKey, + initialMessages: [], + snapshotPresignedUrl, + }, + }; + } +} diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index 0ea9b37ab7f..bd0ac5c540a 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -1,18 +1,21 @@ import { type MachinePreset, prettyPrintPacket, + RunAnnotations, SemanticInternalAttributes, type TaskRunContext, TaskRunError, TriggerTraceContext, type V3TaskRunContext, } from "@trigger.dev/core/v3"; + import { AttemptId, getMaxDuration, parseTraceparent } from "@trigger.dev/core/v3/isomorphic"; import { extractIdempotencyKeyScope, getUserProvidedIdempotencyKey, } from "@trigger.dev/core/v3/serverOnly"; import { RUNNING_STATUSES } from "~/components/runs/v3/TaskRunStatus"; +import { baseWorkerQueue } from "~/runEngine/concerns/workerQueueSplit.server"; import { logger } from "~/services/logger.server"; import { rehydrateAttribute } from "~/v3/eventRepository/eventRepository.server"; import { machinePresetFromRun } from "~/v3/machinePresets.server"; @@ -21,7 +24,6 @@ import { isFailedRunStatus, isFinalRunStatus } from "~/v3/taskStatus"; import { BasePresenter } from "./basePresenter.server"; import { WaitpointPresenter } from "./WaitpointPresenter.server"; import { engine } from "~/v3/runEngine.server"; -import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { IEventRepository, SpanDetail } from "~/v3/eventRepository/eventRepository.types"; import { safeJsonParse } from "~/utils/json"; import { @@ -30,6 +32,9 @@ import { extractAIToolCallData, extractAIEmbedData, } from "~/components/runs/v3/ai"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server"; +import { buildSyntheticSpanRun } from "~/v3/mollifier/syntheticSpanRun.server"; export type PromptSpanData = { slug: string; @@ -42,9 +47,7 @@ export type PromptSpanData = { config?: string; }; -function extractPromptSpanData( - properties: Record -): PromptSpanData | undefined { +function extractPromptSpanData(properties: Record): PromptSpanData | undefined { // Properties come as an unflattened nested object from ClickHouse, // e.g. { prompt: { slug: "...", version: 3, ... } } const prompt = properties.prompt; @@ -72,9 +75,21 @@ function extractPromptSpanData( }; } +// SpanRun is grounded in the PG-path `getRun` method rather than +// inferred from `call`'s return type. The buffered branch of `call` +// routes through `buildSyntheticSpanRun`, and that helper is annotated +// `Promise` — if SpanRun were derived from `call` it would +// close a loop TS no longer tolerates ("Type alias 'Result' circularly +// references itself"). `getRun` is the canonical source for the shape +// (the synthetic helper just rebuilds the same shape from a buffer +// snapshot), and it doesn't recurse, so grounding here breaks the +// cycle while keeping Span available off `call` (Span's path through +// `#getSpan` has no synthetic indirection). +export type SpanRun = NonNullable< + Awaited["getRun"]>> +>; type Result = Awaited>; export type Span = NonNullable["span"]>; -export type SpanRun = NonNullable["run"]>; type FindRunResult = NonNullable< Awaited["findRun"]>> >; @@ -84,12 +99,18 @@ export class SpanPresenter extends BasePresenter { public async call({ userId, projectSlug, + envSlug, spanId, runFriendlyId, linkedRunId, }: { userId: string; projectSlug: string; + // Optional for backwards compatibility, required for the mollifier + // buffer fallback when the parent run isn't yet in PG — we need to + // resolve the env id to satisfy `findRunByIdWithMollifierFallback`'s + // auth check. + envSlug?: string; spanId: string; runFriendlyId: string; linkedRunId?: string; @@ -127,19 +148,47 @@ export class SpanPresenter extends BasePresenter { }); if (!parentRun) { - return; + // PG miss → fall back to the mollifier buffer. Without this the + // right-side span detail panel on the run-detail page never + // resolves for buffered runs: `call()` returns undefined, the + // resource route redirects with an "Event not found" toast, the + // run-detail page reloads, the toast fires again — a perpetual + // spin until the drainer materialises the row. Synthesise a + // SpanRun straight from the buffer snapshot, reusing + // `buildSyntheticSpanRun` (the same helper the run-detail + // loader's header fallback already uses). + if (!envSlug) return; + const envRow = await this._replica.runtimeEnvironment.findFirst({ + where: { project: { id: project.id }, slug: envSlug }, + select: { id: true, slug: true, type: true, organizationId: true }, + }); + if (!envRow) return; + const buffered = await findRunByIdWithMollifierFallback({ + runId: runFriendlyId, + environmentId: envRow.id, + organizationId: envRow.organizationId, + }); + if (!buffered) return; + const synth = await buildSyntheticSpanRun({ + run: buffered, + environment: { id: envRow.id, slug: envRow.slug, type: envRow.type }, + }); + return { type: "run" as const, run: synth }; } const { traceId } = parentRun; - const eventRepository = resolveEventRepositoryForStore(parentRun.taskEventStore); + const repository = await getEventRepositoryForStore( + parentRun.taskEventStore, + project.organizationId + ); const eventStore = getTaskEventStoreTableForRun(parentRun); const run = await this.getRun({ eventStore, traceId, - eventRepository, + eventRepository: repository, spanId, linkedRunId, createdAt: parentRun.createdAt, @@ -161,7 +210,7 @@ export class SpanPresenter extends BasePresenter { projectId: parentRun.projectId, createdAt: parentRun.createdAt, completedAt: parentRun.completedAt, - eventRepository, + eventRepository: repository, }); if (!span) { @@ -242,6 +291,9 @@ export class SpanPresenter extends BasePresenter { const externalTraceId = this.#getExternalTraceId(run.traceContext); + const taskKind = RunAnnotations.safeParse(run.annotations).data?.taskKind; + const isAgentRun = taskKind === "AGENT"; + let region: { name: string; location: string | null } | null = null; if (run.runtimeEnvironment.type !== "DEVELOPMENT" && run.engine !== "V1") { @@ -251,13 +303,55 @@ export class SpanPresenter extends BasePresenter { location: true, }, where: { - masterQueue: run.workerQueue, + masterQueue: baseWorkerQueue(run.workerQueue), }, }); region = workerGroup ?? null; } + // Only AGENT-tagged runs (chat.agent and friends) can be session-bound, + // so skip the SessionRun lookup for the much larger set of standard runs. + // Lookup is by the unique `runId` index, but the cheapest query is the + // one we don't run. + const sessionRun = isAgentRun + ? await this._replica.sessionRun.findFirst({ + where: { runId: run.id }, + select: { + reason: true, + triggeredAt: true, + session: { + select: { + friendlyId: true, + externalId: true, + type: true, + taskIdentifier: true, + closedAt: true, + expiresAt: true, + }, + }, + }, + }) + : null; + + const session = sessionRun + ? { + friendlyId: sessionRun.session.friendlyId, + externalId: sessionRun.session.externalId, + type: sessionRun.session.type, + taskIdentifier: sessionRun.session.taskIdentifier, + status: + sessionRun.session.closedAt != null + ? ("CLOSED" as const) + : sessionRun.session.expiresAt != null && + sessionRun.session.expiresAt.getTime() < Date.now() + ? ("EXPIRED" as const) + : ("ACTIVE" as const), + reason: sessionRun.reason, + triggeredAt: sessionRun.triggeredAt, + } + : undefined; + return { id: run.id, friendlyId: run.friendlyId, @@ -299,6 +393,7 @@ export class SpanPresenter extends BasePresenter { isFinished, isRunning: RUNNING_STATUSES.includes(run.status), isError: isFailedRunStatus(run.status), + isAgentRun, payload, payloadType: run.payloadType, output, @@ -317,12 +412,14 @@ export class SpanPresenter extends BasePresenter { metadata, maxDurationInSeconds: getMaxDuration(run.maxDurationInSeconds), batch: run.batch ? { friendlyId: run.batch.friendlyId } : undefined, + session, engine: run.engine, region, workerQueue: run.workerQueue, traceId: run.traceId, spanId: run.spanId, isCached: !!linkedRunId, + isBuffered: false, machinePreset: machine?.name, taskEventStore: run.taskEventStore, externalTraceId, @@ -457,6 +554,7 @@ export class SpanPresenter extends BasePresenter { payloadType: true, metadata: true, metadataType: true, + annotations: true, maxAttempts: true, project: { include: { @@ -592,10 +690,7 @@ export class SpanPresenter extends BasePresenter { triggeredRuns, aiData: span.properties && typeof span.properties === "object" - ? extractAISpanData( - span.properties as Record, - span.duration / 1_000_000 - ) + ? extractAISpanData(span.properties as Record, span.duration / 1_000_000) : undefined, }; @@ -739,10 +834,7 @@ export class SpanPresenter extends BasePresenter { "ai.streamObject", ]; - if ( - typeof span.message === "string" && - AI_SUMMARY_MESSAGES.includes(span.message) - ) { + if (typeof span.message === "string" && AI_SUMMARY_MESSAGES.includes(span.message)) { const aiSummaryData = extractAISummarySpanData( span.properties as Record, span.duration / 1_000_000 @@ -899,6 +991,7 @@ export class SpanPresenter extends BasePresenter { createdAt: run.createdAt, tags: run.runTags, isTest: run.isTest, + isReplay: !!run.replayedFromTaskRunFriendlyId, idempotencyKey: getUserProvidedIdempotencyKey(run) ?? undefined, startedAt: run.startedAt ?? run.createdAt, durationMs: run.usageDurationMs, diff --git a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts index f1635f23375..5d1d4c45d45 100644 --- a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts @@ -4,7 +4,7 @@ import { type TaskTriggerSource, } from "@trigger.dev/database"; import { $replica } from "~/db.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { type AverageDurations, ClickHouseEnvironmentMetricsRepository, @@ -25,10 +25,7 @@ export type TaskListItem = { export type TaskActivity = DailyTaskActivity[string]; export class TaskListPresenter { - constructor( - private readonly environmentMetricsRepository: EnvironmentMetricsRepository, - private readonly _replica: PrismaClientOrTransaction - ) {} + constructor(private readonly _replica: PrismaClientOrTransaction) {} public async call({ organizationId, @@ -61,6 +58,7 @@ export class TaskListPresenter { const tasks = await this._replica.backgroundWorkerTask.findMany({ where: { workerId: currentWorker.id, + triggerSource: { not: "AGENT" }, }, select: { id: true, @@ -76,9 +74,15 @@ export class TaskListPresenter { const slugs = tasks.map((t) => t.slug); + // Create org-specific environment metrics repository + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "standard"); + const environmentMetricsRepository = new ClickHouseEnvironmentMetricsRepository({ + clickhouse, + }); + // IMPORTANT: Don't await these, we want to return the promises // so we can defer the loading of the data - const activity = this.environmentMetricsRepository.getDailyTaskActivity({ + const activity = environmentMetricsRepository.getDailyTaskActivity({ organizationId, projectId, environmentId, @@ -86,7 +90,7 @@ export class TaskListPresenter { tasks: slugs, }); - const runningStats = this.environmentMetricsRepository.getCurrentRunningStats({ + const runningStats = environmentMetricsRepository.getCurrentRunningStats({ organizationId, projectId, environmentId, @@ -94,7 +98,7 @@ export class TaskListPresenter { tasks: slugs, }); - const durations = this.environmentMetricsRepository.getAverageDurations({ + const durations = environmentMetricsRepository.getAverageDurations({ organizationId, projectId, environmentId, @@ -109,9 +113,5 @@ export class TaskListPresenter { export const taskListPresenter = singleton("taskListPresenter", setupTaskListPresenter); function setupTaskListPresenter() { - const environmentMetricsRepository = new ClickHouseEnvironmentMetricsRepository({ - clickhouse: clickhouseClient, - }); - - return new TaskListPresenter(environmentMetricsRepository, $replica); + return new TaskListPresenter($replica); } diff --git a/apps/webapp/app/presenters/v3/TasksStreamPresenter.server.ts b/apps/webapp/app/presenters/v3/TasksStreamPresenter.server.ts index d690b3d083f..17a5bda620a 100644 --- a/apps/webapp/app/presenters/v3/TasksStreamPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TasksStreamPresenter.server.ts @@ -1,6 +1,7 @@ import { type TaskRunAttempt } from "@trigger.dev/database"; import { eventStream } from "remix-utils/sse/server"; import { type PrismaClient, prisma } from "~/db.server"; +import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server"; import { logger } from "~/services/logger.server"; import { projectPubSub } from "~/v3/services/projectPubSub.server"; @@ -63,7 +64,9 @@ export class TasksStreamPresenter { const subscriber = await projectPubSub.subscribe(`project:${project.id}:*`); - return eventStream(request.signal, (send, close) => { + const signal = getRequestAbortSignal(); + + return eventStream(signal, (send, close) => { const safeSend = (args: { event?: string; data: string }) => { try { send(args); @@ -95,7 +98,7 @@ export class TasksStreamPresenter { }); pinger = setInterval(() => { - if (request.signal.aborted) { + if (signal.aborted) { return close(); } diff --git a/apps/webapp/app/presenters/v3/TestPresenter.server.ts b/apps/webapp/app/presenters/v3/TestPresenter.server.ts index af5bb93a7e7..b817bbf155e 100644 --- a/apps/webapp/app/presenters/v3/TestPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestPresenter.server.ts @@ -19,15 +19,13 @@ export class TestPresenter extends BasePresenter { const tasks = await this.#getTasks(environmentId, isDev); return { - tasks: tasks.map((task) => { - return { - id: task.id, - taskIdentifier: task.slug, - filePath: task.filePath, - friendlyId: task.friendlyId, - triggerSource: task.triggerSource, - }; - }), + tasks: tasks.map((task) => ({ + id: task.id, + taskIdentifier: task.slug, + filePath: task.filePath, + friendlyId: task.friendlyId, + triggerSource: task.triggerSource, + })), }; } @@ -54,10 +52,13 @@ export class TestPresenter extends BasePresenter { SELECT bwt.id, version, slug, "filePath", bwt."friendlyId", bwt."triggerSource" FROM latest_workers JOIN ${sqlDatabaseSchema}."BackgroundWorkerTask" bwt ON bwt."workerId" = latest_workers.id + WHERE bwt."triggerSource" != 'AGENT' ORDER BY slug ASC;`; } else { const currentDeployment = await findCurrentWorkerDeployment({ environmentId: envId }); - return currentDeployment?.worker?.tasks ?? []; + return (currentDeployment?.worker?.tasks ?? []).filter( + (t) => t.triggerSource !== "AGENT" + ); } } } diff --git a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts index 09abb22639e..a9381ab60d2 100644 --- a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts @@ -203,7 +203,7 @@ export class TestTaskPresenter { prisma: this.replica as PrismaClient, }); - const runIds = await runsRepository.listRunIds({ + const { runIds } = await runsRepository.listRunIds({ organizationId: environment.organizationId, environmentId: environment.id, projectId: environment.projectId, @@ -373,6 +373,10 @@ export class TestTaskPresenter { ), }; } + case "AGENT": { + // AGENT tasks are filtered out by TestPresenter and shouldn't reach here + return { foundTask: false }; + } default: { return task.triggerSource satisfies never; } diff --git a/apps/webapp/app/presenters/v3/UsagePresenter.server.ts b/apps/webapp/app/presenters/v3/UsagePresenter.server.ts index 2fac95617a6..f04e53496a2 100644 --- a/apps/webapp/app/presenters/v3/UsagePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/UsagePresenter.server.ts @@ -4,7 +4,7 @@ import { getUsage, getUsageSeries } from "~/services/platform.v3.server"; import { createTimeSeriesData } from "~/utils/graphs"; import { BasePresenter } from "./basePresenter.server"; import { DataPoint, linear } from "regression"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; type Options = { organizationId: string; @@ -124,7 +124,8 @@ async function getTaskUsageByOrganization( endOfMonth: Date, replica: PrismaClientOrTransaction ) { - const [queryError, tasks] = await clickhouseClient.taskRuns.getTaskUsageByOrganization({ + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "standard"); + const [queryError, tasks] = await clickhouse.taskRuns.getTaskUsageByOrganization({ startTime: startOfMonth.getTime(), endTime: endOfMonth.getTime(), organizationId, diff --git a/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts b/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts index 4a57e3ec0ef..ea2462816e5 100644 --- a/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/VercelSettingsPresenter.server.ts @@ -42,6 +42,13 @@ export type VercelSettingsResult = { autoAssignCustomDomains?: boolean | null; /** URL to manage Vercel integration access (project sharing) on vercel.com */ vercelManageAccessUrl?: string; + /** The currently pinned TRIGGER_VERSION on Vercel production, if set. Used to surface + * the pin in the UI and prompt the user to clear it when atomic deployments are disabled. */ + currentTriggerVersion?: string | null; + /** True when the Vercel lookup for TRIGGER_VERSION failed (network/auth/etc). Distinct + * from "no pin set" — the UI uses this to warn the user and still prompt them on disable + * so they can manually verify that production isn't pinned. */ + currentTriggerVersionFetchFailed?: boolean; }; export type VercelAvailableProject = { @@ -248,13 +255,17 @@ export class VercelSettingsPresenter extends BasePresenter { customEnvironments: VercelCustomEnvironment[]; autoAssignCustomDomains: boolean | null; vercelManageAccessUrl?: string; + currentTriggerVersion: string | null; + currentTriggerVersionFetchFailed: boolean; }> => { if (!orgIntegration) { - return { customEnvironments: [], autoAssignCustomDomains: null }; + return { customEnvironments: [], autoAssignCustomDomains: null, currentTriggerVersion: null, currentTriggerVersionFetchFailed: false }; } const clientResult = await VercelIntegrationRepository.getVercelClient(orgIntegration); if (clientResult.isErr()) { - return { customEnvironments: [], autoAssignCustomDomains: null }; + // We couldn't even build a Vercel client — treat as fetch failure so the UI + // still prompts the user when they disable atomic deployments. + return { customEnvironments: [], autoAssignCustomDomains: null, currentTriggerVersion: null, currentTriggerVersionFetchFailed: true }; } const client = clientResult.value; const teamId = await VercelIntegrationRepository.getTeamIdFromIntegration(orgIntegration); @@ -275,10 +286,10 @@ export class VercelSettingsPresenter extends BasePresenter { } if (!connectedProject) { - return { customEnvironments: [], autoAssignCustomDomains: null, vercelManageAccessUrl }; + return { customEnvironments: [], autoAssignCustomDomains: null, vercelManageAccessUrl, currentTriggerVersion: null, currentTriggerVersionFetchFailed: false }; } - const [customEnvsResult, autoAssignResult] = await Promise.all([ + const [customEnvsResult, autoAssignResult, triggerVersionResult] = await Promise.all([ VercelIntegrationRepository.getVercelCustomEnvironments( client, connectedProject.vercelProjectId, @@ -289,18 +300,44 @@ export class VercelSettingsPresenter extends BasePresenter { connectedProject.vercelProjectId, teamId ), + VercelIntegrationRepository.getVercelEnvironmentVariableValues( + client, + connectedProject.vercelProjectId, + teamId, + "production", + (key) => key === "TRIGGER_VERSION" + ), ]); + + let currentTriggerVersion: string | null = null; + let currentTriggerVersionFetchFailed = false; + if (triggerVersionResult.isOk()) { + const match = triggerVersionResult.value.find( + (envVar) => envVar.key === "TRIGGER_VERSION" && envVar.target.includes("production") + ); + currentTriggerVersion = match?.value ?? null; + } else { + currentTriggerVersionFetchFailed = true; + logger.warn("Failed to fetch current TRIGGER_VERSION from Vercel — surfacing as unknown", { + projectId, + vercelProjectId: connectedProject.vercelProjectId, + error: triggerVersionResult.error.message, + }); + } + return { customEnvironments: customEnvsResult.isOk() ? customEnvsResult.value : [], autoAssignCustomDomains: autoAssignResult.isOk() ? autoAssignResult.value : null, vercelManageAccessUrl, + currentTriggerVersion, + currentTriggerVersionFetchFailed, }; }; return fromPromise( fetchVercelData(), (error) => ({ type: "other" as const, cause: error }) - ).map(({ customEnvironments, autoAssignCustomDomains, vercelManageAccessUrl }) => ({ + ).map(({ customEnvironments, autoAssignCustomDomains, vercelManageAccessUrl, currentTriggerVersion, currentTriggerVersionFetchFailed }) => ({ enabled: true, hasOrgIntegration, authInvalid: false, @@ -311,6 +348,8 @@ export class VercelSettingsPresenter extends BasePresenter { customEnvironments, autoAssignCustomDomains, vercelManageAccessUrl, + currentTriggerVersion, + currentTriggerVersionFetchFailed, } as VercelSettingsResult)); }).mapErr((error) => { // Log the error and return a safe fallback @@ -419,7 +458,7 @@ export class VercelSettingsPresenter extends BasePresenter { }; } - const clientResult = await VercelIntegrationRepository.getVercelClient(orgIntegration); + const clientResult = await VercelIntegrationRepository.getVercelClientAndToken(orgIntegration); if (clientResult.isErr()) { return { customEnvironments: [], @@ -434,7 +473,7 @@ export class VercelSettingsPresenter extends BasePresenter { isOnboardingComplete: false, }; } - const client = clientResult.value; + const { client, accessToken } = clientResult.value; const teamId = await VercelIntegrationRepository.getTeamIdFromIntegration(orgIntegration); const projectIntegration = await (this._replica as PrismaClient).organizationProjectIntegration.findFirst({ @@ -492,7 +531,7 @@ export class VercelSettingsPresenter extends BasePresenter { // Only fetch shared env vars if teamId is available teamId ? VercelIntegrationRepository.getVercelSharedEnvironmentVariables( - client, + accessToken, teamId, projectIntegration.externalEntityId ) diff --git a/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts b/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts index f0e955fd04d..dbb1123c488 100644 --- a/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts @@ -1,7 +1,7 @@ import { ScheduleObject } from "@trigger.dev/core/v3"; import { PrismaClient, prisma } from "~/db.server"; import { displayableEnvironment } from "~/models/runtimeEnvironment.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { nextScheduledTimestamps } from "~/v3/utils/calculateNextSchedule.server"; import { NextRunListPresenter } from "./NextRunListPresenter.server"; import { scheduleWhereClause } from "~/models/schedules.server"; @@ -75,7 +75,8 @@ export class ViewSchedulePresenter { ? nextScheduledTimestamps(schedule.generatorExpression, schedule.timezone, new Date(), 5) : []; - const runPresenter = new NextRunListPresenter(this.#prismaClient, clickhouseClient); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(schedule.project.organizationId, "standard"); + const runPresenter = new NextRunListPresenter(this.#prismaClient, clickhouse); const { runs } = await runPresenter.call(schedule.project.organizationId, environmentId, { projectId: schedule.project.id, scheduleId: schedule.id, diff --git a/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts b/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts index 9abcdf32215..7877c2cc0c8 100644 --- a/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts @@ -1,5 +1,5 @@ import { isWaitpointOutputTimeout, prettyPrintPacket } from "@trigger.dev/core/v3"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { generateHttpCallbackUrl } from "~/services/httpCallback.server"; import { logger } from "~/services/logger.server"; import { BasePresenter } from "./basePresenter.server"; @@ -79,7 +79,8 @@ export class WaitpointPresenter extends BasePresenter { const connectedRuns: NextRunListItem[] = []; if (connectedRunIds.length > 0) { - const runPresenter = new NextRunListPresenter(this._prisma, clickhouseClient); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(waitpoint.environment.organizationId, "standard"); + const runPresenter = new NextRunListPresenter(this._prisma, clickhouse); const { runs } = await runPresenter.call( waitpoint.environment.organizationId, environmentId, diff --git a/apps/webapp/app/presenters/v3/environmentVariablesEnvironments.server.ts b/apps/webapp/app/presenters/v3/environmentVariablesEnvironments.server.ts new file mode 100644 index 00000000000..9473127df05 --- /dev/null +++ b/apps/webapp/app/presenters/v3/environmentVariablesEnvironments.server.ts @@ -0,0 +1,75 @@ +import type { RuntimeEnvironmentType } from "@trigger.dev/database"; +import { type PrismaReplicaClient } from "~/db.server"; +import { filterOrphanedEnvironments, sortEnvironments } from "~/utils/environmentSort"; + +export type EnvironmentVariablesEnvironment = { + id: string; + type: RuntimeEnvironmentType; + isBranchableEnvironment: boolean; + branchName: string | null; +}; + +export type EnvironmentVariablesEnvironmentsResult = { + environments: EnvironmentVariablesEnvironment[]; + hasStaging: boolean; +}; + +export async function loadEnvironmentVariablesEnvironments( + prismaClient: PrismaReplicaClient, + { userId, projectId }: { userId: string; projectId: string }, + options?: { skipProjectAccessCheck?: boolean } +): Promise { + if (!options?.skipProjectAccessCheck) { + const project = await prismaClient.project.findFirst({ + select: { + id: true, + }, + where: { + id: projectId, + organization: { + members: { + some: { + userId, + }, + }, + }, + }, + }); + + if (!project) { + throw new Error("Project not found"); + } + } + + const environments = await prismaClient.runtimeEnvironment.findMany({ + select: { + id: true, + type: true, + isBranchableEnvironment: true, + branchName: true, + orgMember: { + select: { + userId: true, + }, + }, + }, + where: { + projectId, + archivedAt: null, + }, + }); + + const sortedEnvironments = sortEnvironments(filterOrphanedEnvironments(environments)).filter( + (environment) => environment.orgMember?.userId === userId || environment.orgMember === null + ); + + return { + environments: sortedEnvironments.map((environment) => ({ + id: environment.id, + type: environment.type, + isBranchableEnvironment: environment.isBranchableEnvironment, + branchName: environment.branchName, + })), + hasStaging: environments.some((environment) => environment.type === "STAGING"), + }; +} diff --git a/apps/webapp/app/presenters/v3/mapRunToLiveFields.server.ts b/apps/webapp/app/presenters/v3/mapRunToLiveFields.server.ts new file mode 100644 index 00000000000..abbf1db7fd3 --- /dev/null +++ b/apps/webapp/app/presenters/v3/mapRunToLiveFields.server.ts @@ -0,0 +1,21 @@ +import { isCancellableRunStatus, isFinalRunStatus, isPendingRunStatus } from "~/v3/taskStatus"; +import type { ListedRun } from "~/services/runsRepository/runsRepository.server"; + +export function mapRunToLiveFields(run: ListedRun) { + const hasFinished = isFinalRunStatus(run.status); + const startedAt = run.startedAt ?? run.lockedAt; + + return { + friendlyId: run.friendlyId, + status: run.status, + updatedAt: run.updatedAt.toISOString(), + startedAt: startedAt?.toISOString(), + finishedAt: hasFinished ? run.completedAt?.toISOString() ?? run.updatedAt.toISOString() : undefined, + hasFinished, + isCancellable: isCancellableRunStatus(run.status), + isPending: isPendingRunStatus(run.status), + usageDurationMs: Number(run.usageDurationMs), + costInCents: run.costInCents, + baseCostInCents: run.baseCostInCents, + }; +} diff --git a/apps/webapp/app/redis.server.ts b/apps/webapp/app/redis.server.ts index 55d490821e3..01efa0d3e68 100644 --- a/apps/webapp/app/redis.server.ts +++ b/apps/webapp/app/redis.server.ts @@ -1,4 +1,5 @@ import { Cluster, Redis, type ClusterNode, type ClusterOptions } from "ioredis"; +import { defaultReconnectOnError } from "@internal/redis"; import { logger } from "./services/logger.server"; export type RedisWithClusterOptions = { @@ -42,6 +43,7 @@ export function createRedisClient( username: options.username, password: options.password, enableAutoPipelining: true, + reconnectOnError: defaultReconnectOnError, ...(options.tlsDisabled ? { checkServerIdentity: () => { @@ -69,6 +71,7 @@ export function createRedisClient( password: options.password, enableAutoPipelining: true, keyPrefix: options.keyPrefix, + reconnectOnError: defaultReconnectOnError, ...(options.tlsDisabled ? {} : { tls: {} }), }); } diff --git a/apps/webapp/app/root.tsx b/apps/webapp/app/root.tsx index c6027b1a6d3..db2d3db22b8 100644 --- a/apps/webapp/app/root.tsx +++ b/apps/webapp/app/root.tsx @@ -58,9 +58,14 @@ export const loader = async ({ request }: LoaderFunctionArgs) => { websiteId: env.KAPA_AI_WEBSITE_ID, }; + const user = await getUser(request); + + const headers = new Headers(); + headers.append("Set-Cookie", await commitSession(session)); + return typedjson( { - user: await getUser(request), + user, toastMessage, posthogProjectKey, features, @@ -70,7 +75,7 @@ export const loader = async ({ request }: LoaderFunctionArgs) => { kapa, timezone, }, - { headers: { "Set-Cookie": await commitSession(session) } } + { headers } ); }; diff --git a/apps/webapp/app/routes/@.runs.$runParam.ts b/apps/webapp/app/routes/@.runs.$runParam.ts index a52600628d8..a709191271e 100644 --- a/apps/webapp/app/routes/@.runs.$runParam.ts +++ b/apps/webapp/app/routes/@.runs.$runParam.ts @@ -3,7 +3,8 @@ import { z } from "zod"; import { prisma } from "~/db.server"; import { redirectWithErrorMessage } from "~/models/message.server"; import { requireUser } from "~/services/session.server"; -import { impersonate, rootPath, v3RunPath } from "~/utils/pathBuilder"; +import { impersonate, rootPath, v3RunPath, v3RunSpanPath } from "~/utils/pathBuilder"; +import { findBufferedRunRedirectInfo } from "~/v3/mollifier/syntheticRedirectInfo.server"; const ParamsSchema = z.object({ runParam: z.string(), @@ -32,6 +33,7 @@ export async function loader({ params, request }: LoaderFunctionArgs) { friendlyId: runParam, }, select: { + spanId: true, runtimeEnvironment: { select: { slug: true, @@ -51,16 +53,45 @@ export async function loader({ params, request }: LoaderFunctionArgs) { }); if (!run) { + // Admin impersonation route — bypass org membership so admins can + // open any buffered run by friendlyId, mirroring the existing PG + // behaviour above (no membership filter on the find). + const buffered = await findBufferedRunRedirectInfo({ + runFriendlyId: runParam, + userId: user.id, + skipOrgMembershipCheck: true, + }); + if (buffered) { + // Preselect the root span so the run-detail trace tree opens with + // the buffered run's span highlighted, matching the sibling + // redirect routes (runs.$runParam.ts, projects.v3.$projectRef…). + const path = buffered.spanId + ? v3RunSpanPath( + { slug: buffered.organizationSlug }, + { slug: buffered.projectSlug }, + { slug: buffered.environmentSlug }, + { friendlyId: runParam }, + { spanId: buffered.spanId } + ) + : v3RunPath( + { slug: buffered.organizationSlug }, + { slug: buffered.projectSlug }, + { slug: buffered.environmentSlug }, + { friendlyId: runParam } + ); + return redirect(impersonate(path)); + } return redirectWithErrorMessage(rootPath(), request, "Run doesn't exist", { ephemeral: false, }); } - const path = v3RunPath( + const path = v3RunSpanPath( { slug: run.project.organization.slug }, { slug: run.project.slug }, { slug: run.runtimeEnvironment.slug }, - { friendlyId: runParam } + { friendlyId: runParam }, + { spanId: run.spanId } ); return redirect(impersonate(path)); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.invite/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.invite/route.tsx index 44990abaa6e..f77c19ffbdd 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.invite/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.invite/route.tsx @@ -25,13 +25,15 @@ import { Input } from "~/components/primitives/Input"; import { InputGroup } from "~/components/primitives/InputGroup"; import { Label } from "~/components/primitives/Label"; import { Paragraph } from "~/components/primitives/Paragraph"; +import { Select, SelectItem } from "~/components/primitives/Select"; import { $replica } from "~/db.server"; import { env } from "~/env.server"; import { useOrganization } from "~/hooks/useOrganizations"; import { inviteMembers } from "~/models/member.server"; import { redirectWithSuccessMessage } from "~/models/message.server"; import { TeamPresenter } from "~/presenters/TeamPresenter.server"; -import { scheduleEmail } from "~/services/email.server"; +import { scheduleEmail } from "~/services/scheduleEmail.server"; +import { rbac } from "~/services/rbac.server"; import { requireUserId } from "~/services/session.server"; import { acceptInvitePath, organizationTeamPath, v3BillingPath } from "~/utils/pathBuilder"; import { PurchaseSeatsModal } from "../_app.orgs.$organizationSlug.settings.team/route"; @@ -63,9 +65,77 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { throw new Response("Not Found", { status: 404 }); } - return typedjson(result); + // Inviter's own role drives the "below their level" filter on the + // dropdown. Plus assignable role IDs already encode the org's plan + // tier — the intersection is what we offer. + const [inviterRole, assignableRoleIds, systemRoles] = await Promise.all([ + rbac.getUserRole({ userId, organizationId: organization.id }), + rbac.getAssignableRoleIds(organization.id), + rbac.systemRoles(organization.id), + ]); + + // Build the dropdown's offerable set server-side: roles that are + // (a) assignable on the current plan AND (b) at or below the + // inviter's own level. The client just renders these — it doesn't + // need to know about the system-role catalogue or the ladder. + const assignableSet = new Set(assignableRoleIds); + const offerableRoleIds = systemRoles + ? result.roles + .filter( + (r) => + assignableSet.has(r.id) && + isAtOrBelow(systemRoles, inviterRole?.id ?? null, r.id) + ) + .map((r) => r.id) + : []; + + return typedjson({ ...result, offerableRoleIds }); }; +// Sentinel for "no RBAC role attached to invite" — the runtime +// fallback will derive a role from the legacy OrgMember.role write at +// accept time. Used when the org has no RBAC plugin installed (the +// dropdown is hidden) or as a defensive default. +const NO_RBAC_ROLE = "__no_rbac_role__"; + +// An inviter can only assign a role at or below their own. The +// plugin's systemRoles array is in canonical order (highest authority +// first), so array index drives the ladder — earlier index = higher +// rank. Plan-tier filtering happens separately via assignableRoleIds; +// the ladder is the absolute hierarchy. Custom roles aren't in the +// table and are refused (TRI-8747's follow-up will handle them). +type LadderRole = { id: string }; + +function buildRoleLevel(roles: ReadonlyArray): Record { + const level: Record = {}; + roles.forEach((r, i) => { + // Top of the array = highest level. Subtract from length so larger + // numbers always mean "more authority" — no off-by-one when a role + // is added or removed. + level[r.id] = roles.length - i; + }); + return level; +} + +function isAtOrBelow( + roles: ReadonlyArray, + inviterRoleId: string | null, + invitedRoleId: string +): boolean { + // No RBAC role on inviter (e.g. the runtime fallback couldn't derive + // one) → fall back to the legacy OrgMember.role check the calling + // code already enforces. Allow the invite to proceed; the action + // would have already failed earlier if the inviter wasn't allowed + // to invite at all. + if (!inviterRoleId) return true; + const level = buildRoleLevel(roles); + const inviter = level[inviterRoleId]; + const invited = level[invitedRoleId]; + // Custom roles aren't in the level table — refuse. + if (inviter === undefined || invited === undefined) return false; + return invited <= inviter; +} + const schema = z.object({ emails: z.preprocess((i) => { if (typeof i === "string") return [i]; @@ -80,6 +150,7 @@ const schema = z.object({ return [""]; }, z.string().email().array().nonempty("At least one email is required")), + rbacRoleId: z.string().optional(), }); export const action: ActionFunction = async ({ request, params }) => { @@ -94,11 +165,62 @@ export const action: ActionFunction = async ({ request, params }) => { return json(submission); } + // Resolve the RBAC role choice. NO_RBAC_ROLE / undefined / unknown + // role → don't pass one through; the runtime fallback handles it. + // Validation: the chosen role must be in the org's assignable set + // (plan-tier) and at or below the inviter's own level. + let resolvedRbacRoleId: string | null = null; + const submittedRbacRoleId = submission.value.rbacRoleId; + if ( + submittedRbacRoleId && + submittedRbacRoleId !== NO_RBAC_ROLE + ) { + const org = await $replica.organization.findFirst({ + where: { slug: organizationSlug }, + select: { id: true }, + }); + if (!org) { + return json({ errors: { body: "Organization not found" } }, { status: 404 }); + } + const [inviterRole, assignableRoleIds, systemRoles] = await Promise.all([ + rbac.getUserRole({ userId, organizationId: org.id }), + rbac.getAssignableRoleIds(org.id), + rbac.systemRoles(org.id), + ]); + if (!systemRoles) { + // No plugin installed but the form somehow submitted a role id — + // ignore it (fall through to legacy behaviour rather than 400). + resolvedRbacRoleId = null; + } else { + const assignable = new Set(assignableRoleIds); + if (!assignable.has(submittedRbacRoleId)) { + return json( + { errors: { body: "You can't invite someone with this role on your current plan" } }, + { status: 400 } + ); + } + if ( + !isAtOrBelow( + systemRoles, + inviterRole?.id ?? null, + submittedRbacRoleId + ) + ) { + return json( + { errors: { body: "You can only invite members at or below your own role" } }, + { status: 403 } + ); + } + resolvedRbacRoleId = submittedRbacRoleId; + } + } + try { const invites = await inviteMembers({ slug: organizationSlug, emails: submission.value.emails, userId, + rbacRoleId: resolvedRbacRoleId, }); for (const invite of invites) { @@ -128,12 +250,35 @@ export const action: ActionFunction = async ({ request, params }) => { }; export default function Page() { - const { limits, canPurchaseSeats, seatPricing, extraSeats, maxSeatQuota, planSeatLimit } = - useTypedLoaderData(); + const { + limits, + canPurchaseSeats, + seatPricing, + extraSeats, + maxSeatQuota, + planSeatLimit, + roles, + offerableRoleIds, + } = useTypedLoaderData(); const [total, setTotal] = useState(limits.used); const organization = useOrganization(); const lastSubmission = useActionData(); + // The loader filtered the catalogue to roles this inviter can + // actually assign (plan tier × strict-below-my-level). With no plugin + // installed, offerableRoleIds is [] and the picker hides entirely. + const offerableSet = new Set(offerableRoleIds); + const offerable = roles.filter((r) => offerableSet.has(r.id)); + const showRolePicker = offerable.length > 0; + + // Default to the lowest-tier offered role (the loader returns roles + // in its allRoles order, which the plugin emits Owner→Member; the + // last entry is the most restrictive). + const defaultRoleId = showRolePicker + ? offerable[offerable.length - 1].id + : NO_RBAC_ROLE; + const [selectedRoleId, setSelectedRoleId] = useState(defaultRoleId); + const [form, { emails }] = useForm({ id: "invite-members", // TODO: type this @@ -232,6 +377,36 @@ export default function Page() { ))} + {showRolePicker ? ( + + + + + defaultValue={defaultRoleId} + items={offerable} + variant="tertiary/medium" + dropdownIcon + text={(v) => + offerable.find((r) => r.id === v)?.name ?? "Pick a role" + } + setValue={(next) => { + if (typeof next === "string") setSelectedRoleId(next); + }} + > + {(items) => + items.map((role) => ( + + {role.name} + + )) + } + + + Invitees join with this role. They can be promoted later + from the Team page. + + + ) : null} limits.limit}> diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx index 2cf8b844a9e..f8e28fc6888 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx @@ -5,8 +5,6 @@ import { ChevronUpIcon, ExclamationTriangleIcon, LightBulbIcon, - MagnifyingGlassIcon, - XMarkIcon, UserPlusIcon, VideoCameraIcon, } from "@heroicons/react/20/solid"; @@ -17,7 +15,7 @@ import { DiscordIcon } from "@trigger.dev/companyicons"; import { formatDurationMilliseconds } from "@trigger.dev/core/v3"; import type { TaskRunStatus } from "@trigger.dev/database"; import { Fragment, Suspense, useCallback, useEffect, useRef, useState } from "react"; -import type { PanelHandle } from "react-window-splitter"; +import type { PanelHandle } from "@window-splitter/react"; import { Bar, BarChart, ResponsiveContainer, Tooltip, type TooltipProps } from "recharts"; import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; import { ExitIcon } from "~/assets/icons/ExitIcon"; @@ -38,7 +36,6 @@ import { Callout } from "~/components/primitives/Callout"; import { formatDateTime } from "~/components/primitives/DateTime"; import { Dialog, DialogContent, DialogHeader, DialogTitle } from "~/components/primitives/Dialog"; import { Header2, Header3 } from "~/components/primitives/Headers"; -import { Input } from "~/components/primitives/Input"; import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; import { Paragraph } from "~/components/primitives/Paragraph"; import { PopoverMenuItem } from "~/components/primitives/Popover"; @@ -50,6 +47,7 @@ import { ResizablePanelGroup, collapsibleHandleClassName, } from "~/components/primitives/Resizable"; +import { SearchInput } from "~/components/primitives/SearchInput"; import { Spinner } from "~/components/primitives/Spinner"; import { StepNumber } from "~/components/primitives/StepNumber"; import { @@ -75,6 +73,7 @@ import { useEventSource } from "~/hooks/useEventSource"; import { useFuzzyFilter } from "~/hooks/useFuzzyFilter"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; +import { useSearchParams } from "~/hooks/useSearchParam"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { @@ -88,7 +87,6 @@ import { uiPreferencesStorage, } from "~/services/preferences/uiPreferences.server"; import { requireUserId } from "~/services/session.server"; -import { motion } from "framer-motion"; import { cn } from "~/utils/cn"; import { docsPath, @@ -176,9 +174,11 @@ export default function Page() { const environment = useEnvironment(); const { tasks, activity, runningStats, durations, usefulLinksPreference } = useTypedLoaderData(); - const { filterText, setFilterText, filteredItems } = useFuzzyFilter({ + const { value } = useSearchParams(); + const { filteredItems } = useFuzzyFilter({ items: tasks, keys: ["slug", "filePath", "triggerSource"], + filterText: value("search") ?? "", }); const hasTasks = tasks.length > 0; @@ -244,16 +244,12 @@ export default function Page() { {tasks.length === 0 ? : null}
- - {!showUsefulLinks && ( + + {!showUsefulLinks && ( - ) : undefined - } - /> - - ); -} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.agents/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.agents/route.tsx new file mode 100644 index 00000000000..deedafd9879 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.agents/route.tsx @@ -0,0 +1,360 @@ +import { BeakerIcon, CpuChipIcon, MagnifyingGlassIcon } from "@heroicons/react/20/solid"; +import { type MetaFunction } from "@remix-run/node"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { Suspense } from "react"; +import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; +import { RunsIcon } from "~/assets/icons/RunsIcon"; +import { MainCenteredContainer, PageBody, PageContainer } from "~/components/layout/AppLayout"; +import { Badge } from "~/components/primitives/Badge"; +import { Header2 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Spinner } from "~/components/primitives/Spinner"; +import { + Table, + TableBlankRow, + TableBody, + TableCell, + TableCellMenu, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; +import { PopoverMenuItem } from "~/components/primitives/Popover"; +import { TaskFileName } from "~/components/runs/v3/TaskPath"; +import { useFuzzyFilter } from "~/hooks/useFuzzyFilter"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { + type AgentListItem, + type AgentActiveState, + agentListPresenter, +} from "~/presenters/v3/AgentListPresenter.server"; +import { requireUserId } from "~/services/session.server"; +import { EnvironmentParamSchema, v3RunsPath, v3PlaygroundAgentPath } from "~/utils/pathBuilder"; +import { cn } from "~/utils/cn"; + +export const meta: MetaFunction = () => { + return [{ title: "Agents | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response(undefined, { status: 404, statusText: "Project not found" }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response(undefined, { status: 404, statusText: "Environment not found" }); + } + + const result = await agentListPresenter.call({ + organizationId: project.organizationId, + projectId: project.id, + environmentId: environment.id, + environmentType: environment.type, + }); + + return typeddefer(result); +}; + +export default function AgentsPage() { + const { agents, activeStates, conversationSparklines, costSparklines, tokenSparklines } = + useTypedLoaderData(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const { filterText, setFilterText, filteredItems } = useFuzzyFilter({ + items: agents, + keys: ["slug", "filePath"], + }); + + if (agents.length === 0) { + return ( + + + + + + +
+ + No agents deployed + + Create a chat agent using chat.agent() from{" "} + @trigger.dev/sdk/ai and deploy it to see it here. + +
+
+
+
+ ); + } + + return ( + + + + + +
+
+
+
+ setFilterText(e.target.value)} + autoFocus + /> +
+ + + + ID + Type + File + Active + Conversations (24h) + Cost (24h) + Tokens (24h) + Go to page + + + + {filteredItems.length > 0 ? ( + filteredItems.map((agent) => { + const path = v3RunsPath(organization, project, environment, { + tasks: [agent.slug], + }); + const agentType = + (agent.config as { type?: string } | null)?.type ?? "unknown"; + + return ( + + +
+ + } + content="Agent" + /> + {agent.slug} +
+
+ + {formatAgentType(agentType)} + + + + + + }> + –}> + {(data) => { + const state = data[agent.slug]; + if (!state || (state.running === 0 && state.suspended === 0)) { + return ( + + ); + } + return ( + + {state.running > 0 && ( + + + {state.running} + + )} + {state.running > 0 && state.suspended > 0 && ( + · + )} + {state.suspended > 0 && ( + + + {state.suspended} + + )} + + ); + }} + + + + + }> + –}> + {(data) => ( + + )} + + + + + }> + –}> + {(data) => ( + + )} + + + + + }> + –}> + {(data) => ( + + )} + + + + + + + + } + hiddenButtons={ + + Playground + + } + /> +
+ ); + }) + ) : ( + + + No agents match your filters + + + )} +
+
+
+
+
+
+
+ ); +} + +function formatAgentType(type: string): string { + switch (type) { + case "ai-sdk-chat": + return "AI SDK Chat"; + default: + return type; + } +} + +function formatCount(total: number): string { + if (total === 0) return "0"; + if (total >= 1000) return `${(total / 1000).toFixed(1)}k`; + return total.toString(); +} + +function formatCost(total: number): string { + if (total === 0) return "$0"; + if (total < 0.01) return `$${total.toFixed(4)}`; + if (total < 1) return `$${total.toFixed(2)}`; + return `$${total.toFixed(2)}`; +} + +function formatTokens(total: number): string { + if (total === 0) return "0"; + if (total >= 1_000_000) return `${(total / 1_000_000).toFixed(1)}M`; + if (total >= 1000) return `${(total / 1000).toFixed(1)}k`; + return total.toString(); +} + +function SparklinePlaceholder() { + return
; +} + +function SparklineWithTotal({ + data, + formatTotal, + color = "text-text-bright", + barColor = "#3B82F6", +}: { + data?: number[]; + formatTotal: (total: number) => string; + color?: string; + barColor?: string; +}) { + if (!data || data.every((v) => v === 0)) { + return ; + } + + const total = data.reduce((sum, v) => sum + v, 0); + const max = Math.max(...data); + + return ( +
+
+ {data.map((value, i) => { + const height = max > 0 ? Math.max((value / max) * 100, value > 0 ? 8 : 0) : 0; + return ( +
0 ? barColor : "transparent", + opacity: value > 0 ? 0.8 : 0, + }} + /> + ); + })} +
+ {formatTotal(total)} +
+ ); +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx index eaa040c4081..47318edd355 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.batches/route.tsx @@ -30,6 +30,7 @@ import { TableHeader, TableHeaderCell, TableRow, + CopyableTableCell, } from "~/components/primitives/Table"; import { SimpleTooltip } from "~/components/primitives/Tooltip"; import { BatchFilters, BatchListFilters } from "~/components/runs/v3/BatchFilters"; @@ -53,6 +54,7 @@ import { v3BatchPath, v3BatchRunsPath, } from "~/utils/pathBuilder"; +import { throwNotFound } from "~/utils/httpErrors"; export const meta: MetaFunction = () => { return [ @@ -73,7 +75,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const environment = await findEnvironmentBySlug(project.id, envParam, userId); if (!environment) { - throw new Error("Environment not found"); + throwNotFound("Environment not found"); } const url = new URL(request.url); @@ -234,9 +236,9 @@ function BatchesTable({ batches, hasFilters, filters }: BatchList) { return ( - + {batch.friendlyId} - + {batch.batchVersion === "v1" ? ( @@ -286,7 +288,7 @@ function BatchesTable({ batches, hasFilters, filters }: BatchList) { {isLoading && ( Loading… diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.branches/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.branches/route.tsx index c7d54d1842e..39db1d96f3a 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.branches/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.branches/route.tsx @@ -132,10 +132,7 @@ const PurchaseSchema = z.discriminatedUnion("action", [ }), z.object({ action: z.literal("quota-increase"), - amount: z.coerce - .number() - .int("Must be a whole number") - .min(1, "Amount must be greater than 0"), + amount: z.coerce.number().int("Must be a whole number").min(1, "Amount must be greater than 0"), }), ]); @@ -329,23 +326,16 @@ export default function Page() { ) : ( <> -
+
-
- -
+
-
1 ? "grid-rows-[1fr_auto]" : "grid-rows-[1fr]" - )} - > +
@@ -438,14 +428,6 @@ export default function Page() { )}
-
1 && "justify-end border-t border-grid-dimmed px-2 py-3" - )} - > - -
@@ -545,12 +527,12 @@ export function BranchFilters() { return (
- +
); @@ -673,7 +655,13 @@ function PurchaseBranchesModal({ const [open, setOpen] = useState(false); useEffect(() => { const data = fetcher.data; - if (fetcher.state === "idle" && data !== null && typeof data === "object" && "ok" in data && data.ok) { + if ( + fetcher.state === "idle" && + data !== null && + typeof data === "object" && + "ok" in data && + data.ok + ) { setOpen(false); } }, [fetcher.state, fetcher.data]); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx index 57b6b71db6f..3f922351bfb 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx @@ -1,6 +1,8 @@ +import { XMarkIcon } from "@heroicons/react/20/solid"; import { type LoaderFunctionArgs } from "@remix-run/node"; +import { Form } from "@remix-run/react"; import type { TaskTriggerSource } from "@trigger.dev/database"; -import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { type ReactNode, useCallback, useEffect, useMemo, useRef, useState } from "react"; import ReactGridLayout from "react-grid-layout"; import { typedjson, useTypedLoaderData } from "remix-typedjson"; import { z } from "zod"; @@ -15,16 +17,16 @@ import { QueuesFilter } from "~/components/metrics/QueuesFilter"; import { ScopeFilter } from "~/components/metrics/ScopeFilter"; import { TitleWidget } from "~/components/metrics/TitleWidget"; import { CreateDashboardPageButton } from "~/components/navigation/DashboardDialogs"; -import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; +import { Button } from "~/components/primitives/Buttons"; +import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; import { TimeFilter } from "~/components/runs/v3/SharedFilters"; -import { $replica } from "~/db.server"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import { useSearchParams } from "~/hooks/useSearchParam"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; -import { getAllTaskIdentifiers } from "~/models/task.server"; +import { getTaskIdentifiers } from "~/models/task.server"; import { type BuiltInDashboardFilter, type LayoutItem, @@ -32,7 +34,7 @@ import { MetricDashboardPresenter, } from "~/presenters/v3/MetricDashboardPresenter.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUser } from "~/services/session.server"; import { cn } from "~/utils/cn"; import { EnvironmentParamSchema } from "~/utils/pathBuilder"; @@ -70,15 +72,17 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { organizationId: project.organizationId, key: dashboardKey, }), - getAllTaskIdentifiers($replica, environment.id), + getTaskIdentifiers(environment.id), ]); const filters = dashboard.filters ?? ["tasks", "queues"]; + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(project.organizationId, "standard"); + // Load distinct models from ClickHouse if the dashboard has a models filter let possibleModels: { model: string; system: string }[] = []; if (filters.includes("models")) { - const queryFn = clickhouseClient.reader.query({ + const queryFn = clickhouse.reader.query({ name: "getDistinctModels", query: `SELECT response_model, any(gen_ai_system) AS gen_ai_system FROM trigger_dev.llm_metrics_v1 WHERE organization_id = {organizationId: String} AND project_id = {projectId: String} AND environment_id = {environmentId: String} AND response_model != '' GROUP BY response_model ORDER BY response_model`, params: z.object({ @@ -98,7 +102,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { } } - const promptPresenter = new PromptPresenter(clickhouseClient); + const promptPresenter = new PromptPresenter(clickhouse); const [possiblePrompts, possibleOperations, possibleProviders] = await Promise.all([ filters.includes("prompts") ? promptPresenter.getDistinctPromptSlugs(project.organizationId, project.id, environment.id) @@ -114,9 +118,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { return typedjson({ ...dashboard, filters, - possibleTasks: possibleTasks - .map((task) => ({ slug: task.slug, triggerSource: task.triggerSource })) - .sort((a, b) => a.slug.localeCompare(b.slug)), + possibleTasks, possibleModels, possiblePrompts, possibleOperations, @@ -146,13 +148,6 @@ export default function Page() { - - -
@@ -168,6 +163,14 @@ export default function Page() { possiblePrompts={possiblePrompts} possibleOperations={possibleOperations} possibleProviders={possibleProviders} + filterAccessories={ + + } />
@@ -191,6 +194,7 @@ export function MetricDashboard({ onRenameWidget, onDeleteWidget, onDuplicateWidget, + filterAccessories, }: { /** The layout items (positions/sizes) - fully controlled from parent */ layout: LayoutItem[]; @@ -201,7 +205,7 @@ export function MetricDashboard({ /** Which filters to show. Defaults to ["tasks", "queues"]. */ filters?: BuiltInDashboardFilter[]; /** Possible tasks for filtering */ - possibleTasks?: { slug: string; triggerSource: TaskTriggerSource }[]; + possibleTasks?: { slug: string; triggerSource: TaskTriggerSource; isInLatestDeployment: boolean }[]; /** Possible models for filtering */ possibleModels?: ModelOption[]; /** Possible prompt slugs for filtering */ @@ -215,6 +219,7 @@ export function MetricDashboard({ onRenameWidget?: (widgetId: string, newTitle: string) => void; onDeleteWidget?: (widgetId: string) => void; onDuplicateWidget?: (widgetId: string, widget: WidgetData) => void; + filterAccessories?: ReactNode; }) { const { value, values } = useSearchParams(); const { width, containerRef, mounted } = useContainerWidth(); @@ -242,6 +247,13 @@ export function MetricDashboard({ const providers = values("providers").filter((v) => v !== ""); const activeFilters = filterConfig ?? ["tasks", "queues"]; + const hasAppliedFilters = + tasks.length > 0 || + queues.length > 0 || + models.length > 0 || + prompts.length > 0 || + operations.length > 0 || + providers.length > 0; const handleLayoutChange = useCallback( (newLayout: readonly LayoutItem[]) => { @@ -266,31 +278,48 @@ export function MetricDashboard({ return (
-
- - {activeFilters.includes("tasks") && ( - - )} - {activeFilters.includes("queues") && } - {activeFilters.includes("models") && ( - - )} - {activeFilters.includes("prompts") && ( - - )} - {activeFilters.includes("operations") && ( - - )} - {activeFilters.includes("providers") && ( - +
+
+ + {activeFilters.includes("tasks") && ( + + )} + {activeFilters.includes("queues") && } + {activeFilters.includes("models") && ( + + )} + {activeFilters.includes("prompts") && ( + + )} + {activeFilters.includes("operations") && ( + + )} + {activeFilters.includes("providers") && ( + + )} + + {hasAppliedFilters && ( +
+
+ {filterAccessories && ( +
{filterAccessories}
)} -
{ queryPresenter.call({ organizationId: project.organizationId, }), - getAllTaskIdentifiers($replica, environment.id), + getTaskIdentifiers(environment.id), ]); // Admins and impersonating users can use EXPLAIN @@ -109,9 +109,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { queryHistory: history, isAdmin, maxRows: env.QUERY_CLICKHOUSE_MAX_RETURNED_ROWS, - possibleTasks: possibleTasks - .map((task) => ({ slug: task.slug, triggerSource: task.triggerSource })) - .sort((a, b) => a.slug.localeCompare(b.slug)), + possibleTasks, widgetCount, }); }; @@ -207,19 +205,22 @@ export default function Page() { const toast = useToast(); - const handleSyncError = useCallback((error: Error, action: string) => { - const actionMessages: Record = { - add: "Failed to add widget", - update: "Failed to update widget", - delete: "Failed to delete widget", - duplicate: "Failed to duplicate widget", - layout: "Failed to save layout", - }; + const handleSyncError = useCallback( + (error: Error, action: string) => { + const actionMessages: Record = { + add: "Failed to add widget", + update: "Failed to update widget", + delete: "Failed to delete widget", + duplicate: "Failed to duplicate widget", + layout: "Failed to save layout", + }; - const message = actionMessages[action] || "Failed to save changes"; + const message = actionMessages[action] || "Failed to save changes"; - toast.error(`${message}. Your changes may not be saved.`, { title: "Sync Error" }); - }, [toast]); + toast.error(`${message}. Your changes may not be saved.`, { title: "Sync Error" }); + }, + [toast] + ); // Add title dialog state const [showAddTitleDialog, setShowAddTitleDialog] = useState(false); @@ -351,88 +352,99 @@ export default function Page() { })() : null; + const dashboardMenu = ( + + + +
+ + +
+
+
+ ); + + const filterAccessories = ( +
+ {widgetIsAtLimit ? ( + <> + + + + + + You've exceeded your widget limit + + You've used {widgetLimits.used}/{widgetLimits.limit} widgets on this dashboard. + + + {widgetCanUpgrade ? ( + + Upgrade + + ) : ( + Request more} + defaultValue="help" + /> + )} + + + + + + ) : ( + <> + + + + )} + {dashboardMenu} +
+ ); + return ( - - {totalWidgetCount > 0 && - (widgetIsAtLimit ? ( - <> - - - - - - You've exceeded your widget limit - - You've used {widgetLimits.used}/{widgetLimits.limit} widgets on this - dashboard. - - - {widgetCanUpgrade ? ( - - Upgrade - - ) : ( - Request more} - defaultValue="help" - /> - )} - - - - - - ) : ( - <> - - - - ))} - - - -
- - -
-
-
-
+ {totalWidgetCount === 0 && dashboardMenu}
@@ -473,6 +485,7 @@ export default function Page() { onRenameWidget={actions.renameWidget} onDeleteWidget={actions.deleteWidget} onDuplicateWidget={actions.duplicateWidget} + filterAccessories={filterAccessories} /> )}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.environment-variables.new/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.environment-variables.new/route.tsx index 86bd5bbc95d..4c318323b58 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.environment-variables.new/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.environment-variables.new/route.tsx @@ -9,10 +9,11 @@ import { import { parse } from "@conform-to/zod"; import { LockClosedIcon, LockOpenIcon, PlusIcon, XMarkIcon } from "@heroicons/react/20/solid"; import { Form, useActionData, useNavigate, useNavigation } from "@remix-run/react"; -import { type ActionFunctionArgs, type LoaderFunctionArgs, json } from "@remix-run/server-runtime"; +import { type ActionFunctionArgs, json } from "@remix-run/server-runtime"; import dotenv from "dotenv"; -import { type RefObject, useCallback, useEffect, useRef, useState } from "react"; -import { redirect, typedjson, useTypedLoaderData } from "remix-typedjson"; +import { type RefObject, useCallback, useRef, useState } from "react"; +import { redirect } from "remix-typedjson"; +import invariant from "tiny-invariant"; import { z } from "zod"; import { EnvironmentLabel } from "~/components/environments/EnvironmentLabel"; import { Button, LinkButton } from "~/components/primitives/Buttons"; @@ -39,13 +40,15 @@ import { useEnvironment } from "~/hooks/useEnvironment"; import { useList } from "~/hooks/useList"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; -import { EnvironmentVariablesPresenter } from "~/presenters/v3/EnvironmentVariablesPresenter.server"; -import { logger } from "~/services/logger.server"; +import { useTypedMatchesData } from "~/hooks/useTypedMatchData"; import { requireUserId } from "~/services/session.server"; import { cn } from "~/utils/cn"; +import { + environmentVariablesRouteId, + type loader as environmentVariablesLoader, +} from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.environment-variables/route"; import { EnvironmentParamSchema, - ProjectParamSchema, v3BillingPath, v3EnvironmentVariablesPath, } from "~/utils/pathBuilder"; @@ -53,29 +56,6 @@ import { EnvironmentVariablesRepository } from "~/v3/environmentVariables/enviro import { EnvironmentVariableKey } from "~/v3/environmentVariables/repository"; import { Select, SelectItem } from "~/components/primitives/Select"; -export const loader = async ({ request, params }: LoaderFunctionArgs) => { - const userId = await requireUserId(request); - const { projectParam } = ProjectParamSchema.parse(params); - - try { - const presenter = new EnvironmentVariablesPresenter(); - const { environments, hasStaging } = await presenter.call({ - userId, - projectSlug: projectParam, - }); - - return typedjson({ - environments, - hasStaging, - }); - } catch (error) { - throw new Response(undefined, { - status: 400, - statusText: "Something went wrong, if this problem persists please contact support.", - }); - } -}; - const Variable = z.object({ key: EnvironmentVariableKey, value: z.string().nonempty("Value is required"), @@ -185,8 +165,15 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { }; export default function Page() { - const [isOpen, setIsOpen] = useState(false); - const { environments, hasStaging } = useTypedLoaderData(); + const [isOpen, setIsOpen] = useState(true); + const parentData = useTypedMatchesData({ + id: environmentVariablesRouteId, + }); + invariant( + parentData, + "Environment variables page loader data must be defined when rendering the create dialog" + ); + const { environments, hasStaging } = parentData; const lastSubmission = useActionData(); const navigation = useNavigation(); const navigate = useNavigate(); @@ -259,10 +246,6 @@ export default function Page() { const [revealAll, setRevealAll] = useState(true); - useEffect(() => { - setIsOpen(true); - }, []); - return ( { return [ @@ -86,16 +103,31 @@ export const meta: MetaFunction = () => { ]; }; +type PageVercelIntegration = NonNullable< + Awaited>["vercelIntegration"] +>; + +export type EnvironmentVariablesPageLoaderData = { + environmentVariables: EnvironmentVariableWithSetValues[]; + environments: EnvironmentVariablesEnvironment[]; + hasStaging: boolean; + vercelIntegration: PageVercelIntegration | null; +}; + +export const environmentVariablesRouteId = + "routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.environment-variables"; + export const loader = async ({ request, params }: LoaderFunctionArgs) => { const userId = await requireUserId(request); const { projectParam } = ProjectParamSchema.parse(params); try { const presenter = new EnvironmentVariablesPresenter(); - const { environmentVariables, environments, hasStaging, vercelIntegration } = await presenter.call({ - userId, - projectSlug: projectParam, - }); + const { environmentVariables, environments, hasStaging, vercelIntegration } = + await presenter.call({ + userId, + projectSlug: projectParam, + }); return typedjson({ environmentVariables, @@ -123,7 +155,9 @@ const schema = z.discriminatedUnion("action", [ action: z.literal("update-vercel-sync"), key: z.string(), environmentType: z.enum(["PRODUCTION", "STAGING", "PREVIEW", "DEVELOPMENT"]), - syncEnabled: z.union([z.literal("true"), z.literal("false")]).transform((val) => val === "true"), + syncEnabled: z + .union([z.literal("true"), z.literal("false")]) + .transform((val) => val === "true"), }), ]); @@ -247,21 +281,45 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { } }; +const SSR_ROW_WINDOW = 50; +const ROW_ESTIMATE_HEIGHT = 44; +const VIRTUAL_OVERSCAN = 10; + +type GroupedEnvironmentVariable = EnvironmentVariableWithSetValues & { + isFirstTime: boolean; + isLastTime: boolean; + occurences: number; +}; + export default function Page() { + const loaderData = useTypedLoaderData(); + + return ; +} + +function EnvironmentVariablesListPage({ + loaderData, +}: { + loaderData: EnvironmentVariablesPageLoaderData; +}) { const [revealAll, setRevealAll] = useState(false); - const { environmentVariables, environments, vercelIntegration } = useTypedLoaderData(); + const { environmentVariables, vercelIntegration } = loaderData; const organization = useOrganization(); const project = useProject(); const environment = useEnvironment(); - const { filterText, setFilterText, filteredItems } = - useFuzzyFilter({ - items: environmentVariables, - keys: ["key", "value", "environment.type", "environment.branchName"], - }); + const { value } = useSearchParams(); + const urlSearch = value("search") ?? ""; + const { filteredItems } = useFuzzyFilter({ + items: environmentVariables, + keys: ["key", "value", "environment.type", "environment.branchName"], + filterText: urlSearch, + }); + + const tableScrollRef = useRef(null); // Add isFirst and isLast to each environment variable // They're set based on if they're the first or last time that `key` has been seen in the list - const groupedEnvironmentVariables = useMemo(() => { + const groupedEnvironmentVariables = useMemo((): GroupedEnvironmentVariable[] => { // Create a map to track occurrences of each key const keyOccurrences = new Map(); @@ -296,6 +354,22 @@ export default function Page() { }); }, [filteredItems]); + const shouldVirtualize = groupedEnvironmentVariables.length > SSR_ROW_WINDOW; + const [isVirtualized, setIsVirtualized] = useState(false); + + useLayoutEffect(() => { + setIsVirtualized(shouldVirtualize); + }, [shouldVirtualize]); + + const staticRows = useMemo(() => { + if (shouldVirtualize) { + return groupedEnvironmentVariables.slice(0, SSR_ROW_WINDOW); + } + return groupedEnvironmentVariables; + }, [groupedEnvironmentVariables, shouldVirtualize]); + + const vercelColumnCount = vercelIntegration?.enabled ? 6 : 5; + return ( @@ -311,21 +385,13 @@ export default function Page() { -
+
{environmentVariables.length > 0 && (
- setFilterText(e.target.value)} - autoFocus - /> -
+ +
setRevealAll(e.valueOf())} @@ -341,193 +407,285 @@ export default function Page() {
)} - - - - - Key - - - Value - - - Environment - - {vercelIntegration?.enabled && ( - +
+
+ + + + Key + + + Value + + - Sync + Environment } - content="When enabled, this variable will be pulled from Vercel during builds. Requires 'Pull env vars before build' to be enabled in settings." + content="Dev environment variables specified here will be overridden by ones in your .env file when running locally." + className="max-w-60" /> - )} - - Updated - - - Actions - - - - - {groupedEnvironmentVariables.length > 0 ? ( - groupedEnvironmentVariables.map((variable) => { - const cellClassName = "py-2"; - let borderedCellClassName = ""; - - if (variable.occurences > 1) { - borderedCellClassName = - "relative after:absolute after:bottom-0 after:left-0 after:right-0 after:h-px after:bg-grid-bright group-hover/table-row:after:bg-grid-bright group-hover/table-row:before:bg-grid-bright"; - if (variable.isLastTime) { - borderedCellClassName = ""; - } else if (variable.isFirstTime) { - } - } else { - } - - return ( - - - {variable.isFirstTime ? ( - - ) : null} - - - {variable.isSecret ? ( - - - Secret - - } - content="This variable is secret and cannot be revealed." - /> - ) : ( - - )} - - - - - - {vercelIntegration?.enabled && ( - - {variable.environment.type !== "DEVELOPMENT" && ( - - )} - - )} - -
- {variable.updatedByUser ? ( -
- - {variable.updatedByUser.name} -
- ) : (variable.lastUpdatedBy?.type === "integration" && variable.lastUpdatedBy?.integration === 'vercel' ) ? ( -
- - - {variable.lastUpdatedBy.integration} - -
- ) : null} - {variable.updatedAt ? ( - - - - ) : null} -
-
- - - - + {vercelIntegration?.enabled && ( + + + Sync + + } + content="When enabled, this variable will be pulled from Vercel during builds. Requires 'Pull env vars before build' to be enabled in settings." /> -
- ); - }) - ) : ( - - - {environmentVariables.length === 0 ? ( -
- You haven't set any environment variables yet. - - Add new - -
- ) : ( -
- No variables match your search. -
- )} -
+ + )} + + Updated + + + Actions +
+ + {groupedEnvironmentVariables.length > 0 ? ( + isVirtualized && shouldVirtualize ? ( + + ) : ( + + {staticRows.map((variable) => ( + + ))} + + ) + ) : ( + + + + {environmentVariables.length === 0 ? ( +
+ You haven't set any environment variables yet. + + Add new + +
+ ) : ( +
+ No variables match your search. +
+ )} +
+
+
)} -
-
- -
- - Dev environment variables specified here will be overridden by ones in your .env file - when running locally. - +
- + ); } +function getBorderedCellClassName(variable: GroupedEnvironmentVariable) { + if (variable.occurences <= 1) { + return ""; + } + + if (variable.isLastTime) { + return ""; + } + + return "relative after:absolute after:bottom-0 after:left-0 after:right-0 after:h-px after:bg-grid-bright group-hover/table-row:after:bg-grid-bright group-hover/table-row:before:bg-grid-bright"; +} + +function EnvironmentVariableTableRow({ + variable, + revealAll, + vercelIntegration, +}: { + variable: GroupedEnvironmentVariable; + revealAll: boolean; + vercelIntegration: PageVercelIntegration | null; +}) { + const cellClassName = "py-2"; + const borderedCellClassName = getBorderedCellClassName(variable); + + return ( + + + {variable.isFirstTime ? ( + + ) : null} + + + {variable.isSecret ? ( + + + Secret +
+ } + content="This variable is secret and cannot be revealed." + /> + ) : ( + + )} + + + + + + {vercelIntegration?.enabled && ( + + {variable.environment.type !== "DEVELOPMENT" && ( + + )} + + )} + +
+ {variable.updatedAt ? ( + + + + ) : null} + {variable.updatedByUser ? ( +
+ + {variable.updatedByUser.name} +
+ ) : variable.lastUpdatedBy?.type === "integration" && + variable.lastUpdatedBy?.integration === "vercel" ? ( +
+ + + {variable.lastUpdatedBy.integration} + +
+ ) : null} +
+
+ + + + + } + /> + + ); +} + +function EnvironmentVariablesVirtualTableBody({ + groupedEnvironmentVariables, + scrollRef, + revealAll, + vercelIntegration, + columnCount, +}: { + groupedEnvironmentVariables: GroupedEnvironmentVariable[]; + scrollRef: RefObject; + revealAll: boolean; + vercelIntegration: PageVercelIntegration | null; + columnCount: number; +}) { + const rowVirtualizer = useVirtualizer({ + count: groupedEnvironmentVariables.length, + getScrollElement: () => scrollRef.current, + estimateSize: () => ROW_ESTIMATE_HEIGHT, + overscan: VIRTUAL_OVERSCAN, + }); + + const virtualItems = rowVirtualizer.getVirtualItems(); + const topSpacerHeight = virtualItems[0]?.start ?? 0; + const bottomSpacerHeight = + rowVirtualizer.getTotalSize() - (virtualItems.at(-1)?.end ?? 0); + + return ( + + {topSpacerHeight > 0 && ( + + + + )} + {virtualItems.map((virtualRow) => { + const variable = groupedEnvironmentVariables[virtualRow.index]; + if (!variable) { + return null; + } + + return ( + + ); + })} + {bottomSpacerHeight > 0 && ( + + + + )} + + ); +} + function EditEnvironmentVariablePanel({ variable, revealAll, @@ -561,9 +719,12 @@ function EditEnvironmentVariablePanel({ return ( - + Edit environment variable @@ -715,14 +876,7 @@ function VercelSyncCheckbox({ if (!pullEnvVarsEnabledForEnv) { return ( {}} - /> - } + button={ {}} />} content="Enable 'Pull env vars before build' for this environment in Vercel settings." /> ); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx index f42c73b5ea3..f65de7bc8c1 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx @@ -1,52 +1,16 @@ -import { type LoaderFunctionArgs, type ActionFunctionArgs, json } from "@remix-run/server-runtime"; -import { type MetaFunction, useFetcher, useRevalidator } from "@remix-run/react"; -import { BellAlertIcon } from "@heroicons/react/20/solid"; -import { IconAlarmSnooze as IconAlarmSnoozeBase, IconCircleDotted } from "@tabler/icons-react"; import { parse } from "@conform-to/zod"; -import { z } from "zod"; -import { ErrorStatusBadge } from "~/components/errors/ErrorStatusBadge"; -import { ServiceValidationError } from "~/v3/services/baseService.server"; -import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; -import { requireUser, requireUserId } from "~/services/session.server"; -import { - EnvironmentParamSchema, - v3CreateBulkActionPath, - v3ErrorsPath, - v3RunsPath, -} from "~/utils/pathBuilder"; -import { findProjectBySlug } from "~/models/project.server"; -import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; -import { - ErrorGroupPresenter, - type ErrorGroupActivity, - type ErrorGroupActivityVersions, - type ErrorGroupOccurrences, - type ErrorGroupSummary, - type ErrorGroupState, -} from "~/presenters/v3/ErrorGroupPresenter.server"; -import { type NextRunList } from "~/presenters/v3/NextRunListPresenter.server"; -import { $replica } from "~/db.server"; -import { logsClickhouseClient, clickhouseClient } from "~/services/clickhouseInstance.server"; -import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; -import { PageBody } from "~/components/layout/AppLayout"; +import { BellAlertIcon } from "@heroicons/react/20/solid"; +import { type MetaFunction, useFetcher, useRevalidator } from "@remix-run/react"; +import { type ActionFunctionArgs, json, type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { - ResizableHandle, - ResizablePanel, - ResizablePanelGroup, -} from "~/components/primitives/Resizable"; + IconAlarmSnooze as IconAlarmSnoozeBase, + IconBugFilled, + IconCircleDotted, +} from "@tabler/icons-react"; +import { ErrorId } from "@trigger.dev/core/v3/isomorphic"; +import { isPast } from "date-fns"; import { AnimatePresence, motion } from "framer-motion"; import { Suspense, useEffect, useMemo, useRef, useState } from "react"; -import { Spinner } from "~/components/primitives/Spinner"; -import { Paragraph } from "~/components/primitives/Paragraph"; -import { Callout } from "~/components/primitives/Callout"; -import { Header2, Header3 } from "~/components/primitives/Headers"; - -import { formatDistanceToNow, isPast } from "date-fns"; - -import * as Property from "~/components/primitives/PropertyTable"; -import { TaskRunsTable } from "~/components/runs/v3/TaskRunsTable"; -import { DateTime, RelativeDateTime } from "~/components/primitives/DateTime"; -import { ErrorId } from "@trigger.dev/core/v3/isomorphic"; import { Bar, BarChart, @@ -57,31 +21,68 @@ import { XAxis, YAxis, } from "recharts"; -import TooltipPortal from "~/components/primitives/TooltipPortal"; -import { TimeFilter, timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; -import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; -import { DirectionSchema, ListPagination } from "~/components/ListPagination"; -import { Button, LinkButton } from "~/components/primitives/Buttons"; +import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; +import { z } from "zod"; import { ListCheckedIcon } from "~/assets/icons/ListCheckedIcon"; -import { useOrganization } from "~/hooks/useOrganizations"; -import { useProject } from "~/hooks/useProject"; -import { useEnvironment } from "~/hooks/useEnvironment"; import { RunsIcon } from "~/assets/icons/RunsIcon"; -import type { TaskRunListSearchFilters } from "~/components/runs/v3/RunFilters"; -import { useSearchParams } from "~/hooks/useSearchParam"; -import { CopyableText } from "~/components/primitives/CopyableText"; -import { cn } from "~/utils/cn"; -import { LogsVersionFilter } from "~/components/logs/LogsVersionFilter"; import { CodeBlock } from "~/components/code/CodeBlock"; - -import { Popover, PopoverArrowTrigger, PopoverContent } from "~/components/primitives/Popover"; -import { ErrorGroupActions } from "~/v3/services/errorGroupActions.server"; +import { ErrorStatusBadge } from "~/components/errors/ErrorStatusBadge"; import { - ErrorStatusMenuItems, CustomIgnoreDialog, + ErrorStatusMenuItems, statusActionToastMessage, } from "~/components/errors/ErrorStatusMenu"; +import { PageBody } from "~/components/layout/AppLayout"; +import { DirectionSchema, ListPagination } from "~/components/ListPagination"; +import { LogsVersionFilter } from "~/components/logs/LogsVersionFilter"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { Callout } from "~/components/primitives/Callout"; +import { CopyableText } from "~/components/primitives/CopyableText"; +import { DateTime, RelativeDateTime } from "~/components/primitives/DateTime"; +import { Header2, Header3 } from "~/components/primitives/Headers"; +import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Popover, PopoverArrowTrigger, PopoverContent } from "~/components/primitives/Popover"; +import * as Property from "~/components/primitives/PropertyTable"; +import { + ResizableHandle, + ResizablePanel, + ResizablePanelGroup, +} from "~/components/primitives/Resizable"; +import { Spinner } from "~/components/primitives/Spinner"; import { useToast } from "~/components/primitives/Toast"; +import TooltipPortal from "~/components/primitives/TooltipPortal"; +import type { TaskRunListSearchFilters } from "~/components/runs/v3/RunFilters"; +import { TimeFilter, timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; +import { TaskRunsTable } from "~/components/runs/v3/TaskRunsTable"; +import { $replica } from "~/db.server"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { useSearchParams } from "~/hooks/useSearchParam"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { + type ErrorGroupActivity, + type ErrorGroupActivityVersions, + type ErrorGroupOccurrences, + ErrorGroupPresenter, + type ErrorGroupState, + type ErrorGroupSummary, +} from "~/presenters/v3/ErrorGroupPresenter.server"; +import { type NextRunList } from "~/presenters/v3/NextRunListPresenter.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; +import { requireUser, requireUserId } from "~/services/session.server"; +import { cn } from "~/utils/cn"; +import { + EnvironmentParamSchema, + v3CreateBulkActionPath, + v3ErrorsPath, + v3RunsPath, +} from "~/utils/pathBuilder"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { ErrorGroupActions } from "~/v3/services/errorGroupActions.server"; export const meta: MetaFunction = ({ data }) => { return [ @@ -163,6 +164,11 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { let occurrenceCountAtIgnoreTime: number | undefined; if (submission.value.totalOccurrences) { + const clickhouseClient = await clickhouseFactory.getClickhouseForOrganization( + environment.organizationId, + "query" + ); + const qb = clickhouseClient.errors.listQueryBuilder(); qb.where("organization_id = {organizationId: String}", { organizationId: project.organizationId, @@ -236,6 +242,11 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const directionRaw = url.searchParams.get("direction") ?? undefined; const direction = directionRaw ? DirectionSchema.parse(directionRaw) : undefined; + const [logsClickhouseClient, clickhouseClient] = await Promise.all([ + clickhouseFactory.getClickhouseForOrganization(environment.organizationId, "logs"), + clickhouseFactory.getClickhouseForOrganization(environment.organizationId, "standard"), + ]); + const presenter = new ErrorGroupPresenter($replica, logsClickhouseClient, clickhouseClient); const detailPromise = presenter @@ -324,13 +335,23 @@ export default function Page() { }} title={{ErrorId.toFriendlyId(fingerprint)}} /> + + + Configure alerts… + + -
+
+
Loading error details…
@@ -366,7 +387,6 @@ export default function Page() { projectParam={projectParam} envParam={envParam} fingerprint={fingerprint} - alertsHref={alertsHref} /> ); }} @@ -385,7 +405,6 @@ function ErrorGroupDetail({ projectParam, envParam, fingerprint, - alertsHref, }: { errorGroup: ErrorGroupSummary | undefined; runList: NextRunList | undefined; @@ -394,7 +413,6 @@ function ErrorGroupDetail({ projectParam: string; envParam: string; fingerprint: string; - alertsHref: string; }) { const { value, values } = useSearchParams(); const organization = useOrganization(); @@ -499,9 +517,12 @@ function ErrorGroupDetail({ additionalTableState={{ errorId: ErrorId.toFriendlyId(fingerprint) }} /> ) : ( - - No runs found for this error. - +
+ + + No runs found for this error. + +
)}
@@ -510,11 +531,7 @@ function ErrorGroupDetail({ {/* Right-hand detail sidebar */} - + ); @@ -523,24 +540,14 @@ function ErrorGroupDetail({ function ErrorDetailSidebar({ errorGroup, fingerprint, - alertsHref, }: { errorGroup: ErrorGroupSummary; fingerprint: string; - alertsHref: string; }) { return (
-
+
Details - - Configure alerts -
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx index e92b5b34644..385f0cd8b19 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx @@ -55,6 +55,7 @@ import { statusActionToastMessage, } from "~/components/errors/ErrorStatusMenu"; import { useToast } from "~/components/primitives/Toast"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; import TooltipPortal from "~/components/primitives/TooltipPortal"; import { appliedSummary, FilterMenuProvider, TimeFilter } from "~/components/runs/v3/SharedFilters"; import { $replica } from "~/db.server"; @@ -70,7 +71,7 @@ import { type ErrorOccurrences, type ErrorsList as ErrorsListData, } from "~/presenters/v3/ErrorsListPresenter.server"; -import { logsClickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { getCurrentPlan } from "~/services/platform.v3.server"; import { requireUser } from "~/services/session.server"; import { formatNumberCompact } from "~/utils/numberFormatter"; @@ -123,6 +124,10 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const plan = await getCurrentPlan(project.organizationId); const retentionLimitDays = plan?.v3Subscription?.plan?.limits.logRetentionDays.number ?? 30; + const logsClickhouseClient = await clickhouseFactory.getClickhouseForOrganization( + project.organizationId, + "logs" + ); const presenter = new ErrorsListPresenter($replica, logsClickhouseClient); const listPromise = presenter @@ -289,6 +294,8 @@ const errorStatusOptions = [ const statusIcon = ; const statusShortcut = { key: "s" }; +const timeShortcut = { key: "d" }; +const alertsShortcut = { key: "c" }; function StatusFilter() { const { values, del } = useSearchParams(); @@ -305,8 +312,9 @@ function StatusFilter() { variant="secondary/small" shortcut={statusShortcut} tooltipTitle="Filter by status" + className="pl-1.5" > - Status + Status } searchValue={search} @@ -415,9 +423,10 @@ function FiltersBar({ return (
-
+
{list ? ( <> + @@ -425,45 +434,55 @@ function FiltersBar({ defaultPeriod={defaultPeriod} maxPeriodDays={retentionLimitDays} labelName="Occurred" + shortcut={timeShortcut} /> - {hasFilters && ( -
+
-
+
- Configure alerts + Configure alerts… {list && }
@@ -706,9 +725,15 @@ function ErrorActivityGraph({ activity }: { activity: ErrorOccurrenceActivity })
- - {formatNumberCompact(maxCount)} - + + {formatNumberCompact(maxCount)} + + } + content="Peak occurrences in a single time bucket" + />
); } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx index af3cc30a246..c913623ebab 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx @@ -16,7 +16,7 @@ import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { LogsListPresenter, LogEntry } from "~/presenters/v3/LogsListPresenter.server"; import type { LogLevel } from "~/utils/logUtils"; import { $replica, prisma } from "~/db.server"; -import { logsClickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; import { Suspense, useCallback, useEffect, useMemo, useRef, useState, useTransition } from "react"; @@ -137,7 +137,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const plan = await getCurrentPlan(project.organizationId); const retentionLimitDays = plan?.v3Subscription?.plan?.limits.logRetentionDays.number ?? 30; - const presenter = new LogsListPresenter($replica, logsClickhouseClient); + const logsClickhouse = await clickhouseFactory.getClickhouseForOrganization(project.organizationId, "logs"); + const presenter = new LogsListPresenter($replica, logsClickhouse); const listPromise = presenter .call(project.organizationId, environment.id, { @@ -260,20 +261,22 @@ function FiltersBar({ return (
-
+
{list ? ( <> + - {hasFilters && ( -
+
-
+
- + + }> + + + +
+ Toggle all details + +
+
+
); @@ -585,13 +691,13 @@ function CompareDialog({ return ( - - + + Compare models {rows.length > 0 ? (
- +
Metric @@ -1116,7 +1222,7 @@ export default function ModelsPage() { -
+
{ return typedjson({ comparison: [] as ModelComparisonItem[], models: responseModels }); } - const presenter = new ModelRegistryPresenter(clickhouseClient); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new ModelRegistryPresenter(clickhouse); const now = new Date(); const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.$agentParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.$agentParam/route.tsx new file mode 100644 index 00000000000..7c37089bf63 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.$agentParam/route.tsx @@ -0,0 +1,1238 @@ +import { + ArrowUpIcon, + BoltIcon, + CpuChipIcon, + StopIcon, + ArrowPathIcon, + TrashIcon, +} from "@heroicons/react/20/solid"; +import { type MetaFunction } from "@remix-run/node"; +import { Link, useFetcher, useNavigate, useRouteLoaderData } from "@remix-run/react"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { useChat } from "@ai-sdk/react"; +import { TriggerChatTransport } from "@trigger.dev/sdk/chat"; +import { MainCenteredContainer } from "~/components/layout/AppLayout"; +import { Badge } from "~/components/primitives/Badge"; +import { Button, LinkButton } from "~/components/primitives/Buttons"; +import { CopyButton } from "~/components/primitives/CopyButton"; +import { DurationPicker } from "~/components/primitives/DurationPicker"; +import { Header3 } from "~/components/primitives/Headers"; +import { Hint } from "~/components/primitives/Hint"; +import { Input } from "~/components/primitives/Input"; +import { InputGroup } from "~/components/primitives/InputGroup"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Spinner } from "~/components/primitives/Spinner"; +import { Popover, PopoverContent, PopoverTrigger } from "~/components/primitives/Popover"; +import { ClockRotateLeftIcon } from "~/assets/icons/ClockRotateLeftIcon"; +import type { PlaygroundConversation } from "~/presenters/v3/PlaygroundPresenter.server"; +import { DateTime } from "~/components/primitives/DateTime"; +import { cn } from "~/utils/cn"; +import { JSONEditor } from "~/components/code/JSONEditor"; +import { ToolUseRow, AssistantResponse, ChatBubble } from "~/components/runs/v3/ai/AIChatMessages"; +import { MessageBubble } from "~/components/runs/v3/agent/AgentMessageView"; +import { useAutoScrollToBottom } from "~/hooks/useAutoScrollToBottom"; +import { + ResizableHandle, + ResizablePanel, + ResizablePanelGroup, +} from "~/components/primitives/Resizable"; +import { + ClientTabs, + ClientTabsContent, + ClientTabsList, + ClientTabsTrigger, +} from "~/components/primitives/ClientTabs"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { playgroundPresenter } from "~/presenters/v3/PlaygroundPresenter.server"; +import { requireUserId } from "~/services/session.server"; +import { RunTagInput } from "~/components/runs/v3/RunTagInput"; +import { Select, SelectItem } from "~/components/primitives/Select"; +import { EnvironmentParamSchema, v3PlaygroundAgentPath } from "~/utils/pathBuilder"; +import { env as serverEnv } from "~/env.server"; +import { generateJWT as internal_generateJWT, MachinePresetName } from "@trigger.dev/core/v3"; +import { extractJwtSigningSecretKey } from "~/services/realtime/jwtAuth.server"; +import { SchemaTabContent } from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/SchemaTabContent"; +import { AIPayloadTabContent } from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/AIPayloadTabContent"; +import type { UIMessage } from "@ai-sdk/react"; + +export const meta: MetaFunction = () => { + return [{ title: "Playground | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + const agentSlug = params.agentParam; + + if (!agentSlug) { + throw new Response(undefined, { status: 404, statusText: "Agent not specified" }); + } + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response(undefined, { status: 404, statusText: "Project not found" }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response(undefined, { status: 404, statusText: "Environment not found" }); + } + + const agent = await playgroundPresenter.getAgent({ + environmentId: environment.id, + environmentType: environment.type, + agentSlug, + }); + + if (!agent) { + throw new Response(undefined, { status: 404, statusText: "Agent not found" }); + } + + const agentConfig = agent.config as { type?: string } | null; + const apiOrigin = serverEnv.API_ORIGIN || serverEnv.LOGIN_ORIGIN || "http://localhost:3030"; + + const recentConversations = await playgroundPresenter.getRecentConversations({ + environmentId: environment.id, + agentSlug, + userId, + }); + + // Check for ?conversation= param to resume an existing conversation + const url = new URL(request.url); + const conversationId = url.searchParams.get("conversation"); + + let activeConversation: { + chatId: string; + runFriendlyId: string | null; + publicAccessToken: string | null; + clientData: unknown; + messages: unknown; + lastEventId: string | null; + } | null = null; + + if (conversationId) { + const conv = recentConversations.find((c) => c.id === conversationId); + if (conv) { + let jwt: string | null = null; + if (conv.isActive && conv.runFriendlyId) { + jwt = await internal_generateJWT({ + secretKey: extractJwtSigningSecretKey(environment), + payload: { + sub: environment.id, + pub: true, + scopes: [`read:runs:${conv.runFriendlyId}`, `write:inputStreams:${conv.runFriendlyId}`], + }, + expirationTime: "1h", + }); + } + + activeConversation = { + chatId: conv.chatId, + runFriendlyId: conv.runFriendlyId, + publicAccessToken: jwt, + clientData: conv.clientData, + messages: conv.messages, + lastEventId: conv.lastEventId, + }; + } + } + + return typedjson({ + agent: { + slug: agent.slug, + filePath: agent.filePath, + type: agentConfig?.type ?? "unknown", + clientDataSchema: agent.payloadSchema ?? null, + }, + apiOrigin, + recentConversations, + activeConversation, + }); +}; + +export default function PlaygroundAgentPage() { + const { agent, activeConversation } = useTypedLoaderData(); + // Key on agent slug + conversation chatId so React remounts all stateful + // children when switching agents or navigating between conversations. + // Without the agent slug, switching agents keeps key="new" and React + // reuses the component — useState initializers don't re-run. + const conversationKey = `${agent.slug}:${activeConversation?.chatId ?? "new"}`; + return ; +} + +const PARENT_ROUTE_ID = + "routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground"; + +function PlaygroundChat() { + const { agent, apiOrigin, recentConversations, activeConversation } = + useTypedLoaderData(); + const parentData = useRouteLoaderData(PARENT_ROUTE_ID) as + | { + agents: Array<{ slug: string }>; + versions: string[]; + regions: Array<{ + id: string; + name: string; + description?: string; + isDefault: boolean; + }>; + isDev: boolean; + } + | undefined; + const agents = parentData?.agents ?? []; + const versions = parentData?.versions ?? []; + const regions = parentData?.regions ?? []; + const isDev = parentData?.isDev ?? false; + const defaultRegion = regions.find((r) => r.isDefault); + const navigate = useNavigate(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const [conversationId, setConversationId] = useState(() => + activeConversation + ? recentConversations.find((c) => c.chatId === activeConversation.chatId)?.id ?? null + : null + ); + const [chatId, setChatId] = useState(() => activeConversation?.chatId ?? crypto.randomUUID()); + const [clientDataJson, setClientDataJson] = useState(() => + activeConversation?.clientData ? JSON.stringify(activeConversation.clientData, null, 2) : "{}" + ); + const clientDataJsonRef = useRef(clientDataJson); + clientDataJsonRef.current = clientDataJson; + const [machine, setMachine] = useState(undefined); + const [tags, setTags] = useState([]); + const [maxAttempts, setMaxAttempts] = useState(undefined); + const [maxDuration, setMaxDuration] = useState(undefined); + const [version, setVersion] = useState(undefined); + const [region, setRegion] = useState(() => + isDev ? undefined : defaultRegion?.name + ); + + const actionPath = `/resources/orgs/${organization.slug}/projects/${project.slug}/env/${environment.slug}/playground/action`; + + // Server-side `start` via Remix action — atomically creates the + // backing Session for `chatId` and triggers the first run, returns + // the session-scoped PAT. Idempotent: called on initial use AND on + // 401, so the same code path serves both first-run and PAT renewal. + const startSession = useCallback( + async (): Promise => { + const formData = new FormData(); + formData.set("intent", "start"); + formData.set("agentSlug", agent.slug); + formData.set("chatId", chatId); + formData.set("clientData", clientDataJsonRef.current); + if (tags.length > 0) formData.set("tags", tags.join(",")); + if (machine) formData.set("machine", machine); + if (maxAttempts) formData.set("maxAttempts", String(maxAttempts)); + if (maxDuration) formData.set("maxDuration", String(maxDuration)); + if (version) formData.set("version", version); + if (region) formData.set("region", region); + + const response = await fetch(actionPath, { method: "POST", body: formData }); + const data = (await response.json()) as { + runId?: string; + publicAccessToken?: string; + conversationId?: string; + error?: string; + }; + + if (!response.ok || !data.publicAccessToken) { + throw new Error(data.error ?? "Failed to start chat session"); + } + + if (data.conversationId) { + setConversationId(data.conversationId); + } + + return data.publicAccessToken; + }, + [actionPath, agent.slug, chatId, tags, machine, maxAttempts, maxDuration, version, region] + ); + + // Resource route prefix — all realtime traffic goes through session-authed routes + const playgroundBaseURL = `${apiOrigin}/resources/orgs/${organization.slug}/projects/${project.slug}/env/${environment.slug}/playground`; + + // The transport is constructed once (guarded ref below); reading + // `startSession` directly there would freeze its closure to the + // first render's sidebar values, so subsequent edits to tags / + // machine / maxAttempts / maxDuration / version / region would be + // silently ignored on the first send. Mirror the `clientDataJsonRef` + // pattern so the transport always calls the latest `startSession`. + const startSessionRef = useRef(startSession); + startSessionRef.current = startSession; + + // Create TriggerChatTransport directly (not via useTriggerChatTransport hook + // to avoid React version mismatch between SDK and webapp) + const transportRef = useRef(null); + if (transportRef.current === null) { + transportRef.current = new TriggerChatTransport({ + task: agent.slug, + // The Remix action is idempotent on `(env, externalId)` and + // returns a fresh session PAT every time, so it serves both + // first-run create and PAT renewal. `startSession` runs on + // `transport.preload(chatId)` and lazily on the first + // `sendMessage`; `accessToken` runs on a 401/403 from any + // session-PAT-authed request. Wiring the same call to both + // keeps the Preload button working without a separate refresh + // route. + startSession: async () => ({ publicAccessToken: await startSessionRef.current() }), + accessToken: () => startSessionRef.current(), + baseURL: playgroundBaseURL, + // Use safeParseJson so a mid-edit invalid JSON state in the editor + // doesn't throw and crash the component during transport construction. + clientData: safeParseJson(clientDataJson), + ...(activeConversation?.publicAccessToken + ? { + sessions: { + [activeConversation.chatId]: { + publicAccessToken: activeConversation.publicAccessToken, + lastEventId: activeConversation.lastEventId ?? undefined, + }, + }, + } + : {}), + }); + } + const transport = transportRef.current; + + // Keep the transport's `defaultMetadata` in sync with the JSON editor. + // Without this the transport uses the value captured at construction for + // every per-turn metadata merge, even after the user edits the JSON. + // `startSession` reads from `clientDataJsonRef.current` directly so session + // creation is unaffected — this only fixes the per-turn metadata path. + useEffect(() => { + // JSONEditor fires onChange on every keystroke — intermediate values + // like `{"key":` are syntactically invalid. `safeParseJson` returns + // `{}` on parse failure so the next valid keystroke lands the update + // without crashing the component mid-edit. + transport.setClientData(safeParseJson(clientDataJson)); + }, [clientDataJson, transport]); + + // Initial messages from persisted conversation (for resume) + const initialMessages = activeConversation?.messages + ? (activeConversation.messages as UIMessage[]) + : []; + + // Track the initial message count so we only save after genuinely new turns + // (not during resume replay which re-fires onFinish for replayed turns) + const initialMessageCountRef = useRef(initialMessages?.length ?? 0); + + // Save messages after each turn completes + const saveMessages = useCallback( + (allMessages: UIMessage[]) => { + // Skip saves during resume replay — only save when we have more messages than we started with + if (allMessages.length <= initialMessageCountRef.current) return; + + const currentSession = transport.getSession(chatId); + const lastEventId = currentSession?.lastEventId; + + const formData = new FormData(); + formData.set("intent", "save"); + formData.set("agentSlug", agent.slug); + formData.set("chatId", chatId); + formData.set("messages", JSON.stringify(allMessages)); + if (lastEventId) formData.set("lastEventId", lastEventId); + + // Fire and forget + fetch(actionPath, { method: "POST", body: formData }).catch(() => {}); + + // Update the baseline so subsequent saves work correctly + initialMessageCountRef.current = allMessages.length; + }, + [chatId, agent.slug, actionPath, transport] + ); + + // useChat from AI SDK — handles message accumulation, streaming, stop + const { messages, sendMessage, stop, status, error } = useChat({ + id: chatId, + messages: initialMessages, + transport, + onFinish: ({ messages: allMessages }) => { + saveMessages(allMessages); + }, + }); + + const isStreaming = status === "streaming"; + const isSubmitted = status === "submitted"; + + // Sticky-bottom auto-scroll for the messages list. The hook walks up to + // the surrounding `overflow-y-auto` panel and follows the conversation + // as new chunks stream in — pauses if you scroll up to read history, + // resumes when you scroll back into the bottom band. Same behavior as + // the run-inspector Agent tab. + const messagesRootRef = useAutoScrollToBottom([messages, isSubmitted]); + + // Pending messages — steering during streaming + const pending = usePlaygroundPendingMessages({ + transport, + chatId, + status, + messages, + sendMessage, + metadata: safeParseJson(clientDataJson), + }); + + const [input, setInput] = useState(""); + const [preloading, setPreloading] = useState(false); + const [preloaded, setPreloaded] = useState(false); + const inputRef = useRef(null); + + const session = transport.getSession(chatId); + + const handlePreload = useCallback(async () => { + setPreloading(true); + try { + await transport.preload(chatId); + setPreloaded(true); + inputRef.current?.focus(); + } finally { + setPreloading(false); + } + }, [transport, chatId]); + + const handleNewConversation = useCallback(() => { + // Navigate without ?conversation= so the loader returns activeConversation=null + // and the key changes to "new", causing a full remount with fresh state. + navigate(window.location.pathname); + }, [navigate]); + + const handleDeleteConversation = useCallback(async () => { + if (!conversationId) return; + + const formData = new FormData(); + formData.set("intent", "delete"); + formData.set("agentSlug", agent.slug); + formData.set("deleteConversationId", conversationId); + + await fetch(actionPath, { method: "POST", body: formData }); + handleNewConversation(); + }, [conversationId, agent.slug, actionPath, handleNewConversation]); + + const handleSend = useCallback(() => { + const trimmed = input.trim(); + if (!trimmed) return; + + setInput(""); + // steer() handles both cases: sends via input stream during streaming, + // or sends as a normal message when ready + pending.steer(trimmed); + }, [input, pending]); + + const handleKeyDown = useCallback( + (e: React.KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + handleSend(); + } + }, + [handleSend] + ); + + return ( + + +
+ {/* Header */} +
+
+ + {formatAgentType(agent.type)} +
+
+ {activeConversation?.runFriendlyId && ( + + View run + + )} + {messages.length > 0 && ( + + Copy raw + + )} + + {conversationId && ( + +
+
+ + {/* Messages */} +
+ {/* Always-mounted scroll-target wrapper so useAutoScrollToBottom + can find its container from `rootRef.current.parentElement` + on mount, even before any messages exist. */} +
+ {messages.length === 0 ? ( + +
+ {preloaded ? ( + <> + + Preloaded + + Agent is warmed up and waiting. Type a message below to start. + + + ) : ( + <> + + Start a conversation + + Type a message below to start testing{" "} + {agent.slug} + + {!session && ( + + )} + + )} +
+
+ ) : ( +
+ {messages.map((msg) => ( + + ))} + {isSubmitted && ( +
+
+ + Thinking... +
+
+ )} +
+ )} +
+
+ + {/* Error */} + {error && ( +
+ {error.message} +
+ )} + + {/* Input */} +
+
+