diff --git a/.changeset/agent-skills.md b/.changeset/agent-skills.md new file mode 100644 index 00000000000..5ed3b11fc2f --- /dev/null +++ b/.changeset/agent-skills.md @@ -0,0 +1,16 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +"@trigger.dev/build": patch +"trigger.dev": patch +--- + +Add Agent Skills for `chat.agent`. Drop a folder with a `SKILL.md` and any helper scripts/references next to your task code, register it with `skills.define({ id, path })`, and the CLI bundles it into the deploy image automatically — no `trigger.config.ts` changes. The agent gets a one-line summary in its system prompt and discovers full instructions on demand via `loadSkill`, with `bash` and `readFile` tools scoped per-skill (path-traversal guards, output caps, abort-signal propagation). + +```ts +const pdfSkill = skills.define({ id: "pdf-extract", path: "./skills/pdf-extract" }); + +chat.skills.set([await pdfSkill.local()]); +``` + +Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. SDK + CLI only for now; dashboard-editable `SKILL.md` text is on the roadmap. diff --git a/.changeset/ai-prompts.md b/.changeset/ai-prompts.md new file mode 100644 index 00000000000..511aa303097 --- /dev/null +++ b/.changeset/ai-prompts.md @@ -0,0 +1,52 @@ +--- +"@trigger.dev/sdk": minor +--- + +**AI Prompts** — define prompt templates as code alongside your tasks, version them on deploy, and override the text or model from the dashboard without redeploying. Prompts integrate with the Vercel AI SDK via `toAISDKTelemetry()` (links every generation span back to the prompt) and with `chat.agent` via `chat.prompt.set()` + `chat.toStreamTextOptions()`. + +```ts +import { prompts } from "@trigger.dev/sdk"; +import { generateText } from "ai"; +import { openai } from "@ai-sdk/openai"; +import { z } from "zod"; + +export const supportPrompt = prompts.define({ + id: "customer-support", + model: "gpt-4o", + config: { temperature: 0.7 }, + variables: z.object({ + customerName: z.string(), + plan: z.string(), + issue: z.string(), + }), + content: `You are a support agent for Acme. + +Customer: {{customerName}} ({{plan}} plan) +Issue: {{issue}}`, +}); + +const resolved = await supportPrompt.resolve({ + customerName: "Alice", + plan: "Pro", + issue: "Can't access billing", +}); + +const result = await generateText({ + model: openai(resolved.model ?? "gpt-4o"), + system: resolved.text, + prompt: "Can't access billing", + ...resolved.toAISDKTelemetry(), +}); +``` + +**What you get:** + +- **Code-defined, deploy-versioned templates** — define with `prompts.define({ id, model, config, variables, content })`. Every deploy creates a new version visible in the dashboard. Mustache-style placeholders (`{{var}}`, `{{#cond}}...{{/cond}}`) with Zod / ArkType / Valibot-typed variables. +- **Dashboard overrides** — change a prompt's text or model from the dashboard without redeploying. Overrides take priority over the deployed "current" version and are environment-scoped (dev / staging / production independent). +- **Resolve API** — `prompt.resolve(vars, { version?, label? })` returns the compiled `text`, resolved `model`, `version`, and labels. Standalone `prompts.resolve(slug, vars)` for cross-file resolution with full type inference on slug and variable shape. +- **AI SDK integration** — spread `resolved.toAISDKTelemetry({ ...extra })` into any `generateText` / `streamText` call and every generation span links to the prompt in the dashboard alongside its input variables, model, tokens, and cost. +- **`chat.agent` integration** — `chat.prompt.set(resolved)` stores the resolved prompt run-scoped; `chat.toStreamTextOptions({ registry })` pulls `system`, `model` (resolved via the AI SDK provider registry), `temperature` / `maxTokens` / etc., and telemetry into a single spread for `streamText`. +- **Management SDK** — `prompts.list()`, `prompts.versions(slug)`, `prompts.promote(slug, version)`, `prompts.createOverride(slug, body)`, `prompts.updateOverride(slug, body)`, `prompts.removeOverride(slug)`, `prompts.reactivateOverride(slug, version)`. +- **Dashboard** — prompts list with per-prompt usage sparklines; per-prompt detail with Template / Details / Versions / Generations / Metrics tabs. AI generation spans get a custom inspector showing the linked prompt's metadata, input variables, and template content alongside model, tokens, cost, and the message thread. + +See [/docs/ai/prompts](https://trigger.dev/docs/ai/prompts) for the full reference — template syntax, version resolution order, override workflow, and type utilities (`PromptHandle`, `PromptIdentifier`, `PromptVariables`). diff --git a/.changeset/ai-sdk-7-support.md b/.changeset/ai-sdk-7-support.md new file mode 100644 index 00000000000..9f81c50973f --- /dev/null +++ b/.changeset/ai-sdk-7-support.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Adds AI SDK 7 support. The `ai` peer range now includes v7, and the `chat.agent` / chat surfaces work against v7's ESM-only build. On v7, install `@ai-sdk/otel` alongside `ai` and the SDK registers it for you so `experimental_telemetry` spans keep flowing into your run traces (v7 stopped emitting them from `ai` core). v5 and v6 keep working unchanged. diff --git a/.changeset/ai-tool-helpers.md b/.changeset/ai-tool-helpers.md new file mode 100644 index 00000000000..09e3b612ada --- /dev/null +++ b/.changeset/ai-tool-helpers.md @@ -0,0 +1,15 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `ai.toolExecute(task)` so you can wire a Trigger subtask in as the `execute` handler of an AI SDK `tool()` while defining `description` and `inputSchema` yourself — useful when you want full control over the tool surface and just need Trigger's subtask machinery for the body. + +```ts +const myTool = tool({ + description: "...", + inputSchema: z.object({ ... }), + execute: ai.toolExecute(mySubtask), +}); +``` + +`ai.tool(task)` (`toolFromTask`) keeps doing the all-in-one wrap and now aligns its return type with AI SDK's `ToolSet`. Minimum `ai` peer raised to `^6.0.116` to avoid cross-version `ToolSet` mismatches in monorepos. diff --git a/.changeset/backpressure-scale-up-freeze.md b/.changeset/backpressure-scale-up-freeze.md new file mode 100644 index 00000000000..b69fad0f262 --- /dev/null +++ b/.changeset/backpressure-scale-up-freeze.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Add optional `shouldPauseScaling` to the supervisor consumer pool scaling options to freeze scale-up while it returns true (scale-down stays allowed). diff --git a/.changeset/bundle-skills-single-pass.md b/.changeset/bundle-skills-single-pass.md new file mode 100644 index 00000000000..30b2c428b22 --- /dev/null +++ b/.changeset/bundle-skills-single-pass.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +Fix `chat.agent` skills silently missing in `trigger dev` for projects whose task files read `process.env` at module top level (e.g. a third-party SDK client initialized at import). Skill folders now bundle into `.trigger/skills/` reliably regardless of which env vars are set when the CLI launches. diff --git a/.changeset/cap-idempotency-key-length.md b/.changeset/cap-idempotency-key-length.md new file mode 100644 index 00000000000..d1360369148 --- /dev/null +++ b/.changeset/cap-idempotency-key-length.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Reject overlong `idempotencyKey` values at the API boundary so they no longer trip an internal size limit on the underlying unique index and surface as a generic 500. Inputs are capped at 2048 characters — well above what `idempotencyKeys.create()` produces (a 64-character hash) and above any realistic raw key. Applies to `tasks.trigger`, `tasks.batchTrigger`, `batch.create` (Phase 1 streaming batches), `wait.createToken`, `wait.forDuration`, and the input/session stream waitpoint endpoints. Over-limit requests now return a structured 400 instead. diff --git a/.changeset/chat-agent-hardening.md b/.changeset/chat-agent-hardening.md new file mode 100644 index 00000000000..0ea82c0617e --- /dev/null +++ b/.changeset/chat-agent-hardening.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Reliability fixes for `chat.agent`. A user message sent while the agent is streaming is no longer delivered twice (which could run a duplicate turn), input appends now carry an idempotency key so a retried send can't duplicate a message, stopping a generation clears the streaming state so a page reload doesn't replay the stopped turn, and runs can now carry the full set of dashboard tags instead of being silently truncated. `onTurnComplete` now fires on errored turns (with the thrown error attached) and the failed turn's user message is persisted so it isn't lost on the next run. Custom agents and manual `chat.writeTurnComplete` callers now trim the output stream, sending a custom action no longer leaves a second stream reader running, and a long-lived `watch` subscription no longer grows its dedupe set without bound. diff --git a/.changeset/chat-agent-on-boot-hook.md b/.changeset/chat-agent-on-boot-hook.md new file mode 100644 index 00000000000..5eaa078e65e --- /dev/null +++ b/.changeset/chat-agent-on-boot-hook.md @@ -0,0 +1,21 @@ +--- +"@trigger.dev/sdk": minor +--- + +Adds `onBoot` to `chat.agent` — a lifecycle hook that fires once per worker process picking up the chat. Runs for the initial run, preloaded runs, AND reactive continuation runs (post-cancel, crash, `endRun`, `requestUpgrade`, OOM retry), before any other hook. Use it to initialize `chat.local`, open per-process resources, or re-hydrate state from your DB on continuation — anywhere the SAME run picking up after suspend/resume isn't enough. + +```ts +const userContext = chat.local<{ name: string; plan: string }>({ id: "userContext" }); + +export const myChat = chat.agent({ + id: "my-chat", + onBoot: async ({ clientData, continuation }) => { + const user = await db.user.findUnique({ where: { id: clientData.userId } }); + userContext.init({ name: user.name, plan: user.plan }); + }, + run: async ({ messages, signal }) => + streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }), +}); +``` + +Use `onBoot` (not `onChatStart`) for state setup that must run every time a worker picks up the chat — `onChatStart` fires once per chat and won't run on continuation, leaving `chat.local` uninitialized when `run()` tries to use it. diff --git a/.changeset/chat-agent-tools.md b/.changeset/chat-agent-tools.md new file mode 100644 index 00000000000..1d44ea2a659 --- /dev/null +++ b/.changeset/chat-agent-tools.md @@ -0,0 +1,15 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add a `tools` option to `chat.agent`. Declaring your tools here threads them into the SDK's internal `convertToModelMessages`, so each tool's `toModelOutput` is re-applied when prior-turn history is re-converted. + +```ts +chat.agent({ + tools: { readFile, search }, + run: async ({ messages, tools, signal }) => + streamText({ model, messages, tools, abortSignal: signal }), +}); +``` + +Also exports `InferChatUIMessageFromTools` to derive the chat `UIMessage` type (typed tool parts) directly from a tool set. diff --git a/.changeset/chat-agent.md b/.changeset/chat-agent.md new file mode 100644 index 00000000000..733a8ab22e4 --- /dev/null +++ b/.changeset/chat-agent.md @@ -0,0 +1,44 @@ +--- +"@trigger.dev/sdk": minor +"@trigger.dev/core": patch +--- + +**AI Agents** — run AI SDK chat completions as durable Trigger.dev agents instead of fragile API routes. Define an agent in one function, point `useChat` at it from React, and the conversation survives page refreshes, network blips, and process restarts. + +```ts +import { chat } from "@trigger.dev/sdk/ai"; +import { streamText } from "ai"; +import { openai } from "@ai-sdk/openai"; + +export const myChat = chat.agent({ + id: "my-chat", + run: async ({ messages, signal }) => + streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }), +}); +``` + +```tsx +import { useChat } from "@ai-sdk/react"; +import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react"; + +const transport = useTriggerChatTransport({ task: "my-chat", accessToken, startSession }); +const { messages, sendMessage } = useChat({ transport }); +``` + +**What you get:** + +- **AI SDK `useChat` integration** — a custom [`ChatTransport`](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) (`useTriggerChatTransport`) plugs straight into Vercel AI SDK's `useChat` hook. Text streaming, tool calls, reasoning, and `data-*` parts all work natively over Trigger.dev's realtime streams. No custom API routes needed. +- **First-turn fast path (`chat.headStart`)** — opt-in handler that runs the first turn's `streamText` step in your warm server process while the agent run boots in parallel, cutting cold-start TTFC by roughly half (measured 2801ms → 1218ms on `claude-sonnet-4-6`). The agent owns step 2+ (tool execution, persistence, hooks) so heavy deps stay where they belong. Web Fetch handler works natively in Next.js, Hono, SvelteKit, Remix, Workers, etc.; bridge to Express/Fastify/Koa via `chat.toNodeListener`. New `@trigger.dev/sdk/chat-server` subpath. +- **Multi-turn durability via Sessions** — every chat is backed by a durable Session that outlives any individual run. Conversations resume across page refreshes, idle timeout, crashes, and deploys; `resume: true` reconnects via `lastEventId` so clients only see new chunks. `sessions.list` enumerates chats for inbox-style UIs. +- **Auto-accumulated history, delta-only wire** — the backend accumulates the full conversation across turns; clients only ship the new message each turn. Long chats never hit the 512 KiB body cap. Register `hydrateMessages` to be the source of truth yourself. +- **Lifecycle hooks** — `onPreload`, `onChatStart`, `onValidateMessages`, `hydrateMessages`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onChatSuspend`, `onChatResume` — for persistence, validation, and post-turn work. +- **Stop generation** — client-driven `transport.stopGeneration(chatId)` aborts mid-stream; the run stays alive for the next message, partial response is captured, and aborted parts (stuck `partial-call` tools, in-progress reasoning) are auto-cleaned. +- **Tool approvals (HITL)** — tools with `needsApproval: true` pause until the user approves or denies via `addToolApprovalResponse`. The runtime reconciles the updated assistant message by ID and continues `streamText`. +- **Steering and background injection** — `pendingMessages` injects user messages between tool-call steps so users can steer the agent mid-execution; `chat.inject()` + `chat.defer()` adds context from background work (self-review, RAG, safety checks) between turns. +- **Actions** — non-turn frontend commands (undo, rollback, regenerate, edit) sent via `transport.sendAction`. Fire `hydrateMessages` + `onAction` only — no turn hooks, no `run()`. `onAction` can return a `StreamTextResult` for a model response, or `void` for side-effect-only. +- **Typed state primitives** — `chat.local` for per-run state accessible from hooks, `run()`, tools, and subtasks (auto-serialized through `ai.toolExecute`); `chat.store` for typed shared data between agent and client; `chat.history` for reading and mutating the message chain; `clientDataSchema` for typed `clientData` in every hook. +- **`chat.toStreamTextOptions()`** — one spread into `streamText` wires up versioned system [Prompts](https://trigger.dev/docs/ai/prompts), model resolution, telemetry metadata, compaction, steering, and background injection. +- **Multi-tab coordination** — `multiTab: true` + `useMultiTabChat` prevents duplicate sends and syncs state across browser tabs via `BroadcastChannel`. Non-active tabs go read-only with live updates. +- **Network resilience** — built-in indefinite retry with bounded backoff, reconnect on `online` / tab refocus / bfcache restore, `Last-Event-ID` mid-stream resume. No app code needed. + +See [/docs/ai-chat](https://trigger.dev/docs/ai-chat/overview) for the full surface — quick start, three backend approaches (`chat.agent`, `chat.createSession`, raw task), persistence and code-sandbox patterns, type-level guides, and API reference. diff --git a/.changeset/chat-boot-cursor.md b/.changeset/chat-boot-cursor.md new file mode 100644 index 00000000000..eb1b7a41c98 --- /dev/null +++ b/.changeset/chat-boot-cursor.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Continuation chat boots no longer stall for around 10 seconds before the first turn. The `session.in` resume cursor is now found with a non-blocking records read instead of draining an SSE long-poll (which always waited out its full 5 second inactivity window, twice per boot), the boot reads run concurrently, and chat snapshots carry the cursor so subsequent boots skip the scan entirely. diff --git a/.changeset/chat-headstart-hydrate.md b/.changeset/chat-headstart-hydrate.md new file mode 100644 index 00000000000..49e9bb926ee --- /dev/null +++ b/.changeset/chat-headstart-hydrate.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Fix `chat.headStart` when `hydrateMessages` is registered. The warm route's step-1 partial now reaches the agent's accumulator on the hydrate path, so `onTurnComplete` carries the full first turn (the head-start user message included), tool-call handovers resume from step 2 instead of re-running step 1, and the assistant `messageId` stays stable across the handover. diff --git a/.changeset/chat-headstart-reasoning.md b/.changeset/chat-headstart-reasoning.md new file mode 100644 index 00000000000..a53d388c561 --- /dev/null +++ b/.changeset/chat-headstart-reasoning.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Preserve reasoning parts across the `chat.headStart` handover. Extended-thinking models' step-1 reasoning now lands in the durable session history (and `onTurnComplete`) under the same assistant `messageId`, with provider metadata intact so Anthropic thinking signatures survive replays. diff --git a/.changeset/chat-history-read-primitives.md b/.changeset/chat-history-read-primitives.md new file mode 100644 index 00000000000..fd26ad8548b --- /dev/null +++ b/.changeset/chat-history-read-primitives.md @@ -0,0 +1,21 @@ +--- +"@trigger.dev/sdk": minor +--- + +Add read primitives to `chat.history` for HITL flows: `getPendingToolCalls()`, `getResolvedToolCalls()`, `extractNewToolResults(message)`, `getChain()`, and `findMessage(messageId)`. These lift the accumulator-walking logic that customers building human-in-the-loop tools were re-implementing into the SDK. + +Use `getPendingToolCalls()` to gate fresh user turns while a tool call is awaiting an answer. Use `extractNewToolResults(message)` to dedup tool results when persisting to your own store — the helper returns only the parts whose `toolCallId` is not already resolved on the chain. + +```ts +const pending = chat.history.getPendingToolCalls(); +if (pending.length > 0) { + // an addToolOutput is expected before a new user message +} + +onTurnComplete: async ({ responseMessage }) => { + const newResults = chat.history.extractNewToolResults(responseMessage); + for (const r of newResults) { + await db.toolResults.upsert({ id: r.toolCallId, output: r.output, errorText: r.errorText }); + } +}; +``` diff --git a/.changeset/chat-session-attributes.md b/.changeset/chat-session-attributes.md new file mode 100644 index 00000000000..ec4c6a54076 --- /dev/null +++ b/.changeset/chat-session-attributes.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Stamp `gen_ai.conversation.id` (the chat id) on every span and metric emitted from inside a `chat.task` or `chat.agent` run. Lets you filter dashboard spans, runs, and metrics by the chat conversation that produced them — independent of the run boundary, so multi-run chats correlate cleanly. No code changes required on the user side. diff --git a/.changeset/chat-slim-wire-merge.md b/.changeset/chat-slim-wire-merge.md new file mode 100644 index 00000000000..19ea48a8cdd --- /dev/null +++ b/.changeset/chat-slim-wire-merge.md @@ -0,0 +1,31 @@ +--- +"@trigger.dev/sdk": patch +--- + +Fix `chat.agent` HITL continuations on reasoning-heavy turns. Two changes that work together: + +- The per-turn merge now overlays the wire copy's tool-part state advancement onto the agent's existing chain — `state` + the matching resolution field (`output` / `errorText` / `approval`) come from the wire, everything else (text, reasoning, tool `input`, provider metadata) stays whatever the snapshot or `hydrateMessages` returned. Previously a full-message replace overwrote those fields with whatever the client shipped, so a slimmed wire copy landed a tool call with no `arguments` on the next LLM call. Covers `output-available` / `output-error` (HITL `addToolOutput`) and `approval-responded` / `output-denied` (approval flow). +- `TriggerChatTransport.sendMessages` and `AgentChat.sendRaw` now slim assistant messages that carry advanced tool parts. The wire payload is just `{ id, role, parts: [] }` for `submit-message` continuations; everything else passes through. Reasoning blobs and full tool inputs no longer ride the wire on every `addToolOutput` / `addToolApproveResponse`, so continuation payloads stay well under the `.in/append` cap on long agent loops. + +Note: `onValidateMessages` receives the slim wire on HITL turns. If you call `validateUIMessages` from `ai` against the full `messages` array it will reject the slim assistant; filter to user messages (or skip on HITL turns) — see the updated docstring on `onValidateMessages` for the recommended pattern. + +For `hydrateMessages` hooks that persist the chain, this release also adds a small helper to the `@trigger.dev/sdk/ai` surface: + +```ts +import { chat, upsertIncomingMessage } from "@trigger.dev/sdk/ai"; + +chat.agent({ + hydrateMessages: async ({ chatId, trigger, incomingMessages }) => { + const record = await db.chat.findUnique({ where: { id: chatId } }); + const stored = record?.messages ?? []; + if (upsertIncomingMessage(stored, { trigger, incomingMessages })) { + await db.chat.update({ where: { id: chatId }, data: { messages: stored } }); + } + return stored; + }, +}); +``` + +It pushes fresh user messages by id, no-ops on HITL continuations (the incoming shares an id with the existing assistant — the runtime overlays the new tool-state advance), and skips on non-`submit-message` triggers. Returns `true` if it mutated `stored` so the caller knows whether to persist. + +Net effect: `chat.addToolOutput(...)` / `chat.addToolApproveResponse(...)` on multi-step reasoning agents (OpenAI Responses with `store: false`, Anthropic extended thinking, etc.) no longer blows the cap and no longer corrupts the LLM input. diff --git a/.changeset/chat-start-session-action-typed-client-data.md b/.changeset/chat-start-session-action-typed-client-data.md new file mode 100644 index 00000000000..acd75037caf --- /dev/null +++ b/.changeset/chat-start-session-action-typed-client-data.md @@ -0,0 +1,22 @@ +--- +"@trigger.dev/sdk": patch +--- + +Type `chat.createStartSessionAction` against your chat agent so `clientData` is typed end-to-end on the first turn: + +```ts +import { chat } from "@trigger.dev/sdk/ai"; +import type { myChat } from "@/trigger/chat"; + +export const startChatSession = chat.createStartSessionAction("my-chat"); + +// In the browser, threaded from the transport's typed startSession callback: +const transport = useTriggerChatTransport({ + task: "my-chat", + startSession: ({ chatId, clientData }) => + startChatSession({ chatId, clientData }), + // ... +}); +``` + +`ChatStartSessionParams` gains a typed `clientData` field — folded into the first run's `payload.metadata` so `onPreload` / `onChatStart` see the same shape per-turn `metadata` carries via the transport. The opaque session-level `metadata` field is unchanged. diff --git a/.changeset/chat-transport-recreate-missing-session.md b/.changeset/chat-transport-recreate-missing-session.md new file mode 100644 index 00000000000..dba916e9442 --- /dev/null +++ b/.changeset/chat-transport-recreate-missing-session.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +`useTriggerChatTransport` now recovers when restored session state points at a session that no longer exists in the current environment diff --git a/.changeset/cli-deploy-skip-rewrite-timestamp.md b/.changeset/cli-deploy-skip-rewrite-timestamp.md new file mode 100644 index 00000000000..60e82732dce --- /dev/null +++ b/.changeset/cli-deploy-skip-rewrite-timestamp.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +Add `TRIGGER_BUILD_SKIP_REWRITE_TIMESTAMP=1` escape hatch for local self-hosted builds whose buildx driver doesn't support `rewrite-timestamp` alongside push (e.g. orbstack's default `docker` driver). diff --git a/.changeset/cli-init-ai-tooling.md b/.changeset/cli-init-ai-tooling.md new file mode 100644 index 00000000000..a7a4c8be14a --- /dev/null +++ b/.changeset/cli-init-ai-tooling.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +`trigger init` now sets up your AI coding assistant as part of project setup: pick the MCP server, the agent skills, or both, then scaffold with the CLI or hand off to your assistant. Adds a new `getting-started` agent skill that teaches assistants how to bootstrap Trigger.dev (install the SDK, write `trigger.config.ts`, create a first task, run `trigger dev`), so the AI-driven setup path works end to end. It ships in the CLI alongside the existing skills, version-matched to your SDK. diff --git a/.changeset/coerce-concurrency-key-to-string.md b/.changeset/coerce-concurrency-key-to-string.md new file mode 100644 index 00000000000..faccf7a48bf --- /dev/null +++ b/.changeset/coerce-concurrency-key-to-string.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Coerce numeric `concurrencyKey` values to string at the API boundary across `tasks.trigger`, `tasks.batchTrigger`, and the Phase-2 streaming batch endpoint. diff --git a/.changeset/config.json b/.changeset/config.json index 5c191e9bfbd..115f54fefee 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -1,15 +1,25 @@ { "$schema": "https://unpkg.com/@changesets/config@2.2.0/schema.json", - "changelog": "@changesets/cli/changelog", + "changelog": [ + "@remix-run/changelog-github", + { + "repo": "triggerdotdev/trigger.dev" + } + ], "commit": false, - "fixed": [], + "fixed": [["@trigger.dev/*", "trigger.dev"]], "linked": [], "access": "public", "baseBranch": "main", "updateInternalDependencies": "patch", "ignore": [ - "wss", "webapp", - "docs" - ] -} \ No newline at end of file + "coordinator", + "docker-provider", + "kubernetes-provider", + "supervisor" + ], + "___experimentalUnsafeOptions_WILL_CHANGE_IN_PATCH": { + "onlyUpdatePeerDependentsWhenOutOfRange": true + } +} diff --git a/.changeset/dequeue-latency-histogram.md b/.changeset/dequeue-latency-histogram.md new file mode 100644 index 00000000000..0b69b8c98a9 --- /dev/null +++ b/.changeset/dequeue-latency-histogram.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Record client-side dequeue API latency in the supervisor consumer pool as a Prometheus histogram (`queue_consumer_pool_dequeue_duration_seconds`, labelled by `outcome`: success/empty/error). diff --git a/.changeset/duplicate-task-ids.md b/.changeset/duplicate-task-ids.md new file mode 100644 index 00000000000..c68724bfedd --- /dev/null +++ b/.changeset/duplicate-task-ids.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +`dev` and `deploy` now fail with a clear error when two tasks are defined with the same id, including across different task types (e.g. a scheduled task and a regular task sharing an id). Previously the second definition silently overwrote the first, so one of the tasks would vanish with no warning. Task ids are detected as duplicates during indexing (naming each offending id and the files it was found in), and the same rule is enforced server-side when the background worker is registered. diff --git a/.changeset/env-vars-tracing-forceflush-typecheck.md b/.changeset/env-vars-tracing-forceflush-typecheck.md new file mode 100644 index 00000000000..9d90c0383d7 --- /dev/null +++ b/.changeset/env-vars-tracing-forceflush-typecheck.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix `@trigger.dev/core` build: cast the underlying log record exporter when calling `forceFlush` so it typechecks against the updated OpenTelemetry `LogRecordExporter` type (which no longer declares `forceFlush`). diff --git a/.changeset/envvars-import-is-secret.md b/.changeset/envvars-import-is-secret.md new file mode 100644 index 00000000000..5fbe70f43ae --- /dev/null +++ b/.changeset/envvars-import-is-secret.md @@ -0,0 +1,12 @@ +--- +"@trigger.dev/core": patch +--- + +`envvars.upload` now accepts an optional `isSecret` flag, letting you create the imported variables as secret (redacted) environment variables. When omitted, variables default to non-secret. + +```ts +await envvars.upload("proj_1234", "prod", { + variables: { STRIPE_SECRET_KEY: "sk_live_..." }, + isSecret: true, +}); +``` diff --git a/.changeset/large-trigger-payload-offload.md b/.changeset/large-trigger-payload-offload.md new file mode 100644 index 00000000000..e8d87947166 --- /dev/null +++ b/.changeset/large-trigger-payload-offload.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Offload large trigger payloads to object storage before sending the trigger API request. The SDK uploads packets at or above the existing 128KB limit and sends an `application/store` pointer instead of embedding large JSON in the request body. `TriggerTaskRequestBody` now validates that `application/store` payloads are non-empty storage paths. + +Payload uploads use the same resolved `ApiClient` as the trigger call (including `requestOptions.clientConfig`), not only the global `apiClientManager.client` — so custom `baseURL`, access token, and preview branch apply to both presign and trigger. diff --git a/.changeset/locals-key-dual-package-fix.md b/.changeset/locals-key-dual-package-fix.md new file mode 100644 index 00000000000..38d42e19dfb --- /dev/null +++ b/.changeset/locals-key-dual-package-fix.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix `LocalsKey` type incompatibility across dual-package builds. The phantom value-type brand no longer uses a module-level `unique symbol`, so a single TypeScript compilation that resolves the type from both the ESM and CJS outputs (which can happen under certain pnpm hoisting layouts) no longer sees two structurally-incompatible variants of the same type. diff --git a/.changeset/mcp-agent-chat-sessions.md b/.changeset/mcp-agent-chat-sessions.md new file mode 100644 index 00000000000..c3f01aebf28 --- /dev/null +++ b/.changeset/mcp-agent-chat-sessions.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +The CLI MCP server's agent-chat tools (`start_agent_chat`, `send_agent_message`, `close_agent_chat`) now run on the new Sessions primitive, so AI assistants driving a `chat.agent` get the same idempotent-by-`chatId`, durable-across-runs behavior the browser transport gets. Required PAT scopes go from `write:inputStreams` to `read:sessions` + `write:sessions`. diff --git a/.changeset/mcp-list-runs-region.md b/.changeset/mcp-list-runs-region.md new file mode 100644 index 00000000000..b72cfb23c97 --- /dev/null +++ b/.changeset/mcp-list-runs-region.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +MCP `list_runs` tool: add a `region` filter input and surface each run's executing region in the formatted summary. diff --git a/.changeset/mcp-trigger-task-no-default-wait.md b/.changeset/mcp-trigger-task-no-default-wait.md new file mode 100644 index 00000000000..396c68bd005 --- /dev/null +++ b/.changeset/mcp-trigger-task-no-default-wait.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +The MCP server no longer tells the AI agent to wait for a run to complete after every `trigger_task` call. Waiting is now opt-in: the agent only waits when you ask it to (for example "trigger and then wait for it to finish"). This avoids burning tokens polling runs you didn't need to block on and keeps responses clearer. diff --git a/.changeset/mock-chat-agent-test-harness.md b/.changeset/mock-chat-agent-test-harness.md new file mode 100644 index 00000000000..9876e56a9f7 --- /dev/null +++ b/.changeset/mock-chat-agent-test-harness.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Unit-test `chat.agent` definitions offline with `mockChatAgent` from `@trigger.dev/sdk/ai/test`. Drives a real agent's turn loop in-process — no network, no task runtime — so you can send messages, actions, and stop signals via driver methods, inspect captured output chunks, and verify hooks fire. Pairs with `MockLanguageModelV3` from `ai/test` for model mocking. `setupLocals` lets you pre-seed `locals` (DB clients, service stubs) before `run()` starts. + +The broader `runInMockTaskContext` harness it's built on lives at `@trigger.dev/core/v3/test` — useful for unit-testing any task code, not just chat. diff --git a/.changeset/mollifier-buffer-pipeline-list-entries.md b/.changeset/mollifier-buffer-pipeline-list-entries.md new file mode 100644 index 00000000000..2c55d9b18a8 --- /dev/null +++ b/.changeset/mollifier-buffer-pipeline-list-entries.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Pipeline the per-entry `HGETALL` fetches in `MollifierBuffer.listEntriesForEnv`. The previous serial implementation issued one Redis round-trip per runId returned by `LRANGE`, which dominated stale-sweep wall-time at any meaningful backlog (at the sweep's default maxCount=1000, this is ~1000 RTTs per env per pass). Behaviour is unchanged — entries are still skipped when the entry hash has been torn down by a concurrent drainer ack/fail between the LRANGE and the HGETALL. diff --git a/.changeset/mollifier-configurable-constants.md b/.changeset/mollifier-configurable-constants.md new file mode 100644 index 00000000000..e9943e9b190 --- /dev/null +++ b/.changeset/mollifier-configurable-constants.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Make mollifier buffer and drainer internals configurable. `MollifierBuffer` now accepts `ackGraceTtlSeconds`, `maxRetriesPerRequest`, `reconnectStepMs`, and `reconnectMaxMs` options, and `MollifierDrainer` accepts `maxBackoffMs` and `backoffFloorMs`. All default to their previous hardcoded values, so existing behaviour is unchanged. diff --git a/.changeset/mollifier-drain-batch-size.md b/.changeset/mollifier-drain-batch-size.md new file mode 100644 index 00000000000..9e848b5011d --- /dev/null +++ b/.changeset/mollifier-drain-batch-size.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +`MollifierDrainer` accepts a `drainBatchSize` option (default 1) that controls how many entries are popped per env per tick — in-flight handlers remain capped by the global `concurrency`. `MollifierBuffer` also gains `getDrainingCount()` / `listStaleDraining()`, backed by a new `mollifier:draining` ZSET maintained atomically with pop/ack/fail/requeue (observability-only). diff --git a/.changeset/mollifier-redis-worker-primitives.md b/.changeset/mollifier-redis-worker-primitives.md new file mode 100644 index 00000000000..a209e530c24 --- /dev/null +++ b/.changeset/mollifier-redis-worker-primitives.md @@ -0,0 +1,9 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Add MollifierBuffer and MollifierDrainer primitives for trigger burst smoothing. + +MollifierBuffer (`accept`, `pop`, `ack`, `requeue`, `fail`, `evaluateTrip`) is a per-env FIFO over Redis with atomic Lua transitions for status tracking. `evaluateTrip` is a sliding-window trip evaluator the webapp gate uses to detect per-env trigger bursts. + +MollifierDrainer pops entries through a polling loop with a user-supplied handler. The loop survives transient Redis errors via capped exponential backoff (up to 5s), and per-env pop failures don't poison the rest of the batch — one env's blip is logged and counted as failed for that tick. Rotation is two-level: orgs at the top, envs within each org. The buffer maintains `mollifier:orgs` and `mollifier:org-envs:${orgId}` atomically with per-env queues, so the drainer walks orgs → envs directly without an in-memory cache. The `maxOrgsPerTick` option (default 500) caps how many orgs are scheduled per tick; for each picked org, one env is popped (rotating round-robin within the org). An org with N envs gets the same per-tick scheduling slot as an org with 1 env, so tenant-level drainage throughput is determined by org count rather than env count. diff --git a/.changeset/mollifier-tag-cap.md b/.changeset/mollifier-tag-cap.md new file mode 100644 index 00000000000..b9057664fa7 --- /dev/null +++ b/.changeset/mollifier-tag-cap.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/redis-worker": patch +--- + +Mollifier `mutateSnapshot` now enforces a tag cap: an `append_tags` patch carrying `maxTags` returns `"limit_exceeded"` (writing nothing) when the deduped tag count would exceed the limit, so a buffered run can't accumulate more tags via the tags API than the trigger validator allows at creation. diff --git a/.changeset/otel-suite-0218.md b/.changeset/otel-suite-0218.md new file mode 100644 index 00000000000..38b71ceeec1 --- /dev/null +++ b/.changeset/otel-suite-0218.md @@ -0,0 +1,7 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +"@trigger.dev/sdk": patch +--- + +Update the bundled OpenTelemetry packages to their latest releases (`@opentelemetry/sdk-node` 0.218.0, `@opentelemetry/core` 2.7.1, `@opentelemetry/host-metrics` 0.38.3). diff --git a/.changeset/plugin-auth-path.md b/.changeset/plugin-auth-path.md new file mode 100644 index 00000000000..7ce08b71a33 --- /dev/null +++ b/.changeset/plugin-auth-path.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/plugins": patch +--- + +The public interfaces for a plugin system. Initially consolidated authentication and authorization interfaces. diff --git a/.changeset/pre.json b/.changeset/pre.json new file mode 100644 index 00000000000..818658e8972 --- /dev/null +++ b/.changeset/pre.json @@ -0,0 +1,64 @@ +{ + "mode": "pre", + "tag": "rc", + "initialVersions": { + "coordinator": "0.0.1", + "docker-provider": "0.0.1", + "kubernetes-provider": "0.0.1", + "supervisor": "0.0.1", + "webapp": "1.0.0", + "@trigger.dev/build": "4.4.6", + "trigger.dev": "4.4.6", + "@trigger.dev/core": "4.4.6", + "@trigger.dev/plugins": "4.4.6", + "@trigger.dev/python": "4.4.6", + "@trigger.dev/react-hooks": "4.4.6", + "@trigger.dev/redis-worker": "4.4.6", + "@trigger.dev/rsc": "4.4.6", + "@trigger.dev/schema-to-json": "4.4.6", + "@trigger.dev/sdk": "4.4.6" + }, + "changesets": [ + "agent-skills", + "ai-prompts", + "ai-sdk-7-support", + "ai-tool-helpers", + "backpressure-scale-up-freeze", + "bundle-skills-single-pass", + "cap-idempotency-key-length", + "chat-agent-on-boot-hook", + "chat-agent-tools", + "chat-agent", + "chat-history-read-primitives", + "chat-session-attributes", + "chat-slim-wire-merge", + "chat-start-session-action-typed-client-data", + "chat-transport-recreate-missing-session", + "cli-deploy-skip-rewrite-timestamp", + "coerce-concurrency-key-to-string", + "env-vars-tracing-forceflush-typecheck", + "envvars-import-is-secret", + "large-trigger-payload-offload", + "locals-key-dual-package-fix", + "mcp-agent-chat-sessions", + "mcp-list-runs-region", + "mcp-trigger-task-no-default-wait", + "mock-chat-agent-test-harness", + "mollifier-buffer-pipeline-list-entries", + "mollifier-configurable-constants", + "mollifier-drain-batch-size", + "mollifier-redis-worker-primitives", + "mollifier-tag-cap", + "otel-suite-0218", + "plugin-auth-path", + "resource-catalog-runtime-registration", + "retry-middleware-errors", + "retry-sigsegv", + "runs-list-region-filter", + "s2-batch-transform-linger-fix", + "sessions-primitive", + "trigger-client", + "unflatten-attributes-conflict", + "warm-start-external-trace-context-leak" + ] +} diff --git a/.changeset/project-environments-endpoint.md b/.changeset/project-environments-endpoint.md new file mode 100644 index 00000000000..9059c548245 --- /dev/null +++ b/.changeset/project-environments-endpoint.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Add `GetProjectEnvironmentsResponseBody` and `ProjectEnvironment` schemas for the new `GET /api/v1/projects/{projectRef}/environments` endpoint, which lists the parent environments (dev, staging, preview, prod) a personal access token can access for a project. Dev is scoped to the token owner and branch (preview child) environments are excluded. diff --git a/.changeset/resource-catalog-runtime-registration.md b/.changeset/resource-catalog-runtime-registration.md new file mode 100644 index 00000000000..5046f09e1f1 --- /dev/null +++ b/.changeset/resource-catalog-runtime-registration.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +Fix `COULD_NOT_FIND_EXECUTOR` when a task's definition is loaded via `await import(...)` from inside another task's `run()`. The runtime workers now register such tasks with a sentinel file context, and the catalog logs a one-time warning per task id. diff --git a/.changeset/retry-middleware-errors.md b/.changeset/retry-middleware-errors.md new file mode 100644 index 00000000000..2267b4d724c --- /dev/null +++ b/.changeset/retry-middleware-errors.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Retry `TASK_MIDDLEWARE_ERROR` under the task's retry policy instead of failing the run on the first attempt. The error was already classified as retryable by `shouldRetryError`, but `shouldLookupRetrySettings` did not include it, so the retry flow fell through to `fail_run`. Fixes #3231. diff --git a/.changeset/retry-sigsegv.md b/.changeset/retry-sigsegv.md new file mode 100644 index 00000000000..5a53c351efe --- /dev/null +++ b/.changeset/retry-sigsegv.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Retry `TASK_PROCESS_SIGSEGV` task crashes under the user's retry policy instead of failing the run on the first segfault. SIGSEGV in Node tasks is frequently non-deterministic (native addon races, JIT/GC interaction, near-OOM in native code, host issues), so retrying on a fresh process often succeeds. The retry is gated by the task's existing `retry` config + `maxAttempts` — same path `TASK_PROCESS_SIGTERM` and uncaught exceptions already use — so tasks without a retry policy still fail fast. diff --git a/.changeset/runs-list-region-filter.md b/.changeset/runs-list-region-filter.md new file mode 100644 index 00000000000..c487e2d632c --- /dev/null +++ b/.changeset/runs-list-region-filter.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Add `region` to the runs list / retrieve API: filter runs by region (`runs.list({ region: "..." })` / `filter[region]=`) and read each run's executing region from the new `region` field on the response. diff --git a/.changeset/s2-batch-transform-linger-fix.md b/.changeset/s2-batch-transform-linger-fix.md new file mode 100644 index 00000000000..f1e9bab34aa --- /dev/null +++ b/.changeset/s2-batch-transform-linger-fix.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +Bump `@s2-dev/streamstore` to `0.22.10` to fix a `TASK_RUN_UNCAUGHT_EXCEPTION` ("Invalid state: Unable to enqueue") when a `chat.agent` turn is aborted mid-stream. diff --git a/.changeset/sessions-primitive.md b/.changeset/sessions-primitive.md new file mode 100644 index 00000000000..79a6ca48f65 --- /dev/null +++ b/.changeset/sessions-primitive.md @@ -0,0 +1,26 @@ +--- +"@trigger.dev/sdk": minor +"@trigger.dev/core": patch +--- + +**Sessions** — a durable, run-aware stream channel keyed on a stable `externalId`. A Session is the unit of state that owns a multi-run conversation: messages flow through `.in`, responses through `.out`, both survive run boundaries. Sessions back the new `chat.agent` runtime, and you can build on them directly for any pattern that needs durable bi-directional streaming across runs. + +```ts +import { sessions, tasks } from "@trigger.dev/sdk"; + +// Trigger a task and subscribe to its session output in one call +const { runId, stream } = await tasks.triggerAndSubscribe("my-task", payload, { + externalId: "user-456", +}); + +for await (const chunk of stream) { + // ... +} + +// Enumerate existing sessions (powers inbox-style UIs without a separate index) +for await (const s of sessions.list({ type: "chat.agent", tag: "user:user-456" })) { + console.log(s.id, s.externalId, s.createdAt, s.closedAt); +} +``` + +See [/docs/ai-chat/overview](https://trigger.dev/docs/ai-chat/overview) for the full surface — Sessions powers the durable, resumable chat runtime described there. diff --git a/.changeset/trigger-client.md b/.changeset/trigger-client.md new file mode 100644 index 00000000000..75699471ba2 --- /dev/null +++ b/.changeset/trigger-client.md @@ -0,0 +1,18 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `TriggerClient` for running multiple SDK clients side-by-side, each with its own auth, preview branch, and baseURL. Useful when a single process needs to trigger tasks or read runs across multiple projects, environments, or preview branches without mutating shared global state. + +```ts +import { TriggerClient } from "@trigger.dev/sdk"; + +const prod = new TriggerClient({ accessToken: process.env.TRIGGER_PROD_KEY }); +const preview = new TriggerClient({ + accessToken: process.env.TRIGGER_PREVIEW_KEY, + previewBranch: "signup-flow", +}); + +await prod.tasks.trigger("send-email", payload); +await preview.runs.list({ status: ["COMPLETED"] }); +``` diff --git a/.changeset/trigger-skills-installer.md b/.changeset/trigger-skills-installer.md new file mode 100644 index 00000000000..4e05b125919 --- /dev/null +++ b/.changeset/trigger-skills-installer.md @@ -0,0 +1,11 @@ +--- +"trigger.dev": patch +--- + +`trigger skills` installs Trigger.dev agent skills into your coding agent so it knows how to write tasks, schedules, realtime, and chat.agent code. The skills ship with the CLI and are copied into each tool's native skills directory (Claude Code, Cursor, GitHub Copilot, and Codex / AGENTS.md), and `trigger dev` offers to install them on first run. + +```bash +trigger skills --target claude-code +``` + +Replaces the previous `install-rules` command, which stays as an alias. diff --git a/.changeset/unflatten-attributes-conflict.md b/.changeset/unflatten-attributes-conflict.md new file mode 100644 index 00000000000..9df627f2630 --- /dev/null +++ b/.changeset/unflatten-attributes-conflict.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix `TypeError` in `unflattenAttributes` when the input attribute map contains conflicting dotted key paths (e.g. both `a.b` set to a scalar and `a.b.c` set to a value). The path-walk loop now applies last-write-wins when a prior key wrote a primitive, null, or array at an intermediate slot, matching the existing precedent in `AttributeFlattener.addAttribute`. Callers no longer crash when handed malformed external attribute inputs. diff --git a/.changeset/warm-start-external-trace-context-leak.md b/.changeset/warm-start-external-trace-context-leak.md new file mode 100644 index 00000000000..84f91de7689 --- /dev/null +++ b/.changeset/warm-start-external-trace-context-leak.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Fix external trace context leaking across runs on warm-started workers with `processKeepAlive` enabled. Every subsequent run's attempt span was being exported with the first run's `traceId` and `parentSpanId`, breaking causal-chain navigation in external APM tools. Runs without an external trace context are unaffected. diff --git a/.claude/REVIEW.md b/.claude/REVIEW.md new file mode 100644 index 00000000000..19edf00a52e --- /dev/null +++ b/.claude/REVIEW.md @@ -0,0 +1,50 @@ +# REVIEW.md — Trigger.dev OSS + +Repo-specific signal for anyone (human or agent) reviewing a PR in this codebase. Calibrates what counts as critical, what to always check, and what to skip. + +## What makes a 🔴 Important finding here + +Reserve 🔴 for things that would page someone or block a rollback. In this codebase, that means: + +- **Rolling-deploy breakage.** Old and new versions of the webapp/supervisor run side-by-side during deploys. A change is broken if: + - A Lua script's behavior changes for a given key set without versioning (rename the script with a behavior-descriptive suffix like `Tracked` rather than `V2` — both versions must coexist safely). + - A Redis data shape used by both versions changes in place. New shapes need a new key namespace. + - A migration is not backward-compatible with the prior image. +- **Schema / migration safety.** Prisma migrations must be backward-compatible with the prior deploy. Adding NOT NULL without a default, dropping a column an old image still reads, renaming a column — all 🔴. +- **ClickHouse migration ordering + idempotency.** Goose runs in strict mode in the deploy pipeline and refuses to apply a missing version below the current version — slotting a new file in below the latest already-applied version blocks the deploy. New ClickHouse migration files MUST use the next available number (`max(files in internal-packages/clickhouse/schema/) + 1`); if main has added migrations while you've been on a branch, renumber yours. DDL must also be idempotent (`ADD COLUMN IF NOT EXISTS`, `DROP COLUMN IF EXISTS`, `CREATE TABLE IF NOT EXISTS`, `ADD INDEX IF NOT EXISTS`) so a partial / `--allow-missing` apply elsewhere doesn't fail on retry. Either fault is 🔴 — both break test/prod deploys. Rules live in `internal-packages/clickhouse/CLAUDE.md`. +- **Queue / concurrency correctness.** RunQueue, MarQS (V1, legacy), redis-worker — any change to enqueue / dequeue / locking semantics. Re-derive the invariant on paper before flagging or accepting. +- **Missing index on a hot table.** New Prisma queries against `TaskRun`, `TaskRunExecutionSnapshot`, `JobRun`, `Project`, etc. must use an existing index. Check `internal-packages/database/prisma/schema.prisma` for the relevant `@@index` lines — don't guess and don't propose `EXPLAIN`. +- **Recovery-path queries.** Any `TaskRun.findFirst` / `findMany` added to a schedule, run-recovery, or restart loop. Recovery fan-outs (Redis crash, restart storms) turn "rare indexed query" into a DB incident. 🔴 even if indexed. +- **Aggregations on hot tables.** No `COUNT` / `GROUP BY` on `TaskRun` or other multi-million-row tables. Use Redis or ClickHouse for counts. +- **Prod Redis blast-radius.** New code paths that `SCAN` with broad patterns (`*foo*`) on prod-shaped Redis, or `EVAL` Lua with `SCAN` loops inside. Both are 🔴. +- **`@trigger.dev/core` direct import** from anywhere outside the SDK package. Always import from `@trigger.dev/sdk`. Core direct imports are 🔴 — they break the public API contract. +- **Heavy execute-deps imported into request-handler bundles.** Specifically `chat.handover` and similar split-bundle entry points must not transitively import the agent task's execute path. Watch for new imports added at module top-level of route files. +- **V1 engine code modified in a "V2 only" PR.** The `apps/webapp/app/v3/` directory contains both. If the PR description says V2-only but it touches `triggerTaskV1`, `cancelTaskRunV1`, `MarQS`, etc. — 🔴. + +## Always check + +- **Tests use testcontainers, not mocks.** Vitest with `redisTest` / `postgresTest` / `containerTest` from `@internal/testcontainers`. Any new `vi.mock(...)` on Redis, Postgres, BullMQ, or other infra is wrong here — 🔴 if added in production-path tests, 🟡 if isolated unit test. +- **Public-package changes have a changeset.** `pnpm run changeset:add` produces `.changeset/*.md`. Required for any edit under `packages/*`. Missing → 🟡; missing on a breaking change → 🔴. +- **Server-only changes have `.server-changes/*.md`.** Required for `apps/webapp/`, `apps/supervisor/` edits with no public-package change. Body should be 1-2 sentences (it has to fit as one bullet in a future changelog). Missing → 🟡. +- **Lua script naming.** Coexisting scripts use behavior-descriptive suffixes (`Tracked`), never `V2`. Old name must keep working until the next deploy clears it. +- **RunQueue payload shape.** V2 run-queue payload's `projectId` is consumed by `workerQueueResolver` for override matching. If a PR drops it from the payload, 🔴. +- **`safeSend` scope.** Defensive IPC wrappers belong on loop / interval / handler contexts, not one-shot terminal sends. If the PR adds `safeSend` to a single terminal call for consistency, 🟡 with a "remove this" suggestion. +- **Zod version.** Pinned to `3.25.76` monorepo-wide. New package adding zod with a different version or range — 🔴. + +## Skip (do NOT flag) + +- Anything Prettier / ESLint catches. CI runs both. +- TypeScript style preferences (`type` vs `interface`) — already covered by repo standards. +- Test coverage exhortations as a generic suggestion. Only flag missing tests when a specific code path is genuinely untested and the path has prior incidents. +- `agentcrumbs` markers (`// @crumbs`, `// #region @crumbs`) and `agentcrumbs` imports — these are temporary debug instrumentation stripped before merge. +- `// removed comments for removed code`, renamed `_unused` vars, re-exported types as "backwards compatibility shims" — also covered by repo standards. +- Suggestions to "add error handling" without naming a specific scenario that breaks. +- Documentation prose nitpicks in `docs/*` MDX files unless factually wrong. + +## Things V1/legacy that should NOT block a PR + +The `apps/webapp/app/v3/` directory name is misleading — most code there is V2. Only specific files are V1-only legacy: `MarQS` queue, `triggerTaskV1`, `cancelTaskRunV1`, and a handful of others (see `apps/webapp/CLAUDE.md` for the exact list). Don't flag "you should refactor this to use V2" on those — they're frozen. + +## Confidence calibration for this repo + +The most common false-positive pattern: speculating about race conditions in code paths the agent doesn't have runtime visibility into. If the only evidence is "this *could* race", drop it. If you can point to a specific interleaving with file:line for each step, surface it. diff --git a/.claude/review-guides/chat-agent-sessions-row-agnostic.md b/.claude/review-guides/chat-agent-sessions-row-agnostic.md new file mode 100644 index 00000000000..7fb9851f308 --- /dev/null +++ b/.claude/review-guides/chat-agent-sessions-row-agnostic.md @@ -0,0 +1,287 @@ +# Review guide — chat.agent on Sessions, row-agnostic addressing + +Scope: the 12 uncommitted files. **No new behaviour beyond the public surface +already on this branch** — this is plumbing cleanup that: + +1. Eliminates the transport's session-creation step +2. Makes `chatId` the universal addressing string everywhere +3. Makes the server-side stream/append/wait routes row-agnostic + +## The two design moves + +**Move 1 — agent owns session lifecycle.** `chat.agent` and +`chat.customAgent` upsert the backing `Session` row at bind, fire-and-forget, +keyed on `externalId = payload.chatId`. The transport, server-side +`AgentChat`, and `chat.createTriggerAction` no longer create sessions at all. +Browsers cannot mint sessions either (`POST /api/v1/sessions` is now +secret-key-only). One owner, one path. + +**Move 2 — `chatId` is the only address.** The transport, server-side +`AgentChat`, JWT scopes, and S2 stream paths all use `chatId` directly. The +Session's friendlyId is informational. To make this safe, the three stream +routes (`.in/.out` PUT, GET, POST append, plus the run-engine `wait` +endpoint) became "row-optional" and derive a *canonical addressing key* +(`row.externalId ?? row.friendlyId`, fallback to the URL param when the row +hasn't been upserted yet). Same canonical key is used to build the S2 stream +path, the waitpoint cache key, and the JWT resource set — so any caller +addressing by either form converges on the same physical stream. + +Together these remove an entire class of "did the row land yet?" races. The +transport can subscribe to `/sessions/{chatId}/out` before the agent boots, +the agent's `void sessions.create({externalId: chatId})` lands a moment +later, and any earlier reads/writes are already on the right S2 key. + +--- + +## Read in this order + +### 1. `apps/webapp/app/services/realtime/sessions.server.ts` (+34 lines) + +The new primitive. Two helpers: + +- `isSessionFriendlyIdForm(value)` — `value.startsWith("session_")`. Used to + decide whether a missing row is a hard 404 (opaque friendlyId) or a soft + "row will land later" (externalId form). +- `canonicalSessionAddressingKey(row, paramSession)` — `row.externalId ?? + row.friendlyId` if the row exists, else `paramSession`. **This is the load- + bearing function.** Read its docstring. + +**Question to ask:** can two callers addressing the "same" session ever get +different canonical keys? Only if the row exists for one and not the other, +*and* the URL forms differ — but in that case the row-less caller used the +externalId form (friendlyId-form would have 404'd earlier), and the row-ful +caller computes `row.externalId ?? row.friendlyId`. If the row's externalId +matches the URL, they converge. If it doesn't, there's no row to find by +that string anyway. The interesting edge is "row exists with no externalId", +addressed via friendlyId — both sides read `row.friendlyId`. ✓ + +### 2. `apps/webapp/app/routes/realtime.v1.sessions.$session.$io.ts` (+47/-12) + +PUT initialize + GET subscribe (SSE). Both use the helper. The interesting +part is the loader's `findResource` + `authorization.resource`: + +```ts +findResource: async (params, auth) => { + const row = await resolveSessionByIdOrExternalId(...); + if (!row && isSessionFriendlyIdForm(params.session)) return undefined; // 404 + return { row, addressingKey: canonicalSessionAddressingKey(row, params.session) }; +}, +authorization: { + resource: ({ row, addressingKey }) => { + const ids = new Set([addressingKey]); + if (row) { + ids.add(row.friendlyId); + if (row.externalId) ids.add(row.externalId); + } + return { sessions: [...ids] }; + }, + superScopes: ["read:sessions", "read:all", "admin"], +}, +``` + +**Why three IDs in the resource set?** `checkAuthorization` is "any-match" +across the resource values. We want a JWT scoped to *either* form to +authorize *either* URL form. Smoke test verified the 4-cell matrix passes. + +**The PUT path** (action handler) is simpler — it just resolves the row, +builds an addressing key, and hands it to `initializeSessionStream`. Worth +noting the `closedAt` check is now `maybeSession?.closedAt` — no row means +no closedAt to enforce. + +### 3. `apps/webapp/app/routes/realtime.v1.sessions.$session.$io.append.ts` (+22/-13) + +POST append (browser writes a record to `.in` or server writes to `.out`). +Same row-optional pattern. Both the S2 append and the waitpoint drain use +`addressingKey`. + +**Question to ask:** what fires the waitpoint? An agent's +`session.in.wait()` registers a waitpoint keyed on `(addressingKey, io)` via +the wait endpoint (file 4). The append handler drains by the *same* key — +even if the agent registered with externalId form and the transport +appended via friendlyId form, both compute the same canonical key, so they +converge. ✓ + +### 4. `apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts` (+18/-13) + +The agent's `.in.wait()` endpoint. Run-engine creates the waitpoint, then +registers it in Redis under `(addressingKey, io)`. The race-check that runs +right after creation reads from S2 by the same key. Three call sites — +`addSessionStreamWaitpoint`, `readSessionStreamRecords`, +`removeSessionStreamWaitpoint` — all consistent. + +### 5. `apps/webapp/app/routes/api.v1.sessions.ts` (+4/-2) + +**Security tightening.** Removed `allowJWT: true` and `corsStrategy: "all"` +from the `POST /api/v1/sessions` action — secret-key only now. + +**Question to ask:** was the JWT path actually used? Until this branch, the +transport called it via `ensureSession` (now deleted). After this branch, +nobody reaches it from the browser. `chat.createTriggerAction` (server +secret key) is the only browser-adjacent path. + +### 6. `packages/trigger-sdk/src/v3/ai.ts` (+62/-39) + +Two near-identical edits — one in `chatAgent`, one in `chatCustomAgent`. +Both bind on `payload.chatId` and fire-and-forget the upsert: + +```ts +locals.set(chatSessionHandleKey, sessions.open(payload.chatId)); +void sessions + .create({ type: "chat.agent", externalId: payload.chatId }) + .catch(() => { /* best effort */ }); +``` + +**Question to ask:** why `void`-and-`catch`? Awaiting the upsert would gate +the agent's bind on a network round-trip that doesn't unblock anything +user-visible — `.in/.out` routes are row-agnostic and the waitpoint cache +is keyed on the addressing string, not the row id. If the upsert genuinely +fails, the next bind retries the same idempotent call (`sessions.create` +upserts on `externalId`, so concurrent triggers on one chatId converge to +one row). The row matters for downstream metadata + listing, not for live +addressing. + +The PAT scope minting in `chatAgent` (two call sites — preload and +sendMessage) now uses `payload.chatId` for the `sessions:` resource. That +matches what the transport/AgentChat use as the JWT resource and what the +JWT's resource set in the loader includes. Cross-form addressing works +either way (smoke-tested), but using `chatId` keeps the chain tight. + +`createChatTriggerAction` is the most visibly trimmed: no pre-create, no +threading `sessionId` into payload, scope mint uses `chatId`. Return type +no longer carries `sessionId` — note `TriggerChatTaskResult.sessionId` was +already declared optional, so this isn't a public-API break. + +**Stale docstring to flag:** `chat.ts:59` and `chat.ts:112` still describe +PAT scopes as `read:sessions:{sessionId}` and +`write:sessions:{sessionId}`. Functionally either ID works (row lookup +canonicalises), but the doc text is now out of date — it should say +`{chatId}`. Worth a tidy-up before merge but not blocking. + +### 7. `packages/trigger-sdk/src/v3/chat.ts` (+63/-117) + +**The biggest mechanical edit.** Net -54 lines from deleting `ensureSession` +and untangling its callers. + +What disappeared: +- `private async ensureSession(chatId)` — gone +- The "lazy upsert from the browser if no triggerTask callback" branch in + `sendMessages` and `preload` — gone +- The "throw if neither path surfaced a sessionId" guard — gone +- All `state.sessionId` URL params replaced with `chatId` +- `subscribeToSessionStream`'s `chatId?` (optional) is now `chatId` (required) + +What stayed: +- `state.sessionId` in `ChatSessionState` — optional, informational +- The `restore from external storage` branch in the constructor still + hydrates `sessionId` if persisted, just doesn't *require* it +- `notifySessionChange` still surfaces `sessionId` if known + +**Question to ask:** does the transport ever still need the friendlyId? The +only place is the `onSessionChange` callback's payload (so consumers +persisting state can save it for later display). The transport itself never +puts it in a URL or a waitpoint key. + +The `sendMessages` path is worth re-reading: when state.runId is set, it +appends to `.in/append` and subscribes to `.out`. If the append fails with +a non-auth error, it falls through to triggering a new run (legacy "run is +dead" detection — unchanged from pre-Sessions, doesn't depend on +addressing). + +### 8. `packages/trigger-sdk/src/v3/chat-client.ts` (+34/-33) + +Server-side `AgentChat`. Mirrors the transport changes — every URL uses +`this.chatId`. `triggerNewRun` no longer pre-creates a session. `ChatSession` +and internal `SessionState` types now have optional `sessionId`. + +The shape of the diff is identical to the transport: delete the upsert, +swap addressing identifiers, optionalise the friendlyId. If you've read +`chat.ts` carefully, this one is mostly mechanical confirmation that both +client surfaces (browser transport + server-side AgentChat) speak the same +addressing protocol. + +### 9. Test infrastructure — `sessions.ts` (+18) + `mock-chat-agent.ts` (+25) + +`__setSessionCreateImplForTests` mirrors the existing +`__setSessionOpenImplForTests`. `mockChatAgent` installs a no-op create stub +returning a synthetic `CreatedSessionResponseBody` so the agent's bind-time +`void sessions.create(...)` doesn't try to hit a real API. Cleanup runs in +the same `.finally` as the open override. + +**Question to ask:** is the synthetic response shape correct? It mirrors +`CreatedSessionResponseBody` — `id`, `externalId`, `type`, `tags`, +`metadata`, `closedAt`, `closedReason`, `expiresAt`, `createdAt`, +`updatedAt`, `isCached`. Tests don't currently assert on this object, so +the bar is "doesn't crash + matches the type". Met. + +### 10. `packages/trigger-sdk/src/v3/chat.test.ts` (+13/-12) + +Three classes of test edits, all consequences: + +- Stream URL assertion: `chat-1` (the chatId) instead of + `session_streamurl` (the friendlyId) +- `renewRunAccessToken` callback: `sessionId: undefined` (was + `DEFAULT_SESSION_ID` because the mocked trigger doesn't surface it) +- Token resolve count: `1` (was `2` — second resolve was for `ensureSession`) +- One `onSessionChange` matchObject loses `sessionId` + +### 11. `apps/webapp/app/routes/_app.../playground/.../route.tsx` (1 line) + +`sessionId: string` → `sessionId?: string` in the playground sidebar prop +to track the transport type change. + +--- + +## Edge cases I checked, so you don't have to + +- **Cross-form JWT auth (curl matrix).** JWT scoped to externalId can call + externalId URL ✓ and friendlyId URL ✓. JWT scoped to friendlyId can call + externalId URL ✓ and friendlyId URL ✓. Smoke-tested. +- **Row materialises after subscribe.** Transport opens + `GET /sessions/{chatId}/out` before agent's bind upsert lands → 200 OK, + `addressingKey = chatId` (paramSession fallback). Once the row lands + with `externalId = chatId`, addressingKey resolves to the same value via + `row.externalId`. Same S2 key throughout. +- **Concurrent triggers on one chatId.** Two browser tabs trigger two runs + → two binds → two `sessions.create({externalId: chatId})` calls. Upsert + semantics: both return the same row. +- **Closed session enforcement.** Still enforced when a row exists. + `maybeSession?.closedAt` is null-safe; no row = no close-state to honour. +- **Agent run cancellation.** Frontend doesn't auto-detect — unchanged from + pre-Sessions; messages sit in S2 until the next trigger (the existing + run-PAT auth-error path is the only reaper). Out of scope for this branch. +- **Idle timeout in dev.** Runs stay `EXECUTING_WITH_WAITPOINTS` past the + configured idle because dev runs don't snapshot/restore; the in-process + idle clock advances locally without touching the row. Expected, not a + regression. + +## Things explicitly **not** in this branch + +- Run-state subscription on the transport side (the "run died, re-trigger + silently" UX gap) +- Session auto-close on agent exit (still client-driven by design) +- Any change to `Session` schema, `sessions.create` semantics, or + `chatAccessTokenTTL` +- Docstring updates for `read:sessions:{sessionId}` / `write:sessions:{sessionId}` + in `chat.ts:59` and `chat.ts:112` (functional but textually stale — + follow-up nit) + +--- + +## What I'd be ready to answer cold + +- Why fire-and-forget upsert (vs. `await`) in the agent's bind step +- Why the route's authorization resource set has three IDs (cross-form JWT + auth) +- Why `POST /api/v1/sessions` lost `allowJWT` (security tightening — no + caller needs it after the transport's `ensureSession` is gone) +- What converges two callers using different URL forms onto the same S2 + stream (`canonicalSessionAddressingKey`, identical computation on both + sides for any given row) +- What makes `sessions.create` race-safe under concurrent triggers + (`externalId` upsert) +- Why `state.sessionId` stayed on `ChatSessionState` at all (pure + informational, surfaced via `onSessionChange` for consumer persistence; + zero addressing role) +- Why the chat-client (server-side AgentChat) and chat (transport) edits + look near-identical (they implement the same client protocol against the + same row-agnostic routes) diff --git a/.claude/rules/database-safety.md b/.claude/rules/database-safety.md new file mode 100644 index 00000000000..14a6523595b --- /dev/null +++ b/.claude/rules/database-safety.md @@ -0,0 +1,13 @@ +--- +paths: + - "internal-packages/database/**" +--- + +# Database Migration Safety + +- When adding indexes to **existing tables**, use `CREATE INDEX CONCURRENTLY IF NOT EXISTS` to avoid table locks. These must be in their own separate migration file (one index per file). +- Indexes on **newly created tables** (same migration as `CREATE TABLE`) do not need CONCURRENTLY. +- When indexing a **new column on an existing table**, split into two migrations: first `ADD COLUMN IF NOT EXISTS`, then `CREATE INDEX CONCURRENTLY IF NOT EXISTS` in a separate file. +- After generating a migration with Prisma, remove extraneous lines for: `_BackgroundWorkerToBackgroundWorkerFile`, `_BackgroundWorkerToTaskQueue`, `_TaskRunToTaskRunTag`, `_WaitpointRunConnections`, `_completedWaitpoints`, `SecretStore_key_idx`, and unrelated TaskRun indexes. +- Never drop columns or tables without explicit approval. +- New code should target `RunEngineVersion.V2` only. diff --git a/.claude/rules/docs-writing.md b/.claude/rules/docs-writing.md new file mode 100644 index 00000000000..bbfb471368e --- /dev/null +++ b/.claude/rules/docs-writing.md @@ -0,0 +1,14 @@ +--- +paths: + - "docs/**" +--- + +# Documentation Writing Rules + +- Use Mintlify MDX format. Frontmatter: `title`, `description`, `sidebarTitle` (optional). +- After creating a new page, add it to `docs.json` navigation under the correct group. +- Use Mintlify components: ``, ``, ``, ``, ``, ``, ``/``. +- Code examples should be complete and runnable where possible. +- Always import from `@trigger.dev/sdk`, never `@trigger.dev/sdk/v3`. +- Keep paragraphs short. Use headers to break up content. +- Link to related pages using relative paths (e.g., `[Tasks](/tasks/overview)`). diff --git a/.claude/rules/legacy-v3-code.md b/.claude/rules/legacy-v3-code.md new file mode 100644 index 00000000000..6fd8d9402c2 --- /dev/null +++ b/.claude/rules/legacy-v3-code.md @@ -0,0 +1,33 @@ +--- +paths: + - "apps/webapp/app/v3/**" +--- + +# Legacy V1 Engine Code in `app/v3/` + +The `v3/` directory name is misleading - most code here is actively used by the current V2 engine. Only the specific files below are legacy V1-only code. + +## V1-Only Files - Never Modify + +- `marqs/` directory (entire MarQS queue system: sharedQueueConsumer, devQueueConsumer, fairDequeuingStrategy, devPubSub) +- `legacyRunEngineWorker.server.ts` (V1 background job worker) +- `services/triggerTaskV1.server.ts` (deprecated V1 task triggering) +- `services/cancelTaskRunV1.server.ts` (deprecated V1 cancellation) +- `authenticatedSocketConnection.server.ts` (V1 dev WebSocket using DevQueueConsumer) +- `sharedSocketConnection.ts` (V1 shared queue socket using SharedQueueConsumer) + +## V1/V2 Branching Pattern + +Some services act as routers that branch on `RunEngineVersion`: +- `services/cancelTaskRun.server.ts` - calls V1 service or `engine.cancelRun()` for V2 +- `services/batchTriggerV3.server.ts` - uses marqs for V1 path, run-engine for V2 + +When editing these shared services, only modify V2 code paths. + +## V2 Modern Stack + +- **Run lifecycle**: `@internal/run-engine` (internal-packages/run-engine) +- **Background jobs**: `@trigger.dev/redis-worker` (not graphile-worker/zodworker) +- **Queue operations**: RunQueue inside run-engine (not MarQS) +- **V2 engine singleton**: `runEngine.server.ts`, `runEngineHandlers.server.ts` +- **V2 workers**: `commonWorker.server.ts`, `alertsWorker.server.ts`, `batchTriggerWorker.server.ts` diff --git a/.claude/rules/package-installation.md b/.claude/rules/package-installation.md new file mode 100644 index 00000000000..310074823c5 --- /dev/null +++ b/.claude/rules/package-installation.md @@ -0,0 +1,22 @@ +--- +paths: + - "**/package.json" +--- + +# Installing Packages + +When adding a new dependency to any package.json in the monorepo: + +1. **Look up the latest version** on npm before adding: + ```bash + pnpm view version + ``` + If unsure which version to use (e.g. major version compatibility), confirm with the user. + +2. **Edit the package.json directly** — do NOT use `pnpm add` as it can cause issues in the monorepo. Add the dependency with the correct version range (typically `^x.y.z`). + +3. **Run `pnpm i` from the repo root** after editing to install and update the lockfile: + ```bash + pnpm i + ``` + Always run from the repo root, not from the package directory. diff --git a/.claude/rules/sdk-packages.md b/.claude/rules/sdk-packages.md new file mode 100644 index 00000000000..343be2045f8 --- /dev/null +++ b/.claude/rules/sdk-packages.md @@ -0,0 +1,12 @@ +--- +paths: + - "packages/**" +--- + +# Public Package Rules + +- Changes to `packages/` are **customer-facing**. Always add a changeset: `pnpm run changeset:add` +- Default to **patch**. Get maintainer approval for minor. Never select major without explicit approval. +- `@trigger.dev/core`: **Never import the root**. Always use subpath imports (e.g., `@trigger.dev/core/v3`). +- Do NOT update `rules/` or `.claude/skills/trigger-dev-tasks/` unless explicitly asked. These are maintained in separate dedicated passes. +- Test changes using the `hello-world` project in the [`triggerdotdev/references`](https://github.com/triggerdotdev/references) repo. diff --git a/.claude/rules/server-apps.md b/.claude/rules/server-apps.md new file mode 100644 index 00000000000..4d46789701c --- /dev/null +++ b/.claude/rules/server-apps.md @@ -0,0 +1,23 @@ +--- +paths: + - "apps/**" +--- + +# Server App Changes + +When modifying server apps (webapp, supervisor, coordinator, etc.) with **no package changes**, add a `.server-changes/` file instead of a changeset: + +```bash +cat > .server-changes/descriptive-name.md << 'EOF' +--- +area: webapp +type: fix +--- + +Brief description of what changed and why. +EOF +``` + +- **area**: `webapp` | `supervisor` | `coordinator` | `kubernetes-provider` | `docker-provider` +- **type**: `feature` | `fix` | `improvement` | `breaking` +- If the PR also touches `packages/`, just the changeset is sufficient (no `.server-changes/` needed). diff --git a/.claude/skills/span-timeline-events/SKILL.md b/.claude/skills/span-timeline-events/SKILL.md new file mode 100644 index 00000000000..122f49912d7 --- /dev/null +++ b/.claude/skills/span-timeline-events/SKILL.md @@ -0,0 +1,78 @@ +--- +name: span-timeline-events +description: Use when adding, modifying, or debugging OTel span timeline events in the trace view. Covers event structure, ClickHouse storage constraints, rendering in SpanTimeline component, admin visibility, and the step-by-step process for adding new events. +allowed-tools: Read, Write, Edit, Glob, Grep, Bash +--- + +# Span Timeline Events + +The trace view's right panel shows a timeline of events for the selected span. These are OTel span events rendered by `app/utils/timelineSpanEvents.ts` and the `SpanTimeline` component. + +## How They Work + +1. **Span events** in OTel are attached to a parent span. In ClickHouse, they're stored as separate rows with `kind: "SPAN_EVENT"` sharing the parent span's `span_id`. The `#mergeRecordsIntoSpanDetail` method reassembles them into the span's `events` array at query time. +2. The timeline only renders events whose `name` starts with `trigger.dev/` - all others are silently filtered out. +3. The **display name** comes from `properties.event` (not the span event name), mapped through `getFriendlyNameForEvent()`. +4. Events are shown on the **span they belong to** - events on one span don't appear in another span's timeline. + +## ClickHouse Storage Constraint + +When events are written to ClickHouse, `spanEventsToTaskEventV1Input()` filters out events whose `start_time` is not greater than the parent span's `startTime`. Events at or before the span start are silently dropped. This means span events must have timestamps strictly after the span's own `startTimeUnixNano`. + +## Timeline Rendering (SpanTimeline component) + +The `SpanTimeline` component in `app/components/run/RunTimeline.tsx` renders: + +1. **Events** (thin 1px line with hollow dots) - all events from `createTimelineSpanEventsFromSpanEvents()` +2. **"Started"** marker (thick cap) - at the span's `startTime` +3. **Duration bar** (thick 7px line) - from "Started" to "Finished" +4. **"Finished"** marker (thick cap) - at `startTime + duration` + +The thin line before "Started" only appears when there are events with timestamps between the span start and the first child span. For the Attempt span this works well (Dequeued -> Pod scheduled -> Launched -> etc. all happen before execution starts). Events all get `lineVariant: "light"` (thin) while the execution bar gets `variant: "normal"` (thick). + +## Trace View Sort Order + +Sibling spans (same parent) are sorted by `start_time ASC` from the ClickHouse query. The `createTreeFromFlatItems` function preserves this order. Event timestamps don't affect sort order - only the span's own `start_time`. + +## Event Structure + +```typescript +// OTel span event format +{ + name: "trigger.dev/run", // Must start with "trigger.dev/" to render + timeUnixNano: "1711200000000000000", + attributes: [ + { key: "event", value: { stringValue: "dequeue" } }, // The actual event type + { key: "duration", value: { intValue: 150 } }, // Optional: duration in ms + ] +} +``` + +## Admin-Only Events + +`getAdminOnlyForEvent()` controls visibility. Events default to **admin-only** (`true`). + +| Event | Admin-only | Friendly name | +|-------|-----------|---------------| +| `dequeue` | No | Dequeued | +| `fork` | No | Launched | +| `import` | No (if no fork event) | Importing task file | +| `create_attempt` | Yes | Attempt created | +| `lazy_payload` | Yes | Lazy attempt initialized | +| `pod_scheduled` | Yes | Pod scheduled | +| (default) | Yes | (raw event name) | + +## Adding New Timeline Events + +1. Add OTLP span event with `name: "trigger.dev/"` and `properties.event: ""` +2. Event timestamp must be strictly after the parent span's `startTimeUnixNano` (ClickHouse drops earlier events) +3. Add friendly name in `getFriendlyNameForEvent()` in `app/utils/timelineSpanEvents.ts` +4. Set admin visibility in `getAdminOnlyForEvent()` +5. Optionally add help text in `getHelpTextForEvent()` + +## Key Files + +- `app/utils/timelineSpanEvents.ts` - filtering, naming, admin logic +- `app/components/run/RunTimeline.tsx` - `SpanTimeline` component (thin line + thick bar rendering) +- `app/presenters/v3/SpanPresenter.server.ts` - loads span data including events +- `app/v3/eventRepository/clickhouseEventRepository.server.ts` - `spanEventsToTaskEventV1Input()` (storage filter), `#mergeRecordsIntoSpanDetail` (reassembly) diff --git a/.claude/skills/trigger-dev-tasks/SKILL.md b/.claude/skills/trigger-dev-tasks/SKILL.md new file mode 100644 index 00000000000..791c22c27ed --- /dev/null +++ b/.claude/skills/trigger-dev-tasks/SKILL.md @@ -0,0 +1,200 @@ +--- +name: trigger-dev-tasks +description: Use this skill when writing, designing, or optimizing Trigger.dev background tasks and workflows. This includes creating reliable async tasks, implementing AI workflows, setting up scheduled jobs, structuring complex task hierarchies with subtasks, configuring build extensions for tools like ffmpeg or Puppeteer/Playwright, and handling task schemas with Zod validation. +allowed-tools: Read, Write, Edit, Glob, Grep, Bash +--- + +# Trigger.dev Task Expert + +You are an expert Trigger.dev developer specializing in building production-grade background job systems. Tasks deployed to Trigger.dev run in Node.js 21+ and use the `@trigger.dev/sdk` package. + +## Critical Rules + +1. **Always use `@trigger.dev/sdk`** - Never use `@trigger.dev/sdk/v3` or deprecated `client.defineJob` pattern +2. **Never use `node-fetch`** - Use the built-in `fetch` function +3. **Export all tasks** - Every task must be exported, including subtasks +4. **Never wrap wait/trigger calls in Promise.all** - `triggerAndWait`, `batchTriggerAndWait`, and `wait.*` calls cannot be wrapped in `Promise.all` or `Promise.allSettled` + +## Basic Task Pattern + +```ts +import { task } from "@trigger.dev/sdk"; + +export const processData = task({ + id: "process-data", + retry: { + maxAttempts: 10, + factor: 1.8, + minTimeoutInMs: 500, + maxTimeoutInMs: 30_000, + }, + run: async (payload: { userId: string; data: any[] }) => { + console.log(`Processing ${payload.data.length} items`); + return { processed: payload.data.length }; + }, +}); +``` + +## Schema Task (with validation) + +```ts +import { schemaTask } from "@trigger.dev/sdk"; +import { z } from "zod"; + +export const validatedTask = schemaTask({ + id: "validated-task", + schema: z.object({ + name: z.string(), + email: z.string().email(), + }), + run: async (payload) => { + // Payload is automatically validated and typed + return { message: `Hello ${payload.name}` }; + }, +}); +``` + +## Triggering Tasks + +### From Backend Code (type-only import to prevent dependency leakage) + +```ts +import { tasks } from "@trigger.dev/sdk"; +import type { processData } from "./trigger/tasks"; + +const handle = await tasks.trigger("process-data", { + userId: "123", + data: [{ id: 1 }], +}); +``` + +### From Inside Tasks + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload) => { + // Trigger and wait - returns Result object, NOT direct output + const result = await childTask.triggerAndWait({ data: "value" }); + if (result.ok) { + console.log("Output:", result.output); + } else { + console.error("Failed:", result.error); + } + + // Or unwrap directly (throws on error) + const output = await childTask.triggerAndWait({ data: "value" }).unwrap(); + }, +}); +``` + +## Idempotency (Critical for Retries) + +Always use idempotency keys when triggering tasks from inside other tasks: + +```ts +import { idempotencyKeys } from "@trigger.dev/sdk"; + +export const paymentTask = task({ + id: "process-payment", + run: async (payload: { orderId: string }) => { + // Scoped to current run - survives retries + const key = await idempotencyKeys.create(`payment-${payload.orderId}`); + + await chargeCustomer.trigger(payload, { + idempotencyKey: key, + idempotencyKeyTTL: "24h", + }); + }, +}); +``` + +## Trigger Options + +```ts +await myTask.trigger(payload, { + delay: "1h", // Delay execution + ttl: "10m", // Cancel if not started within TTL + idempotencyKey: key, + queue: "my-queue", + machine: "large-1x", // micro, small-1x, small-2x, medium-1x, medium-2x, large-1x, large-2x + maxAttempts: 3, + tags: ["user_123"], // Max 10 tags + debounce: { // Consolidate rapid triggers + key: "unique-key", + delay: "5s", + mode: "trailing", // "leading" (default) or "trailing" + }, +}); +``` + +## Debouncing + +Consolidate multiple triggers into a single execution: + +```ts +// Rapid triggers with same key = single execution +await myTask.trigger({ userId: "123" }, { + debounce: { + key: "user-123-update", + delay: "5s", + }, +}); + +// Trailing mode: use payload from LAST trigger +await myTask.trigger({ data: "latest" }, { + debounce: { + key: "my-key", + delay: "10s", + mode: "trailing", + }, +}); +``` + +Use cases: user activity updates, webhook deduplication, search indexing, notification batching. + +## Batch Triggering + +Up to 1,000 items per batch, 3MB per payload: + +```ts +const results = await myTask.batchTriggerAndWait([ + { payload: { userId: "1" } }, + { payload: { userId: "2" } }, +]); + +for (const result of results) { + if (result.ok) console.log(result.output); +} +``` + +## Machine Presets + +| Preset | vCPU | Memory | +|-------------|------|--------| +| micro | 0.25 | 0.25GB | +| small-1x | 0.5 | 0.5GB | +| small-2x | 1 | 1GB | +| medium-1x | 1 | 2GB | +| medium-2x | 2 | 4GB | +| large-1x | 4 | 8GB | +| large-2x | 8 | 16GB | + +## Design Principles + +1. **Break complex workflows into subtasks** that can be independently retried and made idempotent +2. **Don't over-complicate** - Sometimes `Promise.allSettled` inside a single task is better than many subtasks (each task has dedicated process and is charged by millisecond) +3. **Always configure retries** - Set appropriate `maxAttempts` based on the operation +4. **Use idempotency keys** - Especially for payment/critical operations +5. **Group related subtasks** - Keep subtasks only used by one parent in the same file, don't export them +6. **Use logger** - Log at key execution points with `logger.info()`, `logger.error()`, etc. + +## Reference Documentation + +For detailed documentation on specific topics, read these files: + +- `basic-tasks.md` - Task basics, triggering, waits +- `advanced-tasks.md` - Tags, queues, concurrency, metadata, error handling +- `scheduled-tasks.md` - Cron schedules, declarative and imperative +- `realtime.md` - Real-time subscriptions, streams, React hooks +- `config.md` - trigger.config.ts, build extensions (Prisma, Playwright, FFmpeg, etc.) diff --git a/.claude/skills/trigger-dev-tasks/advanced-tasks.md b/.claude/skills/trigger-dev-tasks/advanced-tasks.md new file mode 100644 index 00000000000..32a00337f89 --- /dev/null +++ b/.claude/skills/trigger-dev-tasks/advanced-tasks.md @@ -0,0 +1,485 @@ +# Trigger.dev Advanced Tasks (v4) + +**Advanced patterns and features for writing tasks** + +## Tags & Organization + +```ts +import { task, tags } from "@trigger.dev/sdk"; + +export const processUser = task({ + id: "process-user", + run: async (payload: { userId: string; orgId: string }, { ctx }) => { + // Add tags during execution + await tags.add(`user_${payload.userId}`); + await tags.add(`org_${payload.orgId}`); + + return { processed: true }; + }, +}); + +// Trigger with tags +await processUser.trigger( + { userId: "123", orgId: "abc" }, + { tags: ["priority", "user_123", "org_abc"] } // Max 10 tags per run +); + +// Subscribe to tagged runs +for await (const run of runs.subscribeToRunsWithTag("user_123")) { + console.log(`User task ${run.id}: ${run.status}`); +} +``` + +**Tag Best Practices:** + +- Use prefixes: `user_123`, `org_abc`, `video:456` +- Max 10 tags per run, 1-64 characters each +- Tags don't propagate to child tasks automatically + +## Batch Triggering v2 + +Enhanced batch triggering with larger payloads and streaming ingestion. + +### Limits + +- **Maximum batch size**: 1,000 items (increased from 500) +- **Payload per item**: 3MB each (increased from 1MB combined) +- Payloads > 512KB automatically offload to object storage + +### Rate Limiting (per environment) + +| Tier | Bucket Size | Refill Rate | +|------|-------------|-------------| +| Free | 1,200 runs | 100 runs/10 sec | +| Hobby | 5,000 runs | 500 runs/5 sec | +| Pro | 5,000 runs | 500 runs/5 sec | + +### Concurrent Batch Processing + +| Tier | Concurrent Batches | +|------|-------------------| +| Free | 1 | +| Hobby | 10 | +| Pro | 10 | + +### Usage + +```ts +import { myTask } from "./trigger/myTask"; + +// Basic batch trigger (up to 1,000 items) +const runs = await myTask.batchTrigger([ + { payload: { userId: "user-1" } }, + { payload: { userId: "user-2" } }, + { payload: { userId: "user-3" } }, +]); + +// Batch trigger with wait +const results = await myTask.batchTriggerAndWait([ + { payload: { userId: "user-1" } }, + { payload: { userId: "user-2" } }, +]); + +for (const result of results) { + if (result.ok) { + console.log("Result:", result.output); + } +} + +// With per-item options +const batchHandle = await myTask.batchTrigger([ + { + payload: { userId: "123" }, + options: { + idempotencyKey: "user-123-batch", + tags: ["priority"], + }, + }, + { + payload: { userId: "456" }, + options: { + idempotencyKey: "user-456-batch", + }, + }, +]); +``` + +## Debouncing + +Consolidate multiple triggers into a single execution by debouncing task runs with a unique key and delay window. + +### Use Cases + +- **User activity updates**: Batch rapid user actions into a single run +- **Webhook deduplication**: Handle webhook bursts without redundant processing +- **Search indexing**: Combine document updates instead of processing individually +- **Notification batching**: Group notifications to prevent user spam + +### Basic Usage + +```ts +await myTask.trigger( + { userId: "123" }, + { + debounce: { + key: "user-123-update", // Unique identifier for debounce group + delay: "5s", // Wait duration ("5s", "1m", or milliseconds) + }, + } +); +``` + +### Execution Modes + +**Leading Mode** (default): Uses payload/options from the first trigger; subsequent triggers only reschedule execution time. + +```ts +// First trigger sets the payload +await myTask.trigger({ action: "first" }, { + debounce: { key: "my-key", delay: "10s" } +}); + +// Second trigger only reschedules - payload remains "first" +await myTask.trigger({ action: "second" }, { + debounce: { key: "my-key", delay: "10s" } +}); +// Task executes with { action: "first" } +``` + +**Trailing Mode**: Uses payload/options from the most recent trigger. + +```ts +await myTask.trigger( + { data: "latest-value" }, + { + debounce: { + key: "trailing-example", + delay: "10s", + mode: "trailing", + }, + } +); +``` + +In trailing mode, these options update with each trigger: +- `payload` — task input data +- `metadata` — run metadata +- `tags` — run tags (replaces existing) +- `maxAttempts` — retry attempts +- `maxDuration` — maximum compute time +- `machine` — machine preset + +### Important Notes + +- Idempotency keys take precedence over debounce settings +- Compatible with `triggerAndWait()` — parent runs block correctly on debounced execution +- Debounce key is scoped to the task + +## Concurrency & Queues + +```ts +import { task, queue } from "@trigger.dev/sdk"; + +// Shared queue for related tasks +const emailQueue = queue({ + name: "email-processing", + concurrencyLimit: 5, // Max 5 emails processing simultaneously +}); + +// Task-level concurrency +export const oneAtATime = task({ + id: "sequential-task", + queue: { concurrencyLimit: 1 }, // Process one at a time + run: async (payload) => { + // Critical section - only one instance runs + }, +}); + +// Per-user concurrency +export const processUserData = task({ + id: "process-user-data", + run: async (payload: { userId: string }) => { + // Override queue with user-specific concurrency + await childTask.trigger(payload, { + queue: { + name: `user-${payload.userId}`, + concurrencyLimit: 2, + }, + }); + }, +}); + +export const emailTask = task({ + id: "send-email", + queue: emailQueue, // Use shared queue + run: async (payload: { to: string }) => { + // Send email logic + }, +}); +``` + +## Error Handling & Retries + +```ts +import { task, retry, AbortTaskRunError } from "@trigger.dev/sdk"; + +export const resilientTask = task({ + id: "resilient-task", + retry: { + maxAttempts: 10, + factor: 1.8, // Exponential backoff multiplier + minTimeoutInMs: 500, + maxTimeoutInMs: 30_000, + randomize: false, + }, + catchError: async ({ error, ctx }) => { + // Custom error handling + if (error.code === "FATAL_ERROR") { + throw new AbortTaskRunError("Cannot retry this error"); + } + + // Log error details + console.error(`Task ${ctx.task.id} failed:`, error); + + // Allow retry by returning nothing + return { retryAt: new Date(Date.now() + 60000) }; // Retry in 1 minute + }, + run: async (payload) => { + // Retry specific operations + const result = await retry.onThrow( + async () => { + return await unstableApiCall(payload); + }, + { maxAttempts: 3 } + ); + + // Conditional HTTP retries + const response = await retry.fetch("https://api.example.com", { + retry: { + maxAttempts: 5, + condition: (response, error) => { + return response?.status === 429 || response?.status >= 500; + }, + }, + }); + + return result; + }, +}); +``` + +## Machines & Performance + +```ts +export const heavyTask = task({ + id: "heavy-computation", + machine: { preset: "large-2x" }, // 8 vCPU, 16 GB RAM + maxDuration: 1800, // 30 minutes timeout + run: async (payload, { ctx }) => { + // Resource-intensive computation + if (ctx.machine.preset === "large-2x") { + // Use all available cores + return await parallelProcessing(payload); + } + + return await standardProcessing(payload); + }, +}); + +// Override machine when triggering +await heavyTask.trigger(payload, { + machine: { preset: "medium-1x" }, // Override for this run +}); +``` + +**Machine Presets:** + +- `micro`: 0.25 vCPU, 0.25 GB RAM +- `small-1x`: 0.5 vCPU, 0.5 GB RAM (default) +- `small-2x`: 1 vCPU, 1 GB RAM +- `medium-1x`: 1 vCPU, 2 GB RAM +- `medium-2x`: 2 vCPU, 4 GB RAM +- `large-1x`: 4 vCPU, 8 GB RAM +- `large-2x`: 8 vCPU, 16 GB RAM + +## Idempotency + +```ts +import { task, idempotencyKeys } from "@trigger.dev/sdk"; + +export const paymentTask = task({ + id: "process-payment", + retry: { + maxAttempts: 3, + }, + run: async (payload: { orderId: string; amount: number }) => { + // Automatically scoped to this task run, so if the task is retried, the idempotency key will be the same + const idempotencyKey = await idempotencyKeys.create(`payment-${payload.orderId}`); + + // Ensure payment is processed only once + await chargeCustomer.trigger(payload, { + idempotencyKey, + idempotencyKeyTTL: "24h", // Key expires in 24 hours + }); + }, +}); + +// Payload-based idempotency +import { createHash } from "node:crypto"; + +function createPayloadHash(payload: any): string { + const hash = createHash("sha256"); + hash.update(JSON.stringify(payload)); + return hash.digest("hex"); +} + +export const deduplicatedTask = task({ + id: "deduplicated-task", + run: async (payload) => { + const payloadHash = createPayloadHash(payload); + const idempotencyKey = await idempotencyKeys.create(payloadHash); + + await processData.trigger(payload, { idempotencyKey }); + }, +}); +``` + +## Metadata & Progress Tracking + +```ts +import { task, metadata } from "@trigger.dev/sdk"; + +export const batchProcessor = task({ + id: "batch-processor", + run: async (payload: { items: any[] }, { ctx }) => { + const totalItems = payload.items.length; + + // Initialize progress metadata + metadata + .set("progress", 0) + .set("totalItems", totalItems) + .set("processedItems", 0) + .set("status", "starting"); + + const results = []; + + for (let i = 0; i < payload.items.length; i++) { + const item = payload.items[i]; + + // Process item + const result = await processItem(item); + results.push(result); + + // Update progress + const progress = ((i + 1) / totalItems) * 100; + metadata + .set("progress", progress) + .increment("processedItems", 1) + .append("logs", `Processed item ${i + 1}/${totalItems}`) + .set("currentItem", item.id); + } + + // Final status + metadata.set("status", "completed"); + + return { results, totalProcessed: results.length }; + }, +}); + +// Update parent metadata from child task +export const childTask = task({ + id: "child-task", + run: async (payload, { ctx }) => { + // Update parent task metadata + metadata.parent.set("childStatus", "processing"); + metadata.root.increment("childrenCompleted", 1); + + return { processed: true }; + }, +}); +``` + +## Logging & Tracing + +```ts +import { task, logger } from "@trigger.dev/sdk"; + +export const tracedTask = task({ + id: "traced-task", + run: async (payload, { ctx }) => { + logger.info("Task started", { userId: payload.userId }); + + // Custom trace with attributes + const user = await logger.trace( + "fetch-user", + async (span) => { + span.setAttribute("user.id", payload.userId); + span.setAttribute("operation", "database-fetch"); + + const userData = await database.findUser(payload.userId); + span.setAttribute("user.found", !!userData); + + return userData; + }, + { userId: payload.userId } + ); + + logger.debug("User fetched", { user: user.id }); + + try { + const result = await processUser(user); + logger.info("Processing completed", { result }); + return result; + } catch (error) { + logger.error("Processing failed", { + error: error.message, + userId: payload.userId, + }); + throw error; + } + }, +}); +``` + +## Hidden Tasks + +```ts +// Hidden task - not exported, only used internally +const internalProcessor = task({ + id: "internal-processor", + run: async (payload: { data: string }) => { + return { processed: payload.data.toUpperCase() }; + }, +}); + +// Public task that uses hidden task +export const publicWorkflow = task({ + id: "public-workflow", + run: async (payload: { input: string }) => { + // Use hidden task internally + const result = await internalProcessor.triggerAndWait({ + data: payload.input, + }); + + if (result.ok) { + return { output: result.output.processed }; + } + + throw new Error("Internal processing failed"); + }, +}); +``` + +## Best Practices + +- **Concurrency**: Use queues to prevent overwhelming external services +- **Retries**: Configure exponential backoff for transient failures +- **Idempotency**: Always use for payment/critical operations +- **Metadata**: Track progress for long-running tasks +- **Machines**: Match machine size to computational requirements +- **Tags**: Use consistent naming patterns for filtering +- **Debouncing**: Use for user activity, webhooks, and notification batching +- **Batch triggering**: Use for bulk operations up to 1,000 items +- **Error Handling**: Distinguish between retryable and fatal errors + +Design tasks to be stateless, idempotent, and resilient to failures. Use metadata for state tracking and queues for resource management. diff --git a/.claude/skills/trigger-dev-tasks/basic-tasks.md b/.claude/skills/trigger-dev-tasks/basic-tasks.md new file mode 100644 index 00000000000..56bff340761 --- /dev/null +++ b/.claude/skills/trigger-dev-tasks/basic-tasks.md @@ -0,0 +1,199 @@ +# Trigger.dev Basic Tasks (v4) + +**MUST use `@trigger.dev/sdk`, NEVER `client.defineJob`** + +## Basic Task + +```ts +import { task } from "@trigger.dev/sdk"; + +export const processData = task({ + id: "process-data", + retry: { + maxAttempts: 10, + factor: 1.8, + minTimeoutInMs: 500, + maxTimeoutInMs: 30_000, + randomize: false, + }, + run: async (payload: { userId: string; data: any[] }) => { + // Task logic - runs for long time, no timeouts + console.log(`Processing ${payload.data.length} items for user ${payload.userId}`); + return { processed: payload.data.length }; + }, +}); +``` + +## Schema Task (with validation) + +```ts +import { schemaTask } from "@trigger.dev/sdk"; +import { z } from "zod"; + +export const validatedTask = schemaTask({ + id: "validated-task", + schema: z.object({ + name: z.string(), + age: z.number(), + email: z.string().email(), + }), + run: async (payload) => { + // Payload is automatically validated and typed + return { message: `Hello ${payload.name}, age ${payload.age}` }; + }, +}); +``` + +## Triggering Tasks + +### From Backend Code + +```ts +import { tasks } from "@trigger.dev/sdk"; +import type { processData } from "./trigger/tasks"; + +// Single trigger +const handle = await tasks.trigger("process-data", { + userId: "123", + data: [{ id: 1 }, { id: 2 }], +}); + +// Batch trigger (up to 1,000 items, 3MB per payload) +const batchHandle = await tasks.batchTrigger("process-data", [ + { payload: { userId: "123", data: [{ id: 1 }] } }, + { payload: { userId: "456", data: [{ id: 2 }] } }, +]); +``` + +### Debounced Triggering + +Consolidate multiple triggers into a single execution: + +```ts +// Multiple rapid triggers with same key = single execution +await myTask.trigger( + { userId: "123" }, + { + debounce: { + key: "user-123-update", // Unique key for debounce group + delay: "5s", // Wait before executing + }, + } +); + +// Trailing mode: use payload from LAST trigger +await myTask.trigger( + { data: "latest-value" }, + { + debounce: { + key: "trailing-example", + delay: "10s", + mode: "trailing", // Default is "leading" (first payload) + }, + } +); +``` + +**Debounce modes:** +- `leading` (default): Uses payload from first trigger, subsequent triggers only reschedule +- `trailing`: Uses payload from most recent trigger + +### From Inside Tasks (with Result handling) + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload) => { + // Trigger and continue + const handle = await childTask.trigger({ data: "value" }); + + // Trigger and wait - returns Result object, NOT task output + const result = await childTask.triggerAndWait({ data: "value" }); + if (result.ok) { + console.log("Task output:", result.output); // Actual task return value + } else { + console.error("Task failed:", result.error); + } + + // Quick unwrap (throws on error) + const output = await childTask.triggerAndWait({ data: "value" }).unwrap(); + + // Batch trigger and wait + const results = await childTask.batchTriggerAndWait([ + { payload: { data: "item1" } }, + { payload: { data: "item2" } }, + ]); + + for (const run of results) { + if (run.ok) { + console.log("Success:", run.output); + } else { + console.log("Failed:", run.error); + } + } + }, +}); + +export const childTask = task({ + id: "child-task", + run: async (payload: { data: string }) => { + return { processed: payload.data }; + }, +}); +``` + +> Never wrap triggerAndWait or batchTriggerAndWait calls in a Promise.all or Promise.allSettled as this is not supported in Trigger.dev tasks. + +## Waits + +```ts +import { task, wait } from "@trigger.dev/sdk"; + +export const taskWithWaits = task({ + id: "task-with-waits", + run: async (payload) => { + console.log("Starting task"); + + // Wait for specific duration + await wait.for({ seconds: 30 }); + await wait.for({ minutes: 5 }); + await wait.for({ hours: 1 }); + await wait.for({ days: 1 }); + + // Wait until specific date + await wait.until({ date: new Date("2024-12-25") }); + + // Wait for token (from external system) + await wait.forToken({ + token: "user-approval-token", + timeoutInSeconds: 3600, // 1 hour timeout + }); + + console.log("All waits completed"); + return { status: "completed" }; + }, +}); +``` + +> Never wrap wait calls in a Promise.all or Promise.allSettled as this is not supported in Trigger.dev tasks. + +## Key Points + +- **Result vs Output**: `triggerAndWait()` returns a `Result` object with `ok`, `output`, `error` properties - NOT the direct task output +- **Type safety**: Use `import type` for task references when triggering from backend +- **Waits > 5 seconds**: Automatically checkpointed, don't count toward compute usage +- **Debounce + idempotency**: Idempotency keys take precedence over debounce settings + +## NEVER Use (v2 deprecated) + +```ts +// BREAKS APPLICATION +client.defineJob({ + id: "job-id", + run: async (payload, io) => { + /* ... */ + }, +}); +``` + +Use SDK (`@trigger.dev/sdk`), check `result.ok` before accessing `result.output` diff --git a/.claude/skills/trigger-dev-tasks/config.md b/.claude/skills/trigger-dev-tasks/config.md new file mode 100644 index 00000000000..f6a4db1c4b8 --- /dev/null +++ b/.claude/skills/trigger-dev-tasks/config.md @@ -0,0 +1,346 @@ +# Trigger.dev Configuration + +**Complete guide to configuring `trigger.config.ts` with build extensions** + +## Basic Configuration + +```ts +import { defineConfig } from "@trigger.dev/sdk"; + +export default defineConfig({ + project: "", // Required: Your project reference + dirs: ["./trigger"], // Task directories + runtime: "node", // "node", "node-22", or "bun" + logLevel: "info", // "debug", "info", "warn", "error" + + // Default retry settings + retries: { + enabledInDev: false, + default: { + maxAttempts: 3, + minTimeoutInMs: 1000, + maxTimeoutInMs: 10000, + factor: 2, + randomize: true, + }, + }, + + // Build configuration + build: { + autoDetectExternal: true, + keepNames: true, + minify: false, + extensions: [], // Build extensions go here + }, + + // Global lifecycle hooks + onStartAttempt: async ({ payload, ctx }) => { + console.log("Global task start"); + }, + onSuccess: async ({ payload, output, ctx }) => { + console.log("Global task success"); + }, + onFailure: async ({ payload, error, ctx }) => { + console.log("Global task failure"); + }, +}); +``` + +## Build Extensions + +### Database & ORM + +#### Prisma + +```ts +import { prismaExtension } from "@trigger.dev/build/extensions/prisma"; + +extensions: [ + prismaExtension({ + schema: "prisma/schema.prisma", + version: "5.19.0", // Optional: specify version + migrate: true, // Run migrations during build + directUrlEnvVarName: "DIRECT_DATABASE_URL", + typedSql: true, // Enable TypedSQL support + }), +]; +``` + +#### TypeScript Decorators (for TypeORM) + +```ts +import { emitDecoratorMetadata } from "@trigger.dev/build/extensions/typescript"; + +extensions: [ + emitDecoratorMetadata(), // Enables decorator metadata +]; +``` + +### Scripting Languages + +#### Python + +```ts +import { pythonExtension } from "@trigger.dev/build/extensions/python"; + +extensions: [ + pythonExtension({ + scripts: ["./python/**/*.py"], // Copy Python files + requirementsFile: "./requirements.txt", // Install packages + devPythonBinaryPath: ".venv/bin/python", // Dev mode binary + }), +]; + +// Usage in tasks +const result = await python.runInline(`print("Hello, world!")`); +const output = await python.runScript("./python/script.py", ["arg1"]); +``` + +### Browser Automation + +#### Playwright + +```ts +import { playwright } from "@trigger.dev/build/extensions/playwright"; + +extensions: [ + playwright({ + browsers: ["chromium", "firefox", "webkit"], // Default: ["chromium"] + headless: true, // Default: true + }), +]; +``` + +#### Puppeteer + +```ts +import { puppeteer } from "@trigger.dev/build/extensions/puppeteer"; + +extensions: [puppeteer()]; + +// Environment variable needed: +// PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable" +``` + +#### Lightpanda + +```ts +import { lightpanda } from "@trigger.dev/build/extensions/lightpanda"; + +extensions: [ + lightpanda({ + version: "latest", // or "nightly" + disableTelemetry: false, + }), +]; +``` + +### Media Processing + +#### FFmpeg + +```ts +import { ffmpeg } from "@trigger.dev/build/extensions/core"; + +extensions: [ + ffmpeg({ version: "7" }), // Static build, or omit for Debian version +]; + +// Automatically sets FFMPEG_PATH and FFPROBE_PATH +// Add fluent-ffmpeg to external packages if using +``` + +#### Audio Waveform + +```ts +import { audioWaveform } from "@trigger.dev/build/extensions/audioWaveform"; + +extensions: [ + audioWaveform(), // Installs Audio Waveform 1.1.0 +]; +``` + +### System & Package Management + +#### System Packages (apt-get) + +```ts +import { aptGet } from "@trigger.dev/build/extensions/core"; + +extensions: [ + aptGet({ + packages: ["ffmpeg", "imagemagick", "curl=7.68.0-1"], // Can specify versions + }), +]; +``` + +#### Additional NPM Packages + +Only use this for installing CLI tools, NOT packages you import in your code. + +```ts +import { additionalPackages } from "@trigger.dev/build/extensions/core"; + +extensions: [ + additionalPackages({ + packages: ["wrangler"], // CLI tools and specific versions + }), +]; +``` + +#### Additional Files + +```ts +import { additionalFiles } from "@trigger.dev/build/extensions/core"; + +extensions: [ + additionalFiles({ + files: ["wrangler.toml", "./assets/**", "./fonts/**"], // Glob patterns supported + }), +]; +``` + +### Environment & Build Tools + +#### Environment Variable Sync + +```ts +import { syncEnvVars } from "@trigger.dev/build/extensions/core"; + +extensions: [ + syncEnvVars(async (ctx) => { + // ctx contains: environment, projectRef, env + return [ + { name: "SECRET_KEY", value: await getSecret(ctx.environment) }, + { name: "API_URL", value: ctx.environment === "prod" ? "api.prod.com" : "api.dev.com" }, + ]; + }), +]; +``` + +#### ESBuild Plugins + +```ts +import { esbuildPlugin } from "@trigger.dev/build/extensions"; +import { sentryEsbuildPlugin } from "@sentry/esbuild-plugin"; + +extensions: [ + esbuildPlugin( + sentryEsbuildPlugin({ + org: process.env.SENTRY_ORG, + project: process.env.SENTRY_PROJECT, + authToken: process.env.SENTRY_AUTH_TOKEN, + }), + { placement: "last", target: "deploy" } // Optional config + ), +]; +``` + +## Custom Build Extensions + +```ts +import { defineConfig } from "@trigger.dev/sdk"; + +const customExtension = { + name: "my-custom-extension", + + externalsForTarget: (target) => { + return ["some-native-module"]; // Add external dependencies + }, + + onBuildStart: async (context) => { + console.log(`Build starting for ${context.target}`); + // Register esbuild plugins, modify build context + }, + + onBuildComplete: async (context, manifest) => { + console.log("Build complete, adding layers"); + // Add build layers, modify deployment + context.addLayer({ + id: "my-layer", + files: [{ source: "./custom-file", destination: "/app/custom" }], + commands: ["chmod +x /app/custom"], + }); + }, +}; + +export default defineConfig({ + project: "my-project", + build: { + extensions: [customExtension], + }, +}); +``` + +## Advanced Configuration + +### Telemetry + +```ts +import { PrismaInstrumentation } from "@prisma/instrumentation"; +import { OpenAIInstrumentation } from "@langfuse/openai"; + +export default defineConfig({ + // ... other config + telemetry: { + instrumentations: [new PrismaInstrumentation(), new OpenAIInstrumentation()], + exporters: [customExporter], // Optional custom exporters + }, +}); +``` + +### Machine & Performance + +```ts +export default defineConfig({ + // ... other config + defaultMachine: "large-1x", // Default machine for all tasks + maxDuration: 300, // Default max duration (seconds) + enableConsoleLogging: true, // Console logging in development +}); +``` + +## Common Extension Combinations + +### Full-Stack Web App + +```ts +extensions: [ + prismaExtension({ schema: "prisma/schema.prisma", migrate: true }), + additionalFiles({ files: ["./public/**", "./assets/**"] }), + syncEnvVars(async (ctx) => [...envVars]), +]; +``` + +### AI/ML Processing + +```ts +extensions: [ + pythonExtension({ + scripts: ["./ai/**/*.py"], + requirementsFile: "./requirements.txt", + }), + ffmpeg({ version: "7" }), + additionalPackages({ packages: ["wrangler"] }), +]; +``` + +### Web Scraping + +```ts +extensions: [ + playwright({ browsers: ["chromium"] }), + puppeteer(), + additionalFiles({ files: ["./selectors.json", "./proxies.txt"] }), +]; +``` + +## Best Practices + +- **Use specific versions**: Pin extension versions for reproducible builds +- **External packages**: Add modules with native addons to the `build.external` array +- **Environment sync**: Use `syncEnvVars` for dynamic secrets +- **File paths**: Use glob patterns for flexible file inclusion +- **Debug builds**: Use `--log-level debug --dry-run` for troubleshooting + +Extensions only affect deployment, not local development. Use `external` array for packages that shouldn't be bundled. diff --git a/.claude/skills/trigger-dev-tasks/realtime.md b/.claude/skills/trigger-dev-tasks/realtime.md new file mode 100644 index 00000000000..c1c4c5821a9 --- /dev/null +++ b/.claude/skills/trigger-dev-tasks/realtime.md @@ -0,0 +1,244 @@ +# Trigger.dev Realtime + +**Real-time monitoring and updates for runs** + +## Core Concepts + +Realtime allows you to: + +- Subscribe to run status changes, metadata updates, and streams +- Build real-time dashboards and UI updates +- Monitor task progress from frontend and backend + +## Authentication + +### Public Access Tokens + +```ts +import { auth } from "@trigger.dev/sdk"; + +// Read-only token for specific runs +const publicToken = await auth.createPublicToken({ + scopes: { + read: { + runs: ["run_123", "run_456"], + tasks: ["my-task-1", "my-task-2"], + }, + }, + expirationTime: "1h", // Default: 15 minutes +}); +``` + +### Trigger Tokens (Frontend only) + +```ts +// Single-use token for triggering tasks +const triggerToken = await auth.createTriggerPublicToken("my-task", { + expirationTime: "30m", +}); +``` + +## Backend Usage + +### Subscribe to Runs + +```ts +import { runs, tasks } from "@trigger.dev/sdk"; + +// Trigger and subscribe +const handle = await tasks.trigger("my-task", { data: "value" }); + +// Subscribe to specific run +for await (const run of runs.subscribeToRun(handle.id)) { + console.log(`Status: ${run.status}, Progress: ${run.metadata?.progress}`); + if (run.status === "COMPLETED") break; +} + +// Subscribe to runs with tag +for await (const run of runs.subscribeToRunsWithTag("user-123")) { + console.log(`Tagged run ${run.id}: ${run.status}`); +} + +// Subscribe to batch +for await (const run of runs.subscribeToBatch(batchId)) { + console.log(`Batch run ${run.id}: ${run.status}`); +} +``` + +### Realtime Streams v2 + +```ts +import { streams, InferStreamType } from "@trigger.dev/sdk"; + +// 1. Define streams (shared location) +export const aiStream = streams.define({ + id: "ai-output", +}); + +export type AIStreamPart = InferStreamType; + +// 2. Pipe from task +export const streamingTask = task({ + id: "streaming-task", + run: async (payload) => { + const completion = await openai.chat.completions.create({ + model: "gpt-4", + messages: [{ role: "user", content: payload.prompt }], + stream: true, + }); + + const { waitUntilComplete } = aiStream.pipe(completion); + await waitUntilComplete(); + }, +}); + +// 3. Read from backend +const stream = await aiStream.read(runId, { + timeoutInSeconds: 300, + startIndex: 0, // Resume from specific chunk +}); + +for await (const chunk of stream) { + console.log("Chunk:", chunk); // Fully typed +} +``` + +## React Frontend Usage + +### Installation + +```bash +npm add @trigger.dev/react-hooks +``` + +### Triggering Tasks + +```tsx +"use client"; +import { useTaskTrigger, useRealtimeTaskTrigger } from "@trigger.dev/react-hooks"; +import type { myTask } from "../trigger/tasks"; + +function TriggerComponent({ accessToken }: { accessToken: string }) { + // Basic trigger + const { submit, handle, isLoading } = useTaskTrigger("my-task", { + accessToken, + }); + + // Trigger with realtime updates + const { + submit: realtimeSubmit, + run, + isLoading: isRealtimeLoading, + } = useRealtimeTaskTrigger("my-task", { accessToken }); + + return ( +
+ + + + + {run &&
Status: {run.status}
} +
+ ); +} +``` + +### Subscribing to Runs + +```tsx +"use client"; +import { useRealtimeRun, useRealtimeRunsWithTag } from "@trigger.dev/react-hooks"; +import type { myTask } from "../trigger/tasks"; + +function SubscribeComponent({ runId, accessToken }: { runId: string; accessToken: string }) { + // Subscribe to specific run + const { run, error } = useRealtimeRun(runId, { + accessToken, + onComplete: (run) => { + console.log("Task completed:", run.output); + }, + }); + + // Subscribe to tagged runs + const { runs } = useRealtimeRunsWithTag("user-123", { accessToken }); + + if (error) return
Error: {error.message}
; + if (!run) return
Loading...
; + + return ( +
+
Status: {run.status}
+
Progress: {run.metadata?.progress || 0}%
+ {run.output &&
Result: {JSON.stringify(run.output)}
} + +

Tagged Runs:

+ {runs.map((r) => ( +
+ {r.id}: {r.status} +
+ ))} +
+ ); +} +``` + +### Realtime Streams with React + +```tsx +"use client"; +import { useRealtimeStream } from "@trigger.dev/react-hooks"; +import { aiStream } from "../trigger/streams"; + +function StreamComponent({ runId, accessToken }: { runId: string; accessToken: string }) { + // Pass defined stream directly for type safety + const { parts, error } = useRealtimeStream(aiStream, runId, { + accessToken, + timeoutInSeconds: 300, + throttleInMs: 50, // Control re-render frequency + }); + + if (error) return
Error: {error.message}
; + if (!parts) return
Loading...
; + + const text = parts.join(""); // parts is typed as AIStreamPart[] + + return
Streamed Text: {text}
; +} +``` + +### Wait Tokens + +```tsx +"use client"; +import { useWaitToken } from "@trigger.dev/react-hooks"; + +function WaitTokenComponent({ tokenId, accessToken }: { tokenId: string; accessToken: string }) { + const { complete } = useWaitToken(tokenId, { accessToken }); + + return ; +} +``` + +## Run Object Properties + +Key properties available in run subscriptions: + +- `id`: Unique run identifier +- `status`: `QUEUED`, `EXECUTING`, `COMPLETED`, `FAILED`, `CANCELED`, etc. +- `payload`: Task input data (typed) +- `output`: Task result (typed, when completed) +- `metadata`: Real-time updatable data +- `createdAt`, `updatedAt`: Timestamps +- `costInCents`: Execution cost + +## Best Practices + +- **Use Realtime over SWR**: Recommended for most use cases due to rate limits +- **Scope tokens properly**: Only grant necessary read/trigger permissions +- **Handle errors**: Always check for errors in hooks and subscriptions +- **Type safety**: Use task types for proper payload/output typing +- **Cleanup subscriptions**: Backend subscriptions auto-complete, frontend hooks auto-cleanup diff --git a/.claude/skills/trigger-dev-tasks/scheduled-tasks.md b/.claude/skills/trigger-dev-tasks/scheduled-tasks.md new file mode 100644 index 00000000000..b314753497f --- /dev/null +++ b/.claude/skills/trigger-dev-tasks/scheduled-tasks.md @@ -0,0 +1,113 @@ +# Scheduled Tasks (Cron) + +Recurring tasks using cron. For one-off future runs, use the **delay** option. + +## Define a Scheduled Task + +```ts +import { schedules } from "@trigger.dev/sdk"; + +export const task = schedules.task({ + id: "first-scheduled-task", + run: async (payload) => { + payload.timestamp; // Date (scheduled time, UTC) + payload.lastTimestamp; // Date | undefined + payload.timezone; // IANA, e.g. "America/New_York" (default "UTC") + payload.scheduleId; // string + payload.externalId; // string | undefined + payload.upcoming; // Date[] + + payload.timestamp.toLocaleString("en-US", { timeZone: payload.timezone }); + }, +}); +``` + +> Scheduled tasks need at least one schedule attached to run. + +## Attach Schedules + +**Declarative (sync on dev/deploy):** + +```ts +schedules.task({ + id: "every-2h", + cron: "0 */2 * * *", // UTC + run: async () => {}, +}); + +schedules.task({ + id: "tokyo-5am", + cron: { pattern: "0 5 * * *", timezone: "Asia/Tokyo", environments: ["PRODUCTION", "STAGING"] }, + run: async () => {}, +}); +``` + +**Imperative (SDK or dashboard):** + +```ts +await schedules.create({ + task: task.id, + cron: "0 0 * * *", + timezone: "America/New_York", // DST-aware + externalId: "user_123", + deduplicationKey: "user_123-daily", // updates if reused +}); +``` + +### Dynamic / Multi-tenant Example + +```ts +// /trigger/reminder.ts +export const reminderTask = schedules.task({ + id: "todo-reminder", + run: async (p) => { + if (!p.externalId) throw new Error("externalId is required"); + const user = await db.getUser(p.externalId); + await sendReminderEmail(user); + }, +}); +``` + +```ts +// app/reminders/route.ts +export async function POST(req: Request) { + const data = await req.json(); + return Response.json( + await schedules.create({ + task: reminderTask.id, + cron: "0 8 * * *", + timezone: data.timezone, + externalId: data.userId, + deduplicationKey: `${data.userId}-reminder`, + }) + ); +} +``` + +## Cron Syntax (no seconds) + +``` +* * * * * +| | | | └ day of week (0–7 or 1L–7L; 0/7=Sun; L=last) +| | | └── month (1–12) +| | └──── day of month (1–31 or L) +| └────── hour (0–23) +└──────── minute (0–59) +``` + +## When Schedules Won't Trigger + +- **Dev:** only when the dev CLI is running. +- **Staging/Production:** only for tasks in the **latest deployment**. + +## SDK Management + +```ts +await schedules.retrieve(id); +await schedules.list(); +await schedules.update(id, { cron: "0 0 1 * *", externalId: "ext", deduplicationKey: "key" }); +await schedules.deactivate(id); +await schedules.activate(id); +await schedules.del(id); +await schedules.timezones(); // list of IANA timezones +``` diff --git a/.configs/tsconfig.base.json b/.configs/tsconfig.base.json new file mode 100644 index 00000000000..2d560d22d0f --- /dev/null +++ b/.configs/tsconfig.base.json @@ -0,0 +1,36 @@ +{ + "compilerOptions": { + "target": "es2022", + "lib": ["ES2022", "DOM", "DOM.Iterable", "DOM.AsyncIterable"], + "module": "NodeNext", + "moduleResolution": "NodeNext", + "moduleDetection": "force", + "verbatimModuleSyntax": false, + "jsx": "react", + + "strict": true, + "alwaysStrict": true, + "strictPropertyInitialization": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "noUnusedLocals": false, + "noUnusedParameters": false, + "noImplicitAny": true, + "noImplicitReturns": true, + "noImplicitThis": true, + + "noFallthroughCasesInSwitch": true, + "resolveJsonModule": true, + + "removeComments": false, + "esModuleInterop": true, + "emitDecoratorMetadata": false, + "experimentalDecorators": false, + "downlevelIteration": true, + "isolatedModules": true, + "noUncheckedIndexedAccess": true, + + "pretty": true, + "customConditions": ["@triggerdotdev/source"] + } +} diff --git a/.cursor/commands/deslop.md b/.cursor/commands/deslop.md new file mode 100644 index 00000000000..d82835663f7 --- /dev/null +++ b/.cursor/commands/deslop.md @@ -0,0 +1,11 @@ +# Remove AI code slop + +Check the diff against main, and remove all AI generated slop introduced in this branch. + +This includes: +- Extra comments that a human wouldn't add or is inconsistent with the rest of the file +- Extra defensive checks or try/catch blocks that are abnormal for that area of the codebase (especially if called by trusted / validated codepaths) +- Casts to any to get around type issues +- Any other style that is inconsistent with the file + +Report at the end with only a 1-3 sentence summary of what you changed \ No newline at end of file diff --git a/.cursor/mcp.json b/.cursor/mcp.json new file mode 100644 index 00000000000..c4b06a67630 --- /dev/null +++ b/.cursor/mcp.json @@ -0,0 +1,7 @@ +{ + "mcpServers": { + "linear": { + "url": "https://mcp.linear.app/mcp" + } + } +} diff --git a/.cursor/rules/executing-commands.mdc b/.cursor/rules/executing-commands.mdc new file mode 100644 index 00000000000..0d36b449491 --- /dev/null +++ b/.cursor/rules/executing-commands.mdc @@ -0,0 +1,24 @@ +--- +description: how to run commands in the monorepo +globs: +alwaysApply: true +--- +Almost all commands in the monorepo should be executed when `pnpm run ...` from the root of the monorepo. For example, running tests for the `@internal/run-engine` internal package: + +``` +pnpm run dev --filter webapp +``` + +But often, when running tests, it's better to `cd` into the directory and then run tests: + +``` +cd apps/webapp +pnpm run test --run +``` + +This way you can run for a single file easily: + +``` +cd internal-packages/run-engine +pnpm run test ./src/engine/tests/ttl.test.ts --run +``` diff --git a/.cursor/rules/migrations.mdc b/.cursor/rules/migrations.mdc new file mode 100644 index 00000000000..370c87c051d --- /dev/null +++ b/.cursor/rules/migrations.mdc @@ -0,0 +1,6 @@ +--- +description: how to create and apply database migrations +alwaysApply: false +--- + +Follow our [migrations.md](mdc:ai/references/migrations.md) guide for how to create and apply database migrations. diff --git a/.cursor/rules/otel-metrics.mdc b/.cursor/rules/otel-metrics.mdc new file mode 100644 index 00000000000..218f07c41e2 --- /dev/null +++ b/.cursor/rules/otel-metrics.mdc @@ -0,0 +1,66 @@ +--- +description: Guidelines for creating OpenTelemetry metrics to avoid cardinality issues +globs: + - "**/*.ts" +--- + +# OpenTelemetry Metrics Guidelines + +When creating or editing OTEL metrics (counters, histograms, gauges), always ensure metric attributes have **low cardinality**. + +## What is Cardinality? + +Cardinality refers to the number of unique values an attribute can have. Each unique combination of attribute values creates a new time series, which consumes memory and storage in your metrics backend. + +## Rules + +### DO use low-cardinality attributes: +- **Enums**: `environment_type` (PRODUCTION, STAGING, DEVELOPMENT, PREVIEW) +- **Booleans**: `hasFailures`, `streaming`, `success` +- **Bounded error codes**: A finite, controlled set of error types +- **Shard IDs**: When sharding is bounded (e.g., 0-15) + +### DO NOT use high-cardinality attributes: +- **UUIDs/IDs**: `envId`, `userId`, `runId`, `projectId`, `organizationId` +- **Unbounded integers**: `itemCount`, `batchSize`, `retryCount` +- **Timestamps**: `createdAt`, `startTime` +- **Free-form strings**: `errorMessage`, `taskName`, `queueName` + +## Example + +```typescript +// BAD - High cardinality +this.counter.add(1, { + envId: options.environmentId, // UUID - unbounded + itemCount: options.runCount, // Integer - unbounded +}); + +// GOOD - Low cardinality +this.counter.add(1, { + environment_type: options.environmentType, // Enum - 4 values + streaming: true, // Boolean - 2 values +}); +``` + +## Prometheus Metric Naming + +When metrics are exported via OTLP to Prometheus, the exporter automatically adds unit suffixes to metric names: + +| OTel Metric Name | Unit | Prometheus Name | +|------------------|------|-----------------| +| `my_duration_ms` | `ms` | `my_duration_ms_milliseconds` | +| `my_counter` | counter | `my_counter_total` | +| `items_inserted` | counter | `items_inserted_inserts_total` | +| `batch_size` | histogram | `batch_size_items_bucket` | + +Keep this in mind when writing Grafana dashboards or Prometheus queries—the metric names in Prometheus will differ from the names defined in code. + +## Reference + +See the schedule engine (`internal-packages/schedule-engine/src/engine/index.ts`) for a good example of low-cardinality metric attributes. + +High cardinality metrics can cause: +- Memory bloat in metrics backends (Axiom, Prometheus, etc.) +- Slow queries and dashboard timeouts +- Increased costs (many backends charge per time series) +- Potential data loss or crashes at scale diff --git a/.cursor/rules/repo.mdc b/.cursor/rules/repo.mdc new file mode 100644 index 00000000000..460c3893656 --- /dev/null +++ b/.cursor/rules/repo.mdc @@ -0,0 +1,6 @@ +--- +description: understanding the structure of the monorepo +globs: +alwaysApply: true +--- +We've documented the structure of our monorepo here: [repo.md](mdc:ai/references/repo.md) \ No newline at end of file diff --git a/.cursor/rules/webapp.mdc b/.cursor/rules/webapp.mdc new file mode 100644 index 00000000000..f1333febdc0 --- /dev/null +++ b/.cursor/rules/webapp.mdc @@ -0,0 +1,40 @@ +--- +description: Making updates to the main trigger.dev remix webapp +globs: apps/webapp/**/*.tsx,apps/webapp/**/*.ts +alwaysApply: false +--- + +The main trigger.dev webapp, which powers it's API and dashboard and makes up the docker image that is produced as an OSS image, is a Remix 2.17.4 app that uses an express server, written in TypeScript. The following subsystems are either included in the webapp or are used by the webapp in another part of the monorepo: + +- `@trigger.dev/database` exports a Prisma 6.14.0 client that is used extensively in the webapp to access a PostgreSQL instance. The schema file is [schema.prisma](mdc:internal-packages/database/prisma/schema.prisma) +- `@trigger.dev/core` is a published package and is used to share code between the `@trigger.dev/sdk` and the webapp. It includes functionality but also a load of Zod schemas for data validation. When importing from `@trigger.dev/core` in the webapp, we never import the root `@trigger.dev/core` path, instead we favor one of the subpath exports that you can find in [package.json](mdc:packages/core/package.json) +- `@internal/run-engine` has all the code needed to trigger a run and take it through it's lifecycle to completion. +- `@trigger.dev/redis-worker` is a custom redis based background job/worker system that's used in the webapp and also used inside the run engine. + +## Environment variables and testing + +In the webapp, all environment variables are accessed through the `env` export of [env.server.ts](mdc:apps/webapp/app/env.server.ts), instead of directly accessing `process.env`. + +Ideally, the `env.server.ts` file would never be imported into a test file, either directly or indirectly. Tests should only imported classes and functions from a file matching `app/**/*.ts` of the webapp, and that file should not use environment variables, everything should be passed through as options instead. This "service/configuration" separation is important, and can be seen in a few places in the code for examples: + +- [realtimeClient.server.ts](mdc:apps/webapp/app/services/realtimeClient.server.ts) is the testable service, and [realtimeClientGlobal.server.ts](mdc:apps/webapp/app/services/realtimeClientGlobal.server.ts) is the configuration + +Also for writing tests in the webapp, checkout our [tests.md](mdc:ai/references/tests.md) guide + +## Legacy run engine vs Run Engine 2.0 + +We originally the Trigger.dev "Run Engine" not as a single system, but just spread out all over the codebase, with no real separate or encapsulation. And we didn't even call it a "Run Engine". With Run Engine 2.0, we've completely rewritten big parts of the way the system works, and moved it over to an internal package called `@internal/run-engine`. So we've retroactively named the previous run engine "Legacy run engine". We're focused almost exclusively now on moving to Run Engine 2.0 and will be deprecating and removing the legacy run engine code eventually. + +## Where to look for code + +- The trigger API endpoint is [api.v1.tasks.$taskId.trigger.ts](mdc:apps/webapp/app/routes/api.v1.tasks.$taskId.trigger.ts) +- The batch trigger API endpoint is [api.v1.tasks.batch.ts](mdc:apps/webapp/app/routes/api.v1.tasks.batch.ts) +- Setup code for the prisma client is in [db.server.ts](mdc:apps/webapp/app/db.server.ts) +- The run engine is configured in [runEngine.server.ts](mdc:apps/webapp/app/v3/runEngine.server.ts) +- All the "services" that are found in app/v3/services/\*_/_.server.ts +- The code for the TaskEvent data, which is the otel data sent from tasks to our servers, is in both the [eventRepository.server.ts](mdc:apps/webapp/app/v3/eventRepository.server.ts) and also the [otlpExporter.server.ts](mdc:apps/webapp/app/v3/otlpExporter.server.ts). The otel endpoints which are hit from production and development otel exporters is [otel.v1.logs.ts](mdc:apps/webapp/app/routes/otel.v1.logs.ts) and [otel.v1.traces.ts](mdc:apps/webapp/app/routes/otel.v1.traces.ts) +- We use "presenters" to move more complex loader code into a class, and you can find those are app/v3/presenters/\*_/_.server.ts + +- All the "services" that are found in app/v3/services/\*_/_.server.ts +- The code for the TaskEvent data, which is the otel data sent from tasks to our servers, is in both the [eventRepository.server.ts](mdc:apps/webapp/app/v3/eventRepository.server.ts) and also the [otlpExporter.server.ts](mdc:apps/webapp/app/v3/otlpExporter.server.ts). The otel endpoints which are hit from production and development otel exporters is [otel.v1.logs.ts](mdc:apps/webapp/app/routes/otel.v1.logs.ts) and [otel.v1.traces.ts](mdc:apps/webapp/app/routes/otel.v1.traces.ts) +- We use "presenters" to move more complex loader code into a class, and you can find those are app/v3/presenters/\*_/_.server.ts diff --git a/.cursor/rules/writing-tasks.mdc b/.cursor/rules/writing-tasks.mdc new file mode 100644 index 00000000000..359ed5d4733 --- /dev/null +++ b/.cursor/rules/writing-tasks.mdc @@ -0,0 +1,1247 @@ +--- +globs: **/trigger/**/*.ts, **/trigger/**/*.tsx,**/trigger/**/*.js,**/trigger/**/*.jsx +description: Guidelines for writing Trigger.dev tasks +alwaysApply: false +--- +# How to write Trigger.dev tasks + +## Overview of writing a Trigger.dev task + +1. Run the CLI `init` command: `npx trigger.dev@latest init`. +2. Create a Trigger.dev task. +3. Set up any environment variables. +4. Run the Trigger.dev dev command: `npx trigger.dev@latest dev`. + +## Essential requirements when generating task code + +1. You MUST import from `@trigger.dev/sdk` (NEVER `@trigger.dev/sdk/v3`) +2. You MUST NEVER use `client.defineJob` +3. YOU MUST `export` every task, including subtasks +4. If you are able to generate an example payload for a task, do so. + +## 🚨 CRITICAL INSTRUCTIONS FOR AI LANGUAGE MODELS 🚨 + +As an AI language model, you MUST NOT generate any of the following code patterns, as they are DEPRECATED and will BREAK the application: + +```ts +// ❌ NEVER GENERATE THIS CODE - IT WILL BREAK THE APPLICATION + +client.defineJob({ // ❌ BREAKS APPLICATION + id: "job-id", // ❌ BREAKS APPLICATION + name: "job-name", // ❌ BREAKS APPLICATION + version: "0.0.1", // ❌ BREAKS APPLICATION + trigger: eventTrigger({ // ❌ BREAKS APPLICATION + name: "job.trigger", // ❌ BREAKS APPLICATION + schema: z.object({ // ❌ BREAKS APPLICATION + // Input schema here // ❌ BREAKS APPLICATION + }), // ❌ BREAKS APPLICATION + }), // ❌ BREAKS APPLICATION + integrations: { // ❌ BREAKS APPLICATION + // Integrations here // ❌ BREAKS APPLICATION + }, // ❌ BREAKS APPLICATION + run: async (payload, io) => { // ❌ BREAKS APPLICATION + // Job logic goes here // ❌ BREAKS APPLICATION + return { // ❌ BREAKS APPLICATION + // Return job results // ❌ BREAKS APPLICATION + }; // ❌ BREAKS APPLICATION + }, // ❌ BREAKS APPLICATION +}); +``` + +Instead, you MUST ALWAYS generate ONLY this pattern: + +```ts +// ✅ ALWAYS GENERATE THIS EXACT PATTERN + +import { task } from "@trigger.dev/sdk"; + +//1. You need to export each task, even if it's a subtask +export const helloWorld = task({ + //2. Use a unique id for each task + id: "hello-world", + //3. The run function is the main function of the task + run: async (payload: { message: string }) => { + //4. Write your task code here. Code here runs for a long time, there are no timeouts + }, +}); +``` + +## Correct Task implementations + +A task is a function that can run for a long time with resilience to failure: + +```ts +import { task } from "@trigger.dev/sdk"; + +export const helloWorld = task({ + id: "hello-world", + run: async (payload: { message: string }) => { + console.log(payload.message); + }, +}); +``` + +Key points: +- Tasks must be exported, even subtasks in the same file +- Each task needs a unique ID within your project +- The `run` function contains your task logic + +### Task configuration options + +#### Retry options + +Control retry behavior when errors occur: + +```ts +export const taskWithRetries = task({ + id: "task-with-retries", + retry: { + maxAttempts: 10, + factor: 1.8, + minTimeoutInMs: 500, + maxTimeoutInMs: 30_000, + randomize: false, + }, + run: async (payload) => { + // Task logic + }, +}); +``` + +#### Queue options + +Control concurrency: + +```ts +export const oneAtATime = task({ + id: "one-at-a-time", + queue: { + concurrencyLimit: 1, + }, + run: async (payload) => { + // Task logic + }, +}); +``` + +#### Machine options + +Specify CPU/RAM requirements: + +```ts +export const heavyTask = task({ + id: "heavy-task", + machine: { + preset: "large-1x", // 4 vCPU, 8 GB RAM + }, + run: async (payload) => { + // Task logic + }, +}); +``` + +Machine configuration options: + +| Machine name | vCPU | Memory | Disk space | +| ------------------- | ---- | ------ | ---------- | +| micro | 0.25 | 0.25 | 10GB | +| small-1x (default) | 0.5 | 0.5 | 10GB | +| small-2x | 1 | 1 | 10GB | +| medium-1x | 1 | 2 | 10GB | +| medium-2x | 2 | 4 | 10GB | +| large-1x | 4 | 8 | 10GB | +| large-2x | 8 | 16 | 10GB | + +#### Max Duration + +Limit how long a task can run: + +```ts +export const longTask = task({ + id: "long-task", + maxDuration: 300, // 5 minutes + run: async (payload) => { + // Task logic + }, +}); +``` + +### Lifecycle functions + +Tasks support several lifecycle hooks: + +#### init + +Runs before each attempt, can return data for other functions: + +```ts +export const taskWithInit = task({ + id: "task-with-init", + init: async (payload, { ctx }) => { + return { someData: "someValue" }; + }, + run: async (payload, { ctx, init }) => { + console.log(init.someData); // "someValue" + }, +}); +``` + +#### cleanup + +Runs after each attempt, regardless of success/failure: + +```ts +export const taskWithCleanup = task({ + id: "task-with-cleanup", + cleanup: async (payload, { ctx }) => { + // Cleanup resources + }, + run: async (payload, { ctx }) => { + // Task logic + }, +}); +``` + +#### onStart + +Runs once when a task starts (not on retries): + +```ts +export const taskWithOnStart = task({ + id: "task-with-on-start", + onStart: async (payload, { ctx }) => { + // Send notification, log, etc. + }, + run: async (payload, { ctx }) => { + // Task logic + }, +}); +``` + +#### onSuccess + +Runs when a task succeeds: + +```ts +export const taskWithOnSuccess = task({ + id: "task-with-on-success", + onSuccess: async (payload, output, { ctx }) => { + // Handle success + }, + run: async (payload, { ctx }) => { + // Task logic + }, +}); +``` + +#### onFailure + +Runs when a task fails after all retries: + +```ts +export const taskWithOnFailure = task({ + id: "task-with-on-failure", + onFailure: async (payload, error, { ctx }) => { + // Handle failure + }, + run: async (payload, { ctx }) => { + // Task logic + }, +}); +``` + +#### handleError + +Controls error handling and retry behavior: + +```ts +export const taskWithErrorHandling = task({ + id: "task-with-error-handling", + handleError: async (error, { ctx }) => { + // Custom error handling + }, + run: async (payload, { ctx }) => { + // Task logic + }, +}); +``` + +Global lifecycle hooks can also be defined in `trigger.config.ts` to apply to all tasks. + +## Correct Schedules task (cron) implementations + +```ts +import { schedules } from "@trigger.dev/sdk"; + +export const firstScheduledTask = schedules.task({ + id: "first-scheduled-task", + run: async (payload) => { + //when the task was scheduled to run + //note this will be slightly different from new Date() because it takes a few ms to run the task + console.log(payload.timestamp); //is a Date object + + //when the task was last run + //this can be undefined if it's never been run + console.log(payload.lastTimestamp); //is a Date object or undefined + + //the timezone the schedule was registered with, defaults to "UTC" + //this is in IANA format, e.g. "America/New_York" + //See the full list here: https://cloud.trigger.dev/timezones + console.log(payload.timezone); //is a string + + //If you want to output the time in the user's timezone do this: + const formatted = payload.timestamp.toLocaleString("en-US", { + timeZone: payload.timezone, + }); + + //the schedule id (you can have many schedules for the same task) + //using this you can remove the schedule, update it, etc + console.log(payload.scheduleId); //is a string + + //you can optionally provide an external id when creating the schedule + //usually you would set this to a userId or some other unique identifier + //this can be undefined if you didn't provide one + console.log(payload.externalId); //is a string or undefined + + //the next 5 dates this task is scheduled to run + console.log(payload.upcoming); //is an array of Date objects + }, +}); +``` + +### Attach a Declarative schedule + +```ts +import { schedules } from "@trigger.dev/sdk"; + +// Sepcify a cron pattern (UTC) +export const firstScheduledTask = schedules.task({ + id: "first-scheduled-task", + //every two hours (UTC timezone) + cron: "0 */2 * * *", + run: async (payload, { ctx }) => { + //do something + }, +}); +``` + +```ts +import { schedules } from "@trigger.dev/sdk"; + +// Specify a specific timezone like this: +export const secondScheduledTask = schedules.task({ + id: "second-scheduled-task", + cron: { + //5am every day Tokyo time + pattern: "0 5 * * *", + timezone: "Asia/Tokyo", + }, + run: async (payload) => {}, +}); +``` + +### Attach an Imperative schedule + +Create schedules explicitly for tasks using the dashboard's "New schedule" button or the SDK. + +#### Benefits +- Dynamic creation (e.g., one schedule per user) +- Manage without code deployment: + - Activate/disable + - Edit + - Delete + +#### Implementation +1. Define a task using `⁠schedules.task()` +2. Attach one or more schedules via: + - Dashboard + - SDK + +#### Attach schedules with the SDK like this + +```ts +const createdSchedule = await schedules.create({ + //The id of the scheduled task you want to attach to. + task: firstScheduledTask.id, + //The schedule in cron format. + cron: "0 0 * * *", + //this is required, it prevents you from creating duplicate schedules. It will update the schedule if it already exists. + deduplicationKey: "my-deduplication-key", +}); +``` + +## Correct Schema task implementations + +Schema tasks validate payloads against a schema before execution: + +```ts +import { schemaTask } from "@trigger.dev/sdk"; +import { z } from "zod"; + +const myTask = schemaTask({ + id: "my-task", + schema: z.object({ + name: z.string(), + age: z.number(), + }), + run: async (payload) => { + // Payload is typed and validated + console.log(payload.name, payload.age); + }, +}); +``` + +## Correct implementations for triggering a task from your backend + +When you trigger a task from your backend code, you need to set the `TRIGGER_SECRET_KEY` environment variable. You can find the value on the API keys page in the Trigger.dev dashboard. + +### tasks.trigger() + +Triggers a single run of a task with specified payload and options without importing the task. Use type-only imports for full type checking. + +```ts +import { tasks } from "@trigger.dev/sdk"; +import type { emailSequence } from "~/trigger/emails"; + +export async function POST(request: Request) { + const data = await request.json(); + const handle = await tasks.trigger("email-sequence", { + to: data.email, + name: data.name, + }); + return Response.json(handle); +} +``` + +### tasks.batchTrigger() + +Triggers multiple runs of a single task with different payloads without importing the task. + +```ts +import { tasks } from "@trigger.dev/sdk"; +import type { emailSequence } from "~/trigger/emails"; + +export async function POST(request: Request) { + const data = await request.json(); + const batchHandle = await tasks.batchTrigger( + "email-sequence", + data.users.map((u) => ({ payload: { to: u.email, name: u.name } })) + ); + return Response.json(batchHandle); +} +``` + +### batch.trigger() + +Triggers multiple runs of different tasks at once, useful when you need to execute multiple tasks simultaneously. + +```ts +import { batch } from "@trigger.dev/sdk"; +import type { myTask1, myTask2 } from "~/trigger/myTasks"; + +export async function POST(request: Request) { + const data = await request.json(); + const result = await batch.trigger([ + { id: "my-task-1", payload: { some: data.some } }, + { id: "my-task-2", payload: { other: data.other } }, + ]); + return Response.json(result); +} +``` + +## Correct implementations for triggering a task from inside another task + +### yourTask.trigger() + +Triggers a single run of a task with specified payload and options. + +```ts +import { myOtherTask, runs } from "~/trigger/my-other-task"; + +export const myTask = task({ + id: "my-task", + run: async (payload: string) => { + const handle = await myOtherTask.trigger({ foo: "some data" }); + + const run = await runs.retrieve(handle); + // Do something with the run + }, +}); +``` + +If you need to call `trigger()` on a task in a loop, use `batchTrigger()` instead which can trigger up to 500 runs in a single call. + +### yourTask.batchTrigger() + +Triggers multiple runs of a single task with different payloads. + +```ts +import { myOtherTask, batch } from "~/trigger/my-other-task"; + +export const myTask = task({ + id: "my-task", + run: async (payload: string) => { + const batchHandle = await myOtherTask.batchTrigger([{ payload: "some data" }]); + + //...do other stuff + const batch = await batch.retrieve(batchHandle.id); + }, +}); +``` + +### yourTask.triggerAndWait() + +Triggers a task and waits for the result, useful when you need to call a different task and use its result. + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload: string) => { + const result = await childTask.triggerAndWait("some-data"); + console.log("Result", result); + + //...do stuff with the result + }, +}); +``` + +The result object needs to be checked to see if the child task run was successful. You can also use the `unwrap` method to get the output directly or handle errors with `SubtaskUnwrapError`. This method should only be used inside a task. + +### yourTask.batchTriggerAndWait() + +Batch triggers a task and waits for all results, useful for fan-out patterns. + +```ts +export const batchParentTask = task({ + id: "parent-task", + run: async (payload: string) => { + const results = await childTask.batchTriggerAndWait([ + { payload: "item4" }, + { payload: "item5" }, + { payload: "item6" }, + ]); + console.log("Results", results); + + //...do stuff with the result + }, +}); +``` + +You can handle run failures by inspecting individual run results and implementing custom error handling strategies. This method should only be used inside a task. + +### batch.triggerAndWait() + +Batch triggers multiple different tasks and waits for all results. + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload: string) => { + const results = await batch.triggerAndWait([ + { id: "child-task-1", payload: { foo: "World" } }, + { id: "child-task-2", payload: { bar: 42 } }, + ]); + + for (const result of results) { + if (result.ok) { + switch (result.taskIdentifier) { + case "child-task-1": + console.log("Child task 1 output", result.output); + break; + case "child-task-2": + console.log("Child task 2 output", result.output); + break; + } + } + } + }, +}); +``` + +### batch.triggerByTask() + +Batch triggers multiple tasks by passing task instances, useful for static task sets. + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload: string) => { + const results = await batch.triggerByTask([ + { task: childTask1, payload: { foo: "World" } }, + { task: childTask2, payload: { bar: 42 } }, + ]); + + const run1 = await runs.retrieve(results.runs[0]); + const run2 = await runs.retrieve(results.runs[1]); + }, +}); +``` + +### batch.triggerByTaskAndWait() + +Batch triggers multiple tasks by passing task instances and waits for all results. + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload: string) => { + const { runs } = await batch.triggerByTaskAndWait([ + { task: childTask1, payload: { foo: "World" } }, + { task: childTask2, payload: { bar: 42 } }, + ]); + + if (runs[0].ok) { + console.log("Child task 1 output", runs[0].output); + } + + if (runs[1].ok) { + console.log("Child task 2 output", runs[1].output); + } + }, +}); +``` + +## Correct Metadata implementation + +### Overview + +Metadata allows attaching up to 256KB of structured data to a run, which can be accessed during execution, via API, Realtime, and in the dashboard. Useful for storing user information, tracking progress, or saving intermediate results. + +### Basic Usage + +Add metadata when triggering a task: + +```ts +const handle = await myTask.trigger( + { message: "hello world" }, + { metadata: { user: { name: "Eric", id: "user_1234" } } } +); +``` + +Access metadata inside a run: + +```ts +import { task, metadata } from "@trigger.dev/sdk"; + +export const myTask = task({ + id: "my-task", + run: async (payload: { message: string }) => { + // Get the whole metadata object + const currentMetadata = metadata.current(); + + // Get a specific key + const user = metadata.get("user"); + console.log(user.name); // "Eric" + }, +}); +``` + +### Update methods + +Metadata can be updated as the run progresses: + +- **set**: `metadata.set("progress", 0.5)` +- **del**: `metadata.del("progress")` +- **replace**: `metadata.replace({ user: { name: "Eric" } })` +- **append**: `metadata.append("logs", "Step 1 complete")` +- **remove**: `metadata.remove("logs", "Step 1 complete")` +- **increment**: `metadata.increment("progress", 0.4)` +- **decrement**: `metadata.decrement("progress", 0.4)` +- **stream**: `await metadata.stream("logs", readableStream)` +- **flush**: `await metadata.flush()` + +Updates can be chained with a fluent API: + +```ts +metadata.set("progress", 0.1) + .append("logs", "Step 1 complete") + .increment("progress", 0.4); +``` + +### Parent & root updates + +Child tasks can update parent task metadata: + +```ts +export const childTask = task({ + id: "child-task", + run: async (payload: { message: string }) => { + // Update parent task's metadata + metadata.parent.set("progress", 0.5); + + // Update root task's metadata + metadata.root.set("status", "processing"); + }, +}); +``` + +### Type safety + +Metadata accepts any JSON-serializable object. For type safety, consider wrapping with Zod: + +```ts +import { z } from "zod"; + +const Metadata = z.object({ + user: z.object({ + name: z.string(), + id: z.string(), + }), + date: z.coerce.date(), +}); + +function getMetadata() { + return Metadata.parse(metadata.current()); +} +``` + +### Important notes + +- Metadata methods only work inside run functions or task lifecycle hooks +- Metadata is NOT automatically propagated to child tasks +- Maximum size is 256KB (configurable if self-hosting) +- Objects like Dates are serialized to strings and must be deserialized when retrieved + +## Correct Realtime implementation + +### Overview + +Trigger.dev Realtime enables subscribing to runs for real-time updates on run status, useful for monitoring tasks, updating UIs, and building realtime dashboards. It's built on Electric SQL, a PostgreSQL syncing engine. + +### Basic usage + +Subscribe to a run after triggering a task: + +```ts +import { runs, tasks } from "@trigger.dev/sdk"; + +async function myBackend() { + const handle = await tasks.trigger("my-task", { some: "data" }); + + for await (const run of runs.subscribeToRun(handle.id)) { + console.log(run); // Logs the run every time it changes + } +} +``` + +### Subscription methods + +- **subscribeToRun**: Subscribe to changes for a specific run +- **subscribeToRunsWithTag**: Subscribe to changes for all runs with a specific tag +- **subscribeToBatch**: Subscribe to changes for all runs in a batch + +### Type safety + +You can infer types of run's payload and output by passing the task type: + +```ts +import { runs } from "@trigger.dev/sdk"; +import type { myTask } from "./trigger/my-task"; + +for await (const run of runs.subscribeToRun(handle.id)) { + console.log(run.payload.some); // Type-safe access to payload + + if (run.output) { + console.log(run.output.result); // Type-safe access to output + } +} +``` + +### Realtime Streams + +Stream data in realtime from inside your tasks using the metadata system: + +```ts +import { task, metadata } from "@trigger.dev/sdk"; +import OpenAI from "openai"; + +export type STREAMS = { + openai: OpenAI.ChatCompletionChunk; +}; + +export const myTask = task({ + id: "my-task", + run: async (payload: { prompt: string }) => { + const completion = await openai.chat.completions.create({ + messages: [{ role: "user", content: payload.prompt }], + model: "gpt-3.5-turbo", + stream: true, + }); + + // Register the stream with the key "openai" + const stream = await metadata.stream("openai", completion); + + let text = ""; + for await (const chunk of stream) { + text += chunk.choices.map((choice) => choice.delta?.content).join(""); + } + + return { text }; + }, +}); +``` + +Subscribe to streams using `withStreams`: + +```ts +for await (const part of runs.subscribeToRun(runId).withStreams()) { + switch (part.type) { + case "run": { + console.log("Received run", part.run); + break; + } + case "openai": { + console.log("Received OpenAI chunk", part.chunk); + break; + } + } +} +``` + +## Realtime hooks + +### Installation + +```bash +npm add @trigger.dev/react-hooks +``` + +### Authentication + +All hooks require a Public Access Token. You can provide it directly to each hook: + +```ts +import { useRealtimeRun } from "@trigger.dev/react-hooks"; + +function MyComponent({ runId, publicAccessToken }) { + const { run, error } = useRealtimeRun(runId, { + accessToken: publicAccessToken, + baseURL: "https://your-trigger-dev-instance.com", // Optional for self-hosting + }); +} +``` + +Or use the `TriggerAuthContext` provider: + +```ts +import { TriggerAuthContext } from "@trigger.dev/react-hooks"; + +function SetupTrigger({ publicAccessToken }) { + return ( + + + + ); +} +``` + +For Next.js App Router, wrap the provider in a client component: + +```ts +// components/TriggerProvider.tsx +"use client"; + +import { TriggerAuthContext } from "@trigger.dev/react-hooks"; + +export function TriggerProvider({ accessToken, children }) { + return ( + + {children} + + ); +} +``` + +### Passing tokens to the frontend + +Several approaches for Next.js App Router: + +1. **Using cookies**: +```ts +// Server action +export async function startRun() { + const handle = await tasks.trigger("example", { foo: "bar" }); + cookies().set("publicAccessToken", handle.publicAccessToken); + redirect(`/runs/${handle.id}`); +} + +// Page component +export default function RunPage({ params }) { + const publicAccessToken = cookies().get("publicAccessToken"); + return ( + + + + ); +} +``` + +2. **Using query parameters**: +```ts +// Server action +export async function startRun() { + const handle = await tasks.trigger("example", { foo: "bar" }); + redirect(`/runs/${handle.id}?publicAccessToken=${handle.publicAccessToken}`); +} +``` + +3. **Server-side token generation**: +```ts +// Page component +export default async function RunPage({ params }) { + const publicAccessToken = await generatePublicAccessToken(params.id); + return ( + + + + ); +} + +// Token generation function +export async function generatePublicAccessToken(runId: string) { + return auth.createPublicToken({ + scopes: { + read: { + runs: [runId], + }, + }, + expirationTime: "1h", + }); +} +``` + +### Hook types + +#### SWR hooks + +Data fetching hooks that use SWR for caching: + +```ts +"use client"; +import { useRun } from "@trigger.dev/react-hooks"; +import type { myTask } from "@/trigger/myTask"; + +function MyComponent({ runId }) { + const { run, error, isLoading } = useRun(runId); + + if (isLoading) return
Loading...
; + if (error) return
Error: {error.message}
; + + return
Run: {run.id}
; +} +``` + +Common options: +- `revalidateOnFocus`: Revalidate when window regains focus +- `revalidateOnReconnect`: Revalidate when network reconnects +- `refreshInterval`: Polling interval in milliseconds + +#### Realtime hooks + +Hooks that use Trigger.dev's realtime API for live updates (recommended over polling). + +For most use cases, Realtime hooks are preferred over SWR hooks with polling due to better performance and lower API usage. + +### Authentication + +For client-side usage, generate a public access token with appropriate scopes: + +```ts +import { auth } from "@trigger.dev/sdk"; + +const publicToken = await auth.createPublicToken({ + scopes: { + read: { + runs: ["run_1234"], + }, + }, +}); +``` + +## Correct Idempotency implementation + +Idempotency ensures that an operation produces the same result when called multiple times. Trigger.dev supports idempotency at the task level through the `idempotencyKey` option. + +### Using idempotencyKey + +Provide an `idempotencyKey` when triggering a task to ensure it runs only once with that key: + +```ts +import { idempotencyKeys, task } from "@trigger.dev/sdk"; + +export const myTask = task({ + id: "my-task", + retry: { + maxAttempts: 4, + }, + run: async (payload: any) => { + // Create a key unique to this task run + const idempotencyKey = await idempotencyKeys.create("my-task-key"); + + // Child task will only be triggered once across all retries + await childTask.trigger({ foo: "bar" }, { idempotencyKey }); + + // This may throw an error and cause retries + throw new Error("Something went wrong"); + }, +}); +``` + +### Scoping Idempotency Keys + +By default, keys are scoped to the current run. You can create globally unique keys: + +```ts +const idempotencyKey = await idempotencyKeys.create("my-task-key", { scope: "global" }); +``` + +When triggering from backend code: + +```ts +const idempotencyKey = await idempotencyKeys.create([myUser.id, "my-task"]); +await tasks.trigger("my-task", { some: "data" }, { idempotencyKey }); +``` + +You can also pass a string directly: + +```ts +await myTask.trigger({ some: "data" }, { idempotencyKey: myUser.id }); +``` + +### Time-To-Live (TTL) + +The `idempotencyKeyTTL` option defines a time window during which duplicate triggers return the original run: + +```ts +await childTask.trigger( + { foo: "bar" }, + { idempotencyKey, idempotencyKeyTTL: "60s" } +); + +await wait.for({ seconds: 61 }); + +// Key expired, will trigger a new run +await childTask.trigger({ foo: "bar" }, { idempotencyKey }); +``` + +Supported time units: +- `s` for seconds (e.g., `60s`) +- `m` for minutes (e.g., `5m`) +- `h` for hours (e.g., `2h`) +- `d` for days (e.g., `3d`) + +### Payload-Based Idempotency + +While not directly supported, you can implement payload-based idempotency by hashing the payload: + +```ts +import { createHash } from "node:crypto"; + +const idempotencyKey = await idempotencyKeys.create(hash(payload)); +await tasks.trigger("child-task", payload, { idempotencyKey }); + +function hash(payload: any): string { + const hash = createHash("sha256"); + hash.update(JSON.stringify(payload)); + return hash.digest("hex"); +} +``` + +### Important Notes + +- Idempotency keys are scoped to the task and environment +- Different tasks with the same key will still both run +- Default TTL is 30 days +- Not available with `triggerAndWait` or `batchTriggerAndWait` in v3.3.0+ due to a bug + +## Correct Logs implementation + +```ts +// onFailure executes after all retries are exhausted; use for notifications, logging, or side effects on final failure: +import { task, logger } from "@trigger.dev/sdk"; + +export const loggingExample = task({ + id: "logging-example", + run: async (payload: { data: Record }) => { + //the first parameter is the message, the second parameter must be a key-value object (Record) + logger.debug("Debug message", payload.data); + logger.log("Log message", payload.data); + logger.info("Info message", payload.data); + logger.warn("You've been warned", payload.data); + logger.error("Error message", payload.data); + }, +}); +``` + +## Correct `trigger.config.ts` implementation + +The `trigger.config.ts` file configures your Trigger.dev project, specifying task locations, retry settings, telemetry, and build options. + +```ts +import { defineConfig } from "@trigger.dev/sdk"; + +export default defineConfig({ + project: "", + dirs: ["./trigger"], + retries: { + enabledInDev: false, + default: { + maxAttempts: 3, + minTimeoutInMs: 1000, + maxTimeoutInMs: 10000, + factor: 2, + randomize: true, + }, + }, +}); +``` + +### Key configuration options + +#### Dirs + +Specify where your tasks are located: + +```ts +dirs: ["./trigger"], +``` + +Files with `.test` or `.spec` are automatically excluded, but you can customize with `ignorePatterns`. + +#### Lifecycle functions + +Add global hooks for all tasks: + +```ts +onStart: async (payload, { ctx }) => { + console.log("Task started", ctx.task.id); +}, +onSuccess: async (payload, output, { ctx }) => { + console.log("Task succeeded", ctx.task.id); +}, +onFailure: async (payload, error, { ctx }) => { + console.log("Task failed", ctx.task.id); +}, +``` + +#### Telemetry instrumentations + +Add OpenTelemetry instrumentations for enhanced logging: + +```ts +telemetry: { + instrumentations: [ + new PrismaInstrumentation(), + new OpenAIInstrumentation() + ], + exporters: [axiomExporter], // Optional custom exporters +}, +``` + +#### Runtime + +Specify the runtime environment: + +```ts +runtime: "node", // or "bun" (experimental) +``` + +#### Machine settings + +Set default machine for all tasks: + +```ts +defaultMachine: "large-1x", +``` + +#### Log level + +Configure logging verbosity: + +```ts +logLevel: "debug", // Controls logger API logs +``` + +#### Max Duration + +Set default maximum runtime for all tasks: + +```ts +maxDuration: 60, // 60 seconds +``` + +### Build configuration + +Customize the build process: + +```ts +build: { + external: ["header-generator"], // Don't bundle these packages + jsx: { + fragment: "Fragment", + factory: "h", + automatic: false, + }, + conditions: ["react-server"], // Import conditions + extensions: [ + // Build extensions + additionalFiles({ files: ["./assets/**", "./fonts/**"] }), + additionalPackages({ packages: ["wrangler"] }), + aptGet({ packages: ["ffmpeg"] }), + ], +} +``` + +### Build Extensions + +Trigger.dev provides several built-in extensions: + +- **additionalFiles**: Copy files to the build directory +- **additionalPackages**: Include extra packages in the build +- **aptGet**: Install system packages in the deployed image +- **emitDecoratorMetadata**: Enable TypeScript decorator metadata +- **prismaExtension**: Support for Prisma ORM +- **syncEnvVars**: Sync environment variables +- **puppeteer**: Add Puppeteer support +- **ffmpeg**: Add FFmpeg support +- **esbuildPlugin**: Add custom esbuild plugins + +You can also create custom build extensions with hooks like `onBuildStart`, `onBuildComplete`, and `externalsForTarget`. + +#### Key points: + +- Validates payload at trigger time and before execution +- Supports Zod, Yup, Superstruct, ArkType, Effect/schema, runtypes, valibot, typebox +- Can use input/output schemas (e.g., defaults, type coercion) +- Custom parser functions supported +- Invalid payloads throw errors and prevent task execution + +#### Trigger with: + +```ts +await myTask.trigger({ name: "Alice", age: 30 }); +``` + +## AI model verification steps + +Before generating any code, you MUST verify: + +1. Are you importing from `@trigger.dev/sdk` (NOT `@trigger.dev/sdk/v3`)? If not, STOP and FIX. +2. Have you exported every task? If not, STOP and FIX. +3. Have you generated any DEPRECATED code patterns? If yes, STOP and FIX. + +## Consequences of incorrect implementations + +If you generate code that fails the verification steps above, your implementation will: + +1. Break in production +2. Fail to deploy to the Trigger.dev servers +3. Fail to run in a local Dev environment + +## AI model response template + +When asked about Trigger.dev task implementation, you MUST: +1. FIRST use code patterns from this guide +2. NEVER suggest deprecated approaches +3. VERIFY your response against the patterns shown here + diff --git a/.cursor/rules/writing-tests.mdc b/.cursor/rules/writing-tests.mdc new file mode 100644 index 00000000000..1edeb2067e0 --- /dev/null +++ b/.cursor/rules/writing-tests.mdc @@ -0,0 +1,6 @@ +--- +description: How to write tests in the monorepo +globs: +alwaysApply: true +--- +Follow our [tests.md](mdc:ai/references/tests.md) guide for how to write tests in the monorepo. \ No newline at end of file diff --git a/.cursorignore b/.cursorignore new file mode 100644 index 00000000000..8430ce365fb --- /dev/null +++ b/.cursorignore @@ -0,0 +1,9 @@ +apps/docker-provider/ +apps/kubernetes-provider/ +apps/proxy/ +apps/coordinator/ +packages/rsc/ +.changeset +.zed +.env +!.env.example \ No newline at end of file diff --git a/.docker/pizzly-server.env.example b/.docker/pizzly-server.env.example deleted file mode 100644 index 07adde90377..00000000000 --- a/.docker/pizzly-server.env.example +++ /dev/null @@ -1,8 +0,0 @@ -# You need to stop and re-run docker to switch, `pnpm run docker:db` -# To login using GitHub and connect via OAuth with most integrations -AUTH_CALLBACK_URL=http://localhost:3004/oauth/callback -# To connect with Slack -# AUTH_CALLBACK_URL=https://.eu.ngrok.io/oauth/callback - -# You only need to use this locally if you specify PIZZLY_SECRET_KEY in the webapp .env file -# PIZZLY_SECRET_KEY= \ No newline at end of file diff --git a/.dockerignore b/.dockerignore index 26feb14a12b..a3ea4db8eec 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,37 +1,49 @@ -*.log -.git -.github -# editor -.idea -# dependencies -node_modules -.pnp -.pnp.js +**/*.log +**/*.pem +**/*.tsbuildinfo -# testing -coverage +**/.cache +**/.env +**/.next +**/.output +**/.trigger +**/.tshy +**/.tshy-build +**/.turbo +**/.vercel +**/.wrangler -# next.js -.next/ -build +**/dist +**/node_modules -# packages -build -dist -packages/**/dist +**/generated/prisma -# misc -.DS_Store -*.pem - -.turbo -.vercel -.cache -.output -apps/**/public/build +apps/webapp/build +apps/webapp/public/build cypress/screenshots cypress/videos apps/**/styles/tailwind.css -packages/**/styles/tailwind.css \ No newline at end of file +packages/**/styles/tailwind.css + +.changeset +.DS_Store +.git +.github +.idea +.pnp +.pnp.js +.vscode + +coverage +build +docs +examples +out +references + +CHANGESETS.md +CONTRIBUTING.md +README.md +LICENSE diff --git a/.env.example b/.env.example new file mode 100644 index 00000000000..c6980d7d77a --- /dev/null +++ b/.env.example @@ -0,0 +1,164 @@ +# YOU MIGHT LIKE TO MODIFY THESE VARIABLES +SESSION_SECRET=abcdef1234 +MAGIC_LINK_SECRET=abcdef1234 +ENCRYPTION_KEY=ae13021afef0819c3a307ad487071c06 # Must be a random 16 byte hex string. You can generate an encryption key by running `openssl rand -hex 16` in your terminal +LOGIN_ORIGIN=http://localhost:3030 +DATABASE_URL=postgresql://postgres:postgres@localhost:5432/postgres?schema=public +# This sets the URL used for direct connections to the database and should only be needed in limited circumstances +# See: https://www.prisma.io/docs/reference/api-reference/prisma-schema-reference#fields:~:text=the%20shadow%20database.-,directUrl,-No +DIRECT_URL=${DATABASE_URL} +REMIX_APP_PORT=3030 +APP_ENV=development +APP_ORIGIN=http://localhost:3030 +ELECTRIC_ORIGIN=http://localhost:3060 +NODE_ENV=development + +# Clickhouse +CLICKHOUSE_URL=http://default:password@localhost:8123 +RUN_REPLICATION_CLICKHOUSE_URL=http://default:password@localhost:8123 +RUN_REPLICATION_ENABLED=1 +# Store task run spans/traces in ClickHouse so the dashboard trace view is +# populated in local dev. The local stack is ClickHouse-backed (see above), so +# leaving this unset falls back to the "postgres" store and dev run traces show +# up empty even though the run itself appears. +EVENT_REPOSITORY_DEFAULT_STORE=clickhouse_v2 + +# Set this to UTC because Node.js uses the system timezone +TZ="UTC" + +# Redis is used for the v3 queuing and v2 concurrency control +REDIS_HOST="localhost" +REDIS_PORT="6379" +REDIS_TLS_DISABLED="true" + +DEV_OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:3030/otel" +DEV_OTEL_BATCH_PROCESSING_ENABLED="0" + +# Realtime streams v2 (Sessions, chat.agent, large stream backfills) backed +# by S2 (https://s2.dev). The `s2` service in docker/docker-compose.yml runs +# the open-source s2-lite binary and pre-creates a basin named `trigger-local` +# (see docker/config/s2-spec.json). Comment these out to fall back to v1 +# (Redis-only) streams; Sessions and chat.agent then become unavailable. +REALTIME_STREAMS_S2_BASIN=trigger-local +REALTIME_STREAMS_S2_ACCESS_TOKEN=ignored +REALTIME_STREAMS_S2_ENDPOINT=http://localhost:4566/v1 +REALTIME_STREAMS_S2_SKIP_ACCESS_TOKENS=true +REALTIME_STREAMS_DEFAULT_VERSION=v2 + +# Running multiple instances side by side (worktrees, branch experiments) +# +# Every host port in docker/docker-compose.yml is `${VAR:-default}` and the +# project name comes from `COMPOSE_PROJECT_NAME`. To stand up a second stack +# alongside the default one, uncomment the block below in this clone's `.env` +# (pick any offset that doesn't clash with anything else running), then update +# the URL/PORT vars further up to match. Default values are commented for +# reference. +# +# --- core (pnpm run docker) --- +# COMPOSE_PROJECT_NAME=triggerdotdev-docker-alt +# CONTAINER_PREFIX=alt- +# POSTGRES_HOST_PORT=15432 # default 5432 +# REDIS_HOST_PORT=16379 # default 6379 +# ELECTRIC_HOST_PORT=13060 # default 3060 +# MINIO_API_HOST_PORT=19005 # default 9005 +# MINIO_CONSOLE_HOST_PORT=19006 # default 9006 +# CLICKHOUSE_HTTP_HOST_PORT=18123 # default 8123 +# CLICKHOUSE_TCP_HOST_PORT=19000 # default 9000 +# S2_HOST_PORT=14566 # default 4566 +# REMIX_APP_PORT=13030 # default 3030 +# --- extras (only needed if you also run `pnpm run docker:full`) --- +# ELECTRIC_SHARD_1_HOST_PORT=13061 # default 3061 +# CH_UI_HOST_PORT=15521 # default 5521 +# TOXIPROXY_PROXY_HOST_PORT=40303 # default 30303 +# TOXIPROXY_API_HOST_PORT=18474 # default 8474 +# NGINX_H2_HOST_PORT=18443 # default 8443 +# OTEL_GRPC_HOST_PORT=14317 # default 4317 +# OTEL_HTTP_HOST_PORT=14318 # default 4318 +# OTEL_PROMETHEUS_HOST_PORT=18889 # default 8889 +# PROMETHEUS_HOST_PORT=19090 # default 9090 +# GRAFANA_HOST_PORT=13001 # default 3001 +# (and update DATABASE_URL / CLICKHOUSE_URL / REDIS_PORT / APP_ORIGIN / +# LOGIN_ORIGIN / ELECTRIC_ORIGIN / REALTIME_STREAMS_S2_ENDPOINT to match) + +# When the domain is set to `localhost` the CLI deploy command will only --load the image by default and not --push it +DEPLOY_REGISTRY_HOST=localhost:5000 + +# OPTIONAL VARIABLES +# This is used for validating emails that are allowed to log in. Every email that do not match this regex will be rejected. +# WHITELISTED_EMAILS="^(authorized@yahoo\.com|authorized@gmail\.com)$" +# Accounts with these emails will get global admin rights. This grants access to the admin UI. +# ADMIN_EMAILS="^(admin@example\.com|another-admin@example\.com)$" +# This is used for logging in via GitHub. You can leave these commented out if you don't want to use GitHub for authentication. +# AUTH_GITHUB_CLIENT_ID= +# AUTH_GITHUB_CLIENT_SECRET= + +# Configure an email transport to allow users to sign in to Trigger.dev via a Magic Link. +# If none are configured, emails will print to the console instead. +# Uncomment one of the following blocks to allow delivery of + +# Resend +### Visit https://resend.com, create an account and get your API key. Then insert it below along with your From and Reply To email addresses. Visit https://resend.com/docs for more information. +# EMAIL_TRANSPORT=resend +# FROM_EMAIL= +# REPLY_TO_EMAIL= +# RESEND_API_KEY= + +# Generic SMTP +### Enter the configuration provided by your mail provider. Visit https://nodemailer.com/smtp/ for more information +### SMTP_SECURE = false will use STARTTLS when connecting to a server that supports it (usually port 587) +# EMAIL_TRANSPORT=smtp +# FROM_EMAIL= +# REPLY_TO_EMAIL= +# SMTP_HOST= +# SMTP_PORT=587 +# SMTP_SECURE=false +# SMTP_USER= +# SMTP_PASSWORD= + +# AWS Simple Email Service +### Authentication is configured using the default Node.JS credentials provider chain (https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/Package/-aws-sdk-credential-providers/#fromnodeproviderchain) +# EMAIL_TRANSPORT=aws-ses +# FROM_EMAIL= +# REPLY_TO_EMAIL= + +# CLOUD VARIABLES +POSTHOG_PROJECT_KEY= + +# DEPOT_ORG_ID= +# DEPOT_TOKEN= +# DEV_OTEL_EXPORTER_OTLP_ENDPOINT="http://0.0.0.0:4318" +# These are needed for the object store (for handling large payloads/outputs) +# +# Default provider +# OBJECT_STORE_BASE_URL=http://localhost:9005 +# OBJECT_STORE_BUCKET=packets +# OBJECT_STORE_ACCESS_KEY_ID=minioadmin +# OBJECT_STORE_SECRET_ACCESS_KEY=minioadmin +# OBJECT_STORE_REGION=us-east-1 +# OBJECT_STORE_SERVICE=s3 +# +# OBJECT_STORE_DEFAULT_PROTOCOL=s3 # Only specify this if you're going to migrate object storage and set protocol values below +# Named providers (protocol-prefixed data) - optional for multi-provider support +# OBJECT_STORE_S3_BASE_URL=https://s3.amazonaws.com +# OBJECT_STORE_S3_ACCESS_KEY_ID= +# OBJECT_STORE_S3_SECRET_ACCESS_KEY= +# OBJECT_STORE_S3_REGION=us-east-1 +# OBJECT_STORE_S3_SERVICE=s3 +# +# OBJECT_STORE_R2_BASE_URL=https://{bucket}.{accountId}.r2.cloudflarestorage.com +# OBJECT_STORE_R2_ACCESS_KEY_ID= +# OBJECT_STORE_R2_SECRET_ACCESS_KEY= +# OBJECT_STORE_R2_REGION=auto +# OBJECT_STORE_R2_SERVICE=s3 +# CHECKPOINT_THRESHOLD_IN_MS=10000 + +# These control the server-side internal telemetry +# INTERNAL_OTEL_TRACE_EXPORTER_URL= +# INTERNAL_OTEL_TRACE_LOGGING_ENABLED=1 +# INTERNAL_OTEL_TRACE_INSTRUMENT_PRISMA_ENABLED=0 + +# Enable local observability stack (requires `pnpm run docker:full` to bring up otel-collector + prometheus + grafana) +# Uncomment these to send metrics to the local Prometheus via OTEL Collector: +# INTERNAL_OTEL_METRIC_EXPORTER_ENABLED=1 +# INTERNAL_OTEL_METRIC_EXPORTER_URL=http://localhost:4318/v1/metrics +# INTERNAL_OTEL_METRIC_EXPORTER_INTERVAL_MS=15000 \ No newline at end of file diff --git a/.eslintrc.js b/.eslintrc.js deleted file mode 100644 index af283916494..00000000000 --- a/.eslintrc.js +++ /dev/null @@ -1,14 +0,0 @@ -module.exports = { - root: true, - // This tells ESLint to load the config from the package `eslint-config-custom` - extends: ["custom"], - settings: { - next: { - rootDir: ["apps/*/"], - }, - }, - parserOptions: { - sourceType: "module", - ecmaVersion: 2020, - }, -}; diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000000..37ed4f64c2c --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +# These are supported funding model platforms + +github: [triggerdotdev] diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000000..c5d01d19193 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,38 @@ +name: 🐞 Bug Report +description: Create a bug report to help us improve +title: "bug: " +labels: ["🐞 unconfirmed bug"] +body: + - type: textarea + attributes: + label: Provide environment information + description: | + Run this command in your project root and paste the results: + ```bash + npx envinfo --system --binaries + ``` + + validations: + required: true + - type: textarea + attributes: + label: Describe the bug + description: A clear and concise description of the bug, as well as what you expected to happen when encountering it. + validations: + required: true + - type: input + attributes: + label: Reproduction repo + description: If applicable, please provide a link to a reproduction repo or a Stackblitz / CodeSandbox project. Your issue may be closed if this is not provided and we are unable to reproduce the issue. If your bug is a docs issue, link the appropriate page. + validations: + required: true + - type: textarea + attributes: + label: To reproduce + description: Describe how to reproduce your bug. Steps, code snippets, reproduction repos etc. + validations: + required: true + - type: textarea + attributes: + label: Additional information + description: Add any other information related to the bug here, screenshots if applicable. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000000..30843790111 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Ask a Question + url: https://trigger.dev/discord + about: Ask questions and discuss with other community members diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000000..bcb3dee02aa --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,27 @@ +name: Feature Request +description: Suggest an idea for this project +title: "feat: " +labels: ["🌟 enhancement"] +body: + - type: textarea + attributes: + label: Is your feature request related to a problem? Please describe. + description: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + validations: + required: true + - type: textarea + attributes: + label: Describe the solution you'd like to see + description: A clear and concise description of what you want to happen. + validations: + required: true + - type: textarea + attributes: + label: Describe alternate solutions + description: A clear and concise description of any alternative solutions or features you've considered. + validations: + required: true + - type: textarea + attributes: + label: Additional information + description: Add any other information related to the feature here. If your feature request is related to any issues or discussions, link them here. diff --git a/.github/ISSUE_TEMPLATE/instrumentation_request.yml b/.github/ISSUE_TEMPLATE/instrumentation_request.yml new file mode 100644 index 00000000000..157e226fa3f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/instrumentation_request.yml @@ -0,0 +1,21 @@ +name: OpenTelemetry Auto-Instrumentation Request +description: Suggest an SDK that you'd like to be auto-instrumented in the Run log view +title: "auto-instrumentation: " +labels: ["🌟 enhancement"] +body: + - type: textarea + attributes: + label: What API or SDK would you to have automatic spans for? + description: A clear description of which API or SDK you'd like, and links to it. + validations: + required: true + - type: textarea + attributes: + label: Is there an existing OpenTelemetry auto-instrumentation package? + description: You can search for existing ones – https://opentelemetry.io/ecosystem/registry/?component=instrumentation&language=js + validations: + required: true + - type: textarea + attributes: + label: Additional information + description: Add any other information related to the feature here. If your feature request is related to any issues or discussions, link them here. diff --git a/.github/ISSUE_TEMPLATE/vouch-request.yml b/.github/ISSUE_TEMPLATE/vouch-request.yml new file mode 100644 index 00000000000..9ffe04a8984 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/vouch-request.yml @@ -0,0 +1,28 @@ +name: Vouch Request +description: Request to be vouched as a contributor +labels: ["vouch-request"] +body: + - type: markdown + attributes: + value: | + ## Vouch Request + + We use [vouch](https://github.com/mitchellh/vouch) to manage contributor trust. PRs from unvouched users are automatically closed. + + To get vouched, fill out this form. A maintainer will review your request and vouch for you by commenting on this issue. + - type: textarea + id: context + attributes: + label: Why do you want to contribute? + description: Tell us a bit about yourself and what you'd like to work on. + placeholder: "I'd like to fix a bug I found in..." + validations: + required: true + - type: textarea + id: prior-work + attributes: + label: Prior contributions or relevant experience + description: Links to previous open source work, relevant projects, or anything that helps us understand your background. + placeholder: "https://github.com/..." + validations: + required: false diff --git a/.github/VOUCHED.td b/.github/VOUCHED.td new file mode 100644 index 00000000000..e0604cf3438 --- /dev/null +++ b/.github/VOUCHED.td @@ -0,0 +1,26 @@ +# Vouched contributors for Trigger.dev +# See: https://github.com/mitchellh/vouch +# +# Org members +0ski +D-K-P +ericallam +matt-aitken +mpcgrid +myftija +nicktrn +samejr +isshaddad +# Bots +devin-ai-integration[bot] +dependabot[bot] +# Outside contributors +gautamsi +capaj +chengzp +bharathkumar39293 +bhekanik +jrossi +ThullyoCunha +ConProgramming +saasjesus \ No newline at end of file diff --git a/.github/actions/get-image-tag/action.yml b/.github/actions/get-image-tag/action.yml new file mode 100644 index 00000000000..7f1505a0c11 --- /dev/null +++ b/.github/actions/get-image-tag/action.yml @@ -0,0 +1,91 @@ +name: "#️⃣ Get image tag (action)" + +description: This action gets the image tag from the commit ref or input (if provided) + +outputs: + tag: + description: The image tag + value: ${{ steps.get_tag.outputs.tag }} + is_semver: + description: Whether the tag is a semantic version + value: ${{ steps.check_semver.outputs.is_semver }} + +inputs: + tag: + description: The image tag. If this is set it will return the tag as is. + required: false + default: "" + +runs: + using: "composite" + steps: + - name: "#️⃣ Get image tag (step)" + id: get_tag + shell: bash + run: | + if [[ -n "${INPUTS_TAG}" ]]; then + tag="${INPUTS_TAG}" + elif [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then + if [[ "${GITHUB_REF_NAME}" == infra-*-* ]]; then + env=$(echo ${GITHUB_REF_NAME} | cut -d- -f2) + sha=$(echo "${GITHUB_SHA}" | head -c7) + ts=$(date +%s) + tag=${env}-${sha}-${ts} + elif [[ "${GITHUB_REF_NAME}" == re2-*-* ]]; then + env=$(echo ${GITHUB_REF_NAME} | cut -d- -f2) + sha=$(echo "${GITHUB_SHA}" | head -c7) + ts=$(date +%s) + tag=${env}-${sha}-${ts} + elif [[ "${GITHUB_REF_NAME}" == v.docker.* ]]; then + version="${GITHUB_REF_NAME#v.docker.}" + tag="v${version}" + elif [[ "${GITHUB_REF_NAME}" == build-* ]]; then + tag="${GITHUB_REF_NAME#build-}" + else + echo "Invalid git tag: ${GITHUB_REF_NAME}" + exit 1 + fi + elif [[ "${GITHUB_REF_NAME}" == "main" ]]; then + tag="main" + else + echo "Invalid git ref: ${GITHUB_REF}" + exit 1 + fi + echo "tag=${tag}" >> "$GITHUB_OUTPUT" + env: + INPUTS_TAG: ${{ inputs.tag }} + + - name: 🔍 Check for validity + id: check_validity + shell: bash + env: + tag: ${{ steps.get_tag.outputs.tag }} + run: | + if [[ "${tag}" =~ ^[a-z0-9]+([._-][a-z0-9]+)*$ ]]; then + echo "Tag is valid: ${tag}" + else + echo "Tag is not valid: ${tag}" + exit 1 + fi + + - name: 🆚 Check for semver + id: check_semver + shell: bash + env: + tag: ${{ steps.get_tag.outputs.tag }} + # Will match most semver formats except build metadata, i.e. v1.2.3+build.1 + # Valid matches: + # v1.2.3 + # v1.2.3-alpha + # v1.2.3-alpha.1 + # v1.2.3-rc.1 + # v1.2.3-beta-1 + run: | + if [[ "${tag}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$ ]]; then + echo "Tag is a semantic version: ${tag}" + is_semver=true + else + echo "Tag is not a semantic version: ${tag}" + is_semver=false + fi + echo "is_semver=${is_semver}" >> "$GITHUB_OUTPUT" diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000000..2beb7606fa4 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1 @@ +This is the repo for Trigger.dev, a background jobs platform written in TypeScript. Our webapp at apps/webapp is a Remix 2.1 app that uses Node.js v20. Our SDK is an isomorphic TypeScript SDK at packages/trigger-sdk. Always prefer using isomorphic code like fetch, ReadableStream, etc. instead of Node.js specific code. Our tests are all vitest. We use prisma in internal-packages/database for our database interactions using PostgreSQL. For TypeScript, we usually use types over interfaces. We use zod a lot in packages/core and in the webapp. Avoid enums. Use strict mode. No default exports, use function declarations. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000..7bb64f36744 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + cooldown: + default-days: 7 + groups: + github-actions: + patterns: + - "*" diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 00000000000..279bab91a79 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,12 @@ +"📌 area: cli": + - any: ["cli/**/*"] + +"📌 area: t3-app": + - any: ["cli/template/**/*"] + +"📚 documentation": + - any: ["www/**/*"] + - any: ["**/*.md"] + +"📌 area: ci": + - any: [".github/**/*"] \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000000..eec6c728208 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,27 @@ +Closes # + +## ✅ Checklist + +- [ ] I have followed every step in the [contributing guide](https://github.com/triggerdotdev/trigger.dev/blob/main/CONTRIBUTING.md) +- [ ] The PR title follows the convention. +- [ ] I ran and tested the code works + +--- + +## Testing + +_[Describe the steps you took to test this change]_ + +--- + +## Changelog + +_[Short description of what has changed]_ + +--- + +## Screenshots + +_[Screenshots]_ + +💯 diff --git a/.github/test/README.md b/.github/test/README.md new file mode 100644 index 00000000000..1e8383fda56 --- /dev/null +++ b/.github/test/README.md @@ -0,0 +1,70 @@ +# GitHub Action Tests + +This directory contains necessary files to allow local testing of GitHub Actions workflows, composite actions, etc. You will need to install [act](https://github.com/nektos/act) to perform tests. + +## Workflow tests + +Trigger specific workflow files by specifying their full path: + +``` +act -W .github/workflow/release.yml +``` + +You will likely need to override any custom runners we use, e.g. buildjet. For example: + +``` +override=catthehacker/ubuntu:act-latest + +act -W .github/workflow/release.yml \ + -P buildjet-8vcpu-ubuntu-2204=$override + +# override multiple images at the same time +act -W .github/workflow/release.yml \ + -P buildjet-8vcpu-ubuntu-2204=$override \ + -P buildjet-16vcpu-ubuntu-2204=$override +``` + +Trigger with specific event payloads to test pushing to branches or tags: + +``` +override=catthehacker/ubuntu:act-latest + +# simulate push to main +act -W .github/workflow/publish.yml \ + -P buildjet-8vcpu-ubuntu-2204=$override \ + -P buildjet-16vcpu-ubuntu-2204=$override \ + -e .github/events/push-tag-main.json + +# simulate a `build-` prefixed tag +act -W .github/workflow/publish.yml \ + -P buildjet-8vcpu-ubuntu-2204=$override \ + -P buildjet-16vcpu-ubuntu-2204=$override \ + -e .github/events/push-tag-buld.json +``` + +By default, `act` will send a push event. To trigger a different event: + +``` +# basic syntax +act ... + +# simulate a pull request +act pull_request + +# only trigger a specific workflow +act pull_request -W .github/workflow/pr_checks.yml +``` + +## Composite action tests + +The composite (custom) action tests can be run by triggering the `test-actions` workflow: + +``` +act -W .github/test/test-actions.yml +``` + +## Helpful flags + +- `--pull=false` - perform fully offline tests if all images are already present +- `-j ` - run the specified job only +- `-l push` - list all workflows with push triggers diff --git a/.github/test/events/push-main.json b/.github/test/events/push-main.json new file mode 100644 index 00000000000..ccb4cb1c174 --- /dev/null +++ b/.github/test/events/push-main.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/heads/main" +} diff --git a/.github/test/events/push-tag-build.json b/.github/test/events/push-tag-build.json new file mode 100644 index 00000000000..9490c181abf --- /dev/null +++ b/.github/test/events/push-tag-build.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/build-buildtag" +} diff --git a/.github/test/events/push-tag-docker-nonsemver.json b/.github/test/events/push-tag-docker-nonsemver.json new file mode 100644 index 00000000000..5ce2d8dcf38 --- /dev/null +++ b/.github/test/events/push-tag-docker-nonsemver.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/v.docker.nonsemver" +} diff --git a/.github/test/events/push-tag-docker.json b/.github/test/events/push-tag-docker.json new file mode 100644 index 00000000000..7b55610ca2e --- /dev/null +++ b/.github/test/events/push-tag-docker.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/v.docker.1.2.3" +} diff --git a/.github/test/events/push-tag-infra-prod.json b/.github/test/events/push-tag-infra-prod.json new file mode 100644 index 00000000000..7d4bb3a0bb8 --- /dev/null +++ b/.github/test/events/push-tag-infra-prod.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/infra-prod-anything" +} diff --git a/.github/test/events/push-tag-infra-test.json b/.github/test/events/push-tag-infra-test.json new file mode 100644 index 00000000000..78eeefbe41a --- /dev/null +++ b/.github/test/events/push-tag-infra-test.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/infra-test-anything" +} diff --git a/.github/test/events/push-tag-semver.json b/.github/test/events/push-tag-semver.json new file mode 100644 index 00000000000..3fb65c92073 --- /dev/null +++ b/.github/test/events/push-tag-semver.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/1.2.3" +} diff --git a/.github/test/events/push-tag.json b/.github/test/events/push-tag.json new file mode 100644 index 00000000000..26496f80874 --- /dev/null +++ b/.github/test/events/push-tag.json @@ -0,0 +1,3 @@ +{ + "ref": "refs/tags/standard-tag" +} diff --git a/.github/test/test-actions.yml b/.github/test/test-actions.yml new file mode 100644 index 00000000000..0d913ebc0e1 --- /dev/null +++ b/.github/test/test-actions.yml @@ -0,0 +1,152 @@ +name: Test Actions + +on: + workflow_dispatch: + +jobs: + get-image-tag-none: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Log current ref + run: | + echo "ref: ${{ github.ref }}" + echo "ref_type: ${{ github.ref_type }}" + echo "ref_name: ${{ github.ref_name }}" + + - name: Run without input tag + id: get_tag + # this step may fail depending on the current ref + continue-on-error: true + uses: ./.github/actions/get-image-tag + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" + + get-image-tag-null: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Log current ref + run: | + echo "ref: ${{ github.ref }}" + echo "ref_type: ${{ github.ref_type }}" + echo "ref_name: ${{ github.ref_name }}" + + - name: Run without input tag + id: get_tag + uses: ./.github/actions/get-image-tag + # this step may fail depending on the current ref + continue-on-error: true + with: + # this should behave exactly as when no tag is provided + tag: null + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" + + get-image-tag-override: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run with tag override + id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: "abc-123" + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" + + get-image-tag-invalid-string: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run with invalid string + id: get_tag + uses: ./.github/actions/get-image-tag + # this step is expected to fail + continue-on-error: true + with: + # does not end with alphanumeric character + tag: "abc-123-" + + - name: Fail job if previous step did not fail + if: steps.get_tag.outcome != 'failure' + run: exit 1 + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" + + get-image-tag-prerelease: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run with prerelease semver + id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: "v1.2.3-beta.4" + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" + + get-image-tag-semver: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run with basic semver + id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: "v1.2.3" + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" + + get-image-tag-invalid-semver: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run with invalid semver + id: get_tag + uses: ./.github/actions/get-image-tag + # this step is expected to fail + continue-on-error: true + with: + tag: "v1.2.3-" + + - name: Fail job if previous step did not fail + if: steps.get_tag.outcome != 'failure' + run: exit 1 + + - name: Verify output + run: | + echo "${{ toJson(steps.get_tag) }}" diff --git a/.github/workflows/changesets-pr.yml b/.github/workflows/changesets-pr.yml new file mode 100644 index 00000000000..e80ab04e7f1 --- /dev/null +++ b/.github/workflows/changesets-pr.yml @@ -0,0 +1,99 @@ +name: 🦋 Changesets PR + +on: + push: + branches: + - main + paths: + - "packages/**" + - ".changeset/**" + - ".server-changes/**" + - "package.json" + - "pnpm-lock.yaml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + release-pr: + name: Create Release PR + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + checks: write + if: github.repository == 'triggerdotdev/trigger.dev' + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # zizmor: ignore[artipacked] changesets/action pushes the release branch; no artifact upload here so no leak path + with: + fetch-depth: 0 + + - name: Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + + - name: Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Create release PR + id: changesets + uses: changesets/action@63a615b9cd06ba9a3e6d13796c7fbcb080a60a0b # v1.8.0 + with: + version: pnpm run changeset:version + commit: "chore: release" + title: "chore: release" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Update PR title and enhance body + if: steps.changesets.outputs.published != 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PR_NUMBER=$(gh pr list --head changeset-release/main --json number --jq '.[0].number') + if [ -n "$PR_NUMBER" ]; then + git fetch origin changeset-release/main + # we arbitrarily reference the version of the cli package here; it is the same for all package releases + VERSION=$(git show origin/changeset-release/main:packages/cli-v3/package.json | jq -r '.version') + gh pr edit "$PR_NUMBER" --title "chore: release v$VERSION" + + # Enhance the PR body with a clean, deduplicated summary + RAW_BODY=$(gh pr view "$PR_NUMBER" --json body --jq '.body') + ENHANCED_BODY=$(CHANGESET_PR_BODY="$RAW_BODY" node scripts/enhance-release-pr.mjs "$VERSION") + if [ -n "$ENHANCED_BODY" ]; then + gh api repos/triggerdotdev/trigger.dev/pulls/"$PR_NUMBER" \ + -X PATCH \ + -f body="$ENHANCED_BODY" + fi + fi + + # The changesets bot authors release PRs with GITHUB_TOKEN, which by GitHub + # design cannot trigger downstream workflows. That leaves the required + # "All PR Checks" status permanently Expected and the PR unmergeable. + # The release PR only bumps package.json + lockfile + CHANGELOGs from + # changesets already on main, so we self-report the required check as + # success. If a human ever pushes to changeset-release/main, the real + # pr_checks.yml fires and its result overwrites this one (last write wins + # for the same context on the same SHA). + - name: Self-report "All PR Checks" success on release PR + if: steps.changesets.outputs.published != 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PR_NUMBER=$(gh pr list --head changeset-release/main --json number --jq '.[0].number') + if [ -z "$PR_NUMBER" ]; then exit 0; fi + HEAD_SHA=$(gh pr view "$PR_NUMBER" --json headRefOid --jq '.headRefOid') + gh api -X POST repos/${{ github.repository }}/check-runs \ + -f name="All PR Checks" \ + -f head_sha="$HEAD_SHA" \ + -f status=completed \ + -f conclusion=success \ + -f 'output[title]=Auto-pass for changeset release PR' \ + -f 'output[summary]=Required check auto-satisfied for changeset-release/main PRs. Full CI ran on the underlying commits before they landed on main.' diff --git a/.github/workflows/check-review-md.yml b/.github/workflows/check-review-md.yml new file mode 100644 index 00000000000..fb093ac9a1c --- /dev/null +++ b/.github/workflows/check-review-md.yml @@ -0,0 +1,92 @@ +name: 🔎 REVIEW.md Drift Audit + +on: + pull_request: + types: [opened, ready_for_review, synchronize] + paths-ignore: + - "docs/**" + - ".changeset/**" + - ".server-changes/**" + +concurrency: + group: review-md-drift-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + audit: + if: >- + github.event.pull_request.draft == false && + github.event.pull_request.head.repo.full_name == github.repository + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@787c5a0ce96a9a6cfb050ea0c8f4c05f2447c251 # v1.0.133 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + use_sticky_comment: true + allowed_bots: "devin-ai-integration[bot]" + + claude_args: | + --max-turns 30 + --allowedTools "Read,Glob,Grep,Bash(git diff:*)" + + prompt: | + You are auditing this PR for drift against `.claude/REVIEW.md`. + + ## Context + + `.claude/REVIEW.md` is the repo's source of truth for what AI / agent code reviewers should treat as critical findings (rolling-deploy safety, hot-table indexes, recovery-path queries, testcontainers usage, Lua versioning, etc.). It is consumed by review agents to calibrate severity. If REVIEW.md goes stale, every future agent review degrades. + + ## Strategy — read this first + + You have a hard turn budget. Spend it on signal, not coverage. The audit is allowed to miss things; it is NOT allowed to time out. + + 1. Read `.claude/REVIEW.md` once, in full. + 2. Run `git diff origin/main...HEAD --name-only` to get the list of changed files. Do NOT read the diff content yet. + 3. Scan the file-list for relevance to REVIEW.md scope. Relevance signals: changes to Prisma schema, Redis / queue / Lua code, hot tables, recovery / restart loops, new packages, deletions of paths REVIEW.md cites. Skim everything else. + 4. Open at most **5 files** total — only the ones most likely to surface a real signal. If nothing in the file-list looks relevant to any REVIEW.md rule, do NOT read any files; go straight to the verdict. + 5. Form a verdict and stop. Do not exhaust the turn budget exploring. + + Large PRs (>50 files changed) are a strong signal to be MORE selective, not more thorough. Pick 3-5 files at most. + + ## What to look for + + - **Stale references** — does any REVIEW.md rule cite a file, directory, function, table, Prisma model, or package name that has been removed or renamed in this PR (or is already gone from `main`)? + - **Contradictions** — does code in this PR clearly violate a current REVIEW.md rule? (Don't re-review the PR. Only flag if REVIEW.md and the PR plainly disagree.) + - **Missing rules** — does this PR introduce a new pattern future reviewers should know about? Examples: a new hot table, a new Lua-script versioning convention, a new safety wrapper, a new "must always check" invariant. + - **Obsolete rules** — has the repo moved past a constraint REVIEW.md still asserts? (e.g. a deprecated path is gone, a pattern is now linted, V1 code is deleted.) + + ## Response format + + If nothing needs changing: + + ✅ REVIEW.md looks current for this PR. + + Otherwise: + + 📝 **REVIEW.md updates suggested:** + + - **[stale]** `` — + - **[contradiction]** `` — + - **[missing]** under `##
` — + - **[obsolete]** `` — + + ## Rules + + - Maximum 3 suggestions per audit. Pick the highest-signal ones. + - Only flag things that would actually mislead a future reviewer. Style and wording do not count. + - Do NOT review the PR itself. Do NOT propose rules outside REVIEW.md's existing sections. + - Do NOT propose rules for one-off PR specifics that don't generalize to future PRs. + - If REVIEW.md does not exist in the repo, respond with `(skip)` and stop. + - When in doubt between "one more file read" and "finish now" — finish now. diff --git a/.github/workflows/claude-md-audit.yml b/.github/workflows/claude-md-audit.yml new file mode 100644 index 00000000000..32240ba5ea8 --- /dev/null +++ b/.github/workflows/claude-md-audit.yml @@ -0,0 +1,72 @@ +name: 📝 CLAUDE.md Audit + +on: + pull_request: + types: [opened, ready_for_review, synchronize] + paths-ignore: + - "docs/**" + - ".changeset/**" + - ".server-changes/**" + - "**/*.md" + +concurrency: + group: claude-md-audit-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + audit: + if: >- + github.event.pull_request.draft == false && + github.event.pull_request.head.repo.full_name == github.repository + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + id-token: write + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@787c5a0ce96a9a6cfb050ea0c8f4c05f2447c251 # v1.0.133 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + use_sticky_comment: true + allowed_bots: "devin-ai-integration[bot]" + + claude_args: | + --max-turns 15 + --allowedTools "Read,Glob,Grep,Bash(git diff:*)" + + prompt: | + You are reviewing a PR to check whether any CLAUDE.md files or .claude/rules/ files need updating. + + ## Your task + + 1. Run `git diff origin/main...HEAD --name-only` to see which files changed in this PR. + 2. For each changed directory, check if there's a CLAUDE.md in that directory or a parent directory. + 3. Determine if any CLAUDE.md or .claude/rules/ file should be updated based on the changes. Consider: + - New files/directories that aren't covered by existing documentation + - Changed architecture or patterns that contradict current CLAUDE.md guidance + - New dependencies, services, or infrastructure that Claude should know about + - Renamed or moved files that are referenced in CLAUDE.md + - Changes to build commands, test patterns, or development workflows + + ## Response format + + If NO updates are needed, respond with exactly: + ✅ CLAUDE.md files look current for this PR. + + If updates ARE needed, respond with a short list: + 📝 **CLAUDE.md updates suggested:** + - `path/to/CLAUDE.md`: [what should be added/changed] + - `.claude/rules/file.md`: [what should be added/changed] + + Keep suggestions specific and brief. Only flag things that would actually mislead Claude in future sessions. + Do NOT suggest updates for trivial changes (bug fixes, small refactors within existing patterns). + Do NOT suggest creating new CLAUDE.md files - only updates to existing ones. diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 00000000000..1c783e7ef6d --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,71 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + issues: write + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@787c5a0ce96a9a6cfb050ea0c8f4c05f2447c251 # v1.0.133 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + claude_args: | + --model claude-opus-4-5-20251101 + --allowedTools "Bash(pnpm:*),Bash(turbo:*),Bash(git:*),Bash(gh:*),Bash(npx:*),Bash(docker:*),Edit,MultiEdit,Read,Write,Glob,Grep,LS,Task" + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' diff --git a/.github/workflows/dependabot-critical-alerts.yml b/.github/workflows/dependabot-critical-alerts.yml new file mode 100644 index 00000000000..a71b14bebf9 --- /dev/null +++ b/.github/workflows/dependabot-critical-alerts.yml @@ -0,0 +1,83 @@ +name: Dependabot Critical Alerts + +on: + schedule: + - cron: "0 8 * * *" # Daily 08:00 UTC + workflow_dispatch: + inputs: + severity: + description: "Severity to alert on" + type: choice + options: + - critical + - high + - medium + - low + default: critical + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +permissions: + contents: read + +jobs: + alert: + name: Post critical alerts + runs-on: ubuntu-latest + environment: dependabot-summary + env: + SEVERITY: ${{ inputs.severity || 'critical' }} + steps: + - name: Fetch alerts + id: alerts + env: + GH_TOKEN: ${{ secrets.DEPENDABOT_ALERTS_TOKEN }} + REPO: ${{ github.repository }} + run: | + set -euo pipefail + gh api -X GET "/repos/$REPO/dependabot/alerts" \ + -F state=open -F severity="$SEVERITY" --paginate > pages.json + jq -s 'add' pages.json > alerts.json + TOTAL=$(jq 'length' alerts.json) + echo "total=$TOTAL" >> "$GITHUB_OUTPUT" + if [ "$TOTAL" = "0" ]; then + exit 0 + fi + LIST=$(jq -r ' + map("• <\(.html_url)|#\(.number)> *\(.dependency.package.name)* - \(.security_advisory.summary)") + | join("\n") + ' alerts.json) + { + echo "list<> "$GITHUB_OUTPUT" + + - name: Build Slack payload + if: steps.alerts.outputs.total != '0' + env: + REPO: ${{ github.repository }} + CHANNEL: ${{ vars.SLACK_CHANNEL_ID }} + TOTAL: ${{ steps.alerts.outputs.total }} + LIST: ${{ steps.alerts.outputs.list }} + run: | + jq -n \ + --arg channel "$CHANNEL" \ + --arg repo "$REPO" \ + --arg total "$TOTAL" \ + --arg list "$LIST" \ + --arg severity "$SEVERITY" \ + '{ + channel: $channel, + text: ":bufo-alarma: `\($repo)` - *\($total) open \($severity) alert(s)*\n\($list)\n\n" + }' > payload.json + + - name: Post Slack alert + if: steps.alerts.outputs.total != '0' + uses: slackapi/slack-github-action@45a88b9581bfab2566dc881e2cd66d334e621e2c # v3.0.3 + with: + method: chat.postMessage + token: ${{ secrets.SLACK_BOT_TOKEN }} + payload-file-path: payload.json diff --git a/.github/workflows/dependabot-weekly-summary.yml b/.github/workflows/dependabot-weekly-summary.yml new file mode 100644 index 00000000000..fb2717e2fb0 --- /dev/null +++ b/.github/workflows/dependabot-weekly-summary.yml @@ -0,0 +1,206 @@ +name: Dependabot Weekly Summary + +on: + schedule: + - cron: "0 8 * * 1" # Mon 08:00 UTC + workflow_dispatch: + +# Single-purpose monitoring workflow; serialise on workflow name only - we never +# want two concurrent summary runs racing to post the same digest. +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +permissions: + contents: read # gh CLI baseline + pull-requests: read # gh pr list (open dependabot PRs) + actions: read # gh run list / view (parse latest dependabot run logs) + +jobs: + summary: + name: Post weekly Dependabot summary + runs-on: ubuntu-latest + environment: dependabot-summary + env: + # Severities surface in the actions list when their remaining TTR drops + # below this many days. Override via repo/env var ACTION_THRESHOLD_DAYS. + THRESHOLD_DAYS: ${{ vars.ACTION_THRESHOLD_DAYS || '7' }} + steps: + - name: Fetch alerts and compute summaries + id: alerts + env: + GH_TOKEN: ${{ secrets.DEPENDABOT_ALERTS_TOKEN }} + REPO: ${{ github.repository }} + run: | + if ! gh api -X GET "/repos/$REPO/dependabot/alerts" --paginate > pages.json 2> err.txt; then + echo "total=?" >> "$GITHUB_OUTPUT" + ERR=$(head -c 200 err.txt | tr '\n' ' ') + echo "by_severity=:x: _failed to fetch alerts: ${ERR}_" >> "$GITHUB_OUTPUT" + echo "actions=:x: _alerts unavailable_" >> "$GITHUB_OUTPUT" + exit 0 + fi + jq -s '[.[][] | select(.state == "open")]' pages.json > open.json + + TOTAL=$(jq 'length' open.json) + echo "total=$TOTAL" >> "$GITHUB_OUTPUT" + + if [ "$TOTAL" = "0" ]; then + echo "by_severity=:white_check_mark: No open alerts." >> "$GITHUB_OUTPUT" + echo "actions=_None_" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Severity breakdown - real newlines so jq --arg in the payload + # builder encodes them as proper \n in JSON (Slack renders as breaks). + BY_SEV=$(jq -r ' + group_by(.security_advisory.severity) + | map({sev: .[0].security_advisory.severity, + count: length, + weight: ({"critical":0,"high":1,"medium":2,"low":3}[.[0].security_advisory.severity])}) + | sort_by(.weight) + | map("• *\(.count)* \(.sev)") + | join("\n") + ' open.json) + { + echo "by_severity<> "$GITHUB_OUTPUT" + + # Actions: alerts within THRESHOLD_DAYS of their TTR (P0=7d, P1=30d, P2=90d, P3=no deadline) + # Grouped by (package, severity); shows earliest deadline per group. + ACTIONS=$(jq -r --argjson threshold "$THRESHOLD_DAYS" ' + [.[] + | (.security_advisory.severity) as $sev + | ({"critical":7,"high":30,"medium":90,"low":null}[$sev]) as $ttr + | select($ttr != null) + | ((now - (.created_at | fromdateiso8601)) / 86400 | floor) as $age + | {pkg: .dependency.package.name, sev: $sev, remaining: ($ttr - $age)} + ] + | group_by([.pkg, .sev]) + | map({pkg: .[0].pkg, sev: .[0].sev, count: length, min_remaining: ([.[].remaining] | min)}) + | map(select(.min_remaining < $threshold)) + | sort_by(.min_remaining) + | if length == 0 then "_None_" + else (map( + "• *\(.pkg)* (\(.sev))" + + (if .count > 1 then " ×\(.count)" else "" end) + " - " + + (if .min_remaining < 0 then "*OVERDUE* by \(-.min_remaining)d" + else "\(.min_remaining)d remaining" end) + ) | join("\n")) + end + ' open.json) + { + echo "actions<> "$GITHUB_OUTPUT" + + - name: Fetch open dependabot PRs + id: prs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + REPO_URL: https://github.com/${{ github.repository }} + run: | + if ! PR_JSON=$(gh pr list --repo "$REPO" --state open --author "app/dependabot" --json number,title 2> err.txt); then + ERR=$(head -c 200 err.txt | tr '\n' ' ') + echo "list=:x: _failed to fetch PRs: ${ERR}_" >> "$GITHUB_OUTPUT" + exit 0 + fi + LIST=$(echo "$PR_JSON" | jq -r --arg url "$REPO_URL" ' + if length == 0 then "_None_" + else (map("• <\($url)/pull/\(.number)|#\(.number)> \(.title)") | join("\n")) + end + ') + { + echo "list<> "$GITHUB_OUTPUT" + + - name: Find latest npm dependabot run + id: latest + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + run: | + # Repos without a dependabot.yml have no "Dependabot Updates" workflow; + # treat the lookup failure as "no recent run found" rather than failing. + if ! RUN_ID=$(gh run list --repo "$REPO" --workflow "Dependabot Updates" --status success --limit 30 --json databaseId,name --jq 'first(.[] | select(.name | startswith("npm_and_yarn")) | .databaseId) // empty' 2>/dev/null); then + RUN_ID="" + fi + echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" + + - name: Extract stuck deps (only if actions pending) + id: stuck + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + RUN_ID: ${{ steps.latest.outputs.run_id }} + ACTIONS: ${{ steps.alerts.outputs.actions }} + run: | + # Skip the stuck section entirely when nothing in the actions list + # - keeps the digest tidy when there's nothing to actually act on. + if [ "$ACTIONS" = "_None_" ]; then + echo "section=" >> "$GITHUB_OUTPUT" + exit 0 + fi + HEADER=$'\n\n*Couldn\'t auto-fix (need manual `pnpm.overrides`):*\n' + if [ -z "$RUN_ID" ]; then + { + echo "section<> "$GITHUB_OUTPUT" + exit 0 + fi + gh run view "$RUN_ID" --repo "$REPO" --log > log.txt 2>&1 || true + STUCK=$(grep -oE "No update possible for [^[:space:]]+ [0-9][^[:space:]]*" log.txt | sed 's/No update possible for //' | sort -u || true) + if [ -z "$STUCK" ]; then + { + echo "section<> "$GITHUB_OUTPUT" + exit 0 + fi + LIST=$(echo "$STUCK" | awk 'NR>1{printf "\n"} {printf "• *%s* %s", $1, $2}') + { + echo "section<> "$GITHUB_OUTPUT" + + - name: Build Slack payload + env: + REPO: ${{ github.repository }} + CHANNEL: ${{ vars.SLACK_CHANNEL_ID }} + TOTAL: ${{ steps.alerts.outputs.total }} + BY_SEVERITY: ${{ steps.alerts.outputs.by_severity }} + PRS_LIST: ${{ steps.prs.outputs.list }} + ACTIONS: ${{ steps.alerts.outputs.actions }} + STUCK: ${{ steps.stuck.outputs.section }} + run: | + # Build payload via jq so PR titles or error strings containing + # quotes/backslashes/newlines can't break the JSON. + jq -n \ + --arg channel "$CHANNEL" \ + --arg repo "$REPO" \ + --arg total "$TOTAL" \ + --arg by_severity "$BY_SEVERITY" \ + --arg prs_list "$PRS_LIST" \ + --arg actions "$ACTIONS" \ + --arg stuck "$STUCK" \ + --arg threshold "$THRESHOLD_DAYS" \ + '{ + channel: $channel, + text: ":calendar: *Weekly Dependabot summary* - `\($repo)`\n\n*Open alerts (\($total)):*\n\($by_severity)\n\n*Open Dependabot PRs:*\n\($prs_list)\n\n*Actions needed (<\($threshold)d remaining):*\n\($actions)\($stuck)\n\n" + }' > payload.json + + - name: Post Slack summary + uses: slackapi/slack-github-action@45a88b9581bfab2566dc881e2cd66d334e621e2c # v3.0.3 + with: + method: chat.postMessage + token: ${{ secrets.SLACK_BOT_TOKEN }} + payload-file-path: payload.json diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000000..0cac7c8595f --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,44 @@ +name: 📚 Docs Checks + +on: + push: + branches: + - main + paths: + - "docs/**" + pull_request: + types: [opened, synchronize, reopened] + paths: + - "docs/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + check-broken-links: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./docs + steps: + - name: 📥 Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: 📦 Cache npm + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.npm + key: | + ${{ runner.os }}-mintlify + restore-keys: | + ${{ runner.os }}-mintlify + + - name: 🔗 Check for broken links + run: npx mintlify@4.0.393 broken-links diff --git a/.github/workflows/e2e-webapp-auth-full.yml b/.github/workflows/e2e-webapp-auth-full.yml new file mode 100644 index 00000000000..de9d66c07e9 --- /dev/null +++ b/.github/workflows/e2e-webapp-auth-full.yml @@ -0,0 +1,120 @@ +name: "🛡️ E2E Tests: Webapp Auth (full)" + +# Comprehensive RBAC auth test suite — see TRI-8731. Runs separately from +# the smoke e2e-webapp.yml because it covers every route family with a +# pass/fail matrix and would otherwise dominate per-PR CI time. +# +# Triggered: +# - Manually via workflow_dispatch. +# - Nightly via schedule. +# - On pull requests touching auth-relevant files only (paths filter). + +permissions: + contents: read + +on: + workflow_dispatch: + schedule: + - cron: "0 4 * * *" # 04:00 UTC daily + pull_request: + paths: + - "apps/webapp/app/services/routeBuilders/**" + - "apps/webapp/app/services/rbac.server.ts" + - "apps/webapp/app/services/apiAuth.server.ts" + - "apps/webapp/app/services/personalAccessToken.server.ts" + - "apps/webapp/app/services/sessionStorage.server.ts" + - "apps/webapp/app/routes/api.v*.**" + - "apps/webapp/app/routes/realtime.v*.**" + - "apps/webapp/test/**/*.e2e.full.test.ts" + - "apps/webapp/test/setup/global-e2e-full-setup.ts" + - "apps/webapp/test/helpers/sharedTestServer.ts" + - "apps/webapp/test/helpers/seedTestSession.ts" + - "apps/webapp/vitest.e2e.full.config.ts" + - "internal-packages/rbac/**" + - "packages/plugins/**" + - ".github/workflows/e2e-webapp-auth-full.yml" + +jobs: + e2eAuthFull: + name: "🛡️ E2E Auth Tests (full)" + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + # Don't leave the GITHUB_TOKEN in .git/config — this job + # doesn't need to push and the persisted creds would be + # readable from any subsequent step (zizmor/artipacked). + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + docker pull postgres:14 + docker pull redis:7.2 + docker pull testcontainers/ryuk:0.11.0 + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🏗️ Build Webapp + run: pnpm run build --filter webapp + + - name: 🛡️ Run Webapp Full Auth E2E Tests + run: cd apps/webapp && pnpm exec vitest run --config vitest.e2e.full.config.ts --reporter=default + env: + WEBAPP_TEST_VERBOSE: "1" diff --git a/.github/workflows/e2e-webapp.yml b/.github/workflows/e2e-webapp.yml new file mode 100644 index 00000000000..f306a86cd28 --- /dev/null +++ b/.github/workflows/e2e-webapp.yml @@ -0,0 +1,97 @@ +name: "🧪 E2E Tests: Webapp" + +permissions: + contents: read + +on: + workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + +jobs: + e2eTests: + name: "🧪 E2E Tests: Webapp" + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + echo "Pre-pulling Docker images with authenticated session..." + docker pull postgres:14 + docker pull redis:7.2 + docker pull testcontainers/ryuk:0.11.0 + echo "Image pre-pull complete" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🏗️ Build Webapp + run: pnpm run build --filter webapp + + - name: 🧪 Run Webapp E2E Tests + run: cd apps/webapp && pnpm exec vitest run --config vitest.e2e.config.ts --reporter=default + env: + WEBAPP_TEST_VERBOSE: "1" diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 00000000000..a70f0400e0a --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,60 @@ +name: "E2E" + +permissions: + contents: read + +on: + workflow_call: + inputs: + package: + description: The identifier of the job to run + default: webapp + required: false + type: string + +jobs: + cli-v3: + name: "🧪 CLI v3 tests (${{ matrix.os }} - ${{ matrix.package-manager }})" + if: inputs.package == 'cli-v3' || inputs.package == '' + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + package-manager: ["npm", "pnpm"] + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile --filter trigger.dev... + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🔧 Build v3 cli monorepo dependencies + run: pnpm run build --filter trigger.dev^... + + - name: 🔧 Build worker template files + run: pnpm --filter trigger.dev run --if-present build:workers + + - name: Enable corepack + run: corepack enable + + - name: Run E2E Tests + shell: bash + run: | + LOG=debug PM=${{ matrix.package-manager }} pnpm --filter trigger.dev run test:e2e diff --git a/.github/workflows/helm-prerelease.yml b/.github/workflows/helm-prerelease.yml new file mode 100644 index 00000000000..ff2c8f5a614 --- /dev/null +++ b/.github/workflows/helm-prerelease.yml @@ -0,0 +1,200 @@ +name: 🧭 Helm Chart Prerelease + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - "hosting/k8s/helm/**" + push: + branches: + - main + paths: + - "hosting/k8s/helm/**" + workflow_dispatch: + inputs: + app_version: + description: "Override appVersion (e.g. 'main', 'v4.4.4'). Leave empty to keep Chart.yaml value." + required: false + type: string + default: "" + +concurrency: + group: helm-prerelease-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + REGISTRY: ghcr.io + CHART_NAME: trigger + +jobs: + lint-and-test: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Helm + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 + with: + version: "3.18.3" + + - name: Build dependencies + run: helm dependency build ./hosting/k8s/helm/ + + - name: Extract dependency charts + run: | + cd ./hosting/k8s/helm/ + for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done + + - name: Lint Helm Chart + run: | + helm lint ./hosting/k8s/helm/ + + - name: Render templates + run: | + helm template test-release ./hosting/k8s/helm/ \ + --values ./hosting/k8s/helm/values.yaml \ + --output-dir ./helm-output + + - name: Validate manifests + uses: docker://ghcr.io/yannh/kubeconform:v0.7.0@sha256:85dbef6b4b312b99133decc9c6fc9495e9fc5f92293d4ff3b7e1b30f5611823c + with: + entrypoint: "/kubeconform" + args: "-summary -output json ./helm-output" + + prerelease: + needs: lint-and-test + if: | + (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) || + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Helm + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 + with: + version: "3.18.3" + + - name: Build dependencies + run: helm dependency build ./hosting/k8s/helm/ + + - name: Extract dependency charts + run: | + cd ./hosting/k8s/helm/ + for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done + + - name: Log in to Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Generate prerelease version + id: version + run: | + BASE_VERSION=$(grep '^version:' ./hosting/k8s/helm/Chart.yaml | awk '{print $2}') + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + PR_NUMBER=${{ github.event.pull_request.number }} + SHORT_SHA=$(echo "${{ github.event.pull_request.head.sha }}" | cut -c1-7) + PRERELEASE_VERSION="${BASE_VERSION}-pr${PR_NUMBER}.${SHORT_SHA}" + elif [[ "${{ github.event_name }}" == "push" ]]; then + SHORT_SHA=$(echo "${GITHUB_SHA}" | cut -c1-7) + PRERELEASE_VERSION="${BASE_VERSION}-main.${SHORT_SHA}" + else + SHORT_SHA=$(echo "${GITHUB_SHA}" | cut -c1-7) + REF_SLUG=$(echo "${GITHUB_REF_NAME}" | tr '/' '-' | tr -cd 'a-zA-Z0-9-') + if [[ -z "$REF_SLUG" ]]; then + REF_SLUG="manual" + fi + PRERELEASE_VERSION="${BASE_VERSION}-${REF_SLUG}.${SHORT_SHA}" + fi + echo "version=$PRERELEASE_VERSION" >> "$GITHUB_OUTPUT" + echo "Prerelease version: $PRERELEASE_VERSION" + + - name: Update Chart.yaml with prerelease version + run: | + sed -i "s/^version:.*/version: ${STEPS_VERSION_OUTPUTS_VERSION}/" ./hosting/k8s/helm/Chart.yaml + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Override appVersion + if: github.event_name == 'workflow_dispatch' && inputs.app_version != '' + env: + APP_VERSION: ${{ inputs.app_version }} + run: | + yq -i '.appVersion = strenv(APP_VERSION)' ./hosting/k8s/helm/Chart.yaml + + - name: Package Helm Chart + run: | + helm package ./hosting/k8s/helm/ --destination /tmp/ + + - name: Push Helm Chart to GHCR + run: | + VERSION="${STEPS_VERSION_OUTPUTS_VERSION}" + CHART_PACKAGE="/tmp/${{ env.CHART_NAME }}-${VERSION}.tgz" + + # Push to GHCR OCI registry + helm push "$CHART_PACKAGE" "oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts" + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Write run summary + run: | + { + echo "### 🧭 Helm Chart Prerelease Published" + echo "" + echo "**Version:** \`${STEPS_VERSION_OUTPUTS_VERSION}\`" + echo "" + echo "**Install:**" + echo '```bash' + echo "helm upgrade --install trigger \\" + echo " oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts/${{ env.CHART_NAME }} \\" + echo " --version \"${STEPS_VERSION_OUTPUTS_VERSION}\"" + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Find existing comment + if: github.event_name == 'pull_request' + uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0 + id: find-comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: "github-actions[bot]" + body-includes: "Helm Chart Prerelease Published" + + - name: Create or update PR comment + if: github.event_name == 'pull_request' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + ### 🧭 Helm Chart Prerelease Published + + **Version:** `${{ steps.version.outputs.version }}` + + **Install:** + ```bash + helm upgrade --install trigger \ + oci://ghcr.io/${{ github.repository_owner }}/charts/trigger \ + --version "${{ steps.version.outputs.version }}" + ``` + + > ⚠️ This is a prerelease for testing. Do not use in production. + edit-mode: replace diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml new file mode 100644 index 00000000000..95805539807 --- /dev/null +++ b/.github/workflows/pr_checks.yml @@ -0,0 +1,178 @@ +name: 🤖 PR Checks + +on: + pull_request: + types: [opened, synchronize, reopened] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: read + +jobs: + changes: + name: Detect changes + runs-on: ubuntu-latest + outputs: + code: ${{ steps.code_filter.outputs.code }} + typecheck_self: ${{ steps.filter.outputs.typecheck_self }} + webapp: ${{ steps.filter.outputs.webapp }} + packages: ${{ steps.filter.outputs.packages }} + internal: ${{ steps.filter.outputs.internal }} + cli: ${{ steps.filter.outputs.cli }} + sdk: ${{ steps.filter.outputs.sdk }} + steps: + # `code` uses `every` semantics so the negation patterns actually subtract. + # With the default `some` quantifier, `**` matches every file and the + # subsequent `!...` patterns are no-ops (each pattern is OR'd, not AND'd). + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 + id: code_filter + with: + predicate-quantifier: every + filters: | + code: + - '**' + - '!docs/**' + - '!.changeset/**' + - '!hosting/**' + - '!.github/**' + - '!**/*.md' + - '!**/.env.example' + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 + id: filter + with: + filters: | + typecheck_self: + - '.github/workflows/pr_checks.yml' + - '.github/workflows/typecheck.yml' + webapp: + - 'apps/webapp/**' + - 'packages/**' + - 'internal-packages/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/unit-tests-webapp.yml' + - '.github/workflows/e2e-webapp.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + packages: + - 'packages/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/unit-tests-packages.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + internal: + - 'internal-packages/**' + - 'packages/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/unit-tests-internal.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + cli: + - 'packages/cli-v3/**' + - 'packages/build/**' + - 'packages/core/**' + - 'packages/schema-to-json/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/e2e.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + sdk: + - 'packages/trigger-sdk/**' + - 'packages/core/**' + - '.github/workflows/pr_checks.yml' + - '.github/workflows/sdk-compat.yml' + - '.configs/**' + - 'package.json' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - 'turbo.json' + + typecheck: + needs: changes + if: needs.changes.outputs.code == 'true' || needs.changes.outputs.typecheck_self == 'true' + uses: ./.github/workflows/typecheck.yml + + webapp: + needs: changes + if: needs.changes.outputs.webapp == 'true' + uses: ./.github/workflows/unit-tests-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + e2e-webapp: + needs: changes + if: needs.changes.outputs.webapp == 'true' + uses: ./.github/workflows/e2e-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + packages: + needs: changes + if: needs.changes.outputs.packages == 'true' + uses: ./.github/workflows/unit-tests-packages.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + internal: + needs: changes + if: needs.changes.outputs.internal == 'true' + uses: ./.github/workflows/unit-tests-internal.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + e2e: + needs: changes + if: needs.changes.outputs.cli == 'true' + uses: ./.github/workflows/e2e.yml + with: + package: cli-v3 + + sdk-compat: + needs: changes + if: needs.changes.outputs.sdk == 'true' + uses: ./.github/workflows/sdk-compat.yml + + all-checks: + name: All PR Checks + needs: + - changes + - typecheck + - webapp + - e2e-webapp + - packages + - internal + - e2e + - sdk-compat + if: always() + runs-on: ubuntu-latest + steps: + - name: Verify all checks + run: | + if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then + echo "One or more checks failed" + exit 1 + fi + if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then + echo "One or more checks were cancelled" + exit 1 + fi + echo "All checks passed or were skipped due to path filters" diff --git a/.github/workflows/preview-dispatch.yml b/.github/workflows/preview-dispatch.yml new file mode 100644 index 00000000000..3f26c66cf33 --- /dev/null +++ b/.github/workflows/preview-dispatch.yml @@ -0,0 +1,76 @@ +name: 🌱 Preview environment dispatch + +# Opt-in per-PR preview environments + +on: + pull_request: + types: [opened, reopened, synchronize, closed, labeled, unlabeled] + +# Serialize a PR's events so dispatches arrive in order. Cloud-side concurrency +# collapses by branch but can't fix out-of-order arrival — e.g. a push racing a +# close could cancel the in-flight destroy and leak the preview. One short API +# call, so queuing is cheap; cancel-in-progress: false lets an in-flight +# dispatch finish (GitHub keeps only the latest pending, the desired behavior). +concurrency: + group: preview-dispatch-${{ github.event.pull_request.number }} + cancel-in-progress: false + +permissions: {} + +jobs: + dispatch: + name: Dispatch preview-deploy to cloud + runs-on: ubuntu-latest + # label added -> create + # new commit while labeled -> update + # label removed / PR closed -> destroy + if: >- + github.event.pull_request.head.repo.full_name == github.repository && + ( + (github.event.action == 'labeled' && github.event.label.name == 'preview') || + (github.event.action == 'unlabeled' && github.event.label.name == 'preview') || + ( + contains(github.event.pull_request.labels.*.name, 'preview') && + contains(fromJSON('["opened","reopened","synchronize","closed"]'), github.event.action) + ) + ) + steps: + - name: Build dispatch payload + id: payload + env: + ACTION: ${{ github.event.action }} + BRANCH: ${{ github.event.pull_request.head.ref }} + COMMIT: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + # Map the GitHub PR action to the cloud pipeline's lifecycle event. + case "$ACTION" in + labeled | opened | reopened) EVENT=opened ;; + synchronize) EVENT=synchronize ;; + unlabeled | closed) EVENT=closed ;; + *) echo "unexpected action: $ACTION" >&2; exit 1 ;; + esac + # jq --arg JSON-escapes every value, so a branch name containing + # quotes/braces can't break or inject into the client payload. + payload=$(jq -nc \ + --arg b "$BRANCH" \ + --arg c "$COMMIT" \ + --arg e "$EVENT" \ + '{branch_name: $b, commit: $c, pull_request_event: $e}') + { + echo "client_payload=$payload" + echo "summary=$EVENT for $BRANCH @ ${COMMIT:0:7}" + } >> "$GITHUB_OUTPUT" + + - name: Log dispatch + env: + SUMMARY: ${{ steps.payload.outputs.summary }} + run: echo "Dispatching preview-deploy event ($SUMMARY)" + + - name: Send repository_dispatch + uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 + with: + token: ${{ secrets.CROSS_REPO_PAT }} + repository: triggerdotdev/cloud + event-type: preview-deploy + client-payload: ${{ steps.payload.outputs.client_payload }} diff --git a/.github/workflows/preview-packages.yml b/.github/workflows/preview-packages.yml new file mode 100644 index 00000000000..f4dd5b39930 --- /dev/null +++ b/.github/workflows/preview-packages.yml @@ -0,0 +1,83 @@ +name: 📦 Preview packages (pkg.pr.new) + +# Publishes installable preview builds of the public @trigger.dev/* packages +# for every push to a branch, via https://pkg.pr.new. These are NOT published +# to npm — pkg.pr.new serves them by commit SHA and drops install instructions +# in a comment on the associated PR, e.g. +# npm i https://pkg.pr.new/@trigger.dev/sdk@ +# +# Prerequisites: +# - The pkg.pr.new GitHub App must be installed on triggerdotdev/trigger.dev +# (https://github.com/apps/pkg-pr-new). Publishing fails until it is. +# +# Fork note: pkg.pr.new authenticates with a GitHub Actions OIDC token, which +# GitHub does not issue to pull_request workflows from forks. This `push` +# trigger therefore covers branches pushed to this repo (the core team), not +# external fork PRs. Adding fork coverage would require a workflow_run two-stage +# setup. + +on: + push: + branches-ignore: + - main + - changeset-release/main + paths: + - "package.json" + - "packages/**" + - "pnpm-lock.yaml" + - "pnpm-workspace.yaml" + - "turbo.json" + - ".github/workflows/preview-packages.yml" + - "scripts/stamp-preview-version.mjs" + - "scripts/updateVersion.ts" + +concurrency: + group: preview-packages-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + id-token: write # OIDC token used by pkg.pr.new to authenticate the publish + +jobs: + publish: + name: Build and publish previews + runs-on: ubuntu-latest + if: github.repository == 'triggerdotdev/trigger.dev' + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.0 + cache: "pnpm" + + - name: 📥 Install dependencies + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma client + run: pnpm run generate + + # Stamp a unique 0.0.0-preview- version before building so it can't + # collide with real npm versions and so updateVersion.ts bakes it into the + # runtime VERSION constant. See scripts/stamp-preview-version.mjs. + - name: 🏷️ Stamp preview version + run: node scripts/stamp-preview-version.mjs + env: + GITHUB_SHA: ${{ github.sha }} + + - name: 🔨 Build packages + run: pnpm run build --filter "@trigger.dev/*" --filter "trigger.dev" + + - name: 🚀 Publish previews to pkg.pr.new + run: pnpm exec pkg-pr-new publish --pnpm --compact --commentWithSha './packages/*' diff --git a/.github/workflows/publish-webapp.yml b/.github/workflows/publish-webapp.yml new file mode 100644 index 00000000000..5a604e26082 --- /dev/null +++ b/.github/workflows/publish-webapp.yml @@ -0,0 +1,145 @@ +name: "🐳 Publish Webapp" + +permissions: + contents: read + packages: write + id-token: write + attestations: write + +on: + workflow_call: + inputs: + image_tag: + description: The image tag to publish + type: string + required: false + default: "" + image_registry: + description: The registry namespace to publish under (e.g. ghcr.io/) + type: string + required: false + default: "" + outputs: + version: + description: The published image tag + value: ${{ jobs.publish.outputs.version }} + short_sha: + description: Short commit SHA of the published build + value: ${{ jobs.publish.outputs.short_sha }} + image_repo: + description: The image repository the build was published to (without tag) + value: ${{ jobs.publish.outputs.image_repo }} + digest: + description: Multi-arch index digest (sha256:...) of the published image + value: ${{ jobs.publish.outputs.digest }} + secrets: + SENTRY_AUTH_TOKEN: + required: false + +jobs: + publish: + runs-on: ubuntu-latest + env: + PRISMA_ENGINES_CHECKSUM_IGNORE_MISSING: 1 + outputs: + version: ${{ steps.get_tag.outputs.tag }} + short_sha: ${{ steps.get_commit.outputs.sha_short }} + image_repo: ${{ steps.set_tags.outputs.image_repo }} + digest: ${{ steps.build_push.outputs.digest }} + steps: + - name: 🏭 Setup Depot CLI + uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1 + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: recursive + persist-credentials: false + + - name: "#️⃣ Get the image tag" + id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: ${{ inputs.image_tag }} + + - name: 🔢 Get the commit hash + id: get_commit + run: | + echo "sha_short=$(echo "${GITHUB_SHA}" | cut -c1-7)" >> "$GITHUB_OUTPUT" + + - name: 📛 Set the tags + id: set_tags + run: | + # The registry namespace is resolved by the caller (defaulting to + # ghcr.io/, overridable via the IMAGE_REGISTRY repository + # variable); the webapp image lives at /. A fork + # therefore publishes to its own package automatically. + image_tags=$REF_WITHOUT_TAG:${STEPS_GET_TAG_OUTPUTS_TAG} + + # when pushing the mutable main tag, also push an immutable-by-convention + # full-commit-sha tag so a commit can be resolved to a specific digest + if [[ "${STEPS_GET_TAG_OUTPUTS_TAG}" == "main" ]]; then + image_tags=$image_tags,$REF_WITHOUT_TAG:${GITHUB_SHA} + fi + + echo "image_tags=${image_tags}" >> "$GITHUB_OUTPUT" + echo "image_repo=${REF_WITHOUT_TAG}" >> "$GITHUB_OUTPUT" + env: + REF_WITHOUT_TAG: ${{ format('{0}/{1}', inputs.image_registry || vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner), github.event.repository.name) }} + STEPS_GET_TAG_OUTPUTS_TAG: ${{ steps.get_tag.outputs.tag }} + STEPS_GET_TAG_OUTPUTS_IS_SEMVER: ${{ steps.get_tag.outputs.is_semver }} + + - name: 📝 Set the build info + id: set_build_info + run: | + { + tag="${STEPS_GET_TAG_OUTPUTS_TAG}" + if [[ "${STEPS_GET_TAG_OUTPUTS_IS_SEMVER}" == true ]]; then + echo "BUILD_APP_VERSION=${tag}" + fi + echo "BUILD_GIT_SHA=${GITHUB_SHA}" + echo "BUILD_GIT_REF_NAME=${GITHUB_REF_NAME}" + echo "BUILD_TIMESTAMP_SECONDS=$(date +%s)" + echo "BUILD_TIMESTAMP_RFC3339=$(date -u +%Y-%m-%dT%H:%M:%SZ)" + } >> "$GITHUB_OUTPUT" + env: + STEPS_GET_TAG_OUTPUTS_TAG: ${{ steps.get_tag.outputs.tag }} + STEPS_GET_TAG_OUTPUTS_IS_SEMVER: ${{ steps.get_tag.outputs.is_semver }} + + - name: 🐙 Login to GitHub Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 🐳 Build image and push to GitHub Container Registry + id: build_push + uses: depot/build-push-action@98e78adca7817480b8185f474a400b451d74e287 # v1.18.0 + with: + file: ./docker/Dockerfile + platforms: linux/amd64,linux/arm64 + tags: ${{ steps.set_tags.outputs.image_tags }} + push: true + build-args: | + BUILD_APP_VERSION=${{ steps.set_build_info.outputs.BUILD_APP_VERSION }} + BUILD_GIT_SHA=${{ steps.set_build_info.outputs.BUILD_GIT_SHA }} + BUILD_GIT_REF_NAME=${{ steps.set_build_info.outputs.BUILD_GIT_REF_NAME }} + BUILD_TIMESTAMP_SECONDS=${{ steps.set_build_info.outputs.BUILD_TIMESTAMP_SECONDS }} + BUILD_TIMESTAMP_RFC3339=${{ steps.set_build_info.outputs.BUILD_TIMESTAMP_RFC3339 }} + SENTRY_RELEASE=${{ steps.set_build_info.outputs.BUILD_GIT_SHA }} + SENTRY_ORG=triggerdev + SENTRY_PROJECT=trigger-cloud + secrets: | + sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }} + + - name: 🪪 Attest build provenance + # Image is already pushed by this point — don't fail releases (and the + # downstream publish-helm job) on a Sigstore/GHCR-referrer hiccup. Real + # config errors still surface as a step warning in the workflow run. + continue-on-error: true + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-name: ${{ steps.set_tags.outputs.image_repo }} + subject-digest: ${{ steps.build_push.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/publish-worker-v4.yml b/.github/workflows/publish-worker-v4.yml new file mode 100644 index 00000000000..85ca903a8d6 --- /dev/null +++ b/.github/workflows/publish-worker-v4.yml @@ -0,0 +1,99 @@ +name: "⚒️ Publish Worker (v4)" + +on: + workflow_call: + inputs: + image_tag: + description: The image tag to publish + type: string + required: false + default: "" + image_registry: + description: The registry namespace to publish under (e.g. ghcr.io/) + type: string + required: false + default: "" + push: + tags: + - "re2-test-*" + - "re2-prod-*" + +permissions: + id-token: write + packages: write + contents: read + +jobs: + # check-branch: + # runs-on: ubuntu-latest + # steps: + # - name: Fail if re2-prod-* is pushed from a non-main branch + # if: startsWith(github.ref_name, 're2-prod-') && github.base_ref != 'main' + # run: | + # echo "🚫 re2-prod-* tags can only be pushed from the main branch." + # exit 1 + build: + # needs: check-branch + strategy: + matrix: + package: [supervisor] + runs-on: ubuntu-latest + env: + DOCKER_BUILDKIT: "1" + steps: + - name: 🏭 Setup Depot CLI + uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1.7.1 + + - name: ⬇️ Checkout git repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: 📦 Get image repo + id: get_repository + env: + PACKAGE: ${{ matrix.package }} + run: | + if [[ "$PACKAGE" == *-provider ]]; then + repo="provider/${PACKAGE%-provider}" + else + repo="$PACKAGE" + fi + echo "repo=${repo}" >> "$GITHUB_OUTPUT" + + - name: "#️⃣ Get image tag" + id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: ${{ inputs.image_tag }} + + - name: 📛 Set tags to push + id: set_tags + run: | + # Resolved by the caller when invoked from publish.yml; falls back to the + # IMAGE_REGISTRY repository variable (or ghcr.io/) for the direct + # push triggers above, so a fork publishes to its own namespace. + ref_without_tag=${IMAGE_REGISTRY}/${STEPS_GET_REPOSITORY_OUTPUTS_REPO} + image_tags=$ref_without_tag:${STEPS_GET_TAG_OUTPUTS_TAG} + + echo "image_tags=${image_tags}" >> "$GITHUB_OUTPUT" + env: + IMAGE_REGISTRY: ${{ inputs.image_registry || vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + STEPS_GET_REPOSITORY_OUTPUTS_REPO: ${{ steps.get_repository.outputs.repo }} + STEPS_GET_TAG_OUTPUTS_TAG: ${{ steps.get_tag.outputs.tag }} + STEPS_GET_TAG_OUTPUTS_IS_SEMVER: ${{ steps.get_tag.outputs.is_semver }} + + - name: 🐙 Login to GitHub Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 🐳 Build image and push to GitHub Container Registry + uses: depot/build-push-action@98e78adca7817480b8185f474a400b451d74e287 # v1.18.0 + with: + file: ./apps/${{ matrix.package }}/Containerfile + platforms: linux/amd64,linux/arm64 + tags: ${{ steps.set_tags.outputs.image_tags }} + push: true diff --git a/.github/workflows/publish-worker.yml b/.github/workflows/publish-worker.yml new file mode 100644 index 00000000000..f443e5dab1e --- /dev/null +++ b/.github/workflows/publish-worker.yml @@ -0,0 +1,105 @@ +name: "⚒️ Publish Worker" + +on: + workflow_call: + inputs: + image_tag: + description: The image tag to publish + type: string + required: false + default: "" + image_registry: + description: The registry namespace to publish under (e.g. ghcr.io/) + type: string + required: false + default: "" + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + push: + tags: + - "infra-dev-*" + - "infra-test-*" + - "infra-prod-*" + +permissions: + packages: write + contents: read + +jobs: + build: + strategy: + matrix: + package: [coordinator, docker-provider, kubernetes-provider] + runs-on: ubuntu-latest + env: + DOCKER_BUILDKIT: "1" + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + steps: + - name: ⬇️ Checkout git repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: 📦 Get image repo + id: get_repository + env: + PACKAGE: ${{ matrix.package }} + run: | + if [[ "$PACKAGE" == *-provider ]]; then + repo="provider/${PACKAGE%-provider}" + else + repo="$PACKAGE" + fi + echo "repo=${repo}" >> "$GITHUB_OUTPUT" + + - id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: ${{ inputs.image_tag }} + + - name: 🐋 Set up Docker Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: 🚢 Build Container Image + run: | + docker build -t infra_image -f ./apps/${{ matrix.package }}/Containerfile . + + # ..to push image + - name: 🐙 Login to GitHub Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 🐙 Push to GitHub Container Registry + run: | + docker tag infra_image "$REGISTRY/$REPOSITORY:$IMAGE_TAG" + docker push "$REGISTRY/$REPOSITORY:$IMAGE_TAG" + env: + # Resolved by the caller when invoked from publish.yml; falls back to the + # IMAGE_REGISTRY repository variable (or ghcr.io/) for the direct + # push triggers above, so a fork publishes to its own namespace. + REGISTRY: ${{ inputs.image_registry || vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + REPOSITORY: ${{ steps.get_repository.outputs.repo }} + IMAGE_TAG: ${{ steps.get_tag.outputs.tag }} + + # - name: 🐙 Push 'v3' tag to GitHub Container Registry + # if: steps.get_tag.outputs.is_semver == 'true' + # run: | + # docker tag infra_image "$REGISTRY/$REPOSITORY:v3" + # docker push "$REGISTRY/$REPOSITORY:v3" + # env: + # REGISTRY: ghcr.io/triggerdotdev + # REPOSITORY: ${{ steps.get_repository.outputs.repo }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000000..2f2744c7702 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,163 @@ +name: 🚀 Publish Trigger.dev Docker + +on: + workflow_dispatch: + workflow_call: + inputs: + image_tag: + description: The image tag to publish + required: true + type: string + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + SENTRY_AUTH_TOKEN: + required: false + CROSS_REPO_PAT: + required: false + push: + branches: + - main + tags: + - "v.docker.*" + - "build-*" + paths: + - ".github/actions/**/*.yml" + - ".github/workflows/publish.yml" + - ".github/workflows/typecheck.yml" + - ".github/workflows/unit-tests.yml" + - ".github/workflows/e2e.yml" + - ".github/workflows/publish-webapp.yml" + - ".github/workflows/publish-worker.yml" + - "packages/**" + - "!packages/**/*.md" + - "!packages/**/*.eslintrc" + - "internal-packages/**" + - "apps/**" + - "!apps/**/*.md" + - "!apps/**/*.eslintrc" + - "pnpm-lock.yaml" + - "pnpm-workspace.yaml" + - "turbo.json" + - "docker/Dockerfile" + - "docker/scripts/**" + - "tests/**" + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + +env: + AWS_REGION: us-east-1 + +jobs: + typecheck: + uses: ./.github/workflows/typecheck.yml + + units: + uses: ./.github/workflows/unit-tests.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + + publish-webapp: + needs: [typecheck] + permissions: + contents: read + packages: write + id-token: write + attestations: write + uses: ./.github/workflows/publish-webapp.yml + secrets: + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} + with: + image_tag: ${{ inputs.image_tag }} + # Target registry namespace. Defaults to ghcr.io/ so a fork publishes + # to its own namespace; set the IMAGE_REGISTRY repository variable to override. + image_registry: ${{ vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + + publish-worker: + needs: [typecheck] + permissions: + contents: read + packages: write + uses: ./.github/workflows/publish-worker.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + with: + image_tag: ${{ inputs.image_tag }} + image_registry: ${{ vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + + publish-worker-v4: + needs: [typecheck] + permissions: + contents: read + packages: write + id-token: write + uses: ./.github/workflows/publish-worker-v4.yml + with: + image_tag: ${{ inputs.image_tag }} + image_registry: ${{ vars.IMAGE_REGISTRY || format('ghcr.io/{0}', github.repository_owner) }} + + # OS-level CVE scan of the image just published above. Report-only (writes to + # the run summary); runs alongside the worker publishes and never blocks them. + scan-webapp: + needs: [publish-webapp] + permissions: + contents: read + packages: read # pull the just-published image from GHCR + uses: ./.github/workflows/trivy-image-webapp.yml + with: + image-ref: ${{ needs.publish-webapp.outputs.image_repo }}:${{ needs.publish-webapp.outputs.version }} + + # Announce the freshly published mutable `main` webapp image to subscriber + # repos via repository_dispatch, handing them a digest-pinned ref to build or + # deploy from. The repo, ref prefix, and dispatch target all default to the + # canonical values and can be overridden by repository variables. + # + # `push` only: release builds reach publish.yml via workflow_call (from + # release.yml) with an explicit image_tag while github.ref_name is still + # `main`, so gate on the event to avoid dispatching — and failing on the + # absent CROSS_REPO_PAT — during a release. + dispatch-main-image: + name: 📣 Dispatch main image + needs: [publish-webapp] + if: github.repository == (vars.MAIN_IMAGE_DISPATCH_REPO || 'triggerdotdev/trigger.dev') && github.event_name == 'push' && startsWith(github.ref_name, vars.MAIN_IMAGE_DISPATCH_REF_PREFIX || 'main') + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Build dispatch payload + id: payload + env: + IMAGE_REPO: ${{ needs.publish-webapp.outputs.image_repo }} + DIGEST: ${{ needs.publish-webapp.outputs.digest }} + COMMIT: ${{ github.sha }} + run: | + set -euo pipefail + # Pin to the exact multi-arch index just pushed so subscribers resolve a + # single immutable artifact rather than chasing the moving `main` tag. + if [[ -z "${DIGEST}" ]]; then + echo "::error::publish-webapp produced no image digest; refusing to dispatch" + exit 1 + fi + image="${IMAGE_REPO}@${DIGEST}" + # jq --arg JSON-escapes every value, so the ref/commit can't break out of + # or inject into the client payload. + payload=$(jq -nc \ + --arg img "$image" \ + --arg c "$COMMIT" \ + '{image: $img, commit: $c}') + echo "client_payload=$payload" >> "$GITHUB_OUTPUT" + + - name: Send repository_dispatch + uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 + with: + token: ${{ secrets.CROSS_REPO_PAT }} + repository: ${{ vars.MAIN_IMAGE_DISPATCH_TARGET || 'triggerdotdev/cloud' }} + event-type: main-image-published + client-payload: ${{ steps.payload.outputs.client_payload }} diff --git a/.github/workflows/release-helm.yml b/.github/workflows/release-helm.yml new file mode 100644 index 00000000000..13d28545e7f --- /dev/null +++ b/.github/workflows/release-helm.yml @@ -0,0 +1,158 @@ +name: 🧭 Helm Chart Release + +on: + push: + tags: + - 'helm-v*' + workflow_call: + inputs: + chart_version: + description: 'Chart version to release' + required: true + type: string + workflow_dispatch: + inputs: + chart_version: + description: 'Chart version to release' + required: true + type: string + +env: + REGISTRY: ghcr.io + CHART_NAME: trigger + +jobs: + lint-and-test: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Helm + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 + with: + version: "3.18.3" + + - name: Build dependencies + run: helm dependency build ./hosting/k8s/helm/ + + - name: Extract dependency charts + run: | + cd ./hosting/k8s/helm/ + for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done + + - name: Lint Helm Chart + run: | + helm lint ./hosting/k8s/helm/ + + - name: Render templates + run: | + helm template test-release ./hosting/k8s/helm/ \ + --values ./hosting/k8s/helm/values.yaml \ + --output-dir ./helm-output + + - name: Validate manifests + uses: docker://ghcr.io/yannh/kubeconform:v0.7.0@sha256:85dbef6b4b312b99133decc9c6fc9495e9fc5f92293d4ff3b7e1b30f5611823c + with: + entrypoint: '/kubeconform' + args: "-summary -output json ./helm-output" + + release: + needs: lint-and-test + runs-on: ubuntu-latest + permissions: + contents: write # for gh-release + packages: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Helm + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0 + with: + version: "3.18.3" + + - name: Build dependencies + run: helm dependency build ./hosting/k8s/helm/ + + - name: Extract dependency charts + run: | + cd ./hosting/k8s/helm/ + for file in ./charts/*.tgz; do echo "Extracting $file"; tar -xzf "$file" -C ./charts; done + + - name: Log in to Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract version from tag or input + id: version + run: | + if [ -n "${INPUTS_CHART_VERSION}" ]; then + VERSION="${INPUTS_CHART_VERSION}" + else + VERSION="${GITHUB_REF_NAME}" + VERSION="${VERSION#helm-v}" + fi + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "Releasing version: $VERSION" + env: + INPUTS_CHART_VERSION: ${{ inputs.chart_version }} + + - name: Check Chart.yaml version matches release version + run: | + VERSION="${STEPS_VERSION_OUTPUTS_VERSION}" + CHART_VERSION=$(grep '^version:' ./hosting/k8s/helm/Chart.yaml | awk '{print $2}') + echo "Chart.yaml version: $CHART_VERSION" + echo "Release version: $VERSION" + if [ "$CHART_VERSION" != "$VERSION" ]; then + echo "❌ Chart.yaml version does not match release version!" + exit 1 + fi + echo "✅ Chart.yaml version matches release version." + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Package Helm Chart + run: | + helm package ./hosting/k8s/helm/ --destination /tmp/ + + - name: Push Helm Chart to GHCR + run: | + VERSION="${STEPS_VERSION_OUTPUTS_VERSION}" + CHART_PACKAGE="/tmp/${{ env.CHART_NAME }}-${VERSION}.tgz" + + # Push to GHCR OCI registry + helm push "$CHART_PACKAGE" "oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts" + env: + STEPS_VERSION_OUTPUTS_VERSION: ${{ steps.version.outputs.version }} + + - name: Create GitHub Release + id: release + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 + with: + tag_name: helm-v${{ steps.version.outputs.version }} + name: "Helm Chart ${{ steps.version.outputs.version }}" + body: | + ### Installation + ```bash + helm upgrade --install trigger \ + oci://${{ env.REGISTRY }}/${{ github.repository_owner }}/charts/${{ env.CHART_NAME }} \ + --version "${{ steps.version.outputs.version }}" + ``` + + ### Changes + See commit history for detailed changes in this release. + files: | + /tmp/${{ env.CHART_NAME }}-${{ steps.version.outputs.version }}.tgz + token: ${{ secrets.GITHUB_TOKEN }} + draft: true + prerelease: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000000..e3b339dfca7 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,328 @@ +name: 🦋 Changesets Release + +on: + pull_request: + types: [closed] + branches: + - main + workflow_dispatch: + inputs: + type: + description: "Select release type" + required: true + type: choice + options: + - release + - prerelease + default: "prerelease" + ref: + description: "The ref (branch, tag, or SHA) to checkout and release from" + required: true + type: string + prerelease_tag: + description: "The npm dist-tag for the prerelease (e.g., 'v4-prerelease')" + required: false + type: string + default: "prerelease" + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +jobs: + show-release-summary: + name: 📋 Release Summary + runs-on: ubuntu-latest + permissions: {} + if: | + github.repository == 'triggerdotdev/trigger.dev' && + github.event_name == 'pull_request' && + github.event.pull_request.merged == true && + github.event.pull_request.head.ref == 'changeset-release/main' + steps: + - name: Show release summary + env: + PR_BODY: ${{ github.event.pull_request.body }} + run: | + echo "$PR_BODY" | sed -n '/^# Releases/,$p' >> "$GITHUB_STEP_SUMMARY" + + release: + name: 🚀 Release npm packages + runs-on: ubuntu-latest + environment: npm-publish + permissions: + contents: write + packages: write + id-token: write + if: | + github.repository == 'triggerdotdev/trigger.dev' && + ( + (github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'release') || + (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.head.ref == 'changeset-release/main') + ) + outputs: + published: ${{ steps.changesets.outputs.published }} + published_packages: ${{ steps.changesets.outputs.publishedPackages }} + published_package_version: ${{ steps.get_version.outputs.package_version }} + is_prerelease: ${{ steps.get_version.outputs.is_prerelease }} + steps: + - name: Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # zizmor: ignore[artipacked] needs persisted git creds for tag push; no artifact upload here so no leak path + with: + fetch-depth: 0 + ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.ref || github.sha }} + + - name: Verify ref is on main + if: github.event_name == 'workflow_dispatch' + run: | + if ! git merge-base --is-ancestor "${GITHUB_EVENT_INPUTS_REF}" origin/main; then + echo "Error: ref must be an ancestor of main (i.e., already merged)" + exit 1 + fi + env: + GITHUB_EVENT_INPUTS_REF: ${{ github.event.inputs.ref }} + + - name: Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # npm v11.5.1 or newer is required for OIDC support + # https://github.blog/changelog/2025-07-31-npm-trusted-publishing-with-oidc-is-generally-available/#whats-new + - name: Setup npm 11.x for OIDC + run: npm install -g npm@11.6.4 + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Generate Prisma client + run: pnpm run generate + + - name: Build + run: pnpm run build --filter "@trigger.dev/*" --filter "trigger.dev" + + - name: Type check + run: pnpm run typecheck --filter "@trigger.dev/*" --filter "trigger.dev" + + - name: Publish + id: changesets + uses: changesets/action@63a615b9cd06ba9a3e6d13796c7fbcb080a60a0b # v1.8.0 + with: + publish: pnpm run changeset:release + createGithubReleases: false + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Show package version + if: steps.changesets.outputs.published == 'true' + id: get_version + run: | + package_version=$(echo "${STEPS_CHANGESETS_OUTPUTS_PUBLISHEDPACKAGES}" | jq -r '.[0].version') + echo "package_version=${package_version}" >> "$GITHUB_OUTPUT" + # Any semver with a hyphen is a prerelease (e.g. 4.5.0-rc.0, 0.0.0-snapshot-...) + if [[ "${package_version}" == *-* ]]; then + echo "is_prerelease=true" >> "$GITHUB_OUTPUT" + else + echo "is_prerelease=false" >> "$GITHUB_OUTPUT" + fi + env: + STEPS_CHANGESETS_OUTPUTS_PUBLISHEDPACKAGES: ${{ steps.changesets.outputs.publishedPackages }} + + - name: Create unified GitHub release + if: steps.changesets.outputs.published == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RELEASE_PR_BODY: ${{ github.event.pull_request.body }} + STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION: ${{ steps.get_version.outputs.package_version }} + STEPS_GET_VERSION_OUTPUTS_IS_PRERELEASE: ${{ steps.get_version.outputs.is_prerelease }} + run: | + VERSION="${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + node scripts/generate-github-release.mjs "$VERSION" > /tmp/release-body.md + PRERELEASE_FLAG="" + if [ "${STEPS_GET_VERSION_OUTPUTS_IS_PRERELEASE}" = "true" ]; then + PRERELEASE_FLAG="--prerelease" + fi + gh release create "v${VERSION}" \ + --title "trigger.dev v${VERSION}" \ + --notes-file /tmp/release-body.md \ + --target main \ + $PRERELEASE_FLAG + + - name: Create and push Docker tag + if: steps.changesets.outputs.published == 'true' + run: | + set -e + git tag "v.docker.${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + git push origin "v.docker.${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + env: + STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION: ${{ steps.get_version.outputs.package_version }} + + - name: Create and push Helm chart tag + if: steps.changesets.outputs.published == 'true' + run: | + set -e + git tag "helm-v${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + git push origin "helm-v${STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION}" + env: + STEPS_GET_VERSION_OUTPUTS_PACKAGE_VERSION: ${{ steps.get_version.outputs.package_version }} + + # Trigger Docker builds directly via workflow_call since tags pushed with + # GITHUB_TOKEN don't trigger other workflows (GitHub Actions limitation). + publish-docker: + name: 🐳 Publish Docker images + needs: release + if: needs.release.outputs.published == 'true' + permissions: + contents: read + packages: write + id-token: write + attestations: write + uses: ./.github/workflows/publish.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} + with: + image_tag: v${{ needs.release.outputs.published_package_version }} + + # Trigger Helm chart release directly via workflow_call (same GITHUB_TOKEN + # limitation as the Docker path). Runs after Docker images are published so + # the chart never references images that don't exist yet. + publish-helm: + name: 🧭 Publish Helm chart + needs: [release, publish-docker] + if: needs.release.outputs.published == 'true' + permissions: + contents: write + packages: write + uses: ./.github/workflows/release-helm.yml + with: + chart_version: ${{ needs.release.outputs.published_package_version }} + + # After Docker images are published, update the GitHub release with the exact GHCR tag URL. + # The GHCR package version ID is only known after the image is pushed, so we query for it here. + update-release: + name: 🔗 Update release Docker link + needs: [release, publish-docker] + if: needs.release.outputs.published == 'true' + runs-on: ubuntu-latest + permissions: + contents: write + packages: read + steps: + - name: Update GitHub release with Docker image link + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NEEDS_RELEASE_OUTPUTS_PUBLISHED_PACKAGE_VERSION: ${{ needs.release.outputs.published_package_version }} + run: | + set -e + VERSION="${NEEDS_RELEASE_OUTPUTS_PUBLISHED_PACKAGE_VERSION}" + TAG="v${VERSION}" + + # Query GHCR for the version ID matching this tag + VERSION_ID=$(gh api --paginate -H "Accept: application/vnd.github+json" \ + /orgs/triggerdotdev/packages/container/trigger.dev/versions \ + --jq ".[] | select(.metadata.container.tags[] == \"${TAG}\") | .id" \ + | head -1) + + if [ -z "$VERSION_ID" ]; then + echo "Warning: Could not find GHCR version ID for tag ${TAG}, skipping update" + exit 0 + fi + + DOCKER_URL="https://github.com/triggerdotdev/trigger.dev/pkgs/container/trigger.dev/${VERSION_ID}?tag=${TAG}" + GENERIC_URL="https://github.com/triggerdotdev/trigger.dev/pkgs/container/trigger.dev" + + # Get current release body and replace the generic link with the tag-specific one. + # Use word boundary after GENERIC_URL (closing paren) to avoid matching URLs that + # already have a version ID appended (idempotent on re-runs). + gh release view "${TAG}" --repo triggerdotdev/trigger.dev --json body --jq '.body' > /tmp/release-body.md + sed -i "s|${GENERIC_URL})|${DOCKER_URL})|g" /tmp/release-body.md + + gh release edit "${TAG}" --repo triggerdotdev/trigger.dev --notes-file /tmp/release-body.md + + # Dispatch changelog entry creation to the marketing site repo. + # Runs after update-release so the GitHub release body already has the exact Docker image URL. + dispatch-changelog: + name: 📝 Dispatch changelog PR + needs: [release, update-release] + if: needs.release.outputs.published == 'true' && needs.release.outputs.is_prerelease != 'true' + runs-on: ubuntu-latest + permissions: {} + steps: + - uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 + with: + token: ${{ secrets.CROSS_REPO_PAT }} + repository: triggerdotdev/trigger.dev-site-v3 + event-type: new-release + client-payload: '{"version": "${{ needs.release.outputs.published_package_version }}"}' + + # The prerelease job needs to be on the same workflow file due to a limitation related to how npm verifies OIDC claims. + prerelease: + name: 🧪 Prerelease + runs-on: ubuntu-latest + environment: npm-publish + permissions: + contents: read + id-token: write + if: github.repository == 'triggerdotdev/trigger.dev' && github.event_name == 'workflow_dispatch' && github.event.inputs.type == 'prerelease' + steps: + - name: Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.ref }} + persist-credentials: false + + - name: Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # npm v11.5.1 or newer is required for OIDC support + # https://github.blog/changelog/2025-07-31-npm-trusted-publishing-with-oidc-is-generally-available/#whats-new + - name: Setup npm 11.x for OIDC + run: npm install -g npm@11.6.4 + + - name: Download deps + run: pnpm install --frozen-lockfile + + - name: Generate Prisma Client + run: pnpm run generate + + - name: Exit changeset pre mode (if active) + run: | + if [ -f .changeset/pre.json ]; then + echo "Repo is in changeset pre mode; exiting so snapshot release can run" + pnpm exec changeset pre exit + fi + + - name: Snapshot version + run: pnpm exec changeset version --snapshot "${GITHUB_EVENT_INPUTS_PRERELEASE_TAG}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_EVENT_INPUTS_PRERELEASE_TAG: ${{ github.event.inputs.prerelease_tag }} + + - name: Clean + run: pnpm run clean --filter "@trigger.dev/*" --filter "trigger.dev" + + - name: Build + run: pnpm run build --filter "@trigger.dev/*" --filter "trigger.dev" + + - name: Publish prerelease + run: pnpm exec changeset publish --no-git-tag --snapshot --tag "${GITHUB_EVENT_INPUTS_PRERELEASE_TAG}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_EVENT_INPUTS_PRERELEASE_TAG: ${{ github.event.inputs.prerelease_tag }} diff --git a/.github/workflows/sdk-compat.yml b/.github/workflows/sdk-compat.yml new file mode 100644 index 00000000000..1510af23181 --- /dev/null +++ b/.github/workflows/sdk-compat.yml @@ -0,0 +1,182 @@ +name: "🔌 SDK Compatibility Tests" + +permissions: + contents: read + +on: + workflow_call: + +jobs: + node-compat: + name: "Node.js ${{ matrix.node }} (${{ matrix.os }})" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + node: ["20.20", "22.12"] + + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: ${{ matrix.node }} + cache: "pnpm" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🔨 Build SDK dependencies + shell: bash + run: pnpm run build --filter '@trigger.dev/sdk^...' + + - name: 🔨 Build SDK + shell: bash + run: pnpm run build --filter '@trigger.dev/sdk' + + - name: 🧪 Run SDK Compatibility Tests + shell: bash + run: pnpm --filter @internal/sdk-compat-tests test + + bun-compat: + name: "Bun Runtime" + runs-on: ubuntu-latest + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 🥟 Setup Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 + with: + bun-version: latest + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🔨 Build SDK dependencies + run: pnpm run build --filter @trigger.dev/sdk^... + + - name: 🔨 Build SDK + run: pnpm run build --filter @trigger.dev/sdk + + - name: 🧪 Run Bun Compatibility Test + working-directory: internal-packages/sdk-compat-tests/src/fixtures/bun + run: bun run test.ts + + deno-compat: + name: "Deno Runtime" + runs-on: ubuntu-latest + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 🦕 Setup Deno + uses: denoland/setup-deno@667a34cdef165d8d2b2e98dde39547c9daac7282 # v2.0.4 + with: + deno-version: v2.x + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🔨 Build SDK dependencies + run: pnpm run build --filter @trigger.dev/sdk^... + + - name: 🔨 Build SDK + run: pnpm run build --filter @trigger.dev/sdk + + - name: 🔗 Link node_modules for Deno fixture + working-directory: internal-packages/sdk-compat-tests/src/fixtures/deno + run: ln -s ../../../../../node_modules node_modules + + - name: 🧪 Run Deno Compatibility Test + working-directory: internal-packages/sdk-compat-tests/src/fixtures/deno + run: deno run --allow-read --allow-env --allow-sys test.ts + + cloudflare-compat: + name: "Cloudflare Workers" + runs-on: ubuntu-latest + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🔨 Build SDK dependencies + run: pnpm run build --filter @trigger.dev/sdk^... + + - name: 🔨 Build SDK + run: pnpm run build --filter @trigger.dev/sdk + + - name: 📥 Install Cloudflare fixture deps + working-directory: internal-packages/sdk-compat-tests/src/fixtures/cloudflare-worker + run: pnpm install + + - name: 🧪 Run Cloudflare Workers Compatibility Test (dry-run) + working-directory: internal-packages/sdk-compat-tests/src/fixtures/cloudflare-worker + run: npx wrangler deploy --dry-run --outdir dist diff --git a/.github/workflows/trivy-image-webapp.yml b/.github/workflows/trivy-image-webapp.yml new file mode 100644 index 00000000000..7dae65ef2bf --- /dev/null +++ b/.github/workflows/trivy-image-webapp.yml @@ -0,0 +1,75 @@ +name: Trivy Image Scan (webapp) + +# OS-level CVE scan of a published webapp image. Called by the publish pipeline +# (publish.yml) to scan each build right after it's pushed to GHCR — so every +# main build and every release is scanned, not rebuilt. Also runnable ad-hoc +# via workflow_dispatch against any image ref. +# +# Report-only: writes a table to the run summary. No SARIF upload, no gate. +# Library/dependency CVEs are covered by Dependabot, so this is restricted to +# OS packages (`vuln-type: os`) to avoid double-reporting. + +on: + workflow_call: + inputs: + image-ref: + description: "Full image ref to scan (e.g. ghcr.io/triggerdotdev/trigger.dev:main)" + type: string + required: true + workflow_dispatch: + inputs: + image-ref: + description: "Full image ref to scan" + type: string + required: false + default: "ghcr.io/triggerdotdev/trigger.dev:main" + +permissions: {} + +concurrency: + group: trivy-image-webapp-${{ inputs.image-ref }} + cancel-in-progress: true + +jobs: + scan: + name: Scan + runs-on: ubuntu-latest + permissions: + contents: read + packages: read # pull the image from GHCR + steps: + # Authenticate to GHCR so the scan also works for private images + # (GITHUB_TOKEN isn't forwarded to Docker automatically). Harmless for + # public images. Pairs with the packages: read permission above. + - name: Log in to GitHub Container Registry + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Run Trivy image scan + uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0 + with: + scan-type: image + image-ref: ${{ inputs.image-ref }} + # vuln-type maps to --pkg-types: OS packages only (library deps are + # Dependabot's job). ignore-unfixed drops vulns with no patch yet. + vuln-type: os + ignore-unfixed: true + severity: HIGH,CRITICAL + format: table + output: trivy-image-webapp.txt + + - name: Job summary + if: always() + env: + IMAGE_REF: ${{ inputs.image-ref }} + run: | + { + echo "## Trivy Image Scan (webapp) — \`${IMAGE_REF}\`" + echo '```' + # GitHub step summary is capped at 1 MiB; truncate large reports. + head -c 900000 trivy-image-webapp.txt 2>/dev/null || echo "(no report produced)" + echo '```' + } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml new file mode 100644 index 00000000000..91ec46f3a9a --- /dev/null +++ b/.github/workflows/typecheck.yml @@ -0,0 +1,43 @@ +name: "ʦ TypeScript" + +on: + workflow_call: + +permissions: + contents: read + +jobs: + typecheck: + runs-on: ubuntu-latest + + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🔎 Type check + run: pnpm run typecheck + env: + NODE_OPTIONS: --max-old-space-size=8192 + + - name: 🔎 Check exports + run: pnpm run check-exports diff --git a/.github/workflows/unit-tests-internal.yml b/.github/workflows/unit-tests-internal.yml new file mode 100644 index 00000000000..e2aae11b846 --- /dev/null +++ b/.github/workflows/unit-tests-internal.yml @@ -0,0 +1,160 @@ +name: "🧪 Unit Tests: Internal" + +permissions: + contents: read + +on: + workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + +jobs: + unitTests: + name: "🧪 Unit Tests: Internal" + runs-on: ubuntu-latest + strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false + matrix: + shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + shardTotal: [12] + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + SHARD_INDEX: ${{ matrix.shardIndex }} + SHARD_TOTAL: ${{ matrix.shardTotal }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } + echo "Pre-pulling Docker images with authenticated session..." + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 + echo "Image pre-pull complete" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🧪 Run Internal Unit Tests + run: pnpm run test:internal --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --passWithNoTests + + - name: Gather all reports + if: ${{ !cancelled() }} + run: | + mkdir -p .vitest-reports + find . -type f -path '*/.vitest-reports/blob-*.json' \ + -exec bash -c 'src="$1"; basename=$(basename "$src"); pkg=$(dirname "$src" | sed "s|^\./||;s|/\.vitest-reports$||;s|/|_|g"); cp "$src" ".vitest-reports/${pkg}-${basename}"' _ {} \; + + - name: Upload blob reports to GitHub Actions Artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: internal-blob-report-${{ matrix.shardIndex }} + path: .vitest-reports/* + include-hidden-files: true + retention-days: 1 + + merge-reports: + name: "📊 Merge Reports" + if: ${{ !cancelled() }} + needs: [unitTests] + runs-on: ubuntu-latest + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + # no cache enabled, we're not installing deps + + - name: Download blob reports from GitHub Actions Artifacts + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: .vitest-reports + pattern: internal-blob-report-* + merge-multiple: true + + - name: Merge reports + run: pnpm dlx vitest@4.1.7 run --merge-reports --pass-with-no-tests diff --git a/.github/workflows/unit-tests-packages.yml b/.github/workflows/unit-tests-packages.yml new file mode 100644 index 00000000000..6642f2443c4 --- /dev/null +++ b/.github/workflows/unit-tests-packages.yml @@ -0,0 +1,160 @@ +name: "🧪 Unit Tests: Packages" + +permissions: + contents: read + +on: + workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + +jobs: + unitTests: + name: "🧪 Unit Tests: Packages" + runs-on: ubuntu-latest + strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false + matrix: + shardIndex: [1, 2, 3] + shardTotal: [3] + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + SHARD_INDEX: ${{ matrix.shardIndex }} + SHARD_TOTAL: ${{ matrix.shardTotal }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } + echo "Pre-pulling Docker images with authenticated session..." + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 + echo "Image pre-pull complete" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🧪 Run Package Unit Tests + run: pnpm run test:packages --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --passWithNoTests + + - name: Gather all reports + if: ${{ !cancelled() }} + run: | + mkdir -p .vitest-reports + find . -type f -path '*/.vitest-reports/blob-*.json' \ + -exec bash -c 'src="$1"; basename=$(basename "$src"); pkg=$(dirname "$src" | sed "s|^\./||;s|/\.vitest-reports$||;s|/|_|g"); cp "$src" ".vitest-reports/${pkg}-${basename}"' _ {} \; + + - name: Upload blob reports to GitHub Actions Artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: packages-blob-report-${{ matrix.shardIndex }} + path: .vitest-reports/* + include-hidden-files: true + retention-days: 1 + + merge-reports: + name: "📊 Merge Reports" + if: ${{ !cancelled() }} + needs: [unitTests] + runs-on: ubuntu-latest + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + # no cache enabled, we're not installing deps + + - name: Download blob reports from GitHub Actions Artifacts + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: .vitest-reports + pattern: packages-blob-report-* + merge-multiple: true + + - name: Merge reports + run: pnpm dlx vitest@4.1.7 run --merge-reports --pass-with-no-tests diff --git a/.github/workflows/unit-tests-webapp.yml b/.github/workflows/unit-tests-webapp.yml new file mode 100644 index 00000000000..dc1cc978f35 --- /dev/null +++ b/.github/workflows/unit-tests-webapp.yml @@ -0,0 +1,169 @@ +name: "🧪 Unit Tests: Webapp" + +permissions: + contents: read + +on: + workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + +jobs: + unitTests: + name: "🧪 Unit Tests: Webapp" + runs-on: ubuntu-latest + strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false + matrix: + shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + shardTotal: [10] + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + SHARD_INDEX: ${{ matrix.shardIndex }} + SHARD_TOTAL: ${{ matrix.shardTotal }} + steps: + - name: 🔧 Disable IPv6 + run: | + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 + + - name: 🔧 Configure docker address pool + run: | + CONFIG='{ + "default-address-pools" : [ + { + "base" : "172.17.0.0/12", + "size" : 20 + }, + { + "base" : "192.168.0.0/16", + "size" : 24 + } + ] + }' + mkdir -p /etc/docker + echo "$CONFIG" | sudo tee /etc/docker/daemon.json + + - name: 🔧 Restart docker daemon + run: sudo systemctl restart docker + + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + cache: "pnpm" + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + if: ${{ env.DOCKERHUB_USERNAME }} + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: 🐳 Skipping DockerHub login (no secrets available) + if: ${{ !env.DOCKERHUB_USERNAME }} + run: echo "DockerHub login skipped because secrets are not available." + + - name: 🐳 Pre-pull testcontainer images + if: ${{ env.DOCKERHUB_USERNAME }} + run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } + echo "Pre-pulling Docker images with authenticated session..." + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 + pull minio/minio:latest + echo "Image pre-pull complete" + + - name: 📥 Download deps + run: pnpm install --frozen-lockfile + + - name: 📀 Generate Prisma Client + run: pnpm run generate + + - name: 🧪 Run Webapp Unit Tests + run: pnpm run test:webapp --reporter=default --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --passWithNoTests + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/postgres + DIRECT_URL: postgresql://postgres:postgres@localhost:5432/postgres + SESSION_SECRET: "secret" + MAGIC_LINK_SECRET: "secret" + ENCRYPTION_KEY: "dummy-encryption-keeeey-32-bytes" + DEPLOY_REGISTRY_HOST: "docker.io" + CLICKHOUSE_URL: "http://default:password@localhost:8123" + + - name: Gather all reports + if: ${{ !cancelled() }} + run: | + mkdir -p .vitest-reports + find . -type f -path '*/.vitest-reports/blob-*.json' \ + -exec bash -c 'src="$1"; basename=$(basename "$src"); pkg=$(dirname "$src" | sed "s|^\./||;s|/\.vitest-reports$||;s|/|_|g"); cp "$src" ".vitest-reports/${pkg}-${basename}"' _ {} \; + + - name: Upload blob reports to GitHub Actions Artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: webapp-blob-report-${{ matrix.shardIndex }} + path: .vitest-reports/* + include-hidden-files: true + retention-days: 1 + + merge-reports: + name: "📊 Merge Reports" + if: ${{ !cancelled() }} + needs: [unitTests] + runs-on: ubuntu-latest + steps: + - name: ⬇️ Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + persist-credentials: false + + - name: ⎔ Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 + with: + version: 10.33.2 + + - name: ⎔ Setup node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: 20.20.2 + # no cache enabled, we're not installing deps + + - name: Download blob reports from GitHub Actions Artifacts + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: .vitest-reports + pattern: webapp-blob-report-* + merge-multiple: true + + - name: Merge reports + run: pnpm dlx vitest@4.1.7 run --merge-reports --pass-with-no-tests diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 00000000000..96e76279c82 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,34 @@ +name: "🧪 Unit Tests" + +permissions: + contents: read + +on: + workflow_call: + secrets: + DOCKERHUB_USERNAME: + required: false + DOCKERHUB_TOKEN: + required: false + +jobs: + webapp: + uses: ./.github/workflows/unit-tests-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + e2e-webapp: + uses: ./.github/workflows/e2e-webapp.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + packages: + uses: ./.github/workflows/unit-tests-packages.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + internal: + uses: ./.github/workflows/unit-tests-internal.yml + secrets: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/vouch-check-pr.yml b/.github/workflows/vouch-check-pr.yml new file mode 100644 index 00000000000..d854b1e0ce6 --- /dev/null +++ b/.github/workflows/vouch-check-pr.yml @@ -0,0 +1,50 @@ +name: Vouch - Check PR + +on: + pull_request_target: # zizmor: ignore[dangerous-triggers] needed to comment/close fork PRs; safe because we never check out PR HEAD ref so no fork-controlled code runs + types: [opened, reopened] + +permissions: {} + +jobs: + check-vouch: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write # auto-close unvouched PRs + issues: read + steps: + - uses: mitchellh/vouch/action/check-pr@c6d80ead49839655b61b422700b7a3bc9d0804a9 # v1.4.2 + with: + pr-number: ${{ github.event.pull_request.number }} + auto-close: true + require-vouch: true + env: + GH_TOKEN: ${{ github.token }} + + require-draft: + needs: check-vouch + permissions: + pull-requests: write # close non-draft PRs with a comment + if: > + github.event.pull_request.draft == false && + github.event.pull_request.author_association != 'MEMBER' && + github.event.pull_request.author_association != 'OWNER' && + github.event.pull_request.author_association != 'COLLABORATOR' && + github.event.pull_request.user.login != 'devin-ai-integration[bot]' && + github.event.pull_request.user.login != 'dependabot[bot]' && + github.event.pull_request.user.login != 'github-actions[bot]' + runs-on: ubuntu-latest + steps: + - name: Close non-draft PR + env: + GH_TOKEN: ${{ github.token }} + run: | + STATE=$(gh pr view ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --json state -q '.state') + if [ "$STATE" != "OPEN" ]; then + echo "PR is already closed, skipping." + exit 0 + fi + gh pr close ${{ github.event.pull_request.number }} \ + --repo ${{ github.repository }} \ + --comment "Thanks for your contribution! We require all external PRs to be opened in **draft** status first so you can address CodeRabbit review comments and ensure CI passes before requesting a review. Please re-open this PR as a draft. See [CONTRIBUTING.md](https://github.com/${{ github.repository }}/blob/main/CONTRIBUTING.md#pr-workflow) for details." diff --git a/.github/workflows/vouch-manage-by-issue.yml b/.github/workflows/vouch-manage-by-issue.yml new file mode 100644 index 00000000000..51bce367b3e --- /dev/null +++ b/.github/workflows/vouch-manage-by-issue.yml @@ -0,0 +1,24 @@ +name: Vouch - Manage by Issue + +on: + issue_comment: + types: [created] + +permissions: + contents: write + issues: write + +jobs: + manage: + runs-on: ubuntu-latest + if: >- + contains(github.event.comment.body, 'vouch') || + contains(github.event.comment.body, 'denounce') || + contains(github.event.comment.body, 'unvouch') + steps: + - uses: mitchellh/vouch/action/manage-by-issue@c6d80ead49839655b61b422700b7a3bc9d0804a9 # v1.4.2 + with: + comment-id: ${{ github.event.comment.id }} + issue-id: ${{ github.event.issue.number }} + env: + GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/workflow-checks.yml b/.github/workflows/workflow-checks.yml new file mode 100644 index 00000000000..a11918c04fe --- /dev/null +++ b/.github/workflows/workflow-checks.yml @@ -0,0 +1,51 @@ +name: Workflow Checks + +on: + push: + branches: [main] + paths: + - '.github/workflows/**' + - '.github/actions/**' + - '.github/zizmor.yml' + pull_request: + paths: + - '.github/workflows/**' + - '.github/actions/**' + - '.github/zizmor.yml' + +permissions: {} + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + actionlint: + name: Actionlint + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run actionlint + uses: docker://rhysd/actionlint:1.7.12@sha256:b1934ee5f1c509618f2508e6eb47ee0d3520686341fec936f3b79331f9315667 + + zizmor: + name: Zizmor + runs-on: ubuntu-latest + permissions: + security-events: write # Upload SARIF to GitHub Security tab + contents: read # Read workflow files for analysis + actions: read # Read workflow run metadata + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run zizmor + uses: zizmorcore/zizmor-action@5f14fd08f7cf1cb1609c1e344975f152c7ee938d # v0.5.6 diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 00000000000..2fcbb540127 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,5 @@ +rules: + unpinned-uses: + config: + policies: + '*': hash-pin diff --git a/.gitignore b/.gitignore index 75fcfff0f45..d5f0c945ad1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,10 +12,12 @@ coverage # next.js .next/ out/ -build dist packages/**/dist +# vendored bundles (generated during build) +packages/**/src/**/vendor + # Tailwind apps/**/styles/tailwind.css packages/**/styles/tailwind.css @@ -30,12 +32,10 @@ yarn-debug.log* yarn-error.log* # local env files -.env.docker +.env +.env.* .docker/*.env -.env.local -.env.development.local -.env.test.local -.env.production.local +!.env.example # turbo .turbo @@ -44,4 +44,34 @@ yarn-error.log* .env .output apps/**/public/build -.tests-container-id.txt \ No newline at end of file +.tests-container-id.txt +.sentryclirc +.buildt + +**/tmp/ +/test-results/ +/playwright-report/ +/playwright/.cache/ + +.cosine +.trigger +.tshy* +.yarn +*.tsbuildinfo +/packages/cli-v3/src/package.json +.husky +/packages/react-hooks/src/package.json +/packages/core/src/package.json +/packages/trigger-sdk/src/package.json +/packages/python/src/package.json +**/.claude/settings.local.json +.claude/architecture/ +.claude/docs-plans/ +.claude/review-guides/ +.claude/scheduled_tasks.lock +.mcp.log +.mcp.json +.cursor/debug.log +ailogger-output.log +# per-package vitest timing capture (transient; merged into root test-timings.json) +.vitest-timing.json diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..ecf08cb1a4d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "internal-packages/otlp-importer/protos"] + path = internal-packages/otlp-importer/protos + url = https://github.com/open-telemetry/opentelemetry-proto.git diff --git a/.infisical.json b/.infisical.json new file mode 100755 index 00000000000..7668c8c4c8e --- /dev/null +++ b/.infisical.json @@ -0,0 +1 @@ +{ "workspaceId": "63e5e42daf9a537ba8d9503c" } diff --git a/.npmrc b/.npmrc deleted file mode 100644 index 4c679147a28..00000000000 --- a/.npmrc +++ /dev/null @@ -1 +0,0 @@ -link-workspace-packages=false \ No newline at end of file diff --git a/.nvmrc b/.nvmrc index fb457f39d53..c675bca8de0 100644 --- a/.nvmrc +++ b/.nvmrc @@ -1 +1 @@ -v16.19.0 +v20.20.2 diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000000..a34447dd45d --- /dev/null +++ b/.prettierignore @@ -0,0 +1,10 @@ +node_modules +.env +.env.local +pnpm-lock.yaml +tailwind.css +.babelrc.json +**/.react-email/ +**/storybook-static/ +**/.changeset/ +**/dist/ \ No newline at end of file diff --git a/integrations/github/src/types.ts b/.server-changes/.gitkeep similarity index 100% rename from integrations/github/src/types.ts rename to .server-changes/.gitkeep diff --git a/.server-changes/README.md b/.server-changes/README.md new file mode 100644 index 00000000000..2b0eeade36b --- /dev/null +++ b/.server-changes/README.md @@ -0,0 +1,89 @@ +# Server Changes + +This directory tracks changes to server-only components (webapp, supervisor, coordinator, etc.) that are not captured by changesets. Changesets only track published npm packages — server changes would otherwise go undocumented. + +## When to add a file + +**Server-only PRs**: If your PR only changes `apps/webapp/`, `apps/supervisor/`, `apps/coordinator/`, or other server components (and does NOT change anything in `packages/`), add a `.server-changes/` file. + +**Mixed PRs** (both packages and server): Just add a changeset as usual. No `.server-changes/` file needed — the changeset covers it. + +**Package-only PRs**: Just add a changeset as usual. + +## File format + +Create a markdown file with a descriptive name: + +``` +.server-changes/fix-batch-queue-stalls.md +``` + +With this format: + +```markdown +--- +area: webapp +type: fix +--- + +Speed up batch queue processing by removing stalls and fixing retry race +``` + +### Fields + +- **area** (required): `webapp` | `supervisor` | `coordinator` | `kubernetes-provider` | `docker-provider` +- **type** (required): `feature` | `fix` | `improvement` | `breaking` + +### Description + +The body text (below the frontmatter) is a one-line description of the change. Keep it concise — it will appear in release notes. + +### Writing guidance + +These entries are public-facing - they ship verbatim in user-visible release notes. A few rules to keep them clean: + +- **One sentence is usually enough.** The body is the bullet in the changelog. If you need a paragraph, you're probably describing the implementation rather than the change. +- **Describe behavior, not implementation.** Skip internal scopes, middleware names, library specifics, framework internals. Users care about what's different for them, not how it's wired. +- **Never name internal tools or infra.** Observability stacks, internal services, infra components, monitoring backends, CI surfaces, AWS specifics - none of these belong in user-facing notes. + +## Lifecycle + +1. Engineer adds a `.server-changes/` file in their PR +2. Files accumulate on `main` as PRs merge +3. The changeset release PR includes these in its summary +4. After the release merges, CI cleans up the consumed files + +## Examples + +**New feature:** + +```markdown +--- +area: webapp +type: feature +--- + +TRQL query language and the Query page +``` + +**Bug fix:** + +```markdown +--- +area: webapp +type: fix +--- + +Fix schedule limit counting for orgs with custom limits +``` + +**Improvement:** + +```markdown +--- +area: webapp +type: improvement +--- + +Use the replica for API auth queries to reduce primary load +``` diff --git a/.server-changes/bulk-action-cursor-pagination.md b/.server-changes/bulk-action-cursor-pagination.md new file mode 100644 index 00000000000..5f506493d11 --- /dev/null +++ b/.server-changes/bulk-action-cursor-pagination.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Fix run pagination that could duplicate or skip runs: the query orders by `(created_at, run_id)` but the cursor cut on `run_id` alone, which diverges when run_id order doesn't match created_at order (e.g. bulk replay re-processing runs). Cursors now encode the composite key as an opaque token and cut on the matching tuple; legacy bare-run_id cursors stay supported for in-flight pagination. diff --git a/.server-changes/cancel-stale-delayed-snapshots.md b/.server-changes/cancel-stale-delayed-snapshots.md new file mode 100644 index 00000000000..9a167c613b1 --- /dev/null +++ b/.server-changes/cancel-stale-delayed-snapshots.md @@ -0,0 +1,6 @@ +--- +area: supervisor +type: fix +--- + +Cancel pending delayed snapshots when a run completes or disconnects, preventing stale snapshots from pausing microVMs that have moved on to new work. diff --git a/.server-changes/compute-network-labels.md b/.server-changes/compute-network-labels.md new file mode 100644 index 00000000000..874081885d5 --- /dev/null +++ b/.server-changes/compute-network-labels.md @@ -0,0 +1,6 @@ +--- +area: supervisor +type: feature +--- + +Forward per-run identity labels to the compute provider on create and restore, letting network policy select runs (e.g. private link). diff --git a/.server-changes/compute-org-label.md b/.server-changes/compute-org-label.md new file mode 100644 index 00000000000..9306a0e2dc3 --- /dev/null +++ b/.server-changes/compute-org-label.md @@ -0,0 +1,8 @@ +--- +area: supervisor +type: improvement +--- + +Compute workload manager now sets an `org` label on every run (create + +restore) for network-policy selection, instead of a plan-gated label. The +Kubernetes workload manager is unchanged. diff --git a/.server-changes/dev-cli-disconnect-md b/.server-changes/dev-cli-disconnect-md new file mode 100644 index 00000000000..a0790d70765 --- /dev/null +++ b/.server-changes/dev-cli-disconnect-md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Added `/engine/v1/dev/disconnect` endpoint to auto-cancel runs when the CLI disconnects. Maximum of 500 runs can be cancelled. Uses the bulk action system when there are more than 25 runs to cancel. \ No newline at end of file diff --git a/.server-changes/env-vars-page-scope-values-to-visible-environments.md b/.server-changes/env-vars-page-scope-values-to-visible-environments.md new file mode 100644 index 00000000000..067c04661b7 --- /dev/null +++ b/.server-changes/env-vars-page-scope-values-to-visible-environments.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Speed up the environment variables page for projects with many archived preview branches. The page now only loads variable values for the environments it displays instead of every value ever created, including those left behind by archived branches. diff --git a/.server-changes/hipaa-addon-pricing-cta.md b/.server-changes/hipaa-addon-pricing-cta.md new file mode 100644 index 00000000000..8dc4a41f8b2 --- /dev/null +++ b/.server-changes/hipaa-addon-pricing-cta.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Request a HIPAA BAA add-on directly from any paid pricing tier in the dashboard. diff --git a/.server-changes/include-prisma-cli-in-prod-image.md b/.server-changes/include-prisma-cli-in-prod-image.md new file mode 100644 index 00000000000..888544239fe --- /dev/null +++ b/.server-changes/include-prisma-cli-in-prod-image.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Fix database migrations failing to run in the production image because the Prisma CLI was missing from the build. diff --git a/.server-changes/mollifier-decision-enrolled-org-labels.md b/.server-changes/mollifier-decision-enrolled-org-labels.md new file mode 100644 index 00000000000..b9e8a11f84a --- /dev/null +++ b/.server-changes/mollifier-decision-enrolled-org-labels.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Add bounded `enrolled` and `org` labels to the `mollifier.decisions` metric so per-enrolled-org pass-through vs mollify is visible (the `org` label is attached only for the enrolled cohort to keep cardinality bounded). diff --git a/.server-changes/react-router-route-matching-perf.md b/.server-changes/react-router-route-matching-perf.md new file mode 100644 index 00000000000..a264835af55 --- /dev/null +++ b/.server-changes/react-router-route-matching-perf.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Speed up the dashboard and API under high request load by memoizing react-router's per-request route matching, which previously re-flattened, re-ranked, and recompiled the entire route table on every request. diff --git a/.server-changes/realtime-replica-read-consistency.md b/.server-changes/realtime-replica-read-consistency.md new file mode 100644 index 00000000000..d23c73e682d --- /dev/null +++ b/.server-changes/realtime-replica-read-consistency.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Realtime feed reads now wait out measured read-replica lag and retry stale reads, so subscribers receive each change's current content instead of trailing one change behind when a read replica races the write. diff --git a/.server-changes/realtime-runs-subscription-scalability.md b/.server-changes/realtime-runs-subscription-scalability.md new file mode 100644 index 00000000000..5de00aae675 --- /dev/null +++ b/.server-changes/realtime-runs-subscription-scalability.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Add a new backend for the realtime runs feed (single runs, tags, and batches) that scales under high concurrency, available behind a feature flag diff --git a/.server-changes/require-plugins-fail-fast.md b/.server-changes/require-plugins-fail-fast.md new file mode 100644 index 00000000000..591cd47c402 --- /dev/null +++ b/.server-changes/require-plugins-fail-fast.md @@ -0,0 +1,8 @@ +--- +area: webapp +type: feature +--- + +Add `REQUIRE_PLUGINS=1` env var. When set, the RBAC plugin loader throws instead of silently falling back to the default implementation if the plugin module fails to load (missing, broken transitive dep, etc.). The webapp's `/healthcheck` route now resolves the lazy plugin controller so the throw surfaces during readiness probes — a deploy where the plugin didn't load fails the probe and is rolled back. + +Self-hosters leave `REQUIRE_PLUGINS` unset and continue to use the fallback when no plugin is installed. diff --git a/.server-changes/retry-transient-instance-create-failures.md b/.server-changes/retry-transient-instance-create-failures.md new file mode 100644 index 00000000000..f7b9c7afd11 --- /dev/null +++ b/.server-changes/retry-transient-instance-create-failures.md @@ -0,0 +1,6 @@ +--- +area: supervisor +type: fix +--- + +Retry transient instance create failures during cold starts instead of waiting minutes for the run to be requeued. diff --git a/.server-changes/runs-backward-pagination-slice.md b/.server-changes/runs-backward-pagination-slice.md new file mode 100644 index 00000000000..41695f4e159 --- /dev/null +++ b/.server-changes/runs-backward-pagination-slice.md @@ -0,0 +1,14 @@ +--- +area: webapp +type: fix +--- + +Fix an off-by-one in `ClickHouseRunsRepository.listRunIds` backward pagination. +When paging backward with more rows before the page (`hasMore`), the displayed +page was sliced as `rows.slice(1, size + 1)`, which dropped the row closest to +the cursor and kept the extra "has-more" sentinel — returning a page that +straddled two logical pages (one row from the correct previous page plus one +from the page before it). The result set is always the first `page.size` rows +(the sentinel is the trailing element in both directions), so the slice is now +`rows.slice(0, size)` for forward and backward alike. Forward pagination and the +cursor values were already correct and are unchanged. diff --git a/.server-changes/runs-bulk-action-no-reload.md b/.server-changes/runs-bulk-action-no-reload.md new file mode 100644 index 00000000000..1926ab20b75 --- /dev/null +++ b/.server-changes/runs-bulk-action-no-reload.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Stop reloading the runs list when opening or closing the bulk action inspector diff --git a/.server-changes/sanitize-agent-view-urls.md b/.server-changes/sanitize-agent-view-urls.md new file mode 100644 index 00000000000..c534a03623d --- /dev/null +++ b/.server-changes/sanitize-agent-view-urls.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Sanitize URLs from streamed agent and tool data before rendering them in the dashboard's Agent view, so an unsafe scheme such as `javascript:` can no longer produce a clickable link or image source. diff --git a/.server-changes/scheduled-run-region-display.md b/.server-changes/scheduled-run-region-display.md new file mode 100644 index 00000000000..dca41c4341e --- /dev/null +++ b/.server-changes/scheduled-run-region-display.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Scheduled runs now show under their correct region in the dashboard, run details, and the API, and match region filters, instead of appearing under a separate region. diff --git a/.server-changes/session-route-hardening.md b/.server-changes/session-route-hardening.md new file mode 100644 index 00000000000..2734b35a784 --- /dev/null +++ b/.server-changes/session-route-hardening.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Hardening fixes for realtime sessions: stricter authorization on snapshot URLs and out-channel appends, environment-scoped message delivery for waiting runs, and idempotent appends via the X-Part-Id header. Session creation now rejects expired sessions, externalId can no longer be changed after creation, and the sessions list returns friendly run ids. diff --git a/.server-changes/snapshots-since-replica-primary-fallback.md b/.server-changes/snapshots-since-replica-primary-fallback.md new file mode 100644 index 00000000000..9b8257f6410 --- /dev/null +++ b/.server-changes/snapshots-since-replica-primary-fallback.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Run snapshot polling no longer errors or pays extra latency when the database read replica hasn't yet replicated the snapshot the runner is polling from (`RUN_ENGINE_READ_REPLICA_SNAPSHOTS_SINCE_ENABLED`): the read is briefly retried on the replica and served from the primary if it still hasn't caught up. Polling also now rejects a since-snapshot id that doesn't belong to the run being polled. diff --git a/.server-changes/trace-export-formats.md b/.server-changes/trace-export-formats.md new file mode 100644 index 00000000000..ff15483003f --- /dev/null +++ b/.server-changes/trace-export-formats.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Export a run's full trace from the run page as a downloadable Log, Markdown, or JSON Lines file, or copy it to the clipboard for pasting into an AI assistant. The export streams straight from the store, so even very large runs export reliably. diff --git a/.server-changes/trace-page-payload-diet.md b/.server-changes/trace-page-payload-diet.md new file mode 100644 index 00000000000..9f84e4b22db --- /dev/null +++ b/.server-changes/trace-page-payload-diet.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary and detailed summary span limits on both event store paths. diff --git a/.server-changes/trigger-worker-queue-db-error-leak.md b/.server-changes/trigger-worker-queue-db-error-leak.md new file mode 100644 index 00000000000..9725ef9f2eb --- /dev/null +++ b/.server-changes/trigger-worker-queue-db-error-leak.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Stop `trigger()` from leaking raw database connection errors to API clients during a database outage; infrastructure errors now return a generic, retryable 500. diff --git a/.vouch.yml b/.vouch.yml new file mode 100644 index 00000000000..ec6e85aa705 --- /dev/null +++ b/.vouch.yml @@ -0,0 +1,4 @@ +vouch: + - github: edosrecki + - github: GautamBytes + - github: ConProgramming diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000000..ec85d436e9a --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,4 @@ +{ + "recommendations": ["bierner.comment-tagged-templates"], + "unwantedRecommendations": [] +} diff --git a/.vscode/launch.json b/.vscode/launch.json index 5578de126bb..1044443e197 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,18 +5,79 @@ "version": "0.2.0", "configurations": [ { + "type": "node-terminal", + "request": "launch", + "name": "Debug WebApp", "command": "pnpm run dev --filter webapp", - "name": "Run webapp", + "envFile": "${workspaceFolder}/.env", + "cwd": "${workspaceFolder}", + "sourceMaps": true + }, + { + "type": "node-terminal", "request": "launch", + "name": "Debug realtimeStreams.test.ts", + "command": "pnpm run test -t RealtimeStreams", + "envFile": "${workspaceFolder}/.env", + "cwd": "${workspaceFolder}/apps/webapp", + "sourceMaps": true + }, + { "type": "node-terminal", - "cwd": "${workspaceFolder}" + "request": "launch", + "name": "Debug triggerTask.test.ts", + "command": "pnpm run test --run ./test/engine/triggerTask.test.ts", + "envFile": "${workspaceFolder}/.env", + "cwd": "${workspaceFolder}/apps/webapp", + "sourceMaps": true + }, + { + "type": "node-terminal", + "request": "launch", + "name": "Debug opened test file", + "command": "pnpm run test -- ./${relativeFile}", + "envFile": "${workspaceFolder}/.env", + "cwd": "${workspaceFolder}", + "sourceMaps": true }, { "type": "chrome", "request": "launch", "name": "Chrome webapp", - "url": "http://localhost:3000", + "url": "http://localhost:3030", "webRoot": "${workspaceFolder}/apps/webapp/app" + }, + { + "type": "node", + "request": "attach", + "name": "Attach to Trigger.dev CLI (v3)", + "port": 9229, + "restart": true, + "skipFiles": ["/**"] + }, + { + "type": "node-terminal", + "request": "launch", + "name": "Debug CLI e2e tests", + "command": "MOD=otel-telemetry-loader pnpm run test:e2e", + "cwd": "${workspaceFolder}/packages/cli-v3", + "sourceMaps": true + }, + { + "type": "node-terminal", + "request": "launch", + "name": "Debug RunEngine tests", + "command": "pnpm run test ./src/engine/tests/releaseConcurrencyTokenBucketQueue.test.ts -t 'Should retrieve metrics for all queues via getQueueMetrics'", + "cwd": "${workspaceFolder}/internal-packages/run-engine", + "sourceMaps": true + }, + { + "type": "node-terminal", + "request": "launch", + "name": "Debug RunQueue tests", + "command": "pnpm run test ./src/run-queue/index.test.ts --run", + "cwd": "${workspaceFolder}/internal-packages/run-engine", + "sourceMaps": true } ] } diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000000..f969bb6d5de --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "deno.enablePaths": ["runtime_tests/tests/deno"], + "debug.toolBarLocation": "commandCenter", + "typescript.tsdk": "node_modules/typescript/lib", + "search.exclude": { + "**/node_modules/**": true, + "packages/cli-v3/e2e": true + }, + "vitest.disableWorkspaceWarning": true, + "chat.agent.maxRequests": 10000 +} diff --git a/.warp/triggerdotdev.yaml.example b/.warp/triggerdotdev.yaml.example deleted file mode 100644 index b2bd5aea7f3..00000000000 --- a/.warp/triggerdotdev.yaml.example +++ /dev/null @@ -1,61 +0,0 @@ -# Warp Launch Configuration -# -# -# Use this to start a certain configuration of windows, tabs, and panes -# Open the launch configuration palette to access and open any launch configuration -# -# This file defines your launch configuration -# More on how to do so here: -# https://docs.warp.dev/features/session-management/launch-configurations -# -# All launch configurations are stored under ~/.warp/launch_configurations/ -# Edit them anytime! -# -# You can also add commands that run on-start for your launch configurations like so: -# --- -# name: Example with Command -# windows: -# - tabs: -# - layout: -# cwd: /Users/warp-user/project -# commands: -# - exec: code . - ---- -name: Triggerdotdev -windows: - - tabs: - - title: webapp - layout: - split_direction: vertical - panes: - - cwd: /Users/eric/code/triggerdotdev/trigger.dev - commands: - - exec: pnpm run dev --filter webapp - - cwd: /Users/eric/code/triggerdotdev/trigger.dev/apps/webapp - - title: ngrok - layout: - split_direction: horizontal - panes: - - cwd: /Users/eric/code/triggerdotdev/trigger.dev - commands: - - exec: ./scripts/proxy-webapp.sh - - cwd: /Users/eric/code/triggerdotdev/trigger.dev - commands: - - exec: ./scripts/proxy-pizzly.sh - - title: wss and examples - layout: - split_direction: horizontal - panes: - - cwd: /Users/eric/code/triggerdotdev/trigger.dev/apps/wss - commands: - - exec: LOG_LEVEL=debug pnpm run dev - - cwd: /Users/eric/code/triggerdotdev/trigger.dev/examples - - title: emails - layout: - split_direction: horizontal - panes: - - cwd: /Users/eric/Work/Git/APIHero/trigger.dev/packages/emails - commands: - - exec: pnpm run dev - - cwd: /Users/eric/Work/Git/APIHero/trigger.dev/packages/emails diff --git a/.zed/tasks.json b/.zed/tasks.json new file mode 100644 index 00000000000..8612e16bfb1 --- /dev/null +++ b/.zed/tasks.json @@ -0,0 +1,45 @@ +[ + { + "label": "Build packages", + "command": "pnpm run build --filter \"@trigger.dev/*\" --filter trigger.dev", + //"args": [], + // Env overrides for the command, will be appended to the terminal's environment from the settings. + "env": { "foo": "bar" }, + // Current working directory to spawn the command into, defaults to current project root. + //"cwd": "/path/to/working/directory", + // Whether to use a new terminal tab or reuse the existing one to spawn the process, defaults to `false`. + "use_new_terminal": false, + // Whether to allow multiple instances of the same task to be run, or rather wait for the existing ones to finish, defaults to `false`. + "allow_concurrent_runs": false, + // What to do with the terminal pane and tab, after the command was started: + // * `always` — always show the task's pane, and focus the corresponding tab in it (default) + // * `no_focus` — always show the task's pane, add the task's tab in it, but don't focus it + // * `never` — do not alter focus, but still add/reuse the task's tab in its pane + "reveal": "always", + // What to do with the terminal pane and tab, after the command has finished: + // * `never` — Do nothing when the command finishes (default) + // * `always` — always hide the terminal tab, hide the pane also if it was the last tab in it + // * `on_success` — hide the terminal tab on task success only, otherwise behaves similar to `always` + "hide": "never", + // Which shell to use when running a task inside the terminal. + // May take 3 values: + // 1. (default) Use the system's default terminal configuration in /etc/passwd + // "shell": "system" + // 2. A program: + // "shell": { + // "program": "sh" + // } + // 3. A program with arguments: + // "shell": { + // "with_arguments": { + // "program": "/bin/bash", + // "args": ["--login"] + // } + // } + "shell": "system", + // Whether to show the task line in the output of the spawned task, defaults to `true`. + "show_summary": true, + // Whether to show the command line in the output of the spawned task, defaults to `true`. + "show_output": true + } +] diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000000..8ff9f18663c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,69 @@ +# Guidance for Coding Agents + +This repository is a pnpm monorepo managed with Turbo. It contains multiple apps and packages that make up the Trigger.dev platform and SDK. + +## Repository layout +- `apps/webapp` – Remix application that serves as the main API and dashboard. +- `apps/supervisor` – Node application for executing built tasks. +- `packages/*` – Published packages such as `@trigger.dev/sdk`, the CLI (`trigger.dev`), and shared libraries. +- `internal-packages/*` – Internal-only packages used by the webapp and other apps. +- Example/reference projects for manual testing live in a separate repo: [`triggerdotdev/references`](https://github.com/triggerdotdev/references). +- `ai/references` – Contains additional documentation including an overview (`repo.md`) and testing guidelines (`tests.md`). + +See `ai/references/repo.md` for a more complete explanation of the workspaces. + +## Development setup +1. Install dependencies with `pnpm i` (pnpm `10.33.2` and Node.js `20.20.2` are required). +2. Copy `.env.example` to `.env` and generate a random 16 byte hex string for `ENCRYPTION_KEY` (`openssl rand -hex 16`). Update other secrets if needed. +3. Start the local services with Docker: + ```bash + pnpm run docker + ``` + Add `:full` (`pnpm run docker:full`) for the optional observability + chaos tooling. See `docker/docker-compose.extras.yml`. +4. Run database migrations: + ```bash + pnpm run db:migrate + ``` +5. Build the webapp, CLI and SDK packages: + ```bash + pnpm run build --filter webapp && pnpm run build --filter trigger.dev && pnpm run build --filter @trigger.dev/sdk + ``` +6. Launch the development server: + ```bash + pnpm run dev --filter webapp + ``` + The webapp runs on . + +For full setup instructions see `CONTRIBUTING.md`. + +## Running tests +- Unit tests use **vitest**. Run all tests: + ```bash + pnpm run test + ``` +- Run tests for a specific workspace (example for `webapp`): + ```bash + pnpm run test --filter webapp + ``` +- Prefer running a single test file from within its directory: + ```bash + cd apps/webapp + pnpm run test ./src/components/Button.test.ts + ``` + If packages in that workspace need to be built first, run `pnpm run build --filter webapp`. + +Refer to `ai/references/tests.md` for details on writing tests. Tests should avoid mocks or stubs and use the helpers from `@internal/testcontainers` when Redis or Postgres are needed. + +## Coding style +- Formatting is enforced using Prettier. Run `pnpm run format` before committing. +- Follow the existing project conventions. Test files live beside the files under test and use descriptive `describe` and `it` blocks. +- Do not commit directly to the `main` branch. All changes should be made in a separate branch and go through a pull request. + +## Additional docs +- The root `README.md` describes Trigger.dev and links to documentation. +- The `docs` workspace contains our documentation site, which can be run locally with: + ```bash + pnpm run dev --filter docs + ``` +- The [`triggerdotdev/references`](https://github.com/triggerdotdev/references) repo's README explains how to create new reference projects for manual testing. + diff --git a/CHANGESETS.md b/CHANGESETS.md index d1d9a6dc5d4..2e225b9ad34 100644 --- a/CHANGESETS.md +++ b/CHANGESETS.md @@ -1,26 +1,70 @@ -# Changesets +# Changesets and Server Changes -Trigger.dev uses [changesets](https://github.com/changesets/changesets) to manage updated our packages and releasing them to npm. +Trigger.dev uses [changesets](https://github.com/changesets/changesets) to manage package versions and releasing them to npm. For server-only changes, we use a lightweight `.server-changes/` convention. -## Adding a changeset +## Adding a changeset (package changes) -To add a changeset, use `pnpm run changeset:add` and follow the instructions [here](https://github.com/changesets/changesets/blob/main/docs/adding-a-changeset.md). Please only ever select one of our public packages when adding a changeset, which currently are: +To add a changeset, use `pnpm run changeset:add` and follow the instructions [here](https://github.com/changesets/changesets/blob/main/docs/adding-a-changeset.md). Please only ever select one of our public packages when adding a changeset. -- `@trigger.dev/sdk` -- `@trigger.dev/integration-sdk` -- `@trigger.dev/github` -- `@trigger.dev/slack` -- `@trigger.dev/shopify` -- `@trigger.dev/resend` +## Adding a server change (server-only changes) -## Release instructions +If your PR only changes server components (`apps/webapp/`, `apps/supervisor/`, etc.) and does NOT change any published packages, add a `.server-changes/` file instead of a changeset: -Based on the instructions [here](https://github.com/changesets/changesets/blob/main/docs/intro-to-using-changesets.md) +```sh +cat > .server-changes/fix-batch-queue-stalls.md << 'EOF' +--- +area: webapp +type: fix +--- -1. Run `pnpm run changeset:version` -2. Run `pnpm run changeset:release` +Speed up batch queue processing by removing stalls and fixing retry race +EOF +``` + +- `area`: `webapp` | `supervisor` | `coordinator` | `kubernetes-provider` | `docker-provider` +- `type`: `feature` | `fix` | `improvement` | `breaking` + +For **mixed PRs** (both packages and server): just add a changeset. No `.server-changes/` file needed. + +See `.server-changes/README.md` for full documentation. + +## When to add which + +| PR changes | What to add | +|---|---| +| Only packages (`packages/`) | Changeset (`pnpm run changeset:add`) | +| Only server (`apps/`) | `.server-changes/` file | +| Both packages and server | Just the changeset | + +## Release instructions (CI) + +Please follow the best-practice of adding changesets in the same commit as the code making the change with `pnpm run changeset:add`, as it will allow our release.yml CI workflow to function properly: + +- Anytime new changesets are added in a commit in the `main` branch, the [changesets-pr.yml](./.github/workflows/changesets-pr.yml) workflow will run and will automatically create/update a PR with a fresh run of `pnpm run changeset:version`. +- The release PR body is automatically enhanced with a clean, deduplicated summary that includes both package changes and `.server-changes/` entries. +- Consumed `.server-changes/` files are removed on the `changeset-release/main` branch — the same way changesets deletes `.changeset/*.md` files. When the release PR merges, they're gone from main. +- When the version PR is merged into `main`, the [release.yml](./.github/workflows/release.yml) workflow will automatically build, release packages to npm, and create a single unified GitHub release. ## Pre-release instructions -- Switch into pre-release mode by running `pnpm run changeset:next`. -- Switch back into normal mode by running `pnpm run changeset:normal`. +1. Add changesets as usual `pnpm run changeset:add` +2. Switch to pre-release mode by running `pnpm run changeset:next` +3. Create version `pnpm run changeset:version` +4. Release `pnpm run changeset:release` +5. Switch back to normal mode by running `pnpm run changeset:normal` + +## Snapshot instructions + +1. Update the `.changeset/config.json` file to set the `"changelog"` field to this: + +```json +"changelog": "@changesets/cli/changelog", +``` + +2. Do a temporary commit (do NOT push this, you should undo it after) + +3. Run `./scripts/publish-prerelease.sh prerelease` + +You can choose a different tag if you want, but usually `prerelease` is fine. + +5. Undo the commit where you updated the config.json file. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000000..c0fd82fb368 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,264 @@ +# CLAUDE.md + +This file provides guidance to Claude Code when working with this repository. Subdirectory CLAUDE.md files provide deeper context when you navigate into specific areas. + +## Build and Development Commands + +This is a pnpm 10.33.2 monorepo using Turborepo. Run commands from root with `pnpm run`. + +**Adding dependencies:** Edit `package.json` directly instead of using `pnpm add`, then run `pnpm i` from the repo root. See `.claude/rules/package-installation.md` for the full process. + +```bash +pnpm run docker # Core dev services (Postgres, Redis, Electric, MinIO, ClickHouse, s2-lite) +# pnpm run docker:full # Same + observability stack (Prometheus, Grafana, OTEL) and chaos tooling +pnpm run db:migrate # Run database migrations +pnpm run db:seed # Seed the database (required for reference projects) + +# Build packages (required before running) +pnpm run build --filter webapp && pnpm run build --filter trigger.dev && pnpm run build --filter @trigger.dev/sdk + +pnpm run dev --filter webapp # Run webapp (http://localhost:3030) +pnpm run dev --filter trigger.dev --filter "@trigger.dev/*" # Watch CLI and packages +``` + +### Verifying Changes + +The verification command depends on where the change lives: + +- **Apps and internal packages** (`apps/*`, `internal-packages/*`): Use `typecheck`. **Never use `build`** for these — building proves almost nothing about correctness. +- **Public packages** (`packages/*`): Use `build`. + +```bash +# Apps and internal packages — use typecheck +pnpm run typecheck --filter webapp # ~1-2 minutes +pnpm run typecheck --filter @internal/run-engine + +# Public packages — use build +pnpm run build --filter @trigger.dev/sdk +pnpm run build --filter @trigger.dev/core +``` + +Only run typecheck/build after major changes (new files, significant refactors, schema changes). For small edits, trust the types and let CI catch issues. + +## Testing + +We use vitest exclusively. **Never mock anything** - use testcontainers instead. + +```bash +pnpm run test --filter webapp # All tests for a package +cd internal-packages/run-engine +pnpm run test ./src/engine/tests/ttl.test.ts --run # Single test file +pnpm run build --filter @internal/run-engine # May need to build deps first +``` + +Test files go next to source files (e.g., `MyService.ts` -> `MyService.test.ts`). + +### Testcontainers for Redis/PostgreSQL + +```typescript +import { redisTest, postgresTest, containerTest } from "@internal/testcontainers"; + +redisTest("should use redis", async ({ redisOptions }) => { + /* ... */ +}); +postgresTest("should use postgres", async ({ prisma }) => { + /* ... */ +}); +containerTest("should use both", async ({ prisma, redisOptions }) => { + /* ... */ +}); +``` + +## Code Style + +### Imports + +**Prefer static imports over dynamic imports.** Only use dynamic `import()` when: +- Circular dependencies cannot be resolved otherwise +- Code splitting is genuinely needed for performance +- The module must be loaded conditionally at runtime + +Dynamic imports add unnecessary overhead in hot paths and make code harder to analyze. If you find yourself using `await import()`, ask if a regular `import` statement would work instead. + +## Changesets and Server Changes + +When modifying any public package (`packages/*` or `integrations/*`), add a changeset: + +```bash +pnpm run changeset:add +``` + +- Default to **patch** for bug fixes and minor changes +- Confirm with maintainers before selecting **minor** (new features) +- **Never** select major without explicit approval + +When modifying only server components (`apps/webapp/`, `apps/supervisor/`, etc.) with no package changes, add a `.server-changes/` file instead. See `.server-changes/README.md` for format and documentation. + +## Dependency Pinning + +Zod is pinned to a single version across the entire monorepo (currently `3.25.76`). When adding zod to a new or existing package, use the **exact same version** as the rest of the repo - never a different version or a range. Mismatched zod versions cause runtime type incompatibilities (e.g., schemas from one package can't be used as body validators in another). + +## Architecture Overview + +### Request Flow + +User API call -> Webapp routes -> Services -> RunEngine -> Redis Queue -> Supervisor -> Container execution -> Results back through RunEngine -> ClickHouse (analytics) + PostgreSQL (state) + +### Apps + +- **apps/webapp**: Remix 2.17.4 app - main API, dashboard, orchestration. Uses Express server. +- **apps/supervisor**: Manages task execution containers (Docker/Kubernetes). + +### Public Packages + +- **packages/trigger-sdk** (`@trigger.dev/sdk`): Main SDK for writing tasks +- **packages/cli-v3** (`trigger.dev`): CLI - also bundles code that goes into customer task images +- **packages/core** (`@trigger.dev/core`): Shared types. **Import subpaths only** (never root). +- **packages/build** (`@trigger.dev/build`): Build extensions and types +- **packages/react-hooks**: React hooks for realtime and triggering +- **packages/redis-worker** (`@trigger.dev/redis-worker`): Redis-based background job system + +### Internal Packages + +- **internal-packages/database**: Prisma 6.14.0 client and schema (PostgreSQL) +- **internal-packages/clickhouse**: ClickHouse client, schema migrations, analytics queries +- **internal-packages/run-engine**: "Run Engine 2.0" - core run lifecycle management +- **internal-packages/redis**: Redis client creation utilities (ioredis) +- **internal-packages/testcontainers**: Test helpers for Redis/PostgreSQL containers +- **internal-packages/schedule-engine**: Durable cron scheduling +- **internal-packages/zodworker**: Graphile-worker wrapper (DEPRECATED - use redis-worker) + +### Legacy V1 Engine Code + +The `apps/webapp/app/v3/` directory name is misleading - most code there is actively used by V2. Only specific files are V1-only legacy (MarQS queue, triggerTaskV1, cancelTaskRunV1, etc.). See `apps/webapp/CLAUDE.md` for the exact list. When you encounter V1/V2 branching in services, only modify V2 code paths. All new work uses Run Engine 2.0 (`@internal/run-engine`) and redis-worker. + +### Documentation + +Docs live in `docs/` as a Mintlify site (MDX format). See `docs/CLAUDE.md` for conventions. + +### Reference Projects + +Reference/example projects for testing SDK and platform features live in a separate repo: [`triggerdotdev/references`](https://github.com/triggerdotdev/references). Clone it alongside this repo and use its `projects/hello-world` to manually test changes before submitting PRs. See that repo's README for setup and linking to a local monorepo build. + +## Docker Image Guidelines + +When updating Docker image references: + +- **Always use multiplatform/index digests**, not architecture-specific digests +- Architecture-specific digests cause CI failures on different build environments +- Use the digest from the main Docker Hub page, not from a specific OS/ARCH variant + +## Writing Trigger.dev Tasks + +Always import from `@trigger.dev/sdk`. Never use `@trigger.dev/sdk/v3` or deprecated `client.defineJob`. + +```typescript +import { task } from "@trigger.dev/sdk"; + +export const myTask = task({ + id: "my-task", + run: async (payload: { message: string }) => { + // Task logic + }, +}); +``` + +### SDK Documentation Rules + +The `rules/` directory contains versioned SDK documentation distributed via the SDK installer. Current version: `rules/manifest.json`. Do NOT update `rules/` or `.claude/skills/trigger-dev-tasks/` unless explicitly asked - these are maintained in separate dedicated passes. + +## Testing with the hello-world Reference Project + +The reference projects live in the separate [`triggerdotdev/references`](https://github.com/triggerdotdev/references) repo - clone it alongside this repo. + +First-time setup: + +1. `pnpm run db:seed` to seed the database (creates the References org + hello-world project) +2. Build the CLI/packages you want to test: `pnpm run build --filter trigger.dev` +3. In your `references` clone, follow its README to link to your local monorepo build, then authorize: `cd projects/hello-world && pnpm exec trigger login -a http://localhost:3030` + +Running (from your `references` clone): `cd projects/hello-world && pnpm exec trigger dev` + +## Local Task Testing Workflow + +### Step 1: Start Webapp in Background + +```bash +# Run from repo root with run_in_background: true +pnpm run dev --filter webapp +curl -s http://localhost:3030/healthcheck # Verify running +``` + +### Step 2: Start Trigger Dev in Background + +```bash +# in your triggerdotdev/references clone +cd projects/hello-world && pnpm exec trigger dev +# Wait for "Local worker ready [node]" +``` + +### Step 3: Trigger and Monitor Tasks via MCP + +``` +mcp__trigger__get_current_worker(projectRef: "proj_rrkpdguyagvsoktglnod", environment: "dev") +mcp__trigger__trigger_task(projectRef: "proj_rrkpdguyagvsoktglnod", environment: "dev", taskId: "hello-world", payload: {"message": "Hello"}) +mcp__trigger__list_runs(projectRef: "proj_rrkpdguyagvsoktglnod", environment: "dev", taskIdentifier: "hello-world", limit: 5) +``` + +Dashboard: http://localhost:3030/orgs/references-9dfd/projects/hello-world-97DT/env/dev/runs + + + +# Skill mappings — when working in these areas, load the linked skill file into context. + +skills: + +- task: "Using agentcrumbs for debug tracing, adding crumbs, trails, markers, querying traces, or stripping debug code before merge" + load: "node_modules/agentcrumbs/skills/agentcrumbs/SKILL.md" +- task: "Setting up agentcrumbs in the project, initializing namespace catalog, running crumbs init" +load: "node_modules/agentcrumbs/skills/agentcrumbs/init/SKILL.md" + + +## agentcrumbs + +Add crumbs as you write code — not just when debugging. Mark lines with +`// @crumbs` or wrap blocks in `// #region @crumbs`. They stay on the +branch throughout development and are stripped by `agentcrumbs strip` +before merge. + +### Namespaces + +| Namespace | Description | Path | +| ----------------- | ---------------------------------------------- | ----------------------------------- | +| `webapp` | Main Remix app — API, dashboard, orchestration | `apps/webapp` | +| `supervisor` | Task execution container manager | `apps/supervisor` | +| `sdk` | Main SDK for writing tasks | `packages/trigger-sdk` | +| `cli` | CLI and task image bundling | `packages/cli-v3` | +| `core` | Shared types and subpath exports | `packages/core` | +| `build` | Build extensions and types | `packages/build` | +| `react-hooks` | React hooks for realtime/triggering | `packages/react-hooks` | +| `redis-worker` | Redis-based background job system | `packages/redis-worker` | +| `run-engine` | Run Engine 2.0 — core run lifecycle | `internal-packages/run-engine` | +| `database` | Prisma client and schema | `internal-packages/database` | +| `clickhouse` | ClickHouse client, migrations, analytics | `internal-packages/clickhouse` | +| `schedule-engine` | Durable cron scheduling | `internal-packages/schedule-engine` | +| `run-queue` | Run queue management | `internal-packages/run-queue` | + +Do not invent new namespaces — pick from this table or ask first. + +### For PR reviewers + +Do not flag `// @crumbs` markers, `#region @crumbs` blocks, or agentcrumbs +imports in reviews. These are temporary debug instrumentation stripped +before merge. Data logged in crumbs (IDs, names, values) never reaches +production. + +### CLI + +```bash +pnpm exec agentcrumbs collect # start collector (multi-service) +pnpm exec agentcrumbs tail --app trigger # live tail +pnpm exec agentcrumbs clear --app trigger # remove crumbs before merge +``` + +The preferred way to query for crumbs is to use `pnpm exec agentcrumbs query --app trigger` with the `--limit` option and cursor pagination, and clear existing crumbs before reproducing a bug via `pnpm exec agentcrumbs clear --app trigger`. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000..32c9b3a9b11 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,43 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual attention or advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic address, without explicit permission +- Other conduct that could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned with this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project email address, posting via an official social media account, or acting as an appointed representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..cddb974417d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,316 @@ +# Contributing to Trigger.dev + +Thank you for taking the time to contribute to Trigger.dev. Your involvement is not just welcomed, but we encourage it! 🚀 + +Please take some time to read this guide to understand contributing best practices for Trigger.dev. Note that we use [vouch](https://github.com/mitchellh/vouch) to manage contributor trust, so you'll need to be vouched before opening a PR. + +Thank you for helping us make Trigger.dev even better! 🤩 + +> **Important:** We only accept PRs that address a single issue. Please do not submit PRs containing multiple unrelated fixes or features. If you have multiple contributions, open a separate PR for each one. + +## Getting vouched (required before opening a PR) + +We use [vouch](https://github.com/mitchellh/vouch) to manage contributor trust. **PRs from unvouched users are automatically closed.** + +Before you open your first pull request, you need to be vouched by a maintainer. Here's how: + +1. Open a [Vouch Request](https://github.com/triggerdotdev/trigger.dev/issues/new?template=vouch-request.yml) issue. +2. Tell us what you'd like to work on and share any relevant background. +3. A maintainer will review your request and vouch for you by commenting on the issue. +4. Once vouched, your PRs will be accepted normally. + +If you're unsure whether you're already vouched, go ahead and open a PR — the check will tell you. + +## Developing + +The development branch is `main`. This is the branch that all pull +requests should be made against. The changes on the `main` +branch are tagged into a release periodically. + +### Prerequisites + +- [Node.js](https://nodejs.org/en) version 20.20.2 +- [pnpm package manager](https://pnpm.io/installation) version 10.33.2 +- [Docker](https://www.docker.com/get-started/) +- [protobuf](https://github.com/protocolbuffers/protobuf) + +### Setup + +1. Clone the repo into a public GitHub repository or [fork the repo](https://github.com/triggerdotdev/trigger.dev/fork). If you plan to distribute the code, keep the source code public to comply with the [Apache Licence 2.0](https://github.com/triggerdotdev/trigger.dev/blob/main/LICENSE). + + ``` + git clone https://github.com//trigger.dev.git + ``` + + > If you are on windows, run the following command on gitbash with admin privileges: + > `git clone -c core.symlinks=true https://github.com//trigger.dev.git` + +2. Navigate to the project folder + ``` + cd trigger.dev + ``` +3. Ensure you are on the correct version of Node.js (20.20.2). If you are using `nvm`, there is an `.nvmrc` file that will automatically select the correct version of Node.js when you navigate to the repository. + +4. Run `corepack enable` to use the correct version of pnpm (`10.33.2`) as specified in the root `package.json` file. + +5. Install the required packages using pnpm. + ``` + pnpm i + ``` +6. Create your `.env` file + ``` + cp .env.example .env + ``` +7. Open it and generate a new value for `ENCRYPTION_KEY`: + + `ENCRYPTION_KEY` is used to two-way encrypt OAuth access tokens and so you'll probably want to actually generate a unique value, and it must be a random 16 byte hex string. You can generate one with the following command: + + ```sh + openssl rand -hex 16 + ``` + + Feel free to update `SESSION_SECRET` and `MAGIC_LINK_SECRET` as well using the same method. + +8. Start Docker. This starts the core dev services (Postgres, Redis, Electric, MinIO, ClickHouse, s2-lite) and runs the ClickHouse migrator once on first start. If this is your first time using Docker, consider going through this [guide](DOCKER_INSTALLATION.md). + + ``` + pnpm run docker + ``` + + For the observability stack (Prometheus, Grafana, OTEL collector) and other optional tooling (Toxiproxy, nginx-h2, ch-ui, extra electric shard), use `pnpm run docker:full` instead. See `docker/docker-compose.extras.yml` for the full list. + +9. Migrate the database + ``` + pnpm run db:migrate + ``` +10. Build the webapp, CLI, and SDK + ``` + pnpm run build --filter webapp --filter trigger.dev --filter @trigger.dev/sdk + ``` +11. Seed the database. This creates a local user, a `References` org, and the reference projects (including `hello-world`) with stable IDs. + ``` + pnpm run db:seed + ``` +12. Run the app. See the section below. + +## Running + +1. You can run the app with: + + ``` + pnpm run dev --filter webapp + ``` + + It should run on port `3030`: [http://localhost:3030](http://localhost:3030/) + +2. Once the app is running click the magic link button and enter your email. You will automatically be logged in, since you are running locally. Create an Org and your first project in the dashboard. + +## Manual testing using hello-world + +The `hello-world` reference project (and the others) live in a separate repo: +[`triggerdotdev/references`](https://github.com/triggerdotdev/references). Clone it +alongside this repo. It's the staging ground for testing changes to the SDK +(`@trigger.dev/sdk` at `/packages/trigger-sdk`), the Core package +(`@trigger.dev/core` at `/packages/core`), the CLI (`trigger.dev` at +`/packages/cli-v3`) and the platform (the Remix app at `/apps/webapp`). +To exercise your local monorepo changes, the reference project links to your local +build — see the references repo's README for the `pnpm run link` flow. + +> Paths below such as `projects/hello-world` are relative to your `references` +> clone, not this repo. + +### First-time setup + +First, make sure you are running the webapp according to the instructions above. The seed step from setup already created a `hello-world` project under the `References` org with the stable ref `proj_rrkpdguyagvsoktglnod` — log in at http://localhost:3030 with any email to access it. Then: + +1. Build the CLI and packages (skip if you already ran the build step in setup) + +```sh +pnpm run build --filter trigger.dev --filter "@trigger.dev/*" +``` + +2. In your `references` clone, link to your local monorepo build (see its README), then change into `projects/hello-world` and authorize the CLI to the local server: + +```sh +cd projects/hello-world +cp .env.example .env +pnpm exec trigger login -a http://localhost:3030 +``` + +This will open a new browser window and authorize the CLI against your local user account. + +You can optionally pass a `--profile` flag to the `login` command, which will allow you to use the CLI with separate accounts/servers. We suggest using a profile called `local` for your local development: + +```sh +cd projects/hello-world +pnpm exec trigger login -a http://localhost:3030 --profile local +# later when you run the dev or deploy command: +pnpm exec trigger dev --profile local +pnpm exec trigger deploy --profile local +``` + +### Running + +The following steps should be followed any time you start working on a new feature you want to test: + +1. Make sure the webapp is running on localhost:3030 + +2. In this repo, open a terminal window and build the CLI and packages and watch for changes (the reference project links against this build) + +```sh +pnpm run dev --filter trigger.dev --filter "@trigger.dev/*" +``` + +3. Open another terminal window, and change into `projects/hello-world` in your `references` clone. + +4. Run the `dev` command, which will register all the local tasks with the platform and allow you to start testing task execution: + +```sh +# in /projects/hello-world +pnpm exec trigger dev +``` + +If you want additional debug logging, you can use the `--log-level debug` flag: + +```sh +# in /projects/hello-world +pnpm exec trigger dev --log-level debug +``` + +5. If you make any changes in the CLI/Core/SDK, you'll need to `CTRL+C` to exit the `dev` command and restart it to pickup changes. Any changes to the files inside the reference project's `src/trigger` dir will automatically be rebuilt by the `dev` command. + +6. Navigate to the `hello-world` project in your local dashboard at localhost:3030 and you should see the list of tasks. + +7. Go to the "Test" page in the sidebar and select a task. Then enter a payload and click "Run test". You can tell what the payloads should be by looking at the relevant task file inside the reference project's `src/trigger` folder. Many of them accept an empty payload. + +8. Feel free to add additional files in the reference project's `src/trigger` dir to test out specific aspects of the system, or add in edge cases. + +## Adding and running migrations + +1. Modify `internal-packages/database/prisma/schema.prisma`. +2. Change directory to the database package: + + ```sh + cd internal-packages/database + ``` + +3. Create a migration: + + ``` + pnpm run db:migrate:dev:create + ``` + + This creates a migration file. Check the migration file does only what you want. If you're adding any database indexes they must use `CONCURRENTLY`, otherwise they'll lock the table when executed. + +4. Run the migration: + + ``` + pnpm run db:migrate:deploy + pnpm run generate + ``` + + This executes the migrations against your database and applies changes to the database schema(s), and then regenerates the Prisma client. + +5. Commit the generated migration files as well as the changes to `schema.prisma`. +6. If you're using VSCode you may need to restart the TypeScript server in the webapp to get updated type inference. Open a TypeScript file, then open the Command Palette (View > Command Palette) and run `TypeScript: Restart TS server`. + +## Making a pull request + +**If you get errors, be sure to fix them before committing.** + +> **Note:** We may close PRs if we decide that the cost of integrating the change outweighs the benefits. To improve the chances of your PR getting accepted, follow the guidelines below. + +### PR workflow + +1. **Always open your PR in draft status first.** Do not mark it as "Ready for Review" until the steps below are complete. +2. **Address all CodeRabbit code review comments.** Our CI runs an automated code review via CodeRabbit. Go through each comment and either fix the issue or resolve it with a comment explaining why no change is needed. +3. **Wait for all CI checks to pass.** Do not mark the PR as "Ready for Review" until every check is green. +4. **Then mark the PR as "Ready for Review"** so a maintainer can take a look. + +### Cost/benefit analysis for risky changes + +If your change touches core infrastructure, modifies widely-used code paths, or could introduce regressions, consider doing a brief cost/benefit analysis and including it in the PR description. Explain what the benefit is to users and why the risk is worth it. This goes a long way toward helping maintainers evaluate your contribution. + +### General guidelines + +- Be sure to [check the "Allow edits from maintainers" option](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork) while creating your PR. +- If your PR refers to or fixes an issue, be sure to add `refs #XXX` or `fixes #XXX` to the PR description. Replacing `XXX` with the respective issue number. See more about [Linking a pull request to an issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue). +- Be sure to fill the PR Template accordingly. + +## Adding changesets + +We use [changesets](https://github.com/changesets/changesets) to manage our package versions and changelogs. If you've never used changesets before, first read [their guide here](https://github.com/changesets/changesets/blob/main/docs/adding-a-changeset.md). + +If you are contributing a change to any packages in this monorepo (anything in either the `/packages` or `/integrations` directories), then you will need to add a changeset to your Pull Requests before they can be merged. + +To add a changeset, run the following command in the root of the repo + +```sh +pnpm run changeset:add +``` + +Here's an example of creating a `patch` changeset for the `@trigger.dev/github` and `@trigger.dev/slack` packages (click to view): + +[![asciicast](https://asciinema.org/a/599228.svg)](https://asciinema.org/a/599228) + +You will be prompted to select which packages to include in the changeset. Only select the packages that you have made changes for. + +Most of the time the changes you'll make are likely to be categorized as patch releases. If you feel like there is the need for a minor or major release of the package based on the changes being made, add the changeset as such and it will be discussed during PR review. + +## Adding server changes + +Changesets only track published npm packages. If your PR only changes server components (`apps/webapp/`, `apps/supervisor/`, `apps/coordinator/`, etc.) with no package changes, add a `.server-changes/` file so the change appears in release notes. + +Create a markdown file with a descriptive name: + +```sh +cat > .server-changes/fix-batch-queue-stalls.md << 'EOF' +--- +area: webapp +type: fix +--- + +Speed up batch queue processing by removing stalls and fixing retry race +EOF +``` + +**Fields:** +- `area` (required): `webapp` | `supervisor` | `coordinator` | `kubernetes-provider` | `docker-provider` +- `type` (required): `feature` | `fix` | `improvement` | `breaking` + +The body text (below the frontmatter) is a one-line description of the change. Keep it concise — it will appear in release notes. + +**When to add which:** + +| PR changes | What to add | +|---|---| +| Only packages (`packages/`) | Changeset | +| Only server (`apps/`) | `.server-changes/` file | +| Both packages and server | Just the changeset | + +See `.server-changes/README.md` for more details. + +## Troubleshooting + +### EADDRINUSE: address already in use :::3030 + +When receiving the following error message: + +```sh +webapp:dev: Error: listen EADDRINUSE: address already in use :::3030 +``` + +The process running on port `3030` should be destroyed. + +1. Get the `PID` of the process running on PORT `3030` + ```sh + lsof -i :3030 + ``` +2. Kill the process + ```sh + sudo kill -9 + ``` + +### Running two clones side by side (worktree, branch experiment) + +The default `pnpm run docker` uses the project name `triggerdotdev-docker` and the standard host ports (5432, 6379, 3060, 4566, 8123, 9000, 9005, 9006). To stand up a second instance in another clone without clashing, set a different `COMPOSE_PROJECT_NAME` and the offset host ports in that clone's `.env`. The "Running multiple instances side by side" block in `.env.example` lists every overridable env var with its default for reference; uncomment the lines you need and update `DATABASE_URL` / `CLICKHOUSE_URL` / `REDIS_PORT` / `APP_ORIGIN` / `LOGIN_ORIGIN` / `ELECTRIC_ORIGIN` / `REALTIME_STREAMS_S2_ENDPOINT` to match. diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md deleted file mode 100644 index 573e4615317..00000000000 --- a/DEPLOYMENT.md +++ /dev/null @@ -1,32 +0,0 @@ -# Trigger.dev Deployment Guide - -## StreamNative Cloud - Hosted Pulsar - -### Generic client credentials - -```sh -snctl auth export-service-account webapp --key-file webapp-credentials.json -snctl auth export-service-account websocketserver --key-file wss-credentials.json -``` - -### Topics - -#### Triggers (`persistent://triggerdotdev/workflows/triggers`) - -Events that trigger workflows to run. These are sent by the "platform" and read by the Web Socket Servers, which then coordinate with the hosts for running the workflows - -#### Run Commands (`persistent://triggerdotdev/workflows/run-commands`) - -These are events that come from hosts and are published by the Web Socket Servers, e.g. Sending Integration Requests, Sending Logs, Initializing a Delay - -#### Run Command Responses (`persistent://triggerdotdev/workflows/run-command-responses`) - -These are events that come from the platform and are read by the Web Socket Servers, to resolve or reject a previous Run Command - -#### Integration Requests (`persistent://triggerdotdev/queues/integration-requests`) - -This is an internal queue used by the platform to perform integration requests, and retry them. - -#### App Task Queue (`persistent://triggerdotdev/queues/background-tasks`) - -This is an internal queue used by the platform to do tasks in a queue. Basically a background job system. diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md deleted file mode 100644 index fdf3ca4d4cb..00000000000 --- a/DEVELOPMENT.md +++ /dev/null @@ -1,229 +0,0 @@ -# Initial setup - -## Prerequisites - -### Pulsar requirements - -1. Ensure you have Homebrew installed by running `which brew` in terminal. If it's not found then you should install it: https://brew.sh/. Run `which brew` again to check it's found. If it's not you may need to [add it your path](https://stackoverflow.com/questions/36657321/after-installing-homebrew-i-get-zsh-command-not-found-brew) - -2. Run `brew install libpulsar` to install the C++ libraries that the pulsar-client depends on - -3. Make sure you have Python installed on your machine by running `which python3` in terminal. - -4. If python isn't found then you should install it: https://www.python.org/downloads/. In a new terminal window run `which python3` again. - -5. Run `npm config set python /the/path/from/the/which/python3/command` inserting the path from step 2 or 3 - -6. Install node-gyp: `npm install -g node-gyp` - -7. Make sure you have the Xcode command line tools installed by running `xcode-select --install` from the terminal. If it says they're already installed then you're set. - -8. Run this in the terminal: - -```sh -export CPLUS_INCLUDE_PATH="$CPLUS_INCLUDE_PATH:$(brew --prefix)/include" -export LIBRARY_PATH="$LIBRARY_PATH:$(brew --prefix)/lib" -export PULSAR_CPP_DIR=/opt/homebrew/Cellar/libpulsar/3.1.0 -``` - -9. Run `pnpm install` in the same terminal window. - -## Building and running the webapp - -> **Warning** -> All the following commands should be launched from the **monorepo root directory** - -1. Install the dependencies. - ```bash - pnpm install - ``` -2. Optionally, if you are testing auth (pizzly) or webhooks (webapp) then you'll need to use ngrok to proxy internet traffic to your local machine - - Get access to ngrok and then follow the instructions here to get it setup: https://ngrok.com/download (use homebrew) and make sure to authenticate. - - Then run the following scripts to start proxying: - - ```sh - ./scripts/proxy-pizzly.sh dan-pizzly-dev - ./scripts/proxy-webapp.sh dan-trigger-dev - ``` - -3. Environment variables. You will need to create copies of the `.env.example` files in `app/webapp` - - ```sh - cp ./apps/webapp/.env.example ./apps/webapp/.env - ``` - - Then you will need to fill in the fields with real values. - - You also need to create the `pizzly-server.env` files under the `.docker` directory: - - ```sh - cp ./.docker/pizzly-server.env.example ./.docker/pizzly-server.env - ``` - - Next, update the `AUTH_CALLBACK_URL` env var in the `pizzly-server.env` env file with the value provided to the `./scripts/proxy-pizzly.sh` command. Using the example above the `AUTH_CALLBACK_URL` would be `AUTH_CALLBACK_URL=https://dan-pizzly-dev.eu.ngrok.io/oauth/callback`. - - If you aren't proxying pizzly according to step 2, then leave the `pizzly-server.env` file empty. - - If you are proxying the webapp according to step 2 then in `webapp/.env`, set the `APP_ORIGIN` to the `NGROK_SUBDOMAIN` provided to the `./scripts/proxy-webapp.sh` command, e.g. `APP_ORIGIN=https://dan-trigger-dev.eu.ngrok.io` - -4. Start postgresql, pulsar, and pizzly server - - ```bash - pnpm run docker:db - ``` - - > **Note:** The npm script will complete while Docker sets up the container in the background. Ensure that Docker has finished and your container is running before proceeding. - -5. Generate prisma schema - ```bash - pnpm run generate - ``` -6. Run the Prisma migration to the database - - ```bash - pnpm run db:migrate:deploy - ``` - -7. Run the first build (with dependencies via the `...` option) - - ```bash - pnpm run build --filter=webapp... - ``` - - **Running simply `pnpm run build` will build everything, including the Remix app.** - -8. Run the Remix dev server - -```bash -pnpm run dev --filter=webapp -``` - -## Attaching an API integration using Pizzly - -```bash -PIZZLY_HOSTPORT=http://localhost:3004 npx pizzly config:create github github "repo,user" -``` - -## Tests, Typechecks, Lint, Install packages... - -Check the `turbo.json` file to see the available pipelines. - -- Run the Cypress tests and Dev - ```bash - pnpm run test:e2e:dev --filter=webapp - ``` -- Lint everything - ```bash - pnpm run lint - ``` -- Typecheck the whole monorepo - ```bash - pnpm run typecheck - ``` -- Test the whole monorepo - ```bash - pnpm run test - or - pnpm run test:dev - ``` -- How to install an npm package in the Remix app ? - ```bash - pnpm add dayjs --filter webapp - ``` -- Tweak the tsconfigs, eslint configs in the `config-package` folder. Any package or app will then extend from these configs. - -# Running a workflow locally - -## After pulling a change - -1. Ensure there are no database migrations to run - -```bash -pnpm run db:migrate:dev -``` - -2. Generate the Prisma database client - -```bash -pnpm run generate -``` - -3. Install packages - -```bash -pnpm install -``` - -4. Build everything - -```bash -pnpm run build -``` - -5. Install packages again, this makes sure the local packages are linked - -```bash -pnpm install -``` - -## Running the servers - -1. Ensure the docker containers are running - -```bash -pnpm run docker:db -``` - -2. Run the webapp - -```bash -pnpm run dev --filter=webapp -``` - -3. Run the Web Socket Server - -```bash -pnpm run dev --filter=wss -``` - -4. Build all the @trigger.dev/\* packages - -```bash -pnpm run dev --filter="@trigger.dev/*" -``` - -## Running the smoke test - -1. Run the smoke test workflow - -```bash -cd ./examples/smoke-test -pnpm run dev -``` - -2. Running the workflow requires you to send data to the local API. - -You can use this cURL command to send a `user.created` event. This will run the workflow and generate the corresponding logs. - -```bash -curl --request POST \ - --url http://localhost:3000/api/v1/events \ - --header 'Authorization: Bearer trigger_dev_zC25mKNn6c0q' \ - --header 'Content-Type: application/json' \ - --data '{ - "name": "user.created", - "payload": { - "id": "123" - } -}' -``` - -## Dependency & Package graph - -![Dependency Graph](assets/dependencyGraph.png) - -## Warp Launch Configuration - -Setup a custom launch configuration for the Warp terminal ([docs here](https://docs.warp.dev/features/sessions/launch-configurations)) by copying the `.warp/triggerdotdev.yaml.example` file to `~/.warp/launch_configurations/triggerdotdev.yaml`. Make sure you edit the file and replace `` and `` with your custom ngrok subdomains. diff --git a/DOCKER_INSTALLATION.md b/DOCKER_INSTALLATION.md new file mode 100644 index 00000000000..7e135bd6f84 --- /dev/null +++ b/DOCKER_INSTALLATION.md @@ -0,0 +1,135 @@ +This guide covers installing Docker and Docker Compose. If you're looking for instructions for running Trigger.dev in docker, [see here](https://github.com/triggerdotdev/docker). + +## Setting up Docker for the first time. + +In the contributing guide of Trigger.dev, there's a section that requires you to start Docker. + +If you don't have Docker installed on your machine, you'll run into some complications (errors). + +Below are the steps on how you can avoid that. + +First you need to setup docker compose as it is an underlying tool that this command: `pnpm run docker` fires behind the scene. + +## Linux + +To install Docker Compose on Linux Ubuntu, you can follow these steps: + +1. Create the Docker config directory and cli-plugins subdirectory: + + ```shell + DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} + mkdir -p $DOCKER_CONFIG/cli-plugins + ``` + +2. Download the Docker Compose plugin: + + ```shell + curl -SL "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o $DOCKER_CONFIG/cli-plugins/docker-compose + ``` + + Note: + + - To install for all users, replace `$DOCKER_CONFIG/cli-plugins` with `/usr/local/lib/docker/cli-plugins` + +3. Set the appropriate permissions to make the Docker Compose plugin executable: + + ```shell + chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose + ``` + + If you installed for all users: + + ```shell + sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + ``` + +4. Verify that Docker Compose has been successfully installed: + + ```shell + docker compose version + ``` + + You should see output similar to: + + ``` + Docker Compose version vX.Y.Z + ``` + +After following these steps, you should have Docker Compose installed on your Ubuntu system, and you can use it by running `docker compose` commands in the terminal. + +When you've verified that the `docker compose` package is installed and you proceed to start Docker with `pnpm run docker`. + +You'll probably get an error similar to the one below: + +```shell +Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running? + ELIFECYCLE  Command failed with exit code 1. +``` + +The error message suggests that the Docker daemon is not running on your system. The Docker daemon is responsible for managing and running Docker containers. + +To resolve this issue, you may need to install Docker properly on your Ubuntu system. Here are the steps to install Docker on Ubuntu: + +1. Update the package index on your system by running the following command: + + ```shell + sudo apt update + ``` + +2. Install the necessary packages to allow apt to use repositories over HTTPS: + + ```shell + sudo apt install apt-transport-https ca-certificates curl software-properties-common + ``` + +3. Add the official Docker GPG key to your system by running the following command: + + ```shell + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + ``` + +4. Add the Docker repository to the APT sources list: + + ```shell + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + ``` + +5. Update the package index again: + + ```shell + sudo apt update + ``` + +6. Install Docker by running the following command: + + ```shell + sudo apt install docker-ce docker-ce-cli containerd.io + ``` + +7. After the installation is complete, verify that Docker is installed correctly by running the following command: + + ```shell + docker --version + ``` + + This command should display the version information of Docker without any errors. + +Once Docker is installed and verified, you should be able to start the Docker daemon and run the `pnpm run docker` command without encountering any issues. + +## Windows + +1. Download the Docker Desktop installer from the Docker website: [Docker Desktop for Windows](https://www.docker.com/products/docker-desktop) + +2. Run the installer and follow the instructions to install Docker Desktop. + +3. After installation, Docker Desktop should be running automatically. + +## macOS + +1. Download the Docker Desktop installer from the Docker website: [Docker Desktop for Mac](https://www.docker.com/products/docker-desktop) + +2. Run the installer and follow the instructions to install Docker Desktop. + +3. After installation, Docker Desktop should be running automatically. + +Please note that the instructions provided above are for the most common scenarios. For specific versions or different distributions, it's always a good idea to consult the official Docker documentation for the respective operating systems. diff --git a/LICENSE b/LICENSE index a1e6cd1e3f8..5e468e50785 100644 --- a/LICENSE +++ b/LICENSE @@ -1,25 +1,201 @@ -Copyright (c) 2022 API Hero Inc. - -Portions of this software are licensed as follows: - -- All content that resides under any "ee/" directory of this repository, if such directories exists, are licensed under the license defined in "ee/LICENSE". -- All third party components incorporated into the Trigger.dev Software are licensed under the original license provided by the owner of the applicable component. -- Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [2023] [Trigger.dev] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md index 3954fda9007..0d7f1ca2930 100644 --- a/README.md +++ b/README.md @@ -1,204 +1,130 @@
-![Hero](https://raw.githubusercontent.com/triggerdotdev/trigger.dev/eebe37109e33beae6390ee19029fce8a5934c84b/apps/webapp/public/images/logo-banner.png) +![Trigger.dev logo](https://content.trigger.dev/github-header-banner.jpg) -[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/triggerdotdev.svg?style=social&label=Follow%20%40trigger.dev)](https://twitter.com/triggerdotdev) [![YouTube Channel Subscribers](https://img.shields.io/youtube/channel/subscribers/UCu-PdxpWtIrrd7vW0N5T6ZA?style=social)](https://www.youtube.com/@triggerdotdev) -[![GitHub Repo stars](https://img.shields.io/github/stars/triggerdotdev/trigger.dev?style=social)](https://github.com/triggerdotdev/trigger.dev) - -[Website](https://trigger.dev) | [Community](https://discord.gg/JtBAxBr2m3) | [Docs](https://docs.trigger.dev) -
- - -# **✨ Trigger.dev** -### **The developer-first open source Zapier alternative.** +### Build and deploy fully‑managed AI agents and workflows +[Website](https://trigger.dev) | [Docs](https://trigger.dev/docs) | [Issues](https://github.com/triggerdotdev/trigger.dev/issues) | [Example projects](https://github.com/triggerdotdev/examples) | [Feature requests](https://triggerdev.featurebase.app/) | [Public roadmap](https://triggerdev.featurebase.app/roadmap) | [Self-hosting](https://trigger.dev/docs/self-hosting/overview) -Trigger.dev is an open source platform that makes it easy for developers to create event-driven background tasks directly in their code. Build, test and run workflows locally using our SDK. Subscribe to webhooks, schedule jobs, run background jobs and add long delays easily and reliably. In our web app you get full visibility of every run your workflow has ever made making it easier to monitor and debug. +[![Open Source](https://img.shields.io/badge/Open%20Source-%E2%9D%A4-red.svg)](https://github.com/triggerdotdev/trigger.dev) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/triggerdotdev/trigger.dev/blob/main/LICENSE) +[![npm](https://img.shields.io/npm/v/@trigger.dev/sdk.svg?label=npm)](https://www.npmjs.com/package/@trigger.dev/sdk) +[![SDK downloads](https://img.shields.io/npm/dm/@trigger.dev/sdk.svg?label=SDK%20downloads)](https://www.npmjs.com/package/@trigger.dev/sdk) -  +[![Twitter Follow](https://img.shields.io/twitter/follow/triggerdotdev?style=social)](https://twitter.com/triggerdotdev) +[![Discord](https://img.shields.io/discord/1066956501299777596?logo=discord&logoColor=white&color=7289da)](https://discord.gg/nkqV9xBYWy) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/triggerdotdev/trigger.dev) +[![GitHub stars](https://img.shields.io/github/stars/triggerdotdev/trigger.dev?style=social)](https://github.com/triggerdotdev/trigger.dev) -# **⭐️ Features:** + -- 👂 Easily subscribe to [webhooks](https://docs.trigger.dev/triggers/webhooks) - — they work locally without tunnelling. -- 🔥 Fire your own [custom events](https://docs.trigger.dev/triggers/custom-events)—a single event can trigger multiple workflows. -- 📆 [Schedule workflows](https://docs.trigger.dev/triggers/scheduled)—easily repeat tasks or use CRON syntax for advanced cases. -- 🚦 Add [long delays](https://docs.trigger.dev/functions/delays) inside workflows (up to a year) and they will pick up where they left off. -- 🤝 When your server goes down [it’s not a problem](https://docs.trigger.dev/guides/resumability), workflows will reconnect and continue. -- 🪧 [View every step of every run](https://docs.trigger.dev/viewing-runs), with data, previews and errors. -- 👋 Connect to and authenticate with APIs using our custom integrations. -- 🚗 If you have a custom use case, we support [Fetch for calling any HTTP endpoint](https://docs.trigger.dev/functions/fetch) or [webhooks](https://docs.trigger.dev/triggers/webhooks) for subscribing to events from APIs. -- 📡 All API calls are automatically retried with exponential back off. -- 😀 TypeScript SDK, so whether you’re using JavaScript or TypeScript you will have a great experience. +## About Trigger.dev -  +Trigger.dev is the open-source platform for building AI workflows in TypeScript. Long-running tasks with retries, queues, observability, and elastic scaling. -# **🌱 Documentation:** +## The platform designed for building AI agents -- [Getting Started with Trigger.dev](https://docs.trigger.dev/getting-started) -- Example workflows - - [Welcome email drip campaign using Resend and Slack](https://docs.trigger.dev/examples/resend) - - [Post to Slack when a GitHub issue is created or modified](https://docs.trigger.dev/examples/slack) - - [Create a new product on Shopify](https://docs.trigger.dev/examples/shopify) - - [When a GitHub repo is starred, post information about the user to Slack](https://docs.trigger.dev/examples/github) -- Triggers: - - [Webhooks](https://docs.trigger.dev/triggers/webhooks) - - [Custom events](https://docs.trigger.dev/triggers/custom-events) - - [Scheduled](https://docs.trigger.dev/triggers/scheduled) -- Functions: - - [Fetch](https://docs.trigger.dev/functions/fetch) - - [Logging](https://docs.trigger.dev/functions/logging) - - [Delays](https://docs.trigger.dev/functions/delays) - - [Send event](https://docs.trigger.dev/functions/send-event) - - [Loops, conditionals, etc](https://docs.trigger.dev/functions/loops-conditionals-etc) +Build [AI agents](https://trigger.dev/product/ai-agents) using all the frameworks, services and LLMs you're used to, deploy them to Trigger.dev and get durable, long-running tasks with retries, queues, observability, and elastic scaling out of the box. -  +- **Long-running without timeouts**: Execute your tasks with absolutely no timeouts, unlike AWS Lambda, Vercel, and other serverless platforms. -# 🔬 **Anatomy of a workflow** +- **Durability, retries & queues**: Build rock solid agents and AI applications using our durable tasks, retries, queues and idempotency. -* You create workflows in code on your server using our SDK -* Each API integration is a separate package, e.g. `@trigger.dev/slack` -* Each workflow has an event that triggers it, e.g. `github.events.newStarEvent`, `scheduleEvent`, `customEvent` -* Each workflow has a `run` function that is called when the event is triggered -* If we don't have an integration for the API you want to use, you can use `fetch` to call any HTTP endpoint and `webhookEvent` to subscribe to webhooks +- **True runtime freedom**: Customize your deployed tasks with system packages – run browsers, Python scripts, FFmpeg and more. -## **Example workflows** +- **Human-in-the-loop**: Programmatically pause your tasks until a human can approve, reject or give feedback. -
Post to Slack when a GitHub issue is created or modified - +- **Realtime apps & streaming**: Move your background jobs to the foreground by subscribing to runs or streaming AI responses to your app. -_Integrations required: Slack, GitHub_ +- **Observability & monitoring**: Each run has full tracing and logs. Configure error alerts to catch bugs fast. -```ts -import { Trigger } from "@trigger.dev/sdk"; -import * as github from "@trigger.dev/github"; -import * as slack from "@trigger.dev/slack"; - -new Trigger({ - id: "new-github-star-to-slack", - name: "New GitHub Star: triggerdotdev/trigger.dev", - apiKey: "", - on: github.events.newStarEvent({ - repo: "triggerdotdev/trigger.dev", - }), - run: async (event) => { - await slack.postMessage("github-stars", { - channelName: "github-stars", - text: `New GitHub star from \n<${event.sender.html_url}|${event.sender.login}>`, - }); - }, -}).listen(); -``` +## Key features: -
+- **[JavaScript and TypeScript SDK](https://trigger.dev/docs/tasks/overview)** - Build background tasks using familiar programming models +- **[Long-running tasks](https://trigger.dev/docs/runs/max-duration)** - Handle resource-heavy tasks without timeouts +- **[Durable cron schedules](https://trigger.dev/docs/tasks/scheduled#scheduled-tasks-cron)** - Create and attach recurring schedules of up to a year +- **[Trigger.dev Realtime](https://trigger.dev/docs/realtime/overview)** - Trigger, subscribe to, and get real-time updates for runs, with LLM streaming support +- **[Build extensions](https://trigger.dev/docs/config/extensions/overview#build-extensions)** - Hook directly into the build system and customize the build process. Run Python scripts, FFmpeg, browsers, and more. +- **[React hooks](https://trigger.dev/docs/frontend/react-hooks#react-hooks)** - Interact with the Trigger.dev API on your frontend using our React hooks package +- **[Batch triggering](https://trigger.dev/docs/triggering#tasks-batchtrigger)** - Use batchTrigger() to initiate multiple runs of a task with custom payloads and options +- **[Structured inputs / outputs](https://trigger.dev/docs/tasks/schemaTask#schematask)** - Define precise data schemas for your tasks with runtime payload validation +- **[Waits](https://trigger.dev/docs/wait)** - Add waits to your tasks to pause execution for a specified duration +- **[Preview branches](https://trigger.dev/docs/deployment/preview-branches)** - Create isolated environments for testing and development. Integrates with Vercel and git workflows +- **[Waitpoints](https://trigger.dev/docs/wait-for-token#wait-for-token)** - Add human-in-the-loop judgment at critical decision points without disrupting workflow +- **[Concurrency & queues](https://trigger.dev/docs/queue-concurrency#concurrency-and-queues)** - Set concurrency rules to manage how multiple tasks execute +- **[Multiple environments](https://trigger.dev/docs/how-it-works#dev-mode)** - Support for DEV, PREVIEW, STAGING, and PROD environments +- **[No infrastructure to manage](https://trigger.dev/docs/how-it-works#trigger-dev-architecture)** - Auto-scaling infrastructure that eliminates timeouts and server management +- **[Automatic retries](https://trigger.dev/docs/errors-retrying)** - If your task encounters an uncaught error, we automatically attempt to run it again +- **[Checkpointing](https://trigger.dev/docs/how-it-works#the-checkpoint-resume-system)** - Tasks are inherently durable, thanks to our checkpointing feature +- **[Versioning](https://trigger.dev/docs/versioning)** - Atomic versioning allows you to deploy new versions without affecting running tasks +- **[Machines](https://trigger.dev/docs/machines)** - Configure the number of vCPUs and GBs of RAM you want the task to use +- **[Observability & monitoring](https://trigger.dev/product/observability-and-monitoring)** - Monitor every aspect of your tasks' performance with comprehensive logging and visualization tools +- **[Logging & tracing](https://trigger.dev/docs/logging)** - Comprehensive logging and tracing for all your tasks +- **[Tags](https://trigger.dev/docs/tags#tags)** - Attach up to ten tags to each run, allowing you to filter via the dashboard, realtime, and the SDK +- **[Run metadata](https://trigger.dev/docs/runs/metadata#run-metadata)** - Attach metadata to runs which updates as the run progresses and is available to use in your frontend for live updates +- **[Bulk actions](https://trigger.dev/docs/bulk-actions)** - Perform actions on multiple runs simultaneously, including replaying and cancelling +- **[Real-time alerts](https://trigger.dev/docs/troubleshooting-alerts#alerts)** - Choose your preferred notification method for run failures and deployments -
Welcome email drip campaign - +## Write tasks in your codebase -_Integrations required: Slack, Resend_ +Create tasks where they belong: in your codebase. Version control, localhost, test and review like you're already used to. ```ts -import { customEvent, Trigger, sendEvent } from "@trigger.dev/sdk"; -import * as resend from "@trigger.dev/resend"; -import * as slack from "@trigger.dev/slack"; -import React from "react"; -import { z } from "zod"; -import { getUser } from "../db"; -import { InactiveEmail, TipsEmail, WelcomeEmail } from "./email-templates"; - -new Trigger({ - id: "welcome-email-campaign", - name: "Welcome email drip campaign", - apiKey: "", - on: customEvent({ - name: "user.created", - schema: z.object({ - userId: z.string(), - }), - }), - async run(event, context) { - //get the user data from the database - const user = await getUser(event.userId); - - await slack.postMessage("send-to-slack", { - channelName: "new-users", - text: `New user signed up: ${user.name} (${user.email})`, - }); - - //Send the first email - const welcomeResponse = await resend.sendEmail("welcome-email", { - from: "Trigger.dev ", - replyTo: "James ", - to: user.email, - subject: "Welcome to Trigger.dev", - react: , - }); - await context.logger.debug( - `Sent welcome email to ${welcomeResponse.to} with id ${welcomeResponse.id}` - ); - - //wait 1 day, check if the user has created a workflow and send the appropriate email - await context.waitFor("wait-a-while", { days: 1 }); - const updatedUser = await getUser(event.userId); - - if (updatedUser.hasOnboarded) { - await resend.sendEmail("onboarding-complete", { - from: "Trigger.dev ", - replyTo: "James ", - to: updatedUser.email, - subject: "Pro tips for workflows", - react: , - }); - } else { - await resend.sendEmail("onboarding-incomplete", { - from: "Trigger.dev ", - replyTo: "James ", - to: updatedUser.email, - subject: "Help with your first workflow", - react: , - }); - } +import { task } from "@trigger.dev/sdk"; + +//1. You need to export each task +export const helloWorld = task({ + //2. Use a unique id for each task + id: "hello-world", + //3. The run function is the main function of the task + run: async (payload: { message: string }) => { + //4. You can write code that runs for a long time here, there are no timeouts + console.log(payload.message); }, -}).listen(); +}); ``` -
+## Deployment -[More examples here](https://docs.trigger.dev/examples/examples) +Use our SDK to write tasks in your codebase. There's no infrastructure to manage, your tasks automatically scale and connect to our cloud. Or you can always self-host. -  +## Environments -# 👀 **Viewing runs:** +We support `Development`, `Staging`, `Preview`, and `Production` environments, allowing you to test your tasks before deploying them to production. -One of the most powerful features of Trigger.dev is the [runs page](https://docs.trigger.dev/viewing-runs). All of the steps in a workflow, including the initial event, can be viewed in detail. See the status / output of each step, the logs, rich previews, errors and much more. +## Full visibility of every job run -![Viewing runs](https://github.com/triggerdotdev/trigger.dev/raw/main/apps/docs/images/run-succeeded.png) +View every task in every run so you can tell exactly what happened. We provide a full trace view of every task run so you can see what happened at every step. +![Trace view image](https://content.trigger.dev/trace-view.png) -  +# Getting started -# **🏠 Running Trigger.dev locally:** +The quickest way to get started is to create an account and project in our [web app](https://cloud.trigger.dev), and follow the instructions in the onboarding. Build and deploy your first task in minutes. -To run Trigger.dev locally, [follow these steps](https://github.com/triggerdotdev/trigger.dev/blob/main/DEVELOPMENT.md). +### Useful links: -  +- [Quick start](https://trigger.dev/docs/quick-start) - get up and running in minutes +- [How it works](https://trigger.dev/docs/how-it-works) - understand how Trigger.dev works under the hood +- [Guides and examples](https://trigger.dev/docs/guides/introduction) - walk-through guides and code examples for popular frameworks and use cases -# **👏 Contributing:** +## Self-hosting -We are open source and love contributions! +If you prefer to self-host Trigger.dev, you can follow our [self-hosting guides](https://trigger.dev/docs/self-hosting/overview): -- Request a feature in our [Discord community](https://discord.gg/JtBAxBr2m3) -- Open a PR +- [Docker self-hosting guide](https://trigger.dev/docs/self-hosting/docker) - use Docker Compose to spin up a Trigger.dev instance +- [Kubernetes self-hosting guide](https://trigger.dev/docs/self-hosting/kubernetes) - use our official Helm chart to deploy Trigger.dev to your Kubernetes cluster -  +## Support and community -# **🧘‍♂️ Self-hosting guide:** +We have a large active community in our official [Discord server](https://trigger.dev/discord) for support, including a dedicated channel for self-hosting. -_coming soon..._ +## Development -  +To setup and develop locally or contribute to the open source project, follow our [development guide](./CONTRIBUTING.md). -# **📧 Support & contact:** +## Meet the Amazing People Behind This Project: -- Join our [Discord community](https://discord.gg/JtBAxBr2m3) -- If you have any other questions, get in touch at [hello@trigger.dev](mailto:hello@trigger.dev) + + + diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000000..8ba3ecb5007 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,36 @@ +## Guide on releasing a new version + +### Automated release (v4+) + +Releases are fully automated via CI: + +1. PRs merge to `main` with changesets (for package changes) and/or `.server-changes/` files (for server-only changes). +2. The [changesets-pr.yml](./.github/workflows/changesets-pr.yml) workflow automatically creates/updates the `changeset-release/main` PR with version bumps and an enhanced summary of all changes. Consumed `.server-changes/` files are removed on the release branch (same approach changesets uses for `.changeset/` files — they're deleted on the branch, so merging the PR cleans them up). +3. When ready to release, merge the changeset release PR into `main`. +4. The [release.yml](./.github/workflows/release.yml) workflow automatically: + - Publishes all packages to npm + - Creates a single unified GitHub release (e.g., "trigger.dev v4.3.4") + - Tags and triggers Docker image builds + - After Docker images are pushed, updates the GitHub release with the exact GHCR tag link + +### What engineers need to do + +- **Package changes**: Add a changeset with `pnpm run changeset:add` +- **Server-only changes**: Add a `.server-changes/` file (see `.server-changes/README.md`) +- **Mixed PRs**: Just the changeset is enough + +See `CHANGESETS.md` for full details on changesets and server changes. + +### Legacy release (v3) + +1. Merge in the changeset PR into main, making sure to cancel both the release and publish github actions from that merge. +2. Pull the changes locally into main +3. Run `pnpm i` which will update the pnpm lock file with the new versions +4. create a commit with "Release 3.x.x" and push. This will build and release the packages +5. Create a git tag on that release commit with v.docker.3.x.x and push the tag to origin. This will publish the `v3.x.x` docker image to GitHub Container Registry. +6. Once the image is built and pushed, create a new GitHub release and select the tag you just created, along with the previous tag that was released. +7. This will generate some release notes. Edit out the package changes and leave only the server changes. +8. Name the release `@trigger.dev/docker@3.x.x` +9. Include the package link (e.g. https://github.com/triggerdotdev/trigger.dev/pkgs/container/trigger.dev/278459584?tag=v3.x.x) +10. Once the packages have been published, head over to the [v2-legacy repo](https://github.com/triggerdotdev/v2-legacy.trigger.dev) and follow the instructions in the README for creating a matching release. +11. Before deploying to cloud, compare the differences in the previously created release and double check to see if there are any migrations with indexes created concurrently, and make sure to run those before deploying. diff --git a/ai/references/migrations.md b/ai/references/migrations.md new file mode 100644 index 00000000000..c6fbf79e9d7 --- /dev/null +++ b/ai/references/migrations.md @@ -0,0 +1,121 @@ +## Creating and applying migrations + +We use prisma migrations to manage the database schema. Please follow the following steps when editing the `internal-packages/database/prisma/schema.prisma` file: + +Edit the `schema.prisma` file to add or modify the schema. + +Create a new migration file but don't apply it yet: + +```bash +cd internal-packages/database +pnpm run db:migrate:dev:create --name "add_new_column_to_table" +``` + +The migration file will be created in the `prisma/migrations` directory, but it will have a bunch of edits to the schema that are not needed and will need to be removed before we can apply the migration. Here's an example of what the migration file might look like: + +```sql +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToBackgroundWorkerFile" ADD CONSTRAINT "_BackgroundWorkerToBackgroundWorkerFile_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToBackgroundWorkerFile_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToTaskQueue" ADD CONSTRAINT "_BackgroundWorkerToTaskQueue_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToTaskQueue_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_TaskRunToTaskRunTag" ADD CONSTRAINT "_TaskRunToTaskRunTag_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_TaskRunToTaskRunTag_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_WaitpointRunConnections_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_completedWaitpoints" ADD CONSTRAINT "_completedWaitpoints_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_completedWaitpoints_AB_unique"; + +-- CreateIndex +CREATE INDEX "SecretStore_key_idx" ON "public"."SecretStore"("key" text_pattern_ops); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_id_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "id" DESC); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_createdAt_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "createdAt" DESC); +``` + +All the following lines should be removed: + +```sql +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToBackgroundWorkerFile" ADD CONSTRAINT "_BackgroundWorkerToBackgroundWorkerFile_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToBackgroundWorkerFile_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToTaskQueue" ADD CONSTRAINT "_BackgroundWorkerToTaskQueue_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToTaskQueue_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_TaskRunToTaskRunTag" ADD CONSTRAINT "_TaskRunToTaskRunTag_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_TaskRunToTaskRunTag_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_WaitpointRunConnections_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_completedWaitpoints" ADD CONSTRAINT "_completedWaitpoints_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_completedWaitpoints_AB_unique"; + +-- CreateIndex +CREATE INDEX "SecretStore_key_idx" ON "public"."SecretStore"("key" text_pattern_ops); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_id_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "id" DESC); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_createdAt_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "createdAt" DESC); +``` + +Leaving only this: + +```sql +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; +``` + +After editing the migration file, apply the migration: + +```bash +cd internal-packages/database +pnpm run db:migrate:deploy && pnpm run generate +``` diff --git a/ai/references/repo.md b/ai/references/repo.md new file mode 100644 index 00000000000..6e0ff056716 --- /dev/null +++ b/ai/references/repo.md @@ -0,0 +1,37 @@ +## Repo Overview + +This is a pnpm 10.33.2 monorepo that uses turborepo @turbo.json. The following workspaces are relevant + +## Apps + +- /apps/webapp is a remix app that is the main API and dashboard for trigger.dev +- /apps/supervisor is a node.js app that handles the execution of built tasks, interaction with the webapp through internal "engine" APIs, as well as interfacing with things like docker or kubernetes, to execute the code. + +## Public Packages + +- /packages/trigger-sdk is the `@trigger.dev/sdk` main SDK package. +- /packages/cli-v3 is the `trigger.dev` CLI package. See our [CLI dev command](https://trigger.dev/docs/cli-dev.md) and [Deployment](https://trigger.dev/docs/deployment/overview.md) docs for more information. +- /packages/core is the `@trigger.dev/core` package that is shared across the SDK and other packages +- /packages/build defines the types and prebuilt build extensions for trigger.dev. See our [build extensions docs](https://trigger.dev/docs/config/extensions/overview.md) for more information. +- /packages/react-hooks defines some useful react hooks like our realtime hooks. See our [Realtime hooks](https://trigger.dev/docs/frontend/react-hooks/realtime.md) and our [Trigger hooks](https://trigger.dev/docs/frontend/react-hooks/triggering.md) for more information. +- /packages/redis-worker is the `@trigger.dev/redis-worker` package that implements a custom background job/worker sytem powered by redis for offloading work to the background, used in the webapp and also in the Run Engine 2.0. + +## Internal Packages + +- /internal-packages/\* are packages that are used internally only, not published, and usually they have a tsc build step and are used in the webapp +- /internal-packages/database is the `@trigger.dev/database` package that exports a prisma client, has the schema file, and exports a few other helpers. +- /internal-packages/run-engine is the `@internal/run-engine` package that is "Run Engine 2.0" and handles moving a run all the way through it's lifecycle +- /internal-packages/redis is the `@internal/redis` package that exports Redis types and the `createRedisClient` function to unify how we create redis clients in the repo. It's not used everywhere yet, but it's the preferred way to create redis clients from now on. +- /internal-packages/testcontainers is the `@internal/testcontainers` package that exports a few useful functions for spinning up local testcontainers when writing vitest tests. See our [tests.md](./tests.md) file for more information. +- /internal-packages/zodworker is the `@internal/zodworker` package that implements a wrapper around graphile-worker that allows us to use zod to validate our background jobs. We are moving away from using graphile-worker as our background job system, replacing it with our own redis-worker package. + +## References + +- /references/\* are test workspaces that we use to write and test the system. Not quite e2e tests or automated, but just a useful place to help develop new features + +## Other + +- /docs is our trigger.dev/docs mintlify documentation site +- /docker/Dockerfile is the one that creates the main trigger.dev published image +- /docker/docker-compose.yml is the file we run locally to start postgresql, redis, and electric when we are doing local development. You can run it with `pnpm run docker` +- /CONTRIBUTING.md defines the steps it takes for OSS contributors to start contributing. diff --git a/ai/references/tests.md b/ai/references/tests.md new file mode 100644 index 00000000000..2bb236c75bc --- /dev/null +++ b/ai/references/tests.md @@ -0,0 +1,86 @@ +## Running Tests + +We use vitest exclusively for testing. To execute tests for a particular workspace, run the following command: + +```bash +pnpm run test --filter webapp +``` + +Prefer running tests on a single file (and first cding into the directory): + +```bash +cd apps/webapp +pnpm run test ./src/components/Button.test.ts +``` + +If you are cd'ing into a directory, you may have to build dependencies first: + +```bash +pnpm run build --filter webapp +cd apps/webapp +pnpm run test ./src/components/Button.test.ts +``` + +## Writing Tests + +We use vitest for testing. We almost NEVER mock anything. Start with a top-level "describe", and have multiple "it" statements inside of it. + +New test files should be placed right next to the file being tested. For example: + +- Source file: `./src/services/MyService.ts` +- Test file: `./src/services/MyService.test.ts` + +When writing anything that needs redis or postgresql, we have some internal "testcontainers" that are used to spin up a local instance, redis, or both. + +redisTest: + +```typescript +import { redisTest } from "@internal/testcontainers"; +import { createRedisClient } from "@internal/redis"; + +describe("redisTest", () => { + redisTest("should use redis", async ({ redisOptions }) => { + const redis = createRedisClient(redisOptions); + + await redis.set("test", "test"); + const result = await redis.get("test"); + expect(result).toEqual("test"); + }); +}); +``` + +postgresTest: + +```typescript +import { postgresTest } from "@internal/testcontainers"; + +describe("postgresTest", () => { + postgresTest("should use postgres", async ({ prisma }) => { + // prisma is an instance of PrismaClient + }); +}); +``` + +containerTest: + +```typescript +import { containerTest } from "@internal/testcontainers"; + +describe("containerTest", () => { + containerTest("should use container", async ({ prisma, redisOptions }) => { + // container has both prisma and redis + }); +}); +``` + +## Dos and Dont's + +- Do not mock anything. +- Do not use mocks in tests. +- Do not use spies in tests. +- Do not use stubs in tests. +- Do not use fakes in tests. +- Do not use sinon in tests. +- Structure each test with a setup, action, and assertion style. +- Feel free to write long test names. +- If there is any randomness in the code under test, use `seedrandom` to make it deterministic by allowing the caller to provide a seed. diff --git a/ailogger-output.log b/ailogger-output.log new file mode 100644 index 00000000000..e69de29bb2d diff --git a/apps/coordinator/.env.example b/apps/coordinator/.env.example new file mode 100644 index 00000000000..77377ab3cfd --- /dev/null +++ b/apps/coordinator/.env.example @@ -0,0 +1,4 @@ +HTTP_SERVER_PORT=8020 +PLATFORM_ENABLED=true +PLATFORM_WS_PORT=3030 +SECURE_CONNECTION=false \ No newline at end of file diff --git a/apps/coordinator/.gitignore b/apps/coordinator/.gitignore new file mode 100644 index 00000000000..5c84119d635 --- /dev/null +++ b/apps/coordinator/.gitignore @@ -0,0 +1,3 @@ +dist/ +node_modules/ +.env \ No newline at end of file diff --git a/apps/coordinator/Containerfile b/apps/coordinator/Containerfile new file mode 100644 index 00000000000..9e973675ab9 --- /dev/null +++ b/apps/coordinator/Containerfile @@ -0,0 +1,60 @@ +# syntax=docker/dockerfile:labs + +FROM node:20-bookworm-slim@sha256:72f2f046a5f8468db28730b990b37de63ce93fd1a72a40f531d6aa82afdf0d46 AS node-20 + +WORKDIR /app + +FROM node-20 AS pruner + +COPY --chown=node:node . . +RUN npx -q turbo@1.10.9 prune --scope=coordinator --docker +RUN find . -name "node_modules" -type d -prune -exec rm -rf '{}' + + +FROM node-20 AS base + +RUN apt-get update \ + && apt-get install -y buildah ca-certificates dumb-init docker.io busybox \ + && rm -rf /var/lib/apt/lists/* + +COPY --chown=node:node .gitignore .gitignore +COPY --from=pruner --chown=node:node /app/out/json/ . +COPY --from=pruner --chown=node:node /app/out/pnpm-lock.yaml ./pnpm-lock.yaml +COPY --from=pruner --chown=node:node /app/out/pnpm-workspace.yaml ./pnpm-workspace.yaml + +FROM base AS dev-deps +RUN corepack enable +ENV NODE_ENV development + +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm fetch --no-frozen-lockfile +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm install --ignore-scripts --no-frozen-lockfile + +FROM base AS builder +RUN corepack enable + +COPY --from=pruner --chown=node:node /app/out/full/ . +COPY --from=dev-deps --chown=node:node /app/ . +COPY --chown=node:node turbo.json turbo.json + +RUN pnpm run -r --filter @trigger.dev/core bundle-vendor && pnpm run -r --filter coordinator build:bundle + +FROM alpine AS cri-tools + +WORKDIR /cri-tools + +ARG CRICTL_VERSION=v1.29.0 +ARG CRICTL_CHECKSUM=sha256:d16a1ffb3938f5a19d5c8f45d363bd091ef89c0bc4d44ad16b933eede32fdcbb +ADD --checksum=${CRICTL_CHECKSUM} \ + https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICTL_VERSION}/crictl-${CRICTL_VERSION}-linux-amd64.tar.gz . +RUN tar zxvf crictl-${CRICTL_VERSION}-linux-amd64.tar.gz + +FROM base AS runner + +RUN corepack enable +ENV NODE_ENV production + +COPY --from=cri-tools --chown=node:node /cri-tools/crictl /usr/local/bin +COPY --from=builder --chown=node:node /app/apps/coordinator/dist/index.mjs ./index.mjs + +EXPOSE 8000 + +CMD [ "/usr/bin/dumb-init", "--", "/usr/local/bin/node", "./index.mjs" ] diff --git a/apps/coordinator/README.md b/apps/coordinator/README.md new file mode 100644 index 00000000000..fa6da2462bd --- /dev/null +++ b/apps/coordinator/README.md @@ -0,0 +1,3 @@ +# Coordinator + +Sits between the platform and tasks. Facilitates communication and checkpointing, amongst other things. diff --git a/apps/coordinator/package.json b/apps/coordinator/package.json new file mode 100644 index 00000000000..3b4240bd37d --- /dev/null +++ b/apps/coordinator/package.json @@ -0,0 +1,30 @@ +{ + "name": "coordinator", + "private": true, + "version": "0.0.1", + "description": "", + "main": "dist/index.cjs", + "scripts": { + "build": "npm run build:bundle", + "build:bundle": "esbuild src/index.ts --bundle --outfile=dist/index.mjs --platform=node --format=esm --target=esnext --banner:js=\"import { createRequire } from 'module';const require = createRequire(import.meta.url);\"", + "build:image": "docker build -f Containerfile . -t coordinator", + "dev": "tsx --no-warnings=ExperimentalWarning --require dotenv/config --watch src/index.ts", + "start": "tsx src/index.ts", + "typecheck": "tsc --noEmit" + }, + "keywords": [], + "author": "", + "license": "MIT", + "dependencies": { + "@trigger.dev/core": "workspace:*", + "nanoid": "^5.0.6", + "prom-client": "^15.1.0", + "socket.io": "4.7.4", + "tinyexec": "^0.3.0" + }, + "devDependencies": { + "dotenv": "^16.4.2", + "esbuild": "^0.19.11", + "tsx": "^4.7.0" + } +} \ No newline at end of file diff --git a/apps/coordinator/src/chaosMonkey.ts b/apps/coordinator/src/chaosMonkey.ts new file mode 100644 index 00000000000..e2bc147674f --- /dev/null +++ b/apps/coordinator/src/chaosMonkey.ts @@ -0,0 +1,88 @@ +import { setTimeout as timeout } from "node:timers/promises"; + +class ChaosMonkeyError extends Error { + constructor(message: string) { + super(message); + this.name = "ChaosMonkeyError"; + } +} + +export class ChaosMonkey { + private chaosEventRate = 0.2; + private delayInSeconds = 45; + + constructor( + private enabled = false, + private disableErrors = false, + private disableDelays = false + ) { + if (this.enabled) { + console.log("🍌 Chaos monkey enabled"); + } + } + + static Error = ChaosMonkeyError; + + enable() { + this.enabled = true; + console.log("🍌 Chaos monkey enabled"); + } + + disable() { + this.enabled = false; + console.log("🍌 Chaos monkey disabled"); + } + + async call({ + throwErrors = !this.disableErrors, + addDelays = !this.disableDelays, + }: { + throwErrors?: boolean; + addDelays?: boolean; + } = {}) { + if (!this.enabled) { + return; + } + + const random = Math.random(); + + if (random > this.chaosEventRate) { + // Don't interfere with normal operation + return; + } + + const chaosEvents: Array<() => Promise> = []; + + if (addDelays) { + chaosEvents.push(async () => { + console.log("🍌 Chaos monkey: Add delay"); + + await timeout(this.delayInSeconds * 1000); + }); + } + + if (throwErrors) { + chaosEvents.push(async () => { + console.log("🍌 Chaos monkey: Throw error"); + + throw new ChaosMonkey.Error("🍌 Chaos monkey: Throw error"); + }); + } + + if (chaosEvents.length === 0) { + console.error("🍌 Chaos monkey: No events selected"); + return; + } + + const randomIndex = Math.floor(Math.random() * chaosEvents.length); + + const chaosEvent = chaosEvents[randomIndex]; + + if (!chaosEvent) { + console.error("🍌 Chaos monkey: No event found"); + return; + } + + await chaosEvent(); + } +} diff --git a/apps/coordinator/src/checkpointer.ts b/apps/coordinator/src/checkpointer.ts new file mode 100644 index 00000000000..b5d4b52a252 --- /dev/null +++ b/apps/coordinator/src/checkpointer.ts @@ -0,0 +1,708 @@ +import { ExponentialBackoff } from "@trigger.dev/core/v3/apps"; +import { testDockerCheckpoint } from "@trigger.dev/core/v3/serverOnly"; +import { nanoid } from "nanoid"; +import fs from "node:fs/promises"; +import { ChaosMonkey } from "./chaosMonkey"; +import { Buildah, Crictl, Exec } from "./exec"; +import { setTimeout } from "node:timers/promises"; +import { TempFileCleaner } from "./cleaner"; +import { numFromEnv, boolFromEnv } from "./util"; +import { SimpleStructuredLogger } from "@trigger.dev/core/v3/utils/structuredLogger"; + +type CheckpointerInitializeReturn = { + canCheckpoint: boolean; + willSimulate: boolean; +}; + +type CheckpointAndPushOptions = { + runId: string; + leaveRunning?: boolean; + projectRef: string; + deploymentVersion: string; + shouldHeartbeat?: boolean; + attemptNumber?: number; +}; + +type CheckpointAndPushResult = + | { success: true; checkpoint: CheckpointData } + | { + success: false; + reason?: "CANCELED" | "ERROR" | "SKIP_RETRYING"; + }; + +type CheckpointData = { + location: string; + docker: boolean; +}; + +type CheckpointerOptions = { + dockerMode: boolean; + forceSimulate: boolean; + heartbeat: (runId: string) => void; + registryHost?: string; + registryNamespace?: string; + registryTlsVerify?: boolean; + disableCheckpointSupport?: boolean; + checkpointPath?: string; + simulateCheckpointFailure?: boolean; + simulateCheckpointFailureSeconds?: number; + simulatePushFailure?: boolean; + simulatePushFailureSeconds?: number; + chaosMonkey?: ChaosMonkey; +}; + +async function getFileSize(filePath: string): Promise { + try { + const stats = await fs.stat(filePath); + return stats.size; + } catch (error) { + console.error("Error getting file size:", error); + return -1; + } +} + +async function getParsedFileSize(filePath: string) { + const sizeInBytes = await getFileSize(filePath); + + let message = `Size in bytes: ${sizeInBytes}`; + + if (sizeInBytes > 1024 * 1024) { + const sizeInMB = (sizeInBytes / 1024 / 1024).toFixed(2); + message = `Size in MB (rounded): ${sizeInMB}`; + } else if (sizeInBytes > 1024) { + const sizeInKB = (sizeInBytes / 1024).toFixed(2); + message = `Size in KB (rounded): ${sizeInKB}`; + } + + return { + path: filePath, + sizeInBytes, + message, + }; +} + +export class Checkpointer { + #initialized = false; + #canCheckpoint = false; + #dockerMode: boolean; + + #logger = new SimpleStructuredLogger("checkpointer"); + + #failedCheckpoints = new Map(); + + // Indexed by run ID + #runAbortControllers = new Map< + string, + { signal: AbortSignal; abort: AbortController["abort"] } + >(); + + private registryHost: string; + private registryNamespace: string; + private registryTlsVerify: boolean; + + private disableCheckpointSupport: boolean; + + private simulateCheckpointFailure: boolean; + private simulateCheckpointFailureSeconds: number; + private simulatePushFailure: boolean; + private simulatePushFailureSeconds: number; + + private chaosMonkey: ChaosMonkey; + private tmpCleaner?: TempFileCleaner; + + constructor(private opts: CheckpointerOptions) { + this.#dockerMode = opts.dockerMode; + + this.registryHost = opts.registryHost ?? "localhost:5000"; + this.registryNamespace = opts.registryNamespace ?? "trigger"; + this.registryTlsVerify = opts.registryTlsVerify ?? true; + + this.disableCheckpointSupport = opts.disableCheckpointSupport ?? false; + + this.simulateCheckpointFailure = opts.simulateCheckpointFailure ?? false; + this.simulateCheckpointFailureSeconds = opts.simulateCheckpointFailureSeconds ?? 300; + this.simulatePushFailure = opts.simulatePushFailure ?? false; + this.simulatePushFailureSeconds = opts.simulatePushFailureSeconds ?? 300; + + this.chaosMonkey = opts.chaosMonkey ?? new ChaosMonkey(!!process.env.CHAOS_MONKEY_ENABLED); + this.tmpCleaner = this.#createTmpCleaner(); + } + + async init(): Promise { + if (this.#initialized) { + return this.#getInitReturn(this.#canCheckpoint); + } + + this.#logger.log(`${this.#dockerMode ? "Docker" : "Kubernetes"} mode`); + + if (this.#dockerMode) { + const testCheckpoint = await testDockerCheckpoint(); + + if (testCheckpoint.ok) { + return this.#getInitReturn(true); + } + + this.#logger.error(testCheckpoint.message, { error: testCheckpoint.error }); + return this.#getInitReturn(false); + } + + const canLogin = await Buildah.canLogin(this.registryHost); + + if (!canLogin) { + this.#logger.error(`No checkpoint support: Not logged in to registry ${this.registryHost}`); + } + + return this.#getInitReturn(canLogin); + } + + #getInitReturn(canCheckpoint: boolean): CheckpointerInitializeReturn { + this.#canCheckpoint = canCheckpoint; + + if (canCheckpoint) { + if (!this.#initialized) { + this.#logger.log("Full checkpoint support!"); + } + } + + this.#initialized = true; + + const willSimulate = this.#dockerMode && (!this.#canCheckpoint || this.opts.forceSimulate); + + if (willSimulate) { + this.#logger.log("Simulation mode enabled. Containers will be paused, not checkpointed.", { + forceSimulate: this.opts.forceSimulate, + }); + } + + return { + canCheckpoint, + willSimulate, + }; + } + + #getImageRef(projectRef: string, deploymentVersion: string, shortCode: string) { + return `${this.registryHost}/${this.registryNamespace}/${projectRef}:${deploymentVersion}.prod-${shortCode}`; + } + + #getExportLocation(projectRef: string, deploymentVersion: string, shortCode: string) { + const basename = `${projectRef}-${deploymentVersion}-${shortCode}`; + + if (this.#dockerMode) { + return basename; + } else { + return Crictl.getExportLocation(basename); + } + } + + async checkpointAndPush( + opts: CheckpointAndPushOptions, + delayMs?: number + ): Promise { + const start = performance.now(); + this.#logger.log(`checkpointAndPush() start`, { start, opts }); + + const { runId } = opts; + + let interval: NodeJS.Timer | undefined; + if (opts.shouldHeartbeat) { + interval = setInterval(() => { + this.#logger.log("Sending heartbeat", { runId }); + this.opts.heartbeat(runId); + }, 20_000); + } + + const controller = new AbortController(); + const signal = controller.signal; + const abort = controller.abort.bind(controller); + + const onAbort = () => { + this.#logger.error("Checkpoint aborted", { runId, options: opts }); + }; + + signal.addEventListener("abort", onAbort, { once: true }); + + const removeCurrentAbortController = () => { + const controller = this.#runAbortControllers.get(runId); + + // Ensure only the current controller is removed + if (controller && controller.signal === signal) { + this.#runAbortControllers.delete(runId); + } + + // Remove the abort listener in case it hasn't fired + signal.removeEventListener("abort", onAbort); + }; + + if (!this.#dockerMode && !this.#canCheckpoint) { + this.#logger.error("No checkpoint support. Simulation requires docker."); + this.#failCheckpoint(runId, "NO_SUPPORT"); + return; + } + + if (this.#isRunCheckpointing(runId)) { + this.#logger.error("Checkpoint procedure already in progress", { options: opts }); + this.#failCheckpoint(runId, "IN_PROGRESS"); + return; + } + + // This is a new checkpoint, clear any last failure for this run + this.#clearFailedCheckpoint(runId); + + if (this.disableCheckpointSupport) { + this.#logger.error("Checkpoint support disabled", { options: opts }); + this.#failCheckpoint(runId, "DISABLED"); + return; + } + + this.#runAbortControllers.set(runId, { signal, abort }); + + try { + const result = await this.#checkpointAndPushWithBackoff(opts, { delayMs, signal }); + + const end = performance.now(); + this.#logger.log(`checkpointAndPush() end`, { + start, + end, + diff: end - start, + diffWithoutDelay: end - start - (delayMs ?? 0), + opts, + success: result.success, + delayMs, + }); + + if (!result.success) { + return; + } + + return result.checkpoint; + } finally { + if (opts.shouldHeartbeat) { + // @ts-ignore - Some kind of node incompatible type issue + clearInterval(interval); + } + removeCurrentAbortController(); + } + } + + #isRunCheckpointing(runId: string) { + return this.#runAbortControllers.has(runId); + } + + cancelAllCheckpointsForRun(runId: string): boolean { + this.#logger.log("cancelAllCheckpointsForRun: call", { runId }); + + // If the last checkpoint failed, pretend we canceled it + // This ensures tasks don't wait for external resume messages to continue + if (this.#hasFailedCheckpoint(runId)) { + this.#logger.log("cancelAllCheckpointsForRun: hasFailedCheckpoint", { runId }); + this.#clearFailedCheckpoint(runId); + return true; + } + + const controller = this.#runAbortControllers.get(runId); + + if (!controller) { + this.#logger.debug("cancelAllCheckpointsForRun: no abort controller", { runId }); + return false; + } + + const { abort, signal } = controller; + + if (signal.aborted) { + this.#logger.debug("cancelAllCheckpointsForRun: signal already aborted", { runId }); + return false; + } + + abort("cancelCheckpoint()"); + this.#runAbortControllers.delete(runId); + + return true; + } + + async #checkpointAndPushWithBackoff( + { + runId, + leaveRunning = true, // This mirrors kubernetes behaviour more accurately + projectRef, + deploymentVersion, + attemptNumber, + }: CheckpointAndPushOptions, + { delayMs, signal }: { delayMs?: number; signal: AbortSignal } + ): Promise { + if (delayMs && delayMs > 0) { + this.#logger.log("Delaying checkpoint", { runId, delayMs }); + + try { + await setTimeout(delayMs, undefined, { signal }); + } catch (error) { + this.#logger.log("Checkpoint canceled during initial delay", { runId }); + return { success: false, reason: "CANCELED" }; + } + } + + this.#logger.log("Checkpointing with backoff", { + runId, + leaveRunning, + projectRef, + deploymentVersion, + }); + + const backoff = new ExponentialBackoff() + .type("EqualJitter") + .base(3) + .max(3 * 3600) + .maxElapsed(48 * 3600); + + for await (const { delay, retry } of backoff) { + try { + if (retry > 0) { + this.#logger.error("Retrying checkpoint", { + runId, + retry, + delay, + }); + + try { + await setTimeout(delay.milliseconds, undefined, { signal }); + } catch (error) { + this.#logger.log("Checkpoint canceled during retry delay", { runId }); + return { success: false, reason: "CANCELED" }; + } + } + + const result = await this.#checkpointAndPush( + { + runId, + leaveRunning, + projectRef, + deploymentVersion, + attemptNumber, + }, + { signal } + ); + + if (result.success) { + return result; + } + + if (result.reason === "CANCELED") { + this.#logger.log("Checkpoint canceled, won't retry", { runId }); + // Don't fail the checkpoint, as it was canceled + return result; + } + + if (result.reason === "SKIP_RETRYING") { + this.#logger.log("Skipping retrying", { runId }); + return result; + } + + continue; + } catch (error) { + this.#logger.error("Checkpoint error", { + retry, + runId, + delay, + error: error instanceof Error ? error.message : error, + }); + } + } + + this.#logger.error(`Checkpoint failed after exponential backoff`, { + runId, + leaveRunning, + projectRef, + deploymentVersion, + }); + this.#failCheckpoint(runId, "ERROR"); + + return { success: false, reason: "ERROR" }; + } + + async #checkpointAndPush( + { + runId, + leaveRunning = true, // This mirrors kubernetes behaviour more accurately + projectRef, + deploymentVersion, + attemptNumber, + }: CheckpointAndPushOptions, + { signal }: { signal: AbortSignal } + ): Promise { + await this.init(); + + const options = { + runId, + leaveRunning, + projectRef, + deploymentVersion, + attemptNumber, + }; + + const shortCode = nanoid(8); + const imageRef = this.#getImageRef(projectRef, deploymentVersion, shortCode); + const exportLocation = this.#getExportLocation(projectRef, deploymentVersion, shortCode); + + const buildah = new Buildah({ id: `${runId}-${shortCode}`, abortSignal: signal }); + const crictl = new Crictl({ id: `${runId}-${shortCode}`, abortSignal: signal }); + + const cleanup = async () => { + const metadata = { + runId, + exportLocation, + imageRef, + }; + + if (this.#dockerMode) { + this.#logger.debug("Skipping cleanup in docker mode", metadata); + return; + } + + this.#logger.log("Cleaning up", metadata); + + try { + await buildah.cleanup(); + await crictl.cleanup(); + } catch (error) { + this.#logger.error("Error during cleanup", { ...metadata, error }); + } + }; + + try { + await this.chaosMonkey.call(); + + this.#logger.log("checkpointAndPush: checkpointing", { options }); + + const containterName = this.#getRunContainerName(runId); + + // Create checkpoint (docker) + if (this.#dockerMode) { + await this.#createDockerCheckpoint( + signal, + runId, + exportLocation, + leaveRunning, + attemptNumber + ); + + this.#logger.log("checkpointAndPush: checkpoint created", { + runId, + location: exportLocation, + }); + + return { + success: true, + checkpoint: { + location: exportLocation, + docker: true, + }, + }; + } + + // Create checkpoint (CRI) + if (!this.#canCheckpoint) { + this.#logger.error("No checkpoint support in kubernetes mode."); + return { success: false, reason: "SKIP_RETRYING" }; + } + + const containerId = await crictl.ps(containterName, true); + + if (!containerId.stdout) { + this.#logger.error("could not find container id", { options, containterName }); + return { success: false, reason: "SKIP_RETRYING" }; + } + + const start = performance.now(); + + if (this.simulateCheckpointFailure) { + if (performance.now() < this.simulateCheckpointFailureSeconds * 1000) { + this.#logger.error("Simulating checkpoint failure", { options }); + throw new Error("SIMULATE_CHECKPOINT_FAILURE"); + } + } + + // Create checkpoint + await crictl.checkpoint(containerId.stdout, exportLocation); + const postCheckpoint = performance.now(); + + // Print checkpoint size + const size = await getParsedFileSize(exportLocation); + this.#logger.log("checkpoint archive created", { size, options }); + + // Create image from checkpoint + const workingContainer = await buildah.from("scratch"); + const postFrom = performance.now(); + + await buildah.add(workingContainer.stdout, exportLocation, "/"); + const postAdd = performance.now(); + + await buildah.config(workingContainer.stdout, [ + `io.kubernetes.cri-o.annotations.checkpoint.name=${shortCode}`, + ]); + const postConfig = performance.now(); + + await buildah.commit(workingContainer.stdout, imageRef); + const postCommit = performance.now(); + + if (this.simulatePushFailure) { + if (performance.now() < this.simulatePushFailureSeconds * 1000) { + this.#logger.error("Simulating push failure", { options }); + throw new Error("SIMULATE_PUSH_FAILURE"); + } + } + + // Push checkpoint image + await buildah.push(imageRef, this.registryTlsVerify); + const postPush = performance.now(); + + const perf = { + "crictl checkpoint": postCheckpoint - start, + "buildah from": postFrom - postCheckpoint, + "buildah add": postAdd - postFrom, + "buildah config": postConfig - postAdd, + "buildah commit": postCommit - postConfig, + "buildah push": postPush - postCommit, + }; + + this.#logger.log("Checkpointed and pushed image to:", { location: imageRef, perf }); + + return { + success: true, + checkpoint: { + location: imageRef, + docker: false, + }, + }; + } catch (error) { + if (error instanceof Exec.Result) { + if (error.aborted) { + this.#logger.error("Checkpoint canceled: Exec", { options }); + + return { success: false, reason: "CANCELED" }; + } else { + this.#logger.error("Checkpoint command error", { options, error }); + + return { success: false, reason: "ERROR" }; + } + } + + this.#logger.error("Unhandled checkpoint error", { + options, + error: error instanceof Error ? error.message : error, + }); + + return { success: false, reason: "ERROR" }; + } finally { + await cleanup(); + + if (signal.aborted) { + this.#logger.error("Checkpoint canceled: Cleanup", { options }); + + // Overrides any prior return value + return { success: false, reason: "CANCELED" }; + } + } + } + + async unpause(runId: string, attemptNumber?: number): Promise { + try { + const containterNameWithAttempt = this.#getRunContainerName(runId, attemptNumber); + const exec = new Exec({ logger: this.#logger }); + await exec.x("docker", ["unpause", containterNameWithAttempt]); + } catch (error) { + this.#logger.error("[Docker] Error during unpause", { runId, attemptNumber, error }); + } + } + + async #createDockerCheckpoint( + abortSignal: AbortSignal, + runId: string, + exportLocation: string, + leaveRunning: boolean, + attemptNumber?: number + ) { + const containterNameWithAttempt = this.#getRunContainerName(runId, attemptNumber); + const exec = new Exec({ logger: this.#logger, abortSignal }); + + try { + if (this.opts.forceSimulate || !this.#canCheckpoint) { + this.#logger.log("Simulating checkpoint"); + + await exec.x("docker", ["pause", containterNameWithAttempt]); + + return; + } + + if (this.simulateCheckpointFailure) { + if (performance.now() < this.simulateCheckpointFailureSeconds * 1000) { + this.#logger.error("Simulating checkpoint failure", { + runId, + exportLocation, + leaveRunning, + attemptNumber, + }); + + throw new Error("SIMULATE_CHECKPOINT_FAILURE"); + } + } + + const args = ["checkpoint", "create"]; + + if (leaveRunning) { + args.push("--leave-running"); + } + + args.push(containterNameWithAttempt, exportLocation); + + await exec.x("docker", args); + } catch (error) { + this.#logger.error("Failed while creating docker checkpoint", { exportLocation }); + throw error; + } + } + + #failCheckpoint(runId: string, error: unknown) { + this.#failedCheckpoints.set(runId, error); + } + + #clearFailedCheckpoint(runId: string) { + this.#failedCheckpoints.delete(runId); + } + + #hasFailedCheckpoint(runId: string) { + return this.#failedCheckpoints.has(runId); + } + + #getRunContainerName(suffix: string, attemptNumber?: number) { + return `task-run-${suffix}${attemptNumber && attemptNumber > 1 ? `-att${attemptNumber}` : ""}`; + } + + #createTmpCleaner() { + if (!boolFromEnv("TMP_CLEANER_ENABLED", false)) { + return; + } + + const defaultPaths = [Buildah.tmpDir, Crictl.checkpointDir].filter(Boolean); + const pathsOverride = process.env.TMP_CLEANER_PATHS_OVERRIDE?.split(",").filter(Boolean) ?? []; + const paths = pathsOverride.length ? pathsOverride : defaultPaths; + + if (paths.length === 0) { + this.#logger.error("TempFileCleaner enabled but no paths to clean", { + defaultPaths, + pathsOverride, + TMP_CLEANER_PATHS_OVERRIDE: process.env.TMP_CLEANER_PATHS_OVERRIDE, + }); + + return; + } + const cleaner = new TempFileCleaner({ + paths, + maxAgeMinutes: numFromEnv("TMP_CLEANER_MAX_AGE_MINUTES", 60), + intervalSeconds: numFromEnv("TMP_CLEANER_INTERVAL_SECONDS", 300), + leadingEdge: boolFromEnv("TMP_CLEANER_LEADING_EDGE", false), + }); + + cleaner.start(); + + return cleaner; + } +} diff --git a/apps/coordinator/src/cleaner.ts b/apps/coordinator/src/cleaner.ts new file mode 100644 index 00000000000..58cfd24bb70 --- /dev/null +++ b/apps/coordinator/src/cleaner.ts @@ -0,0 +1,106 @@ +import { SimpleStructuredLogger } from "@trigger.dev/core/v3/utils/structuredLogger"; +import { Exec } from "./exec"; +import { setTimeout } from "timers/promises"; + +interface TempFileCleanerOptions { + paths: string[]; + maxAgeMinutes: number; + intervalSeconds: number; + leadingEdge?: boolean; +} + +export class TempFileCleaner { + private enabled = false; + + private logger: SimpleStructuredLogger; + private exec: Exec; + + constructor(private opts: TempFileCleanerOptions) { + this.logger = new SimpleStructuredLogger("tmp-cleaner", undefined, { ...this.opts }); + this.exec = new Exec({ logger: this.logger }); + } + + async start() { + this.logger.log("TempFileCleaner.start"); + this.enabled = true; + + if (!this.opts.leadingEdge) { + await this.wait(); + } + + while (this.enabled) { + try { + await this.clean(); + } catch (error) { + this.logger.error("error during tick", { error }); + } + + await this.wait(); + } + } + + stop() { + this.logger.log("TempFileCleaner.stop"); + this.enabled = false; + } + + private wait() { + return setTimeout(this.opts.intervalSeconds * 1000); + } + + private async clean() { + for (const path of this.opts.paths) { + try { + await this.cleanSingle(path); + } catch (error) { + this.logger.error("error while cleaning", { path, error }); + } + } + } + + private async cleanSingle(startingPoint: string) { + const maxAgeMinutes = this.opts.maxAgeMinutes; + + const ignoreStartingPoint = ["!", "-path", startingPoint]; + const onlyDirectDescendants = ["-maxdepth", "1"]; + const onlyOldFiles = ["-mmin", `+${maxAgeMinutes}`]; + + const baseArgs = [ + startingPoint, + ...ignoreStartingPoint, + ...onlyDirectDescendants, + ...onlyOldFiles, + ]; + + const duArgs = ["-exec", "du", "-ch", "{}", "+"]; + const rmArgs = ["-exec", "rm", "-rf", "{}", "+"]; + + const du = this.x("find", [...baseArgs, ...duArgs]); + const duOutput = await du; + + const duLines = duOutput.stdout.trim().split("\n"); + const fileCount = duLines.length - 1; // last line is the total + const fileSize = duLines.at(-1)?.trim().split(/\s+/)[0]; + + if (fileCount === 0) { + this.logger.log("nothing to delete", { startingPoint, maxAgeMinutes }); + return; + } + + this.logger.log("deleting old files", { fileCount, fileSize, startingPoint, maxAgeMinutes }); + + const rm = this.x("find", [...baseArgs, ...rmArgs]); + const rmOutput = await rm; + + if (rmOutput.stderr.length > 0) { + this.logger.error("delete unsuccessful", { rmOutput }); + return; + } + + this.logger.log("deleted old files", { fileCount, fileSize, startingPoint, maxAgeMinutes }); + } + + private get x() { + return this.exec.x.bind(this.exec); + } +} diff --git a/apps/coordinator/src/exec.ts b/apps/coordinator/src/exec.ts new file mode 100644 index 00000000000..b905723c0f8 --- /dev/null +++ b/apps/coordinator/src/exec.ts @@ -0,0 +1,293 @@ +import { SimpleStructuredLogger } from "@trigger.dev/core/v3/utils/structuredLogger"; +import { randomUUID } from "crypto"; +import { homedir } from "os"; +import { type Result, x } from "tinyexec"; + +class TinyResult { + pid?: number; + exitCode?: number; + aborted: boolean; + killed: boolean; + + constructor(result: Result) { + this.pid = result.pid; + this.exitCode = result.exitCode; + this.aborted = result.aborted; + this.killed = result.killed; + } +} + +interface ExecOptions { + logger?: SimpleStructuredLogger; + abortSignal?: AbortSignal; + logOutput?: boolean; + trimArgs?: boolean; + neverThrow?: boolean; +} + +export class Exec { + private logger: SimpleStructuredLogger; + private abortSignal: AbortSignal | undefined; + + private logOutput: boolean; + private trimArgs: boolean; + private neverThrow: boolean; + + constructor(opts: ExecOptions) { + this.logger = opts.logger ?? new SimpleStructuredLogger("exec"); + this.abortSignal = opts.abortSignal; + + this.logOutput = opts.logOutput ?? true; + this.trimArgs = opts.trimArgs ?? true; + this.neverThrow = opts.neverThrow ?? false; + } + + async x( + command: string, + args?: string[], + opts?: { neverThrow?: boolean; ignoreAbort?: boolean } + ) { + const argsTrimmed = this.trimArgs ? args?.map((arg) => arg.trim()) : args; + + const commandWithFirstArg = `${command}${argsTrimmed?.length ? ` ${argsTrimmed[0]}` : ""}`; + this.logger.debug(`exec: ${commandWithFirstArg}`, { command, args, argsTrimmed }); + + const result = x(command, argsTrimmed, { + signal: opts?.ignoreAbort ? undefined : this.abortSignal, + // We don't use this as it doesn't cover killed and aborted processes + // throwOnError: true, + }); + + const output = await result; + + const metadata = { + command, + argsRaw: args, + argsTrimmed, + globalOpts: { + trimArgs: this.trimArgs, + neverThrow: this.neverThrow, + hasAbortSignal: !!this.abortSignal, + }, + localOpts: opts, + stdout: output.stdout, + stderr: output.stderr, + pid: result.pid, + exitCode: result.exitCode, + aborted: result.aborted, + killed: result.killed, + }; + + if (this.logOutput) { + this.logger.debug(`output: ${commandWithFirstArg}`, metadata); + } + + if (this.neverThrow || opts?.neverThrow) { + return output; + } + + if (result.aborted) { + this.logger.error(`aborted: ${commandWithFirstArg}`, metadata); + throw new TinyResult(result); + } + + if (result.killed) { + this.logger.error(`killed: ${commandWithFirstArg}`, metadata); + throw new TinyResult(result); + } + + if (result.exitCode !== 0) { + this.logger.error(`non-zero exit: ${commandWithFirstArg}`, metadata); + throw new TinyResult(result); + } + + return output; + } + + static Result = TinyResult; +} + +interface BuildahOptions { + id?: string; + abortSignal?: AbortSignal; +} + +export class Buildah { + private id: string; + private logger: SimpleStructuredLogger; + private exec: Exec; + + private containers = new Set(); + private images = new Set(); + + constructor(opts: BuildahOptions) { + this.id = opts.id ?? randomUUID(); + this.logger = new SimpleStructuredLogger("buildah", undefined, { id: this.id }); + + this.exec = new Exec({ + logger: this.logger, + abortSignal: opts.abortSignal, + }); + + this.logger.log("initiaized", { opts }); + } + + private get x() { + return this.exec.x.bind(this.exec); + } + + async from(baseImage: string) { + const output = await this.x("buildah", ["from", baseImage]); + this.containers.add(output.stdout); + return output; + } + + async add(container: string, src: string, dest: string) { + return await this.x("buildah", ["add", container, src, dest]); + } + + async config(container: string, annotations: string[]) { + const args = ["config"]; + + for (const annotation of annotations) { + args.push(`--annotation=${annotation}`); + } + + args.push(container); + + return await this.x("buildah", args); + } + + async commit(container: string, imageRef: string) { + const output = await this.x("buildah", ["commit", container, imageRef]); + this.images.add(output.stdout); + return output; + } + + async push(imageRef: string, registryTlsVerify?: boolean) { + return await this.x("buildah", [ + "push", + `--tls-verify=${String(!!registryTlsVerify)}`, + imageRef, + ]); + } + + async cleanup() { + if (this.containers.size > 0) { + try { + const output = await this.x("buildah", ["rm", ...this.containers], { ignoreAbort: true }); + this.containers.clear(); + + if (output.stderr.length > 0) { + this.logger.error("failed to remove some containers", { output }); + } + } catch (error) { + this.logger.error("failed to clean up containers", { error, containers: this.containers }); + } + } else { + this.logger.debug("no containers to clean up"); + } + + if (this.images.size > 0) { + try { + const output = await this.x("buildah", ["rmi", ...this.images], { ignoreAbort: true }); + this.images.clear(); + + if (output.stderr.length > 0) { + this.logger.error("failed to remove some images", { output }); + } + } catch (error) { + this.logger.error("failed to clean up images", { error, images: this.images }); + } + } else { + this.logger.debug("no images to clean up"); + } + } + + static async canLogin(registryHost: string) { + try { + await x("buildah", ["login", "--get-login", registryHost], { throwOnError: true }); + return true; + } catch (error) { + return false; + } + } + + static get tmpDir() { + return process.env.TMPDIR ?? "/var/tmp"; + } + + static get storageRootDir() { + return process.getuid?.() === 0 + ? "/var/lib/containers/storage" + : `${homedir()}/.local/share/containers/storage`; + } +} + +interface CrictlOptions { + id?: string; + abortSignal?: AbortSignal; +} + +export class Crictl { + private id: string; + private logger: SimpleStructuredLogger; + private exec: Exec; + + private archives = new Set(); + + constructor(opts: CrictlOptions) { + this.id = opts.id ?? randomUUID(); + this.logger = new SimpleStructuredLogger("crictl", undefined, { id: this.id }); + + this.exec = new Exec({ + logger: this.logger, + abortSignal: opts.abortSignal, + }); + + this.logger.log("initiaized", { opts }); + } + + private get x() { + return this.exec.x.bind(this.exec); + } + + async ps(containerName: string, quiet?: boolean) { + return await this.x("crictl", ["ps", "--name", containerName, quiet ? "--quiet" : ""]); + } + + async checkpoint(containerId: string, exportLocation: string) { + const output = await this.x("crictl", [ + "checkpoint", + `--export=${exportLocation}`, + containerId, + ]); + this.archives.add(exportLocation); + return output; + } + + async cleanup() { + if (this.archives.size > 0) { + try { + const output = await this.x("rm", ["-v", ...this.archives], { ignoreAbort: true }); + this.archives.clear(); + + if (output.stderr.length > 0) { + this.logger.error("failed to remove some archives", { output }); + } + } catch (error) { + this.logger.error("failed to clean up archives", { error, archives: this.archives }); + } + } else { + this.logger.debug("no archives to clean up"); + } + } + + static getExportLocation(identifier: string) { + return `${this.checkpointDir}/${identifier}.tar`; + } + + static get checkpointDir() { + return process.env.CRI_CHECKPOINT_DIR ?? "/checkpoints"; + } +} diff --git a/apps/coordinator/src/index.ts b/apps/coordinator/src/index.ts new file mode 100644 index 00000000000..815012fe048 --- /dev/null +++ b/apps/coordinator/src/index.ts @@ -0,0 +1,1781 @@ +import { createServer } from "node:http"; +import { Server } from "socket.io"; +import { + CoordinatorToPlatformMessages, + CoordinatorToProdWorkerMessages, + omit, + PlatformToCoordinatorMessages, + ProdWorkerSocketData, + ProdWorkerToCoordinatorMessages, + WaitReason, +} from "@trigger.dev/core/v3"; +import { ZodNamespace } from "@trigger.dev/core/v3/zodNamespace"; +import { ZodSocketConnection } from "@trigger.dev/core/v3/zodSocket"; +import { ExponentialBackoff, HttpReply, getTextBody } from "@trigger.dev/core/v3/apps"; +import { ChaosMonkey } from "./chaosMonkey"; +import { Checkpointer } from "./checkpointer"; +import { boolFromEnv, numFromEnv, safeJsonParse } from "./util"; + +import { collectDefaultMetrics, register, Gauge } from "prom-client"; +import { SimpleStructuredLogger } from "@trigger.dev/core/v3/utils/structuredLogger"; +collectDefaultMetrics(); + +const HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || 8020); +const NODE_NAME = process.env.NODE_NAME || "coordinator"; +const DEFAULT_RETRY_DELAY_THRESHOLD_IN_MS = 30_000; + +const PLATFORM_ENABLED = ["1", "true"].includes(process.env.PLATFORM_ENABLED ?? "true"); +const PLATFORM_HOST = process.env.PLATFORM_HOST || "127.0.0.1"; +const PLATFORM_WS_PORT = process.env.PLATFORM_WS_PORT || 3030; +const PLATFORM_SECRET = process.env.PLATFORM_SECRET || "coordinator-secret"; +const SECURE_CONNECTION = ["1", "true"].includes(process.env.SECURE_CONNECTION ?? "false"); + +const TASK_RUN_COMPLETED_WITH_ACK_TIMEOUT_MS = + parseInt(process.env.TASK_RUN_COMPLETED_WITH_ACK_TIMEOUT_MS || "") || 30_000; +const TASK_RUN_COMPLETED_WITH_ACK_MAX_RETRIES = + parseInt(process.env.TASK_RUN_COMPLETED_WITH_ACK_MAX_RETRIES || "") || 7; + +const WAIT_FOR_TASK_CHECKPOINT_DELAY_MS = + parseInt(process.env.WAIT_FOR_TASK_CHECKPOINT_DELAY_MS || "") || 0; +const WAIT_FOR_BATCH_CHECKPOINT_DELAY_MS = + parseInt(process.env.WAIT_FOR_BATCH_CHECKPOINT_DELAY_MS || "") || 0; + +const logger = new SimpleStructuredLogger("coordinator", undefined, { nodeName: NODE_NAME }); +const chaosMonkey = new ChaosMonkey( + !!process.env.CHAOS_MONKEY_ENABLED, + !!process.env.CHAOS_MONKEY_DISABLE_ERRORS, + !!process.env.CHAOS_MONKEY_DISABLE_DELAYS +); + +class CheckpointReadinessTimeoutError extends Error {} +class CheckpointCancelError extends Error {} + +class TaskCoordinator { + #httpServer: ReturnType; + #internalHttpServer: ReturnType; + + #checkpointer = new Checkpointer({ + dockerMode: !process.env.KUBERNETES_PORT, + forceSimulate: boolFromEnv("FORCE_CHECKPOINT_SIMULATION", false), + heartbeat: this.#sendRunHeartbeat.bind(this), + registryHost: process.env.REGISTRY_HOST, + registryNamespace: process.env.REGISTRY_NAMESPACE, + registryTlsVerify: boolFromEnv("REGISTRY_TLS_VERIFY", true), + disableCheckpointSupport: boolFromEnv("DISABLE_CHECKPOINT_SUPPORT", false), + simulatePushFailure: boolFromEnv("SIMULATE_PUSH_FAILURE", false), + simulatePushFailureSeconds: numFromEnv("SIMULATE_PUSH_FAILURE_SECONDS", 300), + simulateCheckpointFailure: boolFromEnv("SIMULATE_CHECKPOINT_FAILURE", false), + simulateCheckpointFailureSeconds: numFromEnv("SIMULATE_CHECKPOINT_FAILURE_SECONDS", 300), + chaosMonkey, + }); + + #prodWorkerNamespace?: ZodNamespace< + typeof ProdWorkerToCoordinatorMessages, + typeof CoordinatorToProdWorkerMessages, + typeof ProdWorkerSocketData + >; + #platformSocket?: ZodSocketConnection< + typeof CoordinatorToPlatformMessages, + typeof PlatformToCoordinatorMessages + >; + + #checkpointableTasks = new Map< + string, + { resolve: (value: void) => void; reject: (err?: any) => void } + >(); + + #delayThresholdInMs: number = DEFAULT_RETRY_DELAY_THRESHOLD_IN_MS; + + constructor( + private port: number, + private host = "0.0.0.0" + ) { + this.#httpServer = this.#createHttpServer(); + this.#internalHttpServer = this.#createInternalHttpServer(); + + this.#checkpointer.init(); + this.#platformSocket = this.#createPlatformSocket(); + + const connectedTasksTotal = new Gauge({ + name: "daemon_connected_tasks_total", // don't change this without updating dashboard config + help: "The number of tasks currently connected.", + collect: () => { + connectedTasksTotal.set(this.#prodWorkerNamespace?.namespace.sockets.size ?? 0); + }, + }); + register.registerMetric(connectedTasksTotal); + } + + #returnValidatedExtraHeaders(headers: Record) { + for (const [key, value] of Object.entries(headers)) { + if (value === undefined) { + throw new Error(`Extra header is undefined: ${key}`); + } + } + + return headers; + } + + // MARK: SOCKET: PLATFORM + #createPlatformSocket() { + if (!PLATFORM_ENABLED) { + logger.log("INFO: platform connection disabled"); + return; + } + + const extraHeaders = this.#returnValidatedExtraHeaders({ + "x-supports-dynamic-config": "yes", + }); + + const host = PLATFORM_HOST; + const port = Number(PLATFORM_WS_PORT); + + const platformLogger = new SimpleStructuredLogger("socket-platform", undefined, { + namespace: "coordinator", + }); + + platformLogger.log("connecting", { host, port }); + platformLogger.debug("connecting with extra headers", { extraHeaders }); + + const platformConnection = new ZodSocketConnection({ + namespace: "coordinator", + host, + port, + secure: SECURE_CONNECTION, + extraHeaders, + clientMessages: CoordinatorToPlatformMessages, + serverMessages: PlatformToCoordinatorMessages, + authToken: PLATFORM_SECRET, + logHandlerPayloads: false, + handlers: { + // This is used by resumeAttempt + RESUME_AFTER_DEPENDENCY: async (message) => { + const log = platformLogger.child({ + eventName: "RESUME_AFTER_DEPENDENCY", + ...omit(message, "completions", "executions"), + completions: message.completions.map((c) => ({ + id: c.id, + ok: c.ok, + })), + executions: message.executions.length, + }); + + log.log("Handling RESUME_AFTER_DEPENDENCY"); + + const taskSocket = await this.#getAttemptSocket(message.attemptFriendlyId); + + if (!taskSocket) { + log.debug("Socket for attempt not found"); + return; + } + + log.addFields({ socketId: taskSocket.id, socketData: taskSocket.data }); + log.log("Found task socket for RESUME_AFTER_DEPENDENCY"); + + await chaosMonkey.call(); + + // In case the task resumes before the checkpoint is created + this.#cancelCheckpoint(message.runId, { + event: "RESUME_AFTER_DEPENDENCY", + completions: message.completions.length, + }); + + taskSocket.emit("RESUME_AFTER_DEPENDENCY", message); + }, + // This is used by sharedQueueConsumer + RESUME_AFTER_DEPENDENCY_WITH_ACK: async (message) => { + const log = platformLogger.child({ + eventName: "RESUME_AFTER_DEPENDENCY_WITH_ACK", + ...omit(message, "completions", "executions"), + completions: message.completions.map((c) => ({ + id: c.id, + ok: c.ok, + })), + executions: message.executions.length, + }); + + log.log("Handling RESUME_AFTER_DEPENDENCY_WITH_ACK"); + + const taskSocket = await this.#getAttemptSocket(message.attemptFriendlyId); + + if (!taskSocket) { + log.debug("Socket for attempt not found"); + return { + success: false, + error: { + name: "SocketNotFoundError", + message: "Socket for attempt not found", + }, + }; + } + + log.addFields({ socketId: taskSocket.id, socketData: taskSocket.data }); + log.log("Found task socket for RESUME_AFTER_DEPENDENCY_WITH_ACK"); + + //if this is set, we want to kill the process because it will be resumed with the checkpoint from the queue + if (taskSocket.data.requiresCheckpointResumeWithMessage) { + log.log("RESUME_AFTER_DEPENDENCY_WITH_ACK: Checkpoint is set so going to nack"); + + return { + success: false, + error: { + name: "CheckpointMessagePresentError", + message: + "Checkpoint message is present, so we need to kill the process and resume from the queue.", + }, + }; + } + + await chaosMonkey.call(); + + // In case the task resumes before the checkpoint is created + this.#cancelCheckpoint(message.runId, { + event: "RESUME_AFTER_DEPENDENCY_WITH_ACK", + completions: message.completions.length, + }); + + taskSocket.emit("RESUME_AFTER_DEPENDENCY", message); + + return { + success: true, + }; + }, + RESUME_AFTER_DURATION: async (message) => { + const log = platformLogger.child({ + eventName: "RESUME_AFTER_DURATION", + ...message, + }); + + log.log("Handling RESUME_AFTER_DURATION"); + + const taskSocket = await this.#getAttemptSocket(message.attemptFriendlyId); + + if (!taskSocket) { + log.debug("Socket for attempt not found"); + return; + } + + log.addFields({ socketId: taskSocket.id, socketData: taskSocket.data }); + log.log("Found task socket for RESUME_AFTER_DURATION"); + + await chaosMonkey.call(); + + taskSocket.emit("RESUME_AFTER_DURATION", message); + }, + REQUEST_ATTEMPT_CANCELLATION: async (message) => { + const log = platformLogger.child({ + eventName: "REQUEST_ATTEMPT_CANCELLATION", + ...message, + }); + + log.log("Handling REQUEST_ATTEMPT_CANCELLATION"); + + const taskSocket = await this.#getAttemptSocket(message.attemptFriendlyId); + + if (!taskSocket) { + logger.debug("Socket for attempt not found"); + return; + } + + log.addFields({ socketId: taskSocket.id, socketData: taskSocket.data }); + log.log("Found task socket for REQUEST_ATTEMPT_CANCELLATION"); + + taskSocket.emit("REQUEST_ATTEMPT_CANCELLATION", message); + }, + REQUEST_RUN_CANCELLATION: async (message) => { + const log = platformLogger.child({ + eventName: "REQUEST_RUN_CANCELLATION", + ...message, + }); + + log.log("Handling REQUEST_RUN_CANCELLATION"); + + const taskSocket = await this.#getRunSocket(message.runId); + + if (!taskSocket) { + logger.debug("Socket for run not found"); + return; + } + + log.addFields({ socketId: taskSocket.id, socketData: taskSocket.data }); + log.log("Found task socket for REQUEST_RUN_CANCELLATION"); + + this.#cancelCheckpoint(message.runId, { event: "REQUEST_RUN_CANCELLATION", ...message }); + + if (message.delayInMs) { + taskSocket.emit("REQUEST_EXIT", { + version: "v2", + delayInMs: message.delayInMs, + }); + } else { + // If there's no delay, assume the worker doesn't support non-v1 messages + taskSocket.emit("REQUEST_EXIT", { + version: "v1", + }); + } + }, + READY_FOR_RETRY: async (message) => { + const log = platformLogger.child({ + eventName: "READY_FOR_RETRY", + ...message, + }); + + const taskSocket = await this.#getRunSocket(message.runId); + + if (!taskSocket) { + logger.debug("Socket for attempt not found"); + return; + } + + log.addFields({ socketId: taskSocket.id, socketData: taskSocket.data }); + log.log("Found task socket for READY_FOR_RETRY"); + + await chaosMonkey.call(); + + taskSocket.emit("READY_FOR_RETRY", message); + }, + DYNAMIC_CONFIG: async (message) => { + const log = platformLogger.child({ + eventName: "DYNAMIC_CONFIG", + ...message, + }); + + log.log("Handling DYNAMIC_CONFIG"); + + this.#delayThresholdInMs = message.checkpointThresholdInMs; + + // The first time we receive a dynamic config, the worker namespace will be created + if (!this.#prodWorkerNamespace) { + const io = new Server(this.#httpServer); + this.#prodWorkerNamespace = this.#createProdWorkerNamespace(io); + } + }, + }, + }); + + return platformConnection; + } + + async #getRunSocket(runId: string) { + const sockets = (await this.#prodWorkerNamespace?.fetchSockets()) ?? []; + + for (const socket of sockets) { + if (socket.data.runId === runId) { + return socket; + } + } + } + + async #getAttemptSocket(attemptFriendlyId: string) { + const sockets = (await this.#prodWorkerNamespace?.fetchSockets()) ?? []; + + for (const socket of sockets) { + if (socket.data.attemptFriendlyId === attemptFriendlyId) { + return socket; + } + } + } + + // MARK: SOCKET: WORKERS + #createProdWorkerNamespace(io: Server) { + const provider = new ZodNamespace({ + io, + name: "prod-worker", + clientMessages: ProdWorkerToCoordinatorMessages, + serverMessages: CoordinatorToProdWorkerMessages, + socketData: ProdWorkerSocketData, + postAuth: async (socket, next, logger) => { + function setSocketDataFromHeader( + dataKey: keyof typeof socket.data, + headerName: string, + required: boolean = true + ) { + const value = socket.handshake.headers[headerName]; + + if (value) { + socket.data[dataKey] = Array.isArray(value) ? value[0] : value; + return; + } + + if (required) { + logger.error("missing required header", { headerName }); + throw new Error("missing header"); + } + } + + try { + setSocketDataFromHeader("podName", "x-pod-name"); + setSocketDataFromHeader("contentHash", "x-trigger-content-hash"); + setSocketDataFromHeader("projectRef", "x-trigger-project-ref"); + setSocketDataFromHeader("runId", "x-trigger-run-id"); + setSocketDataFromHeader("attemptFriendlyId", "x-trigger-attempt-friendly-id", false); + setSocketDataFromHeader("attemptNumber", "x-trigger-attempt-number", false); + setSocketDataFromHeader("envId", "x-trigger-env-id"); + setSocketDataFromHeader("deploymentId", "x-trigger-deployment-id"); + setSocketDataFromHeader("deploymentVersion", "x-trigger-deployment-version"); + } catch (error) { + logger.error("setSocketDataFromHeader error", { error }); + socket.disconnect(true); + return; + } + + logger.debug("success", socket.data); + + next(); + }, + onConnection: async (socket, handler, sender) => { + const logger = new SimpleStructuredLogger("ns-prod-worker", undefined, { + namespace: "prod-worker", + socketId: socket.id, + socketData: socket.data, + }); + + const getSocketMetadata = () => { + return { + attemptFriendlyId: socket.data.attemptFriendlyId, + attemptNumber: socket.data.attemptNumber, + requiresCheckpointResumeWithMessage: socket.data.requiresCheckpointResumeWithMessage, + }; + }; + + const getAttemptNumber = () => { + return socket.data.attemptNumber ? parseInt(socket.data.attemptNumber) : undefined; + }; + + const exitRun = () => { + logger.log("exitRun", getSocketMetadata()); + + socket.emit("REQUEST_EXIT", { + version: "v1", + }); + }; + + const crashRun = async (error: { name: string; message: string; stack?: string }) => { + logger.error("crashRun", { ...getSocketMetadata(), error }); + + try { + this.#platformSocket?.send("RUN_CRASHED", { + version: "v1", + runId: socket.data.runId, + error, + }); + } finally { + exitRun(); + } + }; + + const checkpointInProgress = () => { + return this.#checkpointableTasks.has(socket.data.runId); + }; + + const readyToCheckpoint = async ( + reason: WaitReason | "RETRY" + ): Promise< + | { + success: true; + } + | { + success: false; + reason?: string; + } + > => { + const log = logger.child(getSocketMetadata()); + + log.log("readyToCheckpoint", { runId: socket.data.runId, reason }); + + if (checkpointInProgress()) { + return { + success: false, + reason: "checkpoint in progress", + }; + } + + let timeout: NodeJS.Timeout | undefined = undefined; + + const CHECKPOINTABLE_TIMEOUT_SECONDS = 20; + + const isCheckpointable = new Promise((resolve, reject) => { + // We set a reasonable timeout to prevent waiting forever + timeout = setTimeout( + () => reject(new CheckpointReadinessTimeoutError()), + CHECKPOINTABLE_TIMEOUT_SECONDS * 1000 + ); + + this.#checkpointableTasks.set(socket.data.runId, { resolve, reject }); + }); + + try { + await isCheckpointable; + this.#checkpointableTasks.delete(socket.data.runId); + + return { + success: true, + }; + } catch (error) { + log.error("Error while waiting for checkpointable state", { error }); + + if (error instanceof CheckpointReadinessTimeoutError) { + logger.error( + `Failed to become checkpointable in ${CHECKPOINTABLE_TIMEOUT_SECONDS}s for ${reason}`, + { runId: socket.data.runId } + ); + + return { + success: false, + reason: "timeout", + }; + } + + if (error instanceof CheckpointCancelError) { + return { + success: false, + reason: "canceled", + }; + } + + return { + success: false, + reason: typeof error === "string" ? error : "unknown", + }; + } finally { + clearTimeout(timeout); + } + }; + + const updateAttemptFriendlyId = (attemptFriendlyId: string) => { + socket.data.attemptFriendlyId = attemptFriendlyId; + }; + + const updateAttemptNumber = (attemptNumber: string | number) => { + socket.data.attemptNumber = String(attemptNumber); + }; + + this.#platformSocket?.send("LOG", { + metadata: socket.data, + text: "connected", + }); + + socket.on("TEST", (message, callback) => { + logger.log("Handling TEST", { eventName: "TEST", ...getSocketMetadata(), ...message }); + + try { + callback(); + } catch (error) { + logger.error("TEST error", { error }); + } + }); + + // Deprecated: Only workers without support for lazy attempts use this + socket.on("READY_FOR_EXECUTION", async (message) => { + const log = logger.child({ + eventName: "READY_FOR_EXECUTION", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling READY_FOR_EXECUTION"); + + try { + const executionAck = await this.#platformSocket?.sendWithAck( + "READY_FOR_EXECUTION", + message + ); + + if (!executionAck) { + log.error("no execution ack"); + + await crashRun({ + name: "ReadyForExecutionError", + message: "No execution ack", + }); + + return; + } + + if (!executionAck.success) { + log.error("failed to get execution payload"); + + await crashRun({ + name: "ReadyForExecutionError", + message: "Failed to get execution payload", + }); + + return; + } + + socket.emit("EXECUTE_TASK_RUN", { + version: "v1", + executionPayload: executionAck.payload, + }); + + updateAttemptFriendlyId(executionAck.payload.execution.attempt.id); + updateAttemptNumber(executionAck.payload.execution.attempt.number); + } catch (error) { + log.error("READY_FOR_EXECUTION error", { error }); + + await crashRun({ + name: "ReadyForExecutionError", + message: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + + return; + } + }); + + // MARK: LAZY ATTEMPT + socket.on("READY_FOR_LAZY_ATTEMPT", async (message) => { + const log = logger.child({ + eventName: "READY_FOR_LAZY_ATTEMPT", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling READY_FOR_LAZY_ATTEMPT"); + + try { + const lazyAttempt = await this.#platformSocket?.sendWithAck("READY_FOR_LAZY_ATTEMPT", { + ...message, + envId: socket.data.envId, + }); + + if (!lazyAttempt) { + log.error("no lazy attempt ack"); + + await crashRun({ + name: "ReadyForLazyAttemptError", + message: "No lazy attempt ack", + }); + + return; + } + + if (!lazyAttempt.success) { + log.error("failed to get lazy attempt payload", { reason: lazyAttempt.reason }); + + await crashRun({ + name: "ReadyForLazyAttemptError", + message: "Failed to get lazy attempt payload", + }); + + return; + } + + await chaosMonkey.call(); + + const lazyPayload = { + ...lazyAttempt.lazyPayload, + metrics: [ + ...(message.startTime + ? [ + { + name: "start", + event: "lazy_payload", + timestamp: message.startTime, + duration: Date.now() - message.startTime, + }, + ] + : []), + ], + }; + + socket.emit("EXECUTE_TASK_RUN_LAZY_ATTEMPT", { + version: "v1", + lazyPayload, + }); + } catch (error) { + if (error instanceof ChaosMonkey.Error) { + log.error("ChaosMonkey error, won't crash run"); + return; + } + + log.error("READY_FOR_LAZY_ATTEMPT error", { error }); + + // await crashRun({ + // name: "ReadyForLazyAttemptError", + // message: + // error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + // }); + + return; + } + }); + + // MARK: RESUME READY + socket.on("READY_FOR_RESUME", async (message) => { + const log = logger.child({ + eventName: "READY_FOR_RESUME", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling READY_FOR_RESUME"); + + try { + updateAttemptFriendlyId(message.attemptFriendlyId); + + if (message.version === "v2") { + updateAttemptNumber(message.attemptNumber); + } + + this.#platformSocket?.send("READY_FOR_RESUME", { ...message, version: "v1" }); + } catch (error) { + log.error("READY_FOR_RESUME error", { error }); + + await crashRun({ + name: "ReadyForResumeError", + message: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + + return; + } + }); + + // MARK: RUN COMPLETED + socket.on("TASK_RUN_COMPLETED", async (message, callback) => { + const log = logger.child({ + eventName: "TASK_RUN_COMPLETED", + ...getSocketMetadata(), + ...omit(message, "completion", "execution"), + completion: { + id: message.completion.id, + ok: message.completion.ok, + }, + }); + + log.log("Handling TASK_RUN_COMPLETED"); + + try { + const { completion, execution } = message; + + // Cancel all in-progress checkpoints (if any) + this.#cancelCheckpoint(socket.data.runId, { + event: "TASK_RUN_COMPLETED", + attemptNumber: execution.attempt.number, + }); + + await chaosMonkey.call({ throwErrors: false }); + + const sendCompletionWithAck = async (): Promise => { + try { + const response = await this.#platformSocket?.sendWithAck( + "TASK_RUN_COMPLETED_WITH_ACK", + { + version: "v2", + execution, + completion, + }, + TASK_RUN_COMPLETED_WITH_ACK_TIMEOUT_MS + ); + + if (!response) { + log.error("TASK_RUN_COMPLETED_WITH_ACK: no response"); + return false; + } + + if (!response.success) { + log.error("TASK_RUN_COMPLETED_WITH_ACK: error response", { + error: response.error, + }); + return false; + } + + log.log("TASK_RUN_COMPLETED_WITH_ACK: successful response"); + return true; + } catch (error) { + log.error("TASK_RUN_COMPLETED_WITH_ACK: threw error", { error }); + return false; + } + }; + + const completeWithoutCheckpoint = async (shouldExit: boolean) => { + const supportsRetryCheckpoints = message.version === "v1"; + + callback({ willCheckpointAndRestore: false, shouldExit }); + + if (supportsRetryCheckpoints) { + // This is only here for backwards compat + this.#platformSocket?.send("TASK_RUN_COMPLETED", { + version: "v1", + execution, + completion, + }); + } else { + // 99.99% of runs should end up here + + const completedWithAckBackoff = new ExponentialBackoff("FullJitter").maxRetries( + TASK_RUN_COMPLETED_WITH_ACK_MAX_RETRIES + ); + + const result = await completedWithAckBackoff.execute( + async ({ retry, delay, elapsedMs }) => { + logger.log("TASK_RUN_COMPLETED_WITH_ACK: sending with backoff", { + retry, + delay, + elapsedMs, + }); + + const success = await sendCompletionWithAck(); + + if (!success) { + throw new Error("Failed to send completion with ack"); + } + } + ); + + if (!result.success) { + logger.error("TASK_RUN_COMPLETED_WITH_ACK: failed to send with backoff", result); + return; + } + + logger.log("TASK_RUN_COMPLETED_WITH_ACK: sent with backoff", result); + } + }; + + if (completion.ok) { + await completeWithoutCheckpoint(true); + return; + } + + if ( + completion.error.type === "INTERNAL_ERROR" && + completion.error.code === "TASK_RUN_CANCELLED" + ) { + await completeWithoutCheckpoint(true); + return; + } + + if (completion.retry === undefined) { + await completeWithoutCheckpoint(true); + return; + } + + if (completion.retry.delay < this.#delayThresholdInMs) { + await completeWithoutCheckpoint(false); + + // Prevents runs that fail fast from never sending a heartbeat + this.#sendRunHeartbeat(socket.data.runId); + + return; + } + + if (message.version === "v2") { + await completeWithoutCheckpoint(true); + return; + } + + const { canCheckpoint, willSimulate } = await this.#checkpointer.init(); + + const willCheckpointAndRestore = canCheckpoint || willSimulate; + + if (!willCheckpointAndRestore) { + await completeWithoutCheckpoint(false); + return; + } + + // The worker will then put itself in a checkpointable state + callback({ willCheckpointAndRestore: true, shouldExit: false }); + + const ready = await readyToCheckpoint("RETRY"); + + if (!ready.success) { + log.error("Failed to become checkpointable", { reason: ready.reason }); + + return; + } + + const checkpoint = await this.#checkpointer.checkpointAndPush({ + runId: socket.data.runId, + projectRef: socket.data.projectRef, + deploymentVersion: socket.data.deploymentVersion, + shouldHeartbeat: true, + }); + + if (!checkpoint) { + log.error("Failed to checkpoint"); + await completeWithoutCheckpoint(false); + return; + } + + log.addFields({ checkpoint }); + + this.#platformSocket?.send("TASK_RUN_COMPLETED", { + version: "v1", + execution, + completion, + checkpoint, + }); + + if (!checkpoint.docker || !willSimulate) { + exitRun(); + } + } catch (error) { + log.error("TASK_RUN_COMPLETED error", { error }); + + await crashRun({ + name: "TaskRunCompletedError", + message: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + + return; + } + }); + + // MARK: TASK FAILED + socket.on("TASK_RUN_FAILED_TO_RUN", async ({ completion }) => { + const log = logger.child({ + eventName: "TASK_RUN_FAILED_TO_RUN", + ...getSocketMetadata(), + completion: { + id: completion.id, + ok: completion.ok, + }, + }); + + log.log("Handling TASK_RUN_FAILED_TO_RUN"); + + try { + // Cancel all in-progress checkpoints (if any) + this.#cancelCheckpoint(socket.data.runId, { + event: "TASK_RUN_FAILED_TO_RUN", + errorType: completion.error.type, + }); + + this.#platformSocket?.send("TASK_RUN_FAILED_TO_RUN", { + version: "v1", + completion, + }); + + exitRun(); + } catch (error) { + log.error("TASK_RUN_FAILED_TO_RUN error", { error }); + + return; + } + }); + + // MARK: CHECKPOINT + socket.on("READY_FOR_CHECKPOINT", async (message) => { + const log = logger.child({ + eventName: "READY_FOR_CHECKPOINT", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling READY_FOR_CHECKPOINT"); + + try { + const checkpointable = this.#checkpointableTasks.get(socket.data.runId); + + if (!checkpointable) { + log.error("No checkpoint scheduled"); + return; + } + + checkpointable.resolve(); + } catch (error) { + log.error("READY_FOR_CHECKPOINT error", { error }); + + return; + } + }); + + // MARK: CXX CHECKPOINT + socket.on("CANCEL_CHECKPOINT", async (message, callback) => { + const log = logger.child({ + eventName: "CANCEL_CHECKPOINT", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling CANCEL_CHECKPOINT"); + + try { + if (message.version === "v1") { + this.#cancelCheckpoint(socket.data.runId, { event: "CANCEL_CHECKPOINT", ...message }); + // v1 has no callback + return; + } + + const checkpointCanceled = this.#cancelCheckpoint(socket.data.runId, { + event: "CANCEL_CHECKPOINT", + ...message, + }); + + callback({ version: "v2", checkpointCanceled }); + } catch (error) { + log.error("CANCEL_CHECKPOINT error", { error }); + } + }); + + // MARK: DURATION WAIT + socket.on("WAIT_FOR_DURATION", async (message, callback) => { + const log = logger.child({ + eventName: "WAIT_FOR_DURATION", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling WAIT_FOR_DURATION"); + + try { + await chaosMonkey.call({ throwErrors: false }); + + if (checkpointInProgress()) { + log.error("Checkpoint already in progress"); + callback({ willCheckpointAndRestore: false }); + return; + } + + const { canCheckpoint, willSimulate } = await this.#checkpointer.init(); + + const willCheckpointAndRestore = canCheckpoint || willSimulate; + + callback({ willCheckpointAndRestore }); + + if (!willCheckpointAndRestore) { + return; + } + + const ready = await readyToCheckpoint("WAIT_FOR_DURATION"); + + if (!ready.success) { + log.error("Failed to become checkpointable", { reason: ready.reason }); + return; + } + + const runId = socket.data.runId; + const attemptNumber = getAttemptNumber(); + + const checkpoint = await this.#checkpointer.checkpointAndPush({ + runId, + projectRef: socket.data.projectRef, + deploymentVersion: socket.data.deploymentVersion, + attemptNumber, + }); + + if (!checkpoint) { + // The task container will keep running until the wait duration has elapsed + log.error("Failed to checkpoint"); + return; + } + + log.addFields({ checkpoint }); + + const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", { + version: "v1", + runId: socket.data.runId, + attemptFriendlyId: message.attemptFriendlyId, + docker: checkpoint.docker, + location: checkpoint.location, + reason: { + type: "WAIT_FOR_DURATION", + ms: message.ms, + now: message.now, + }, + }); + + if (ack?.keepRunAlive) { + log.log("keeping run alive after duration checkpoint"); + + if (checkpoint.docker && willSimulate) { + // The container is still paused so we need to unpause it + log.log("unpausing container after duration checkpoint"); + this.#checkpointer.unpause(runId, attemptNumber); + } + + return; + } + + if (!checkpoint.docker || !willSimulate) { + exitRun(); + } + } catch (error) { + log.error("WAIT_FOR_DURATION error", { error }); + + await crashRun({ + name: "WaitForDurationError", + message: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + + return; + } + }); + + // MARK: TASK WAIT + socket.on("WAIT_FOR_TASK", async (message, callback) => { + const log = logger.child({ + eventName: "WAIT_FOR_TASK", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling WAIT_FOR_TASK"); + + try { + await chaosMonkey.call({ throwErrors: false }); + + if (checkpointInProgress()) { + log.error("Checkpoint already in progress"); + callback({ willCheckpointAndRestore: false }); + return; + } + + const { canCheckpoint, willSimulate } = await this.#checkpointer.init(); + + const willCheckpointAndRestore = canCheckpoint || willSimulate; + + callback({ willCheckpointAndRestore }); + + if (!willCheckpointAndRestore) { + return; + } + + // Workers with v1 schemas don't signal when they're ready to checkpoint for dependency waits + if (message.version === "v2") { + const ready = await readyToCheckpoint("WAIT_FOR_TASK"); + + if (!ready.success) { + log.error("Failed to become checkpointable", { reason: ready.reason }); + return; + } + } + + const runId = socket.data.runId; + const attemptNumber = getAttemptNumber(); + + const checkpoint = await this.#checkpointer.checkpointAndPush( + { + runId, + projectRef: socket.data.projectRef, + deploymentVersion: socket.data.deploymentVersion, + attemptNumber, + }, + WAIT_FOR_TASK_CHECKPOINT_DELAY_MS + ); + + if (!checkpoint) { + log.error("Failed to checkpoint"); + return; + } + + log.addFields({ checkpoint }); + + log.log("WAIT_FOR_TASK checkpoint created"); + + //setting this means we can only resume from a checkpoint + socket.data.requiresCheckpointResumeWithMessage = `location:${checkpoint.location}-docker:${checkpoint.docker}`; + log.log("WAIT_FOR_TASK set requiresCheckpointResumeWithMessage"); + + const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", { + version: "v1", + runId: socket.data.runId, + attemptFriendlyId: message.attemptFriendlyId, + docker: checkpoint.docker, + location: checkpoint.location, + reason: { + type: "WAIT_FOR_TASK", + friendlyId: message.friendlyId, + }, + }); + + if (ack?.keepRunAlive) { + socket.data.requiresCheckpointResumeWithMessage = undefined; + log.log("keeping run alive after task checkpoint"); + + if (checkpoint.docker && willSimulate) { + // The container is still paused so we need to unpause it + log.log("unpausing container after duration checkpoint"); + this.#checkpointer.unpause(runId, attemptNumber); + } + + return; + } + + if (!checkpoint.docker || !willSimulate) { + exitRun(); + } + } catch (error) { + log.error("WAIT_FOR_TASK error", { error }); + + await crashRun({ + name: "WaitForTaskError", + message: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + + return; + } + }); + + // MARK: BATCH WAIT + socket.on("WAIT_FOR_BATCH", async (message, callback) => { + const log = logger.child({ + eventName: "WAIT_FOR_BATCH", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling WAIT_FOR_BATCH", message); + + try { + await chaosMonkey.call({ throwErrors: false }); + + if (checkpointInProgress()) { + log.error("Checkpoint already in progress"); + callback({ willCheckpointAndRestore: false }); + return; + } + + const { canCheckpoint, willSimulate } = await this.#checkpointer.init(); + + const willCheckpointAndRestore = canCheckpoint || willSimulate; + + callback({ willCheckpointAndRestore }); + + if (!willCheckpointAndRestore) { + return; + } + + // Workers with v1 schemas don't signal when they're ready to checkpoint for dependency waits + if (message.version === "v2") { + const ready = await readyToCheckpoint("WAIT_FOR_BATCH"); + + if (!ready.success) { + log.error("Failed to become checkpointable", { reason: ready.reason }); + return; + } + } + + const runId = socket.data.runId; + const attemptNumber = getAttemptNumber(); + + const checkpoint = await this.#checkpointer.checkpointAndPush( + { + runId, + projectRef: socket.data.projectRef, + deploymentVersion: socket.data.deploymentVersion, + attemptNumber, + }, + WAIT_FOR_BATCH_CHECKPOINT_DELAY_MS + ); + + if (!checkpoint) { + log.error("Failed to checkpoint"); + return; + } + + log.addFields({ checkpoint }); + + log.log("WAIT_FOR_BATCH checkpoint created"); + + //setting this means we can only resume from a checkpoint + socket.data.requiresCheckpointResumeWithMessage = `location:${checkpoint.location}-docker:${checkpoint.docker}`; + log.log("WAIT_FOR_BATCH set checkpoint"); + + const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", { + version: "v1", + runId: socket.data.runId, + attemptFriendlyId: message.attemptFriendlyId, + docker: checkpoint.docker, + location: checkpoint.location, + reason: { + type: "WAIT_FOR_BATCH", + batchFriendlyId: message.batchFriendlyId, + runFriendlyIds: message.runFriendlyIds, + }, + }); + + if (ack?.keepRunAlive) { + socket.data.requiresCheckpointResumeWithMessage = undefined; + log.log("keeping run alive after batch checkpoint"); + + if (checkpoint.docker && willSimulate) { + // The container is still paused so we need to unpause it + log.log("unpausing container after batch checkpoint"); + this.#checkpointer.unpause(runId, attemptNumber); + } + + return; + } + + if (!checkpoint.docker || !willSimulate) { + exitRun(); + } + } catch (error) { + log.error("WAIT_FOR_BATCH error", { error }); + + await crashRun({ + name: "WaitForBatchError", + message: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + + return; + } + }); + + // MARK: INDEX + socket.on("INDEX_TASKS", async (message, callback) => { + const log = logger.child({ + eventName: "INDEX_TASKS", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling INDEX_TASKS"); + + try { + const workerAck = await this.#platformSocket?.sendWithAck("CREATE_WORKER", { + version: "v2", + projectRef: socket.data.projectRef, + envId: socket.data.envId, + deploymentId: message.deploymentId, + metadata: { + contentHash: socket.data.contentHash, + packageVersion: message.packageVersion, + tasks: message.tasks, + }, + supportsLazyAttempts: message.version !== "v1" && message.supportsLazyAttempts, + }); + + if (!workerAck) { + log.debug("no worker ack while indexing"); + } + + callback({ success: !!workerAck?.success }); + } catch (error) { + log.error("INDEX_TASKS error", { error }); + callback({ success: false }); + } + }); + + // MARK: INDEX FAILED + socket.on("INDEXING_FAILED", async (message) => { + const log = logger.child({ + eventName: "INDEXING_FAILED", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling INDEXING_FAILED"); + + try { + this.#platformSocket?.send("INDEXING_FAILED", { + version: "v1", + deploymentId: message.deploymentId, + error: message.error, + }); + } catch (error) { + log.error("INDEXING_FAILED error", { error }); + } + }); + + // MARK: CREATE ATTEMPT + socket.on("CREATE_TASK_RUN_ATTEMPT", async (message, callback) => { + const log = logger.child({ + eventName: "CREATE_TASK_RUN_ATTEMPT", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling CREATE_TASK_RUN_ATTEMPT"); + + try { + await chaosMonkey.call({ throwErrors: false }); + + const createAttempt = await this.#platformSocket?.sendWithAck( + "CREATE_TASK_RUN_ATTEMPT", + { + runId: message.runId, + envId: socket.data.envId, + } + ); + + if (!createAttempt?.success) { + log.debug("no ack while creating attempt", { reason: createAttempt?.reason }); + callback({ success: false, reason: createAttempt?.reason }); + return; + } + + updateAttemptFriendlyId(createAttempt.executionPayload.execution.attempt.id); + updateAttemptNumber(createAttempt.executionPayload.execution.attempt.number); + + callback({ + success: true, + executionPayload: createAttempt.executionPayload, + }); + } catch (error) { + log.error("CREATE_TASK_RUN_ATTEMPT error", { error }); + callback({ + success: false, + reason: + error instanceof Error ? `Unexpected error: ${error.message}` : "Unexpected error", + }); + } + }); + + socket.on("UNRECOVERABLE_ERROR", async (message) => { + const log = logger.child({ + eventName: "UNRECOVERABLE_ERROR", + ...getSocketMetadata(), + error: message.error, + }); + + log.log("Handling UNRECOVERABLE_ERROR"); + + try { + await crashRun(message.error); + } catch (error) { + log.error("UNRECOVERABLE_ERROR error", { error }); + } + }); + + socket.on("SET_STATE", async (message) => { + const log = logger.child({ + eventName: "SET_STATE", + ...getSocketMetadata(), + ...message, + }); + + log.log("Handling SET_STATE"); + + try { + if (message.attemptFriendlyId) { + updateAttemptFriendlyId(message.attemptFriendlyId); + } + + if (message.attemptNumber) { + updateAttemptNumber(message.attemptNumber); + } + } catch (error) { + log.error("SET_STATE error", { error }); + } + }); + }, + onDisconnect: async (socket, handler, sender, logger) => { + try { + this.#platformSocket?.send("LOG", { + metadata: socket.data, + text: "disconnect", + }); + } catch (error) { + logger.error("onDisconnect error", { error }); + } + }, + handlers: { + TASK_HEARTBEAT: async (message) => { + this.#platformSocket?.send("TASK_HEARTBEAT", message); + }, + TASK_RUN_HEARTBEAT: async (message) => { + this.#sendRunHeartbeat(message.runId); + }, + }, + }); + + return provider; + } + + #sendRunHeartbeat(runId: string) { + this.#platformSocket?.send("TASK_RUN_HEARTBEAT", { + version: "v1", + runId, + }); + } + + #cancelCheckpoint(runId: string, reason?: any): boolean { + logger.log("cancelCheckpoint: call", { runId, reason }); + + const checkpointWait = this.#checkpointableTasks.get(runId); + + if (checkpointWait) { + // Stop waiting for task to reach checkpointable state + checkpointWait.reject(new CheckpointCancelError()); + } + + // Cancel checkpointing procedure + const checkpointCanceled = this.#checkpointer.cancelAllCheckpointsForRun(runId); + + logger.log("cancelCheckpoint: result", { + runId, + reason, + checkpointCanceled, + hadCheckpointWait: !!checkpointWait, + }); + + return checkpointCanceled; + } + + // MARK: HTTP SERVER + #createHttpServer() { + const httpServer = createServer(async (req, res) => { + logger.log(`[${req.method}]`, { url: req.url }); + + const reply = new HttpReply(res); + + switch (req.url) { + case "/health": { + return reply.text("ok"); + } + case "/metrics": { + return reply.text(await register.metrics(), 200, register.contentType); + } + default: { + return reply.empty(404); + } + } + }); + + httpServer.on("clientError", (err, socket) => { + socket.end("HTTP/1.1 400 Bad Request\r\n\r\n"); + }); + + httpServer.on("listening", () => { + logger.log("server listening on port", { port: HTTP_SERVER_PORT }); + }); + + return httpServer; + } + + #createInternalHttpServer() { + const httpServer = createServer(async (req, res) => { + logger.log(`[${req.method}]`, { url: req.url }); + + const reply = new HttpReply(res); + + switch (req.url) { + case "/whoami": { + return reply.text(NODE_NAME); + } + case "/checkpoint/duration": { + try { + const body = await getTextBody(req); + const json = safeJsonParse(body); + + if (typeof json !== "object" || !json) { + return reply.text("Invalid body", 400); + } + + if (!("runId" in json) || typeof json.runId !== "string") { + return reply.text("Missing or invalid: runId", 400); + } + + if (!("now" in json) || typeof json.now !== "number") { + return reply.text("Missing or invalid: now", 400); + } + + if (!("ms" in json) || typeof json.ms !== "number") { + return reply.text("Missing or invalid: ms", 400); + } + + let keepRunAlive = false; + if ("keepRunAlive" in json && typeof json.keepRunAlive === "boolean") { + keepRunAlive = json.keepRunAlive; + } + + let async = false; + if ("async" in json && typeof json.async === "boolean") { + async = json.async; + } + + const { runId, now, ms } = json; + + if (!runId) { + return reply.text("Missing runId", 400); + } + + const runSocket = await this.#getRunSocket(runId); + if (!runSocket) { + return reply.text("Run socket not found", 404); + } + + const { data } = runSocket; + + console.log("Manual duration checkpoint", data); + + if (async) { + reply.text("Creating checkpoint in the background", 202); + } + + const checkpoint = await this.#checkpointer.checkpointAndPush({ + runId: data.runId, + projectRef: data.projectRef, + deploymentVersion: data.deploymentVersion, + attemptNumber: data.attemptNumber ? parseInt(data.attemptNumber) : undefined, + }); + + if (!checkpoint) { + return reply.text("Failed to checkpoint", 500); + } + + if (!data.attemptFriendlyId) { + return reply.text("Socket data missing attemptFriendlyId", 500); + } + + const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", { + version: "v1", + runId, + attemptFriendlyId: data.attemptFriendlyId, + docker: checkpoint.docker, + location: checkpoint.location, + reason: { + type: "WAIT_FOR_DURATION", + ms, + now, + }, + }); + + if (ack?.keepRunAlive || keepRunAlive) { + return reply.json({ + message: `keeping run ${runId} alive after checkpoint`, + checkpoint, + requestJson: json, + platformAck: ack, + }); + } + + runSocket.emit("REQUEST_EXIT", { + version: "v1", + }); + + return reply.json({ + message: `checkpoint created for run ${runId}`, + checkpoint, + requestJson: json, + platformAck: ack, + }); + } catch (error) { + return reply.json({ + message: `error`, + error, + }); + } + } + case "/checkpoint/manual": { + try { + const body = await getTextBody(req); + const json = safeJsonParse(body); + + if (typeof json !== "object" || !json) { + return reply.text("Invalid body", 400); + } + + if (!("runId" in json) || typeof json.runId !== "string") { + return reply.text("Missing or invalid: runId", 400); + } + + let restoreAtUnixTimeMs: number | undefined; + if ("restoreAtUnixTimeMs" in json && typeof json.restoreAtUnixTimeMs === "number") { + restoreAtUnixTimeMs = json.restoreAtUnixTimeMs; + } + + let keepRunAlive = false; + if ("keepRunAlive" in json && typeof json.keepRunAlive === "boolean") { + keepRunAlive = json.keepRunAlive; + } + + let async = false; + if ("async" in json && typeof json.async === "boolean") { + async = json.async; + } + + const { runId } = json; + + if (!runId) { + return reply.text("Missing runId", 400); + } + + const runSocket = await this.#getRunSocket(runId); + if (!runSocket) { + return reply.text("Run socket not found", 404); + } + + const { data } = runSocket; + + console.log("Manual checkpoint", data); + + if (async) { + reply.text("Creating checkpoint in the background", 202); + } + + const checkpoint = await this.#checkpointer.checkpointAndPush({ + runId: data.runId, + projectRef: data.projectRef, + deploymentVersion: data.deploymentVersion, + attemptNumber: data.attemptNumber ? parseInt(data.attemptNumber) : undefined, + }); + + if (!checkpoint) { + return reply.text("Failed to checkpoint", 500); + } + + if (!data.attemptFriendlyId) { + return reply.text("Socket data missing attemptFriendlyId", 500); + } + + const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", { + version: "v1", + runId, + attemptFriendlyId: data.attemptFriendlyId, + docker: checkpoint.docker, + location: checkpoint.location, + reason: { + type: "MANUAL", + restoreAtUnixTimeMs, + }, + }); + + if (ack?.keepRunAlive || keepRunAlive) { + return reply.json({ + message: `keeping run ${runId} alive after checkpoint`, + checkpoint, + requestJson: json, + platformAck: ack, + }); + } + + runSocket.emit("REQUEST_EXIT", { + version: "v1", + }); + + return reply.json({ + message: `checkpoint created for run ${runId}`, + checkpoint, + requestJson: json, + platformAck: ack, + }); + } catch (error) { + return reply.json({ + message: `error`, + error, + }); + } + } + default: { + return reply.empty(404); + } + } + }); + + httpServer.on("clientError", (err, socket) => { + socket.end("HTTP/1.1 400 Bad Request\r\n\r\n"); + }); + + httpServer.on("listening", () => { + logger.log("internal server listening on port", { port: HTTP_SERVER_PORT + 100 }); + }); + + return httpServer; + } + + listen() { + this.#httpServer.listen(this.port, this.host); + this.#internalHttpServer.listen(this.port + 100, "127.0.0.1"); + } +} + +const coordinator = new TaskCoordinator(HTTP_SERVER_PORT); +coordinator.listen(); diff --git a/apps/coordinator/src/util.ts b/apps/coordinator/src/util.ts new file mode 100644 index 00000000000..18464f230b6 --- /dev/null +++ b/apps/coordinator/src/util.ts @@ -0,0 +1,31 @@ +export const boolFromEnv = (env: string, defaultValue: boolean): boolean => { + const value = process.env[env]; + + if (!value) { + return defaultValue; + } + + return ["1", "true"].includes(value); +}; + +export const numFromEnv = (env: string, defaultValue: number): number => { + const value = process.env[env]; + + if (!value) { + return defaultValue; + } + + return parseInt(value, 10); +}; + +export function safeJsonParse(json?: string): unknown { + if (!json) { + return; + } + + try { + return JSON.parse(json); + } catch (e) { + return null; + } +} diff --git a/apps/coordinator/tsconfig.json b/apps/coordinator/tsconfig.json new file mode 100644 index 00000000000..e03fd024126 --- /dev/null +++ b/apps/coordinator/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2020", + "module": "commonjs", + "esModuleInterop": true, + "resolveJsonModule": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "skipLibCheck": true, + "paths": { + "@trigger.dev/core/v3": ["../../packages/core/src/v3"], + "@trigger.dev/core/v3/*": ["../../packages/core/src/v3/*"] + } + } +} diff --git a/apps/docker-provider/.env.example b/apps/docker-provider/.env.example new file mode 100644 index 00000000000..75c54083d1a --- /dev/null +++ b/apps/docker-provider/.env.example @@ -0,0 +1,11 @@ +HTTP_SERVER_PORT=8050 + +PLATFORM_WS_PORT=3030 +PLATFORM_SECRET=provider-secret +SECURE_CONNECTION=false + +OTEL_EXPORTER_OTLP_ENDPOINT=http://0.0.0.0:3030/otel + +# Use this if you are on macOS +# COORDINATOR_HOST="host.docker.internal" +# OTEL_EXPORTER_OTLP_ENDPOINT="http://host.docker.internal:4318" \ No newline at end of file diff --git a/apps/docker-provider/.gitignore b/apps/docker-provider/.gitignore new file mode 100644 index 00000000000..5c84119d635 --- /dev/null +++ b/apps/docker-provider/.gitignore @@ -0,0 +1,3 @@ +dist/ +node_modules/ +.env \ No newline at end of file diff --git a/apps/docker-provider/Containerfile b/apps/docker-provider/Containerfile new file mode 100644 index 00000000000..42a7ac23092 --- /dev/null +++ b/apps/docker-provider/Containerfile @@ -0,0 +1,47 @@ +FROM node:20-alpine@sha256:7a91aa397f2e2dfbfcdad2e2d72599f374e0b0172be1d86eeb73f1d33f36a4b2 AS node-20-alpine + +WORKDIR /app + +FROM node-20-alpine AS pruner + +COPY --chown=node:node . . +RUN npx -q turbo@1.10.9 prune --scope=docker-provider --docker +RUN find . -name "node_modules" -type d -prune -exec rm -rf '{}' + + +FROM node-20-alpine AS base + +RUN apk add --no-cache dumb-init docker + +COPY --chown=node:node .gitignore .gitignore +COPY --from=pruner --chown=node:node /app/out/json/ . +COPY --from=pruner --chown=node:node /app/out/pnpm-lock.yaml ./pnpm-lock.yaml +COPY --from=pruner --chown=node:node /app/out/pnpm-workspace.yaml ./pnpm-workspace.yaml + +FROM base AS dev-deps +RUN corepack enable +ENV NODE_ENV development + +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm fetch --no-frozen-lockfile +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm install --ignore-scripts --no-frozen-lockfile + +FROM base AS builder +RUN corepack enable + +COPY --from=pruner --chown=node:node /app/out/full/ . +COPY --from=dev-deps --chown=node:node /app/ . +COPY --chown=node:node turbo.json turbo.json + +RUN pnpm run -r --filter @trigger.dev/core bundle-vendor && pnpm run -r --filter docker-provider build:bundle + +FROM base AS runner + +RUN corepack enable +ENV NODE_ENV production + +COPY --from=builder --chown=node:node /app/apps/docker-provider/dist/index.mjs ./index.mjs + +EXPOSE 8000 + +USER node + +CMD [ "/usr/bin/dumb-init", "--", "/usr/local/bin/node", "./index.mjs" ] diff --git a/apps/docker-provider/README.md b/apps/docker-provider/README.md new file mode 100644 index 00000000000..647db280a5b --- /dev/null +++ b/apps/docker-provider/README.md @@ -0,0 +1,3 @@ +# Docker provider + +The `docker-provider` allows the platform to be orchestrator-agnostic. The platform can perform actions such as `INDEX_TASKS` or `INVOKE_TASK` which the provider translates into Docker actions. diff --git a/apps/docker-provider/package.json b/apps/docker-provider/package.json new file mode 100644 index 00000000000..f3e4015ef08 --- /dev/null +++ b/apps/docker-provider/package.json @@ -0,0 +1,27 @@ +{ + "name": "docker-provider", + "private": true, + "version": "0.0.1", + "description": "", + "main": "dist/index.cjs", + "scripts": { + "build": "npm run build:bundle", + "build:bundle": "esbuild src/index.ts --bundle --outfile=dist/index.mjs --platform=node --format=esm --target=esnext --banner:js=\"import { createRequire } from 'module';const require = createRequire(import.meta.url);\"", + "build:image": "docker build -f Containerfile . -t docker-provider", + "dev": "tsx --no-warnings=ExperimentalWarning --require dotenv/config --watch src/index.ts", + "start": "tsx src/index.ts", + "typecheck": "tsc --noEmit" + }, + "keywords": [], + "author": "", + "license": "MIT", + "dependencies": { + "@trigger.dev/core": "workspace:*", + "execa": "^8.0.1" + }, + "devDependencies": { + "dotenv": "^16.4.2", + "esbuild": "^0.19.11", + "tsx": "^4.7.0" + } +} \ No newline at end of file diff --git a/apps/docker-provider/src/index.ts b/apps/docker-provider/src/index.ts new file mode 100644 index 00000000000..a0b0554fb23 --- /dev/null +++ b/apps/docker-provider/src/index.ts @@ -0,0 +1,297 @@ +import { $, type ExecaChildProcess, execa } from "execa"; +import { + ProviderShell, + TaskOperations, + TaskOperationsCreateOptions, + TaskOperationsIndexOptions, + TaskOperationsRestoreOptions, +} from "@trigger.dev/core/v3/apps"; +import { SimpleLogger } from "@trigger.dev/core/v3/apps"; +import { isExecaChildProcess } from "@trigger.dev/core/v3/apps"; +import { testDockerCheckpoint } from "@trigger.dev/core/v3/serverOnly"; +import { setTimeout } from "node:timers/promises"; +import { PostStartCauses, PreStopCauses } from "@trigger.dev/core/v3"; + +const MACHINE_NAME = process.env.MACHINE_NAME || "local"; +const COORDINATOR_PORT = process.env.COORDINATOR_PORT || 8020; +const COORDINATOR_HOST = process.env.COORDINATOR_HOST || "127.0.0.1"; +const DOCKER_NETWORK = process.env.DOCKER_NETWORK || "host"; + +const OTEL_EXPORTER_OTLP_ENDPOINT = + process.env.OTEL_EXPORTER_OTLP_ENDPOINT || "http://0.0.0.0:4318"; + +const FORCE_CHECKPOINT_SIMULATION = ["1", "true"].includes( + process.env.FORCE_CHECKPOINT_SIMULATION ?? "false" +); + +const logger = new SimpleLogger(`[${MACHINE_NAME}]`); + +type TaskOperationsInitReturn = { + canCheckpoint: boolean; + willSimulate: boolean; +}; + +class DockerTaskOperations implements TaskOperations { + #initialized = false; + #canCheckpoint = false; + + constructor(private opts = { forceSimulate: false }) {} + + async init(): Promise { + if (this.#initialized) { + return this.#getInitReturn(this.#canCheckpoint); + } + + logger.log("Initializing task operations"); + + const testCheckpoint = await testDockerCheckpoint(); + + if (testCheckpoint.ok) { + return this.#getInitReturn(true); + } + + logger.error(testCheckpoint.message, testCheckpoint.error); + return this.#getInitReturn(false); + } + + #getInitReturn(canCheckpoint: boolean): TaskOperationsInitReturn { + this.#canCheckpoint = canCheckpoint; + + if (canCheckpoint) { + if (!this.#initialized) { + logger.log("Full checkpoint support!"); + } + } + + this.#initialized = true; + + const willSimulate = !canCheckpoint || this.opts.forceSimulate; + + if (willSimulate) { + logger.log("Simulation mode enabled. Containers will be paused, not checkpointed.", { + forceSimulate: this.opts.forceSimulate, + }); + } + + return { + canCheckpoint, + willSimulate, + }; + } + + async index(opts: TaskOperationsIndexOptions) { + await this.init(); + + const containerName = this.#getIndexContainerName(opts.shortCode); + + logger.log(`Indexing task ${opts.imageRef}`, { + host: COORDINATOR_HOST, + port: COORDINATOR_PORT, + }); + + logger.debug( + await execa("docker", [ + "run", + `--network=${DOCKER_NETWORK}`, + "--rm", + `--env=INDEX_TASKS=true`, + `--env=TRIGGER_SECRET_KEY=${opts.apiKey}`, + `--env=TRIGGER_API_URL=${opts.apiUrl}`, + `--env=TRIGGER_ENV_ID=${opts.envId}`, + `--env=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}`, + `--env=POD_NAME=${containerName}`, + `--env=COORDINATOR_HOST=${COORDINATOR_HOST}`, + `--env=COORDINATOR_PORT=${COORDINATOR_PORT}`, + `--name=${containerName}`, + `${opts.imageRef}`, + ]) + ); + } + + async create(opts: TaskOperationsCreateOptions) { + await this.init(); + + const containerName = this.#getRunContainerName(opts.runId, opts.nextAttemptNumber); + + const runArgs = [ + "run", + `--network=${DOCKER_NETWORK}`, + "--detach", + `--env=TRIGGER_ENV_ID=${opts.envId}`, + `--env=TRIGGER_RUN_ID=${opts.runId}`, + `--env=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}`, + `--env=POD_NAME=${containerName}`, + `--env=COORDINATOR_HOST=${COORDINATOR_HOST}`, + `--env=COORDINATOR_PORT=${COORDINATOR_PORT}`, + `--env=TRIGGER_POD_SCHEDULED_AT_MS=${Date.now()}`, + `--name=${containerName}`, + ]; + + if (process.env.ENFORCE_MACHINE_PRESETS) { + runArgs.push(`--cpus=${opts.machine.cpu}`, `--memory=${opts.machine.memory}G`); + } + + if (opts.dequeuedAt) { + runArgs.push(`--env=TRIGGER_RUN_DEQUEUED_AT_MS=${opts.dequeuedAt}`); + } + + runArgs.push(`${opts.image}`); + + try { + logger.debug(await execa("docker", runArgs)); + } catch (error) { + if (!isExecaChildProcess(error)) { + throw error; + } + + logger.error("Create failed:", { + opts, + exitCode: error.exitCode, + escapedCommand: error.escapedCommand, + stdout: error.stdout, + stderr: error.stderr, + }); + } + } + + async restore(opts: TaskOperationsRestoreOptions) { + await this.init(); + + const containerName = this.#getRunContainerName(opts.runId, opts.attemptNumber); + + if (!this.#canCheckpoint || this.opts.forceSimulate) { + logger.log("Simulating restore"); + + const unpause = logger.debug(await $`docker unpause ${containerName}`); + + if (unpause.exitCode !== 0) { + throw new Error("docker unpause command failed"); + } + + await this.#sendPostStart(containerName); + return; + } + + const { exitCode } = logger.debug( + await $`docker start --checkpoint=${opts.checkpointRef} ${containerName}` + ); + + if (exitCode !== 0) { + throw new Error("docker start command failed"); + } + + await this.#sendPostStart(containerName); + } + + async delete(opts: { runId: string }) { + await this.init(); + + const containerName = this.#getRunContainerName(opts.runId); + await this.#sendPreStop(containerName); + + logger.log("noop: delete"); + } + + async get(opts: { runId: string }) { + await this.init(); + + logger.log("noop: get"); + } + + #getIndexContainerName(suffix: string) { + return `task-index-${suffix}`; + } + + #getRunContainerName(suffix: string, attemptNumber?: number) { + return `task-run-${suffix}${attemptNumber && attemptNumber > 1 ? `-att${attemptNumber}` : ""}`; + } + + async #sendPostStart(containerName: string): Promise { + try { + const port = await this.#getHttpServerPort(containerName); + logger.debug(await this.#runLifecycleCommand(containerName, port, "postStart", "restore")); + } catch (error) { + logger.error("postStart error", { error }); + throw new Error("postStart command failed"); + } + } + + async #sendPreStop(containerName: string): Promise { + try { + const port = await this.#getHttpServerPort(containerName); + logger.debug(await this.#runLifecycleCommand(containerName, port, "preStop", "terminate")); + } catch (error) { + logger.error("preStop error", { error }); + throw new Error("preStop command failed"); + } + } + + async #getHttpServerPort(containerName: string): Promise { + // We first get the correct port, which is random during dev as we run with host networking and need to avoid clashes + // FIXME: Skip this in prod + const logs = logger.debug(await $`docker logs ${containerName}`); + const matches = logs.stdout.match(/http server listening on port (?[0-9]+)/); + + const port = Number(matches?.groups?.port); + + if (!port) { + throw new Error("failed to extract port from logs"); + } + + return port; + } + + async #runLifecycleCommand( + containerName: string, + port: number, + type: THookType, + cause: THookType extends "postStart" ? PostStartCauses : PreStopCauses, + retryCount = 0 + ): Promise { + try { + return await execa("docker", [ + "exec", + containerName, + "busybox", + "wget", + "-q", + "-O-", + `127.0.0.1:${port}/${type}?cause=${cause}`, + ]); + } catch (error: any) { + if (type === "postStart" && retryCount < 6) { + logger.debug(`retriable ${type} error`, { retryCount, message: error?.message }); + await setTimeout(exponentialBackoff(retryCount + 1, 2, 50, 1150, 50)); + + return this.#runLifecycleCommand(containerName, port, type, cause, retryCount + 1); + } + + logger.error(`final ${type} error`, { message: error?.message }); + throw new Error(`${type} command failed after ${retryCount - 1} retries`); + } + } +} + +const provider = new ProviderShell({ + tasks: new DockerTaskOperations({ forceSimulate: FORCE_CHECKPOINT_SIMULATION }), + type: "docker", +}); + +provider.listen(); + +function exponentialBackoff( + retryCount: number, + exponential: number, + minDelay: number, + maxDelay: number, + jitter: number +): number { + // Calculate the delay using the exponential backoff formula + const delay = Math.min(Math.pow(exponential, retryCount) * minDelay, maxDelay); + + // Calculate the jitter + const jitterValue = Math.random() * jitter; + + // Return the calculated delay with jitter + return delay + jitterValue; +} diff --git a/apps/docker-provider/tsconfig.json b/apps/docker-provider/tsconfig.json new file mode 100644 index 00000000000..f87adfc2d7f --- /dev/null +++ b/apps/docker-provider/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2020", + "module": "commonjs", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "strict": true, + "skipLibCheck": true, + "paths": { + "@trigger.dev/core/v3": ["../../packages/core/src/v3"], + "@trigger.dev/core/v3/*": ["../../packages/core/src/v3/*"] + } + } +} diff --git a/apps/docs/README.md b/apps/docs/README.md deleted file mode 100644 index 4552821692d..00000000000 --- a/apps/docs/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Mintlify Starter Kit - -Click on `Use this template` to quickstarter your documentation setup with Mintlify. The starter kit contains examples including - -- Guide pages -- Navigation -- Customizations -- API Reference pages -- Use of popular components - -### 👩‍💻 Development - -Run these from the root of the repository - -``` -pnpm install --filter docs -``` - -``` -pnpm run dev --filter docs -``` - -Go to http://localhost:3050 - -### 😎 Publishing Changes - -Changes will be deployed to production automatically after pushing to the default branch. - -You can also preview changes using PRs, which generates a preview link of the docs. - -#### Troubleshooting - -- Mintlify dev isn't running - Run `mintlify install` it'll re-install dependencies. -- Mintlify dev is updating really slowly - Run `mintlify clear` to clear the cache. diff --git a/apps/docs/_snippets/whatsapp-to-slack.mdx b/apps/docs/_snippets/whatsapp-to-slack.mdx deleted file mode 100644 index ec02fa5d7aa..00000000000 --- a/apps/docs/_snippets/whatsapp-to-slack.mdx +++ /dev/null @@ -1,204 +0,0 @@ -The following example combines WhatsApp and Slack to create a workflow that allows you to receive WhatsApp messages in Slack, and use a modal to compose a reply. - - - -```tsx whatsappToSlack.tsx -/** @jsxImportSource jsx-slack */ -import { Trigger } from "@trigger.dev/sdk"; -import { - events, - sendText, - getMediaUrl, - MessageEventMessage, -} from "@trigger.dev/whatsapp"; -import JSXSlack, { - Actions, - Blocks, - Button, - Section, - Header, - Context, - Image, - Modal, - Input, - Textarea, -} from "jsx-slack"; -import * as slack from "@trigger.dev/slack"; - -const dateFormatter = new Intl.DateTimeFormat("en-US", { - timeStyle: "short", - dateStyle: "short", -}); - -// this trigger listens for WhatsApp messages and sends them to Slack -new Trigger({ - id: "whatsapp-to-slack", - name: "WhatsApp: load messages", - on: events.messageEvent({ - accountId: "", - }), - run: async (event, ctx) => { - //this generates Slack blocks from the WhatsApp message - const messageBody = await createMessageBody(event.message); - - await slack.postMessage("jsx-test", { - channelName: "whatsapp-support", - //text appears in Slack notifications on mobile/desktop - text: "How is your progress today?", - //import and use JSXSlack to make creating rich messages much easier - blocks: JSXSlack( - -
From: {event.message.from}
- At: {dateFormatter.format(event.message.timestamp)} - {messageBody} - - - -
- ), - //pass the WhatsApp message to the next trigger - metadata: { - whatsAppMessage: event.message, - }, - }); - }, -}).listen(); -``` - -```tsx replyButtonInteraction.tsx -//this trigger creates a Slack modal when a user presses the Reply button -new Trigger({ - id: "whatsapp-to-slack-modal", - name: "WhatsApp: show message composer", - on: slack.events.blockActionInteraction({ - blockId: "launch-modal", - }), - run: async (event, ctx) => { - if (!event.trigger_id) { - return; - } - - //get the action (pressing the reply button) and the original WhatsApp message - const action = event.actions[0]; - const whatsAppMessage = - event.message?.metadata?.event_payload.whatsAppMessage; - - //generate Slack blocks from the WhatsApp message - const messageBody = await createMessageBody(whatsAppMessage); - - if (action.action_id === "reply" && action.type === "button") { - //show a reply modal, with the original message and an input field for the reply - await slack.openView( - "Opening view", - event.trigger_id, - JSXSlack( - -
Original message
- {messageBody} -
Your reply
-