From c633618f17c007ad333429dc1772bc1998d15e3c Mon Sep 17 00:00:00 2001 From: Saadi Myftija Date: Thu, 28 May 2026 17:16:29 +0200 Subject: [PATCH 1/5] fix(webapp): make createDeploymentBackgroundWorker idempotent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLI retries after a severed connection were colliding on the BackgroundWorker `(project, env, version)` unique index and burning the retry budget on 500s, leaving deployments to surface as "Indexing timed out" 8 minutes later. Look up an existing row on entry and return it on contentHash match (or 409 on drift); guard the final BUILDING → DEPLOYING transition with an updateMany so a sibling failure handler can't be silently overwritten. --- ...empotent-background-worker-registration.md | 6 + .../services/createBackgroundWorker.server.ts | 55 ++++--- ...eateDeploymentBackgroundWorkerV4.server.ts | 59 ++++--- .../findOrCreateBackgroundWorker.server.ts | 57 +++++++ ...ckgroundWorkerMetadataForStorage.server.ts | 28 ++++ .../test/findOrCreateBackgroundWorker.test.ts | 145 ++++++++++++++++++ 6 files changed, 300 insertions(+), 50 deletions(-) create mode 100644 .server-changes/idempotent-background-worker-registration.md create mode 100644 apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts create mode 100644 apps/webapp/app/v3/services/stripBackgroundWorkerMetadataForStorage.server.ts create mode 100644 apps/webapp/test/findOrCreateBackgroundWorker.test.ts diff --git a/.server-changes/idempotent-background-worker-registration.md b/.server-changes/idempotent-background-worker-registration.md new file mode 100644 index 00000000000..abadd471542 --- /dev/null +++ b/.server-changes/idempotent-background-worker-registration.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Make `POST /api/v1/deployments/:deploymentId/background-workers` idempotent (for sequential requests only) so client-side retries no longer collide on the `BackgroundWorker` `(project, env, version)` unique index. Helps make deployments more resilient against the class of indexing failures that surfaces in the dashboard as "Indexing timed out", e.g. during transient database issues. diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index ec3f6d077ad..d05a3078516 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -34,31 +34,7 @@ import { tryCatch } from "@trigger.dev/core/v3"; import { engine } from "../runEngine.server"; import { scheduleEngine } from "../scheduleEngine.server"; -/** - * Strip BackgroundWorkerMetadata down to the slice that's actually read after - * storage. Everything else is duplicated to dedicated columns/tables - * (BackgroundWorker.{contentHash,cliVersion,sdkVersion,runtime,runtimeVersion}, - * BackgroundWorkerTask, BackgroundWorkerFile, TaskQueue, Prompt). Today the - * only post-write reader is changeCurrentDeployment.server.ts, which feeds - * tasks[].schedule into syncDeclarativeSchedules. packageVersion, contentHash, - * and tasks[].filePath are kept solely to satisfy BackgroundWorkerMetadata's - * required fields when the column is parsed back. - */ -export function stripBackgroundWorkerMetadataForStorage( - metadata: BackgroundWorkerMetadata -): Prisma.InputJsonValue { - return { - packageVersion: metadata.packageVersion, - contentHash: metadata.contentHash, - tasks: metadata.tasks - .filter((t) => t.schedule) - .map((t) => ({ - id: t.id, - filePath: t.filePath, - schedule: t.schedule, - })), - }; -} +export { stripBackgroundWorkerMetadataForStorage } from "./stripBackgroundWorkerMetadataForStorage.server"; export class CreateBackgroundWorkerService extends BaseService { private readonly _taskMetaCache: TaskMetadataCache; @@ -356,8 +332,13 @@ async function createWorkerTask( prisma: PrismaClientOrTransaction, tasksToBackgroundFiles?: Map ): Promise { + // Hoisted so the P2002 catch branch can return the same entry shape. + let queue: TaskQueue | undefined; + let resolvedTriggerSource: "SCHEDULED" | "AGENT" | "STANDARD" | undefined; + let resolvedTtl: string | null | undefined; + try { - let queue = queues.find((queue) => queue.name === task.queue?.name); + queue = queues.find((queue) => queue.name === task.queue?.name); if (!queue) { // Create a TaskQueue @@ -374,14 +355,14 @@ async function createWorkerTask( ); } - const resolvedTriggerSource = + resolvedTriggerSource = task.triggerSource === "schedule" ? ("SCHEDULED" as const) : task.triggerSource === "agent" ? ("AGENT" as const) : ("STANDARD" as const); - const resolvedTtl = + resolvedTtl = typeof task.ttl === "number" ? stringifyDuration(task.ttl) ?? null : task.ttl ?? null; await prisma.backgroundWorkerTask.create({ @@ -418,10 +399,26 @@ async function createWorkerTask( if (error instanceof Prisma.PrismaClientKnownRequestError) { // The error code for unique constraint violation in Prisma is P2002 if (error.code === "P2002") { - logger.warn("Task already exists", { + // Retry landing after the first attempt's row was already written. + const existing = await prisma.backgroundWorkerTask.findFirst({ + where: { workerId: worker.id, slug: task.id }, + select: { id: true }, + }); + + logger.warn("Attempted to recreate background worker task", { task, worker, }); + + if (existing && queue && resolvedTriggerSource && resolvedTtl !== undefined) { + return { + slug: task.id, + ttl: resolvedTtl, + triggerSource: resolvedTriggerSource, + queueId: queue.id, + queueName: queue.name, + }; + } } else { logger.error("Prisma Error creating background worker task", { error: { diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts index ff041359bb0..44d860ef5e5 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts @@ -1,6 +1,9 @@ import { CreateBackgroundWorkerRequestBody, logger, tryCatch } from "@trigger.dev/core/v3"; -import { BackgroundWorkerId } from "@trigger.dev/core/v3/isomorphic"; -import type { BackgroundWorker, PrismaClientOrTransaction, WorkerDeployment } from "@trigger.dev/database"; +import type { + BackgroundWorker, + PrismaClientOrTransaction, + WorkerDeployment, +} from "@trigger.dev/database"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { type TaskMetadataCache } from "~/services/taskMetadataCache.server"; import { taskMetadataCacheInstance } from "~/services/taskMetadataCacheInstance.server"; @@ -8,9 +11,9 @@ import { BaseService, ServiceValidationError } from "./baseService.server"; import { createBackgroundFiles, createWorkerResources, - stripBackgroundWorkerMetadataForStorage, syncDeclarativeSchedules, } from "./createBackgroundWorker.server"; +import { findOrCreateBackgroundWorker } from "./createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server"; import { TimeoutDeploymentService } from "./timeoutDeployment.server"; import { env } from "~/env.server"; @@ -50,6 +53,11 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { }); if (!deployment) { + logger.warn("createDeploymentBackgroundWorker: deployment not found", { + deploymentId, + environmentId: environment.id, + projectId: environment.projectId, + }); return; } @@ -70,25 +78,21 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { } if (deployment.status !== "BUILDING") { + logger.warn("createDeploymentBackgroundWorker: deployment not in BUILDING state", { + deploymentId, + deploymentStatus: deployment.status, + environmentId: environment.id, + projectId: environment.projectId, + }); return; } - const backgroundWorker = await this._prisma.backgroundWorker.create({ - data: { - ...BackgroundWorkerId.generate(), - version: deployment.version, - runtimeEnvironmentId: environment.id, - projectId: environment.projectId, - metadata: stripBackgroundWorkerMetadataForStorage(body.metadata), - contentHash: body.metadata.contentHash, - cliVersion: body.metadata.cliPackageVersion, - sdkVersion: body.metadata.packageVersion, - supportsLazyAttempts: body.supportsLazyAttempts, - engine: body.engine, - runtime: body.metadata.runtime, - runtimeVersion: body.metadata.runtimeVersion, - }, - }); + const backgroundWorker = await findOrCreateBackgroundWorker( + environment, + deployment, + body, + this._prisma + ); //upgrade the project to engine "V2" if it's not already if (environment.project.engine === "V1" && body.engine === "V2") { @@ -188,10 +192,11 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { throw serviceError; } - // Link the deployment with the background worker - await this._prisma.workerDeployment.update({ + // Guarded BUILDING → DEPLOYING transition. `updateMany` for optimistic concurrency control + const { count: updatedCount } = await this._prisma.workerDeployment.updateMany({ where: { id: deployment.id, + status: "BUILDING", }, data: { status: "DEPLOYING", @@ -203,6 +208,18 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { }, }); + if (updatedCount === 0) { + logger.warn( + "createDeploymentBackgroundWorker: deployment no longer in BUILDING state, skipping DEPLOYING transition", + { + deploymentId, + environmentId: environment.id, + projectId: environment.projectId, + } + ); + return backgroundWorker; + } + await TimeoutDeploymentService.enqueue( deployment.id, "DEPLOYING", diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts new file mode 100644 index 00000000000..85c143edf85 --- /dev/null +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts @@ -0,0 +1,57 @@ +import type { CreateBackgroundWorkerRequestBody } from "@trigger.dev/core/v3"; +import { BackgroundWorkerId } from "@trigger.dev/core/v3/isomorphic"; +import type { + BackgroundWorker, + PrismaClientOrTransaction, + WorkerDeployment, +} from "@trigger.dev/database"; +import { prisma as defaultPrisma } from "~/db.server"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { ServiceValidationError } from "../common.server"; +import { stripBackgroundWorkerMetadataForStorage } from "../stripBackgroundWorkerMetadataForStorage.server"; + +/** + * Idempotent on `(project, environment, version)` for sequential calls, not concurrent calls. + */ +export async function findOrCreateBackgroundWorker( + environment: AuthenticatedEnvironment, + deployment: WorkerDeployment, + body: CreateBackgroundWorkerRequestBody, + prisma: PrismaClientOrTransaction = defaultPrisma +): Promise { + const existing = await prisma.backgroundWorker.findFirst({ + where: { + projectId: environment.projectId, + runtimeEnvironmentId: environment.id, + version: deployment.version, + }, + }); + + if (existing && existing.contentHash === body.metadata.contentHash) { + return existing; + } + + if (existing) { + throw new ServiceValidationError( + "A background worker for this deployment version already exists with a different content hash", + 409 + ); + } + + return prisma.backgroundWorker.create({ + data: { + ...BackgroundWorkerId.generate(), + version: deployment.version, + runtimeEnvironmentId: environment.id, + projectId: environment.projectId, + metadata: stripBackgroundWorkerMetadataForStorage(body.metadata), + contentHash: body.metadata.contentHash, + cliVersion: body.metadata.cliPackageVersion, + sdkVersion: body.metadata.packageVersion, + supportsLazyAttempts: body.supportsLazyAttempts, + engine: body.engine, + runtime: body.metadata.runtime, + runtimeVersion: body.metadata.runtimeVersion, + }, + }); +} diff --git a/apps/webapp/app/v3/services/stripBackgroundWorkerMetadataForStorage.server.ts b/apps/webapp/app/v3/services/stripBackgroundWorkerMetadataForStorage.server.ts new file mode 100644 index 00000000000..a740bd2c1d6 --- /dev/null +++ b/apps/webapp/app/v3/services/stripBackgroundWorkerMetadataForStorage.server.ts @@ -0,0 +1,28 @@ +import { BackgroundWorkerMetadata } from "@trigger.dev/core/v3"; +import { Prisma } from "@trigger.dev/database"; + +/** + * Strip BackgroundWorkerMetadata down to the slice that's actually read after + * storage. Everything else is duplicated to dedicated columns/tables + * (BackgroundWorker.{contentHash,cliVersion,sdkVersion,runtime,runtimeVersion}, + * BackgroundWorkerTask, BackgroundWorkerFile, TaskQueue, Prompt). Today the + * only post-write reader is changeCurrentDeployment.server.ts, which feeds + * tasks[].schedule into syncDeclarativeSchedules. packageVersion, contentHash, + * and tasks[].filePath are kept solely to satisfy BackgroundWorkerMetadata's + * required fields when the column is parsed back. + */ +export function stripBackgroundWorkerMetadataForStorage( + metadata: BackgroundWorkerMetadata +): Prisma.InputJsonValue { + return { + packageVersion: metadata.packageVersion, + contentHash: metadata.contentHash, + tasks: metadata.tasks + .filter((t) => t.schedule) + .map((t) => ({ + id: t.id, + filePath: t.filePath, + schedule: t.schedule, + })), + }; +} diff --git a/apps/webapp/test/findOrCreateBackgroundWorker.test.ts b/apps/webapp/test/findOrCreateBackgroundWorker.test.ts new file mode 100644 index 00000000000..c3b46262eae --- /dev/null +++ b/apps/webapp/test/findOrCreateBackgroundWorker.test.ts @@ -0,0 +1,145 @@ +import { containerTest } from "@internal/testcontainers"; +import { CreateBackgroundWorkerRequestBody } from "@trigger.dev/core/v3"; +import { PrismaClient } from "@trigger.dev/database"; +import { describe, expect, vi } from "vitest"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { ServiceValidationError } from "~/v3/services/common.server"; +import { findOrCreateBackgroundWorker } from "~/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server"; + +vi.setConfig({ testTimeout: 30_000 }); + +async function seedDeployment(prisma: PrismaClient, version = "20260528.1") { + const slug = `s${Math.random().toString(36).slice(2, 10)}`; + const organization = await prisma.organization.create({ + data: { title: slug, slug }, + }); + + const project = await prisma.project.create({ + data: { + name: slug, + slug, + organizationId: organization.id, + externalRef: slug, + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + slug, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: slug, + pkApiKey: slug, + shortcode: slug, + }, + }); + + const deployment = await prisma.workerDeployment.create({ + data: { + friendlyId: `deployment_${slug}`, + shortCode: slug, + contentHash: "h_initial", + status: "BUILDING", + version, + projectId: project.id, + environmentId: environment.id, + }, + }); + + // The helper only reads `id` and `projectId`; the rest of the type is + // unused here so we cast a partial. + const authEnv = { id: environment.id, projectId: project.id } as AuthenticatedEnvironment; + + return { project, environment, deployment, authEnv }; +} + +function bodyWithHash(contentHash: string): CreateBackgroundWorkerRequestBody { + return { + localOnly: false, + metadata: { + contentHash, + packageVersion: "0.0.0", + cliPackageVersion: "0.0.0", + tasks: [], + queues: [], + sourceFiles: [], + runtime: "node", + runtimeVersion: "21.0.0", + }, + engine: "V2", + supportsLazyAttempts: true, + }; +} + +describe("findOrCreateBackgroundWorker", () => { + containerTest("creates a new BackgroundWorker on the first call", async ({ prisma }) => { + const { authEnv, deployment } = await seedDeployment(prisma); + + const worker = await findOrCreateBackgroundWorker( + authEnv, + deployment, + bodyWithHash("h1"), + prisma + ); + + expect(worker.projectId).toBe(authEnv.projectId); + expect(worker.runtimeEnvironmentId).toBe(authEnv.id); + expect(worker.version).toBe(deployment.version); + expect(worker.contentHash).toBe("h1"); + + const rowCount = await prisma.backgroundWorker.count({ + where: { projectId: authEnv.projectId, version: deployment.version }, + }); + expect(rowCount).toBe(1); + }); + + containerTest( + "returns the existing row on a second call with the same contentHash (no duplicate)", + async ({ prisma }) => { + const { authEnv, deployment } = await seedDeployment(prisma); + + const first = await findOrCreateBackgroundWorker( + authEnv, + deployment, + bodyWithHash("h1"), + prisma + ); + const second = await findOrCreateBackgroundWorker( + authEnv, + deployment, + bodyWithHash("h1"), + prisma + ); + + expect(second.id).toBe(first.id); + + const rowCount = await prisma.backgroundWorker.count({ + where: { projectId: authEnv.projectId, version: deployment.version }, + }); + expect(rowCount).toBe(1); + } + ); + + containerTest( + "throws 409 ServiceValidationError when an existing row has a different contentHash", + async ({ prisma }) => { + const { authEnv, deployment } = await seedDeployment(prisma); + + await findOrCreateBackgroundWorker(authEnv, deployment, bodyWithHash("h1"), prisma); + + await expect( + findOrCreateBackgroundWorker(authEnv, deployment, bodyWithHash("h2"), prisma) + ).rejects.toMatchObject({ + name: "ServiceValidationError", + status: 409, + }); + + // Also assert the constructor so callers can `catch (e instanceof ServiceValidationError)`. + await expect( + findOrCreateBackgroundWorker(authEnv, deployment, bodyWithHash("h2"), prisma) + ).rejects.toBeInstanceOf(ServiceValidationError); + } + ); + +}); From a3d837fa5003cae6fefe9c98ba037599f6f2a2af Mon Sep 17 00:00:00 2001 From: Saadi Myftija Date: Thu, 28 May 2026 17:53:42 +0200 Subject: [PATCH 2/5] fix(webapp): unbreak local typecheck + unit tests - Restore local import of `stripBackgroundWorkerMetadataForStorage` in `createBackgroundWorker.server.ts`; the prior diff turned it into a bare re-export and the existing in-file usage broke typecheck. - Drop the `defaultPrisma` fallback on `findOrCreateBackgroundWorker`; importing `~/db.server` in test runs triggered the singleton's lazy Prisma client to attempt a localhost:5432 connection and surface as an unhandled rejection on shutdown. --- apps/webapp/app/v3/services/createBackgroundWorker.server.ts | 3 ++- .../findOrCreateBackgroundWorker.server.ts | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index d05a3078516..b3052fa65b6 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -34,7 +34,8 @@ import { tryCatch } from "@trigger.dev/core/v3"; import { engine } from "../runEngine.server"; import { scheduleEngine } from "../scheduleEngine.server"; -export { stripBackgroundWorkerMetadataForStorage } from "./stripBackgroundWorkerMetadataForStorage.server"; +import { stripBackgroundWorkerMetadataForStorage } from "./stripBackgroundWorkerMetadataForStorage.server"; +export { stripBackgroundWorkerMetadataForStorage }; export class CreateBackgroundWorkerService extends BaseService { private readonly _taskMetaCache: TaskMetadataCache; diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts index 85c143edf85..339a557ed9e 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts @@ -5,7 +5,6 @@ import type { PrismaClientOrTransaction, WorkerDeployment, } from "@trigger.dev/database"; -import { prisma as defaultPrisma } from "~/db.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { ServiceValidationError } from "../common.server"; import { stripBackgroundWorkerMetadataForStorage } from "../stripBackgroundWorkerMetadataForStorage.server"; @@ -17,7 +16,7 @@ export async function findOrCreateBackgroundWorker( environment: AuthenticatedEnvironment, deployment: WorkerDeployment, body: CreateBackgroundWorkerRequestBody, - prisma: PrismaClientOrTransaction = defaultPrisma + prisma: PrismaClientOrTransaction ): Promise { const existing = await prisma.backgroundWorker.findFirst({ where: { From 5cba4853a87d4352cd7de1a5c91f844c866674a7 Mon Sep 17 00:00:00 2001 From: Saadi Myftija Date: Thu, 28 May 2026 21:08:51 +0200 Subject: [PATCH 3/5] fix(webapp): guard `failBackgroundWorkerDeployment` with BUILDING predicate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symmetric with the BUILDING → DEPLOYING `updateMany` guard. Without this, two attempts running side-by-side (idempotency lets the retry proceed past worker creation) can produce: A succeeds and flips to DEPLOYING; B hits a transient file/resource/schedule error and unconditionally flips to FAILED; A's subsequent guarded update finds 0 rows; deployment is stuck in FAILED with a worker registered but never deployed. Now the failure handler only flips status on rows that are still BUILDING. The timeout dequeue is also gated on the actual flip so we don't cancel a sibling attempt's just-enqueued timeout. --- ...eateDeploymentBackgroundWorkerV4.server.ts | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts index 44d860ef5e5..7bf7032fd5b 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts @@ -232,9 +232,14 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { } async #failBackgroundWorkerDeployment(deployment: WorkerDeployment, error: Error) { - await this._prisma.workerDeployment.update({ + // Guarded BUILDING → FAILED transition, symmetric with the BUILDING → DEPLOYING + // transition in `call()`. With idempotent retries, two attempts can run side-by-side; + // without the predicate, one attempt's failure could downgrade the deployment after + // the other already flipped it to DEPLOYING, leaving it stuck in FAILED with a worker. + const { count: updatedCount } = await this._prisma.workerDeployment.updateMany({ where: { id: deployment.id, + status: "BUILDING", }, data: { status: "FAILED", @@ -246,7 +251,20 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { }, }); - await TimeoutDeploymentService.dequeue(deployment.id, this._prisma); + if (updatedCount === 0) { + logger.warn( + "failBackgroundWorkerDeployment: deployment moved out of BUILDING during call, skipping FAILED transition", + { + deploymentId: deployment.id, + originalError: error.message, + } + ); + } else { + // Only dequeue the timeout if we actually flipped to FAILED — otherwise a + // sibling attempt may have just enqueued it as part of a successful + // BUILDING → DEPLOYING transition. + await TimeoutDeploymentService.dequeue(deployment.id, this._prisma); + } throw error; } From d6e4a0f29c200175b00a9b76a906f9cb09295e7a Mon Sep 17 00:00:00 2001 From: Saadi Myftija Date: Thu, 28 May 2026 21:35:05 +0200 Subject: [PATCH 4/5] fix(webapp): tighten idempotent registration error semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address three Devin review comments together: - **Late-retry idempotency**: when a prior attempt fully succeeded (`deployment.workerId` set, status past BUILDING) but the client never saw the response, return the linked worker so the CLI can finalize instead of seeing a 5xx. - **Definitive vs transient failure shapes**: distinguish them by error class so the caller can decide whether to fail-deploy. - contentHash drift → `ServiceValidationError(409)` → caller fails the deployment immediately (no 8-min timeout wait) - concurrent `create()` race → plain `Error` (5xx) → caller propagates, CLI's `wrapZodFetch` retries, next `findFirst` returns the winner - **Catch P2002 with a clearer message**: previously the opaque Prisma error propagated as 500; now the loser of a race surfaces a readable "Concurrent background worker registration detected; please retry" message. Same wire status (5xx), better observability. --- ...eateDeploymentBackgroundWorkerV4.server.ts | 34 ++++++++-- .../findOrCreateBackgroundWorker.server.ts | 65 +++++++++++++------ .../test/findOrCreateBackgroundWorker.test.ts | 38 +++++++++++ 3 files changed, 112 insertions(+), 25 deletions(-) diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts index 7bf7032fd5b..9383b780d90 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts @@ -77,6 +77,18 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { } } + // Late-retry idempotency: if a worker was registered by a prior fully- + // successful attempt and the deployment already moved past BUILDING, return + // that worker so the CLI can finalize instead of seeing a 5xx. + if (deployment.workerId) { + const linkedWorker = await this._prisma.backgroundWorker.findFirst({ + where: { id: deployment.workerId }, + }); + if (linkedWorker) { + return linkedWorker; + } + } + if (deployment.status !== "BUILDING") { logger.warn("createDeploymentBackgroundWorker: deployment not in BUILDING state", { deploymentId, @@ -87,13 +99,25 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { return; } - const backgroundWorker = await findOrCreateBackgroundWorker( - environment, - deployment, - body, - this._prisma + const [findOrCreateError, backgroundWorker] = await tryCatch( + findOrCreateBackgroundWorker(environment, deployment, body, this._prisma) ); + if (findOrCreateError) { + // Definitive failures (e.g. contentHash drift) surface as + // `ServiceValidationError` — fail the deployment so the operator sees it + // immediately instead of waiting 8 minutes for the timeout. Transient + // races throw a plain `Error` and propagate as 5xx without failing. + if (findOrCreateError instanceof ServiceValidationError) { + await this.#failBackgroundWorkerDeployment(deployment, findOrCreateError); + } + throw findOrCreateError; + } + + if (!backgroundWorker) { + return; + } + //upgrade the project to engine "V2" if it's not already if (environment.project.engine === "V1" && body.engine === "V2") { await this._prisma.project.update({ diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts index 339a557ed9e..7a2dcb06c34 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server.ts @@ -1,9 +1,10 @@ import type { CreateBackgroundWorkerRequestBody } from "@trigger.dev/core/v3"; import { BackgroundWorkerId } from "@trigger.dev/core/v3/isomorphic"; -import type { - BackgroundWorker, - PrismaClientOrTransaction, - WorkerDeployment, +import { + isUniqueConstraintError, + type BackgroundWorker, + type PrismaClientOrTransaction, + type WorkerDeployment, } from "@trigger.dev/database"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { ServiceValidationError } from "../common.server"; @@ -11,6 +12,14 @@ import { stripBackgroundWorkerMetadataForStorage } from "../stripBackgroundWorke /** * Idempotent on `(project, environment, version)` for sequential calls, not concurrent calls. + * + * Failure shapes the caller distinguishes: + * - `ServiceValidationError` (409): definitive — contentHash drift means a different build + * is being pushed under the same deployment version, so the caller should fail the + * deployment instead of waiting for a timeout. + * - Plain `Error`: transient — two attempts raced the `create()` call and the loser caught + * the unique-index violation. The caller should propagate this as 5xx so the CLI's + * retry/backoff hits findFirst on the next attempt and returns the winner's row. */ export async function findOrCreateBackgroundWorker( environment: AuthenticatedEnvironment, @@ -37,20 +46,36 @@ export async function findOrCreateBackgroundWorker( ); } - return prisma.backgroundWorker.create({ - data: { - ...BackgroundWorkerId.generate(), - version: deployment.version, - runtimeEnvironmentId: environment.id, - projectId: environment.projectId, - metadata: stripBackgroundWorkerMetadataForStorage(body.metadata), - contentHash: body.metadata.contentHash, - cliVersion: body.metadata.cliPackageVersion, - sdkVersion: body.metadata.packageVersion, - supportsLazyAttempts: body.supportsLazyAttempts, - engine: body.engine, - runtime: body.metadata.runtime, - runtimeVersion: body.metadata.runtimeVersion, - }, - }); + try { + return await prisma.backgroundWorker.create({ + data: { + ...BackgroundWorkerId.generate(), + version: deployment.version, + runtimeEnvironmentId: environment.id, + projectId: environment.projectId, + metadata: stripBackgroundWorkerMetadataForStorage(body.metadata), + contentHash: body.metadata.contentHash, + cliVersion: body.metadata.cliPackageVersion, + sdkVersion: body.metadata.packageVersion, + supportsLazyAttempts: body.supportsLazyAttempts, + engine: body.engine, + runtime: body.metadata.runtime, + runtimeVersion: body.metadata.runtimeVersion, + }, + }); + } catch (error) { + // Concurrent attempts raced past `findFirst` and both reached `create`. Surface + // a clear, non-Prisma error so the 5xx the caller returns isn't an opaque + // P2002 — the CLI's retry will then hit `findFirst` and find the winner's row. + // Intentionally NOT a ServiceValidationError so the caller doesn't fail-deploy + // on a transient race. + if ( + isUniqueConstraintError(error, ["projectId", "runtimeEnvironmentId", "version"]) + ) { + throw new Error( + "Concurrent background worker registration detected for this deployment version; please retry" + ); + } + throw error; + } } diff --git a/apps/webapp/test/findOrCreateBackgroundWorker.test.ts b/apps/webapp/test/findOrCreateBackgroundWorker.test.ts index c3b46262eae..8b047001b87 100644 --- a/apps/webapp/test/findOrCreateBackgroundWorker.test.ts +++ b/apps/webapp/test/findOrCreateBackgroundWorker.test.ts @@ -142,4 +142,42 @@ describe("findOrCreateBackgroundWorker", () => { } ); + containerTest( + "concurrent create race surfaces as plain Error (not ServiceValidationError)", + async ({ prisma }) => { + // The class distinction matters: the V4 service uses `instanceof ServiceValidationError` + // to decide whether to fail-deploy. A transient race must not fail-deploy. + const { authEnv, deployment } = await seedDeployment(prisma); + + const [first, second] = await Promise.allSettled([ + findOrCreateBackgroundWorker(authEnv, deployment, bodyWithHash("h-race"), prisma), + findOrCreateBackgroundWorker(authEnv, deployment, bodyWithHash("h-race"), prisma), + ]); + + const fulfilled = [first, second].filter((r) => r.status === "fulfilled"); + const rejected = [first, second].filter( + (r): r is PromiseRejectedResult => r.status === "rejected" + ); + + // Exactly one row in the database regardless of who won. + const rowCount = await prisma.backgroundWorker.count({ + where: { projectId: authEnv.projectId, version: deployment.version }, + }); + expect(rowCount).toBe(1); + + // If the schedule produced an actual race (one wins, one loses), the loser + // must surface a non-SVE error. If the schedule serialised them by accident, + // both fulfilled is also acceptable — this test is about the error *class*, + // not the rate of races. + if (rejected.length > 0) { + expect(fulfilled).toHaveLength(1); + for (const r of rejected) { + expect(r.reason).not.toBeInstanceOf(ServiceValidationError); + expect((r.reason as Error).message).toMatch(/concurrent/i); + } + } else { + expect(fulfilled).toHaveLength(2); + } + } + ); }); From 92969e2f63c13477f571a5ca31520f173f2fe581 Mon Sep 17 00:00:00 2001 From: Saadi Myftija Date: Thu, 28 May 2026 21:40:13 +0200 Subject: [PATCH 5/5] fix(webapp): drop dead `!backgroundWorker` guard, comment unreachable throw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both nits surfaced by a local code review of the previous commit: - `findOrCreateBackgroundWorker` returns `Promise` (non-null). After `if (findOrCreateError) throw`, TypeScript narrows `backgroundWorker` to `BackgroundWorker` already, so the guard was dead code. Worse, returning silently there would have been a 200 with no body — strictly worse than the 5xx the rest of the file produces on internal errors. - Annotate that `#failBackgroundWorkerDeployment` throws its own argument, so the outer `throw findOrCreateError` is only reachable on the non-SVE branch. Same control flow, less confusing for future readers. --- .../services/createDeploymentBackgroundWorkerV4.server.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts index 9383b780d90..d240af34118 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts @@ -109,15 +109,13 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { // immediately instead of waiting 8 minutes for the timeout. Transient // races throw a plain `Error` and propagate as 5xx without failing. if (findOrCreateError instanceof ServiceValidationError) { + // `#failBackgroundWorkerDeployment` already throws its argument; the + // outer `throw` covers the non-SVE branch. await this.#failBackgroundWorkerDeployment(deployment, findOrCreateError); } throw findOrCreateError; } - if (!backgroundWorker) { - return; - } - //upgrade the project to engine "V2" if it's not already if (environment.project.engine === "V1" && body.engine === "V2") { await this._prisma.project.update({