-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
fix(webapp): idempotent DeploymentBackgroundWorker creation
#3772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| area: webapp | ||
| type: fix | ||
| --- | ||
|
|
||
| Make `POST /api/v1/deployments/:deploymentId/background-workers` idempotent (for sequential requests only) so client-side retries no longer collide on the `BackgroundWorker` `(project, env, version)` unique index. Helps make deployments more resilient against the class of indexing failures that surfaces in the dashboard as "Indexing timed out", e.g. during transient database issues. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,16 +1,19 @@ | ||
| import { CreateBackgroundWorkerRequestBody, logger, tryCatch } from "@trigger.dev/core/v3"; | ||
| import { BackgroundWorkerId } from "@trigger.dev/core/v3/isomorphic"; | ||
| import type { BackgroundWorker, PrismaClientOrTransaction, WorkerDeployment } from "@trigger.dev/database"; | ||
| import type { | ||
| BackgroundWorker, | ||
| PrismaClientOrTransaction, | ||
| WorkerDeployment, | ||
| } from "@trigger.dev/database"; | ||
| import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; | ||
| import { type TaskMetadataCache } from "~/services/taskMetadataCache.server"; | ||
| import { taskMetadataCacheInstance } from "~/services/taskMetadataCacheInstance.server"; | ||
| import { BaseService, ServiceValidationError } from "./baseService.server"; | ||
| import { | ||
| createBackgroundFiles, | ||
| createWorkerResources, | ||
| stripBackgroundWorkerMetadataForStorage, | ||
| syncDeclarativeSchedules, | ||
| } from "./createBackgroundWorker.server"; | ||
| import { findOrCreateBackgroundWorker } from "./createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server"; | ||
| import { TimeoutDeploymentService } from "./timeoutDeployment.server"; | ||
| import { env } from "~/env.server"; | ||
|
|
||
|
|
@@ -50,6 +53,11 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { | |
| }); | ||
|
|
||
| if (!deployment) { | ||
| logger.warn("createDeploymentBackgroundWorker: deployment not found", { | ||
| deploymentId, | ||
| environmentId: environment.id, | ||
| projectId: environment.projectId, | ||
| }); | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -70,25 +78,21 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { | |
| } | ||
|
|
||
| if (deployment.status !== "BUILDING") { | ||
| logger.warn("createDeploymentBackgroundWorker: deployment not in BUILDING state", { | ||
| deploymentId, | ||
| deploymentStatus: deployment.status, | ||
| environmentId: environment.id, | ||
| projectId: environment.projectId, | ||
| }); | ||
| return; | ||
| } | ||
|
|
||
| const backgroundWorker = await this._prisma.backgroundWorker.create({ | ||
| data: { | ||
| ...BackgroundWorkerId.generate(), | ||
| version: deployment.version, | ||
| runtimeEnvironmentId: environment.id, | ||
| projectId: environment.projectId, | ||
| metadata: stripBackgroundWorkerMetadataForStorage(body.metadata), | ||
| contentHash: body.metadata.contentHash, | ||
| cliVersion: body.metadata.cliPackageVersion, | ||
| sdkVersion: body.metadata.packageVersion, | ||
| supportsLazyAttempts: body.supportsLazyAttempts, | ||
| engine: body.engine, | ||
| runtime: body.metadata.runtime, | ||
| runtimeVersion: body.metadata.runtimeVersion, | ||
| }, | ||
| }); | ||
| const backgroundWorker = await findOrCreateBackgroundWorker( | ||
| environment, | ||
| deployment, | ||
| body, | ||
| this._prisma | ||
| ); | ||
|
Comment on lines
+90
to
+95
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚩 409 ServiceValidationError from findOrCreateBackgroundWorker doesn't fail the deployment When Was this helpful? React with 👍 or 👎 to provide feedback. |
||
|
|
||
| //upgrade the project to engine "V2" if it's not already | ||
| if (environment.project.engine === "V1" && body.engine === "V2") { | ||
|
|
@@ -188,10 +192,11 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { | |
| throw serviceError; | ||
| } | ||
|
|
||
| // Link the deployment with the background worker | ||
| await this._prisma.workerDeployment.update({ | ||
| // Guarded BUILDING → DEPLOYING transition. `updateMany` for optimistic concurrency control | ||
| const { count: updatedCount } = await this._prisma.workerDeployment.updateMany({ | ||
| where: { | ||
| id: deployment.id, | ||
| status: "BUILDING", | ||
| }, | ||
| data: { | ||
| status: "DEPLOYING", | ||
|
|
@@ -203,6 +208,18 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { | |
| }, | ||
| }); | ||
|
|
||
| if (updatedCount === 0) { | ||
| logger.warn( | ||
| "createDeploymentBackgroundWorker: deployment no longer in BUILDING state, skipping DEPLOYING transition", | ||
| { | ||
| deploymentId, | ||
| environmentId: environment.id, | ||
| projectId: environment.projectId, | ||
| } | ||
| ); | ||
| return backgroundWorker; | ||
| } | ||
|
|
||
| await TimeoutDeploymentService.enqueue( | ||
| deployment.id, | ||
| "DEPLOYING", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| import type { CreateBackgroundWorkerRequestBody } from "@trigger.dev/core/v3"; | ||
| import { BackgroundWorkerId } from "@trigger.dev/core/v3/isomorphic"; | ||
| import type { | ||
| BackgroundWorker, | ||
| PrismaClientOrTransaction, | ||
| WorkerDeployment, | ||
| } from "@trigger.dev/database"; | ||
| import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; | ||
| import { ServiceValidationError } from "../common.server"; | ||
| import { stripBackgroundWorkerMetadataForStorage } from "../stripBackgroundWorkerMetadataForStorage.server"; | ||
|
|
||
| /** | ||
| * Idempotent on `(project, environment, version)` for sequential calls, not concurrent calls. | ||
| */ | ||
| export async function findOrCreateBackgroundWorker( | ||
| environment: AuthenticatedEnvironment, | ||
| deployment: WorkerDeployment, | ||
| body: CreateBackgroundWorkerRequestBody, | ||
| prisma: PrismaClientOrTransaction | ||
| ): Promise<BackgroundWorker> { | ||
| const existing = await prisma.backgroundWorker.findFirst({ | ||
| where: { | ||
| projectId: environment.projectId, | ||
| runtimeEnvironmentId: environment.id, | ||
| version: deployment.version, | ||
| }, | ||
| }); | ||
|
|
||
| if (existing && existing.contentHash === body.metadata.contentHash) { | ||
| return existing; | ||
| } | ||
|
|
||
| if (existing) { | ||
| throw new ServiceValidationError( | ||
| "A background worker for this deployment version already exists with a different content hash", | ||
| 409 | ||
| ); | ||
| } | ||
|
|
||
| return prisma.backgroundWorker.create({ | ||
| data: { | ||
| ...BackgroundWorkerId.generate(), | ||
| version: deployment.version, | ||
| runtimeEnvironmentId: environment.id, | ||
| projectId: environment.projectId, | ||
| metadata: stripBackgroundWorkerMetadataForStorage(body.metadata), | ||
| contentHash: body.metadata.contentHash, | ||
| cliVersion: body.metadata.cliPackageVersion, | ||
| sdkVersion: body.metadata.packageVersion, | ||
| supportsLazyAttempts: body.supportsLazyAttempts, | ||
| engine: body.engine, | ||
| runtime: body.metadata.runtime, | ||
| runtimeVersion: body.metadata.runtimeVersion, | ||
| }, | ||
| }); | ||
|
Comment on lines
+40
to
+55
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚩 findOrCreateBackgroundWorker has unhandled P2002 from create on concurrent calls The Was this helpful? React with 👍 or 👎 to provide feedback. |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| import { BackgroundWorkerMetadata } from "@trigger.dev/core/v3"; | ||
| import { Prisma } from "@trigger.dev/database"; | ||
|
|
||
| /** | ||
| * Strip BackgroundWorkerMetadata down to the slice that's actually read after | ||
| * storage. Everything else is duplicated to dedicated columns/tables | ||
| * (BackgroundWorker.{contentHash,cliVersion,sdkVersion,runtime,runtimeVersion}, | ||
| * BackgroundWorkerTask, BackgroundWorkerFile, TaskQueue, Prompt). Today the | ||
| * only post-write reader is changeCurrentDeployment.server.ts, which feeds | ||
| * tasks[].schedule into syncDeclarativeSchedules. packageVersion, contentHash, | ||
| * and tasks[].filePath are kept solely to satisfy BackgroundWorkerMetadata's | ||
| * required fields when the column is parsed back. | ||
| */ | ||
| export function stripBackgroundWorkerMetadataForStorage( | ||
| metadata: BackgroundWorkerMetadata | ||
| ): Prisma.InputJsonValue { | ||
| return { | ||
| packageVersion: metadata.packageVersion, | ||
| contentHash: metadata.contentHash, | ||
| tasks: metadata.tasks | ||
| .filter((t) => t.schedule) | ||
| .map((t) => ({ | ||
| id: t.id, | ||
| filePath: t.filePath, | ||
| schedule: t.schedule, | ||
| })), | ||
| }; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔴
#failBackgroundWorkerDeploymentlacks status guard, can regress deployment from DEPLOYING to FAILED on overlapping retriesThe BUILDING→DEPLOYING transition was correctly guarded with
updateMany+status: "BUILDING"(line 196-209), but#failBackgroundWorkerDeploymentstill uses an unconditionalupdate(line 235). Before this PR, a retry would fail fast on P2002 atbackgroundWorker.createand never reach the error handlers. Now,findOrCreateBackgroundWorkerallows retries to proceed past worker creation into file/resource/schedule creation. If a client-side timeout causes the first request (A) and the retry (B) to overlap on the server, and B encounters a transient error increateBackgroundFiles/createWorkerResources/syncDeclarativeSchedules, B calls#failBackgroundWorkerDeploymentwhich unconditionally sets the deployment to FAILED — even if A has already transitioned it to DEPLOYING. A's subsequentupdateManythen finds 0 rows (status is FAILED, not BUILDING) and silently skips the transition, leaving the deployment stuck in FAILED.Concrete scenario timeline
findOrCreateBackgroundWorkerreturns existing workerupdateManysucceeds → deployment now DEPLOYINGcreateBackgroundFiles#failBackgroundWorkerDeployment→ unconditionalupdatesets FAILED(Refers to lines 234-252)
Prompt for agents
Was this helpful? React with 👍 or 👎 to provide feedback.