Skip to content

Commit c8aaea8

Browse files
authored
Improvements: Gracefully shutdown to prevent locked jobs (triggerdotdev#648)
* WIP * Report ECS task info on startup and shutdown * Fixed lifecycle name * Re-add terminus * Require the build dir when http server is disabled * Remove unnecessary logs * Implement graceful shutdown in ZodWorker * Re-order some code * Increase the keepAliveTimeout to 65 seconds to prevent LB 502 errors
1 parent d1ecd6b commit c8aaea8

File tree

7 files changed

+266
-534
lines changed

7 files changed

+266
-534
lines changed

apps/webapp/app/env.server.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const EnvironmentSchema = z.object({
4040
EXECUTION_WORKER_POLL_INTERVAL: z.coerce.number().int().default(1000),
4141
WORKER_ENABLED: z.string().default("true"),
4242
EXECUTION_WORKER_ENABLED: z.string().default("true"),
43+
GRACEFUL_SHUTDOWN_TIMEOUT: z.coerce.number().int().default(60000),
4344
});
4445

4546
export type Environment = z.infer<typeof EnvironmentSchema>;

apps/webapp/app/platform/zodWorker.server.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ export type ZodWorkerOptions<TMessageCatalog extends MessageCatalogSchema> = {
102102
recurringTasks?: ZodRecurringTasks;
103103
cleanup?: ZodWorkerCleanupOptions;
104104
reporter?: ZodWorkerReporter;
105+
shutdownTimeoutInMs?: number;
105106
};
106107

107108
export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
@@ -114,6 +115,8 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
114115
#runner?: GraphileRunner;
115116
#cleanup: ZodWorkerCleanupOptions | undefined;
116117
#reporter?: ZodWorkerReporter;
118+
#shutdownTimeoutInMs?: number;
119+
#shuttingDown = false;
117120

118121
constructor(options: ZodWorkerOptions<TMessageCatalog>) {
119122
this.#name = options.name;
@@ -124,6 +127,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
124127
this.#recurringTasks = options.recurringTasks;
125128
this.#cleanup = options.cleanup;
126129
this.#reporter = options.reporter;
130+
this.#shutdownTimeoutInMs = options.shutdownTimeoutInMs ?? 60000; // default to 60 seconds
127131
}
128132

129133
get graphileWorkerSchema() {
@@ -143,6 +147,7 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
143147

144148
this.#runner = await graphileRun({
145149
...this.#runnerOptions,
150+
noHandleSignals: true,
146151
taskList: this.#createTaskListFromTasks(),
147152
parsedCronItems,
148153
});
@@ -199,9 +204,36 @@ export class ZodWorker<TMessageCatalog extends MessageCatalogSchema> {
199204
this.#logDebug("stop");
200205
});
201206

207+
process.on("SIGTERM", this._handleSignal("SIGTERM").bind(this));
208+
process.on("SIGINT", this._handleSignal("SIGINT").bind(this));
209+
202210
return true;
203211
}
204212

213+
private _handleSignal(signal: string) {
214+
return () => {
215+
if (this.#shuttingDown) {
216+
return;
217+
}
218+
219+
this.#shuttingDown = true;
220+
221+
if (this.#shutdownTimeoutInMs) {
222+
setTimeout(() => {
223+
this.#logDebug("Shutdown timeout reached, exiting process");
224+
225+
process.exit(0);
226+
}, this.#shutdownTimeoutInMs);
227+
}
228+
229+
this.#logDebug(`Received ${signal}, shutting down zodWorker...`);
230+
231+
this.stop().finally(() => {
232+
this.#logDebug("zodWorker stopped");
233+
});
234+
};
235+
}
236+
205237
public async stop() {
206238
await this.#runner?.stop();
207239
}

apps/webapp/app/services/worker.server.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { DeliverEventService } from "./events/deliverEvent.server";
1212
import { InvokeDispatcherService } from "./events/invokeDispatcher.server";
1313
import { integrationAuthRepository } from "./externalApis/integrationAuthRepository.server";
1414
import { IntegrationConnectionCreatedService } from "./externalApis/integrationConnectionCreated.server";
15-
import { logger } from "./logger.server";
1615
import { MissingConnectionCreatedService } from "./runs/missingConnectionCreated.server";
1716
import { PerformRunExecutionV1Service } from "./runs/performRunExecutionV1.server";
1817
import { PerformRunExecutionV2Service } from "./runs/performRunExecutionV2.server";
@@ -148,6 +147,7 @@ function getWorkerQueue() {
148147
schema: env.WORKER_SCHEMA,
149148
maxPoolSize: env.WORKER_CONCURRENCY,
150149
},
150+
shutdownTimeoutInMs: env.GRACEFUL_SHUTDOWN_TIMEOUT,
151151
schema: workerCatalog,
152152
recurringTasks: {
153153
// Run this every 5 minutes
@@ -338,6 +338,7 @@ function getExecutionWorkerQueue() {
338338
schema: env.WORKER_SCHEMA,
339339
maxPoolSize: env.EXECUTION_WORKER_CONCURRENCY,
340340
},
341+
shutdownTimeoutInMs: env.GRACEFUL_SHUTDOWN_TIMEOUT,
341342
schema: executionWorkerCatalog,
342343
tasks: {
343344
performRunExecution: {

apps/webapp/package.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@
5555
"@radix-ui/react-switch": "^1.0.3",
5656
"@radix-ui/react-tabs": "^1.0.3",
5757
"@radix-ui/react-tooltip": "^1.0.5",
58-
"@remix-run/express": "1.19.2-pre.0",
59-
"@remix-run/node": "1.19.2-pre.0",
60-
"@remix-run/react": "1.19.2-pre.0",
61-
"@remix-run/serve": "1.19.2-pre.0",
62-
"@remix-run/server-runtime": "1.19.2-pre.0",
58+
"@remix-run/express": "1.19.2",
59+
"@remix-run/node": "1.19.2",
60+
"@remix-run/react": "1.19.2",
61+
"@remix-run/serve": "1.19.2",
62+
"@remix-run/server-runtime": "1.19.2",
6363
"@team-plain/typescript-sdk": "^2.2.0",
6464
"@trigger.dev/companyicons": "^1.5.14",
6565
"@trigger.dev/core": "workspace:*",
@@ -117,9 +117,9 @@
117117
"zod-validation-error": "^1.5.0"
118118
},
119119
"devDependencies": {
120-
"@remix-run/dev": "1.19.2-pre.0",
121-
"@remix-run/eslint-config": "1.19.2-pre.0",
122-
"@remix-run/testing": "^1.19.2-pre.0",
120+
"@remix-run/dev": "1.19.2",
121+
"@remix-run/eslint-config": "1.19.2",
122+
"@remix-run/testing": "^1.19.2",
123123
"@storybook/addon-backgrounds": "^7.0.7",
124124
"@storybook/addon-docs": "^7.0.12",
125125
"@storybook/addon-essentials": "^7.0.7",

apps/webapp/server.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ if (process.env.HTTP_SERVER_DISABLED !== "true") {
6161
console.log(`✅ app ready: http://localhost:${port}`);
6262
});
6363

64+
server.keepAliveTimeout = 65 * 1000;
65+
6466
// Handle shutdowns gracefully
6567
createTerminus(server, {
6668
signals: ["SIGINT", "SIGTERM"],
@@ -80,6 +82,7 @@ if (process.env.HTTP_SERVER_DISABLED !== "true") {
8082
},
8183
});
8284
} else {
85+
require(BUILD_DIR);
8386
console.log(`✅ app ready (skipping http server)`);
8487
}
8588

docker/scripts/entrypoint.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ cp node_modules/@prisma/engines/*.node apps/webapp/prisma/
1414
pnpm --filter webapp db:seed
1515

1616
cd /triggerdotdev/apps/webapp
17-
exec dumb-init pnpm run start:local
17+
# exec dumb-init pnpm run start:local
18+
NODE_PATH='/triggerdotdev/node_modules/.pnpm/node_modules' exec dumb-init node --max-old-space-size=8192 ./build/server.js
19+

0 commit comments

Comments
 (0)