From da151f6f8d46c25c4421b2de0fd37855496b6519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CThullyoCunha=E2=80=9D?= Date: Thu, 21 May 2026 12:54:17 -0300 Subject: [PATCH] feat(webapp): add RUNTIME_API_ORIGIN to decouple runner traffic from external origin The webapp publishes `API_ORIGIN` to runner pods as `TRIGGER_API_URL`, so runner-to-webapp traffic flows back through whatever URL is configured for external clients. Self-hosting behind a tracing-enabled gateway (Envoy, Istio, kgateway, ...) breaks the parent->child run link in trigger.dev's run-detail tree because the gateway's W3C `traceparent` rewrite on egress overwrites the SDK's `triggerAndWait()` span id. The webapp then writes that gateway-generated span id as the child run's `parentSpanId`, which never reaches the trigger event store, so the child renders as an orphan in the UI. Operators can split the two concerns without sacrificing external auth/ callbacks/UI flows that rely on the public `API_ORIGIN`: - Set `RUNTIME_API_ORIGIN=http://.:` (k8s) or `http://webapp:3000` (docker) to keep runner->webapp traffic on a cluster-internal hop that bypasses the gateway. - Leave `API_ORIGIN` on the public URL so the dashboard, magic-link emails, waitpoint callbacks, and API `apiUrl` responses keep working for external clients. Scope is intentionally limited to MANAGED (deployed) runs. Dev CLI runs keep the original `API_ORIGIN`/`APP_ORIGIN` chain so a developer running `trigger.dev dev` from outside the cluster does not lose connectivity. `STREAM_ORIGIN` is still honored as a dedicated stream endpoint when set; `RUNTIME_API_ORIGIN` takes precedence over it for `TRIGGER_STREAM_URL` so the bypass keeps streams on the same internal hop by default. The new env is optional and falls back to `API_ORIGIN`/`APP_ORIGIN`, so existing deployments are unaffected. An empty string is normalized to `undefined` in the zod schema so blank `${RUNTIME_API_ORIGIN:-}` passthroughs from caller environments do not short-circuit the fallback chain. Helm chart and Docker Compose are wired to forward the value to the webapp container. Refs: https://github.com/triggerdotdev/trigger.dev/issues/2821 --- .server-changes/runtime-api-origin.md | 6 ++++ apps/webapp/app/env.server.ts | 15 ++++++++ .../environmentVariablesRepository.server.ts | 34 +++++++++++++++++-- hosting/docker/.env.example | 10 ++++++ hosting/docker/webapp/docker-compose.yml | 1 + hosting/k8s/helm/templates/webapp.yaml | 4 +++ hosting/k8s/helm/values.yaml | 6 ++++ 7 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 .server-changes/runtime-api-origin.md diff --git a/.server-changes/runtime-api-origin.md b/.server-changes/runtime-api-origin.md new file mode 100644 index 00000000000..54edd511634 --- /dev/null +++ b/.server-changes/runtime-api-origin.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Add `RUNTIME_API_ORIGIN` env var to route managed runner traffic through an in-cluster URL, bypassing tracing gateways that rewrite the W3C `traceparent` header and break parent→child run links. diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 6fb6c4ac283..40dd7e58976 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -127,6 +127,21 @@ const EnvironmentSchema = z LOGIN_RATE_LIMITS_ENABLED: BoolEnv.default(true), APP_ORIGIN: z.string().default("http://localhost:3030"), API_ORIGIN: z.string().optional(), + // Origin that the webapp publishes to MANAGED (deployed) runner pods as + // both `TRIGGER_API_URL` and (as the first fallback) `TRIGGER_STREAM_URL`. + // When self-hosting behind a tracing-enabled gateway (Envoy/Istio/etc.) + // that rewrites the W3C `traceparent` on egress, point this at an + // in-cluster service URL so runner-to-webapp traffic stays inside the + // cluster and the parent->child run link in the trace tree is preserved. + // Intentionally NOT used for dev (CLI) task runs, which usually run on a + // developer's machine outside the cluster and would lose connectivity if + // forced onto an in-cluster URL. Empty string is normalized to unset so + // blank `${RUNTIME_API_ORIGIN:-}` passthroughs from caller environments + // don't short-circuit the `??` fallback chain. + RUNTIME_API_ORIGIN: z + .string() + .optional() + .transform((v) => v || undefined), STREAM_ORIGIN: z.string().optional(), ELECTRIC_ORIGIN: z.string().default("http://localhost:3060"), // A comma separated list of electric origins to shard into different electric instances by environmentId diff --git a/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts b/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts index f2ca46d4d3a..cade17d5308 100644 --- a/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts +++ b/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts @@ -941,6 +941,13 @@ function renameVariables(variables: EnvironmentVariable[], renameMap: Record = [ { @@ -1080,6 +1095,12 @@ async function resolveBuiltInDevVariables(runtimeEnvironment: RuntimeEnvironment return [...result, ...commonVariables]; } +/** + * Resolves the OpenTelemetry collector endpoint advertised to dev (CLI) task + * runs. Defaults to the webapp's own `/otel` route under `APP_ORIGIN` so a + * vanilla self-host works without extra wiring; `DEV_OTEL_EXPORTER_OTLP_ENDPOINT` + * can override it to point spans/logs at an external collector. + */ async function resolveOverridableOtelDevVariables( runtimeEnvironment: RuntimeEnvironmentForEnvRepo ) { @@ -1093,6 +1114,15 @@ async function resolveOverridableOtelDevVariables( return result; } +/** + * Resolves built-in environment variables that are injected into managed + * (deployed) task runs. `TRIGGER_API_URL` and `TRIGGER_STREAM_URL` prefer + * `RUNTIME_API_ORIGIN` over `API_ORIGIN`/`STREAM_ORIGIN` so self-hosted + * deployments can keep runner-to-webapp traffic on a cluster-internal hop + * (bypassing tracing-enabled gateways that rewrite the W3C `traceparent` + * header on egress) without affecting the public origins exposed to external + * clients. + */ async function resolveBuiltInProdVariables( runtimeEnvironment: RuntimeEnvironmentForEnvRepo, parentEnvironment?: RuntimeEnvironmentForEnvRepo @@ -1104,11 +1134,11 @@ async function resolveBuiltInProdVariables( }, { key: "TRIGGER_API_URL", - value: env.API_ORIGIN ?? env.APP_ORIGIN, + value: env.RUNTIME_API_ORIGIN ?? env.API_ORIGIN ?? env.APP_ORIGIN, }, { key: "TRIGGER_STREAM_URL", - value: env.STREAM_ORIGIN ?? env.API_ORIGIN ?? env.APP_ORIGIN, + value: env.RUNTIME_API_ORIGIN ?? env.STREAM_ORIGIN ?? env.API_ORIGIN ?? env.APP_ORIGIN, }, { key: "TRIGGER_RUNTIME_WAIT_THRESHOLD_IN_MS", diff --git a/hosting/docker/.env.example b/hosting/docker/.env.example index 4c14201d1ab..1b3941a3623 100644 --- a/hosting/docker/.env.example +++ b/hosting/docker/.env.example @@ -46,6 +46,16 @@ API_ORIGIN=http://localhost:8030 DEV_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:8030/otel # You may need to set this when testing locally or when using the combined setup # API_ORIGIN=http://webapp:3000 +# Optional: origin advertised to MANAGED (deployed) runner pods as both +# TRIGGER_API_URL and TRIGGER_STREAM_URL (intentional: keeps all managed +# runner traffic on the same bypass hop). Dev (CLI) task runs are NOT +# affected -- they keep using API_ORIGIN/APP_ORIGIN so a developer running +# `trigger.dev dev` from outside the cluster doesn't lose connectivity. +# Set this to an in-cluster service URL when running behind a tracing-enabled +# gateway that rewrites the W3C `traceparent` header on egress (e.g. Envoy/ +# Istio with tracing on). If you need streams on a dedicated endpoint (CDN, +# etc.), keep RUNTIME_API_ORIGIN unset and use STREAM_ORIGIN instead. +# RUNTIME_API_ORIGIN=http://webapp:3000 # Webapp - memory management # - This sets the maximum memory allocation for Node.js heap in MiB (e.g. "4096" for 4GB) diff --git a/hosting/docker/webapp/docker-compose.yml b/hosting/docker/webapp/docker-compose.yml index d246babf953..b06cac2603b 100644 --- a/hosting/docker/webapp/docker-compose.yml +++ b/hosting/docker/webapp/docker-compose.yml @@ -43,6 +43,7 @@ services: APP_ORIGIN: ${APP_ORIGIN:-http://localhost:8030} LOGIN_ORIGIN: ${LOGIN_ORIGIN:-http://localhost:8030} API_ORIGIN: ${API_ORIGIN:-http://localhost:8030} + RUNTIME_API_ORIGIN: ${RUNTIME_API_ORIGIN:-} ELECTRIC_ORIGIN: http://electric:3000 DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/main?schema=public&sslmode=disable} DIRECT_URL: ${DIRECT_URL:-postgresql://postgres:postgres@postgres:5432/main?schema=public&sslmode=disable} diff --git a/hosting/k8s/helm/templates/webapp.yaml b/hosting/k8s/helm/templates/webapp.yaml index 721e5e60705..8b31e030fa9 100644 --- a/hosting/k8s/helm/templates/webapp.yaml +++ b/hosting/k8s/helm/templates/webapp.yaml @@ -186,6 +186,10 @@ spec: value: {{ .Values.webapp.loginOrigin | quote }} - name: API_ORIGIN value: {{ .Values.webapp.apiOrigin | quote }} + {{- with .Values.webapp.runtimeApiOrigin }} + - name: RUNTIME_API_ORIGIN + value: {{ . | quote }} + {{- end }} - name: ELECTRIC_ORIGIN value: {{ include "trigger-v4.electric.url" . | quote }} {{- if include "trigger-v4.postgres.useSecretUrl" . }} diff --git a/hosting/k8s/helm/values.yaml b/hosting/k8s/helm/values.yaml index 062bebf9c7f..a20bce25b5d 100644 --- a/hosting/k8s/helm/values.yaml +++ b/hosting/k8s/helm/values.yaml @@ -68,6 +68,12 @@ webapp: appOrigin: "http://localhost:3040" loginOrigin: "http://localhost:3040" apiOrigin: "http://localhost:3040" + # Origin advertised to runner pods as TRIGGER_API_URL. + # When unset (default), runners use apiOrigin/appOrigin. Set this to an + # in-cluster service URL to keep runner->webapp traffic inside the cluster, + # bypassing gateways/proxies (e.g. Envoy with tracing enabled) that rewrite + # the W3C `traceparent` header on egress and break the parent->child run link. + runtimeApiOrigin: "" replicaCount: 1