diff --git a/.server-changes/runtime-api-origin.md b/.server-changes/runtime-api-origin.md new file mode 100644 index 00000000000..54edd511634 --- /dev/null +++ b/.server-changes/runtime-api-origin.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Add `RUNTIME_API_ORIGIN` env var to route managed runner traffic through an in-cluster URL, bypassing tracing gateways that rewrite the W3C `traceparent` header and break parent→child run links. diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 6fb6c4ac283..40dd7e58976 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -127,6 +127,21 @@ const EnvironmentSchema = z LOGIN_RATE_LIMITS_ENABLED: BoolEnv.default(true), APP_ORIGIN: z.string().default("http://localhost:3030"), API_ORIGIN: z.string().optional(), + // Origin that the webapp publishes to MANAGED (deployed) runner pods as + // both `TRIGGER_API_URL` and (as the first fallback) `TRIGGER_STREAM_URL`. + // When self-hosting behind a tracing-enabled gateway (Envoy/Istio/etc.) + // that rewrites the W3C `traceparent` on egress, point this at an + // in-cluster service URL so runner-to-webapp traffic stays inside the + // cluster and the parent->child run link in the trace tree is preserved. + // Intentionally NOT used for dev (CLI) task runs, which usually run on a + // developer's machine outside the cluster and would lose connectivity if + // forced onto an in-cluster URL. Empty string is normalized to unset so + // blank `${RUNTIME_API_ORIGIN:-}` passthroughs from caller environments + // don't short-circuit the `??` fallback chain. + RUNTIME_API_ORIGIN: z + .string() + .optional() + .transform((v) => v || undefined), STREAM_ORIGIN: z.string().optional(), ELECTRIC_ORIGIN: z.string().default("http://localhost:3060"), // A comma separated list of electric origins to shard into different electric instances by environmentId diff --git a/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts b/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts index f2ca46d4d3a..cade17d5308 100644 --- a/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts +++ b/apps/webapp/app/v3/environmentVariables/environmentVariablesRepository.server.ts @@ -941,6 +941,13 @@ function renameVariables(variables: EnvironmentVariable[], renameMap: Record = [ { @@ -1080,6 +1095,12 @@ async function resolveBuiltInDevVariables(runtimeEnvironment: RuntimeEnvironment return [...result, ...commonVariables]; } +/** + * Resolves the OpenTelemetry collector endpoint advertised to dev (CLI) task + * runs. Defaults to the webapp's own `/otel` route under `APP_ORIGIN` so a + * vanilla self-host works without extra wiring; `DEV_OTEL_EXPORTER_OTLP_ENDPOINT` + * can override it to point spans/logs at an external collector. + */ async function resolveOverridableOtelDevVariables( runtimeEnvironment: RuntimeEnvironmentForEnvRepo ) { @@ -1093,6 +1114,15 @@ async function resolveOverridableOtelDevVariables( return result; } +/** + * Resolves built-in environment variables that are injected into managed + * (deployed) task runs. `TRIGGER_API_URL` and `TRIGGER_STREAM_URL` prefer + * `RUNTIME_API_ORIGIN` over `API_ORIGIN`/`STREAM_ORIGIN` so self-hosted + * deployments can keep runner-to-webapp traffic on a cluster-internal hop + * (bypassing tracing-enabled gateways that rewrite the W3C `traceparent` + * header on egress) without affecting the public origins exposed to external + * clients. + */ async function resolveBuiltInProdVariables( runtimeEnvironment: RuntimeEnvironmentForEnvRepo, parentEnvironment?: RuntimeEnvironmentForEnvRepo @@ -1104,11 +1134,11 @@ async function resolveBuiltInProdVariables( }, { key: "TRIGGER_API_URL", - value: env.API_ORIGIN ?? env.APP_ORIGIN, + value: env.RUNTIME_API_ORIGIN ?? env.API_ORIGIN ?? env.APP_ORIGIN, }, { key: "TRIGGER_STREAM_URL", - value: env.STREAM_ORIGIN ?? env.API_ORIGIN ?? env.APP_ORIGIN, + value: env.RUNTIME_API_ORIGIN ?? env.STREAM_ORIGIN ?? env.API_ORIGIN ?? env.APP_ORIGIN, }, { key: "TRIGGER_RUNTIME_WAIT_THRESHOLD_IN_MS", diff --git a/hosting/docker/.env.example b/hosting/docker/.env.example index 4c14201d1ab..1b3941a3623 100644 --- a/hosting/docker/.env.example +++ b/hosting/docker/.env.example @@ -46,6 +46,16 @@ API_ORIGIN=http://localhost:8030 DEV_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:8030/otel # You may need to set this when testing locally or when using the combined setup # API_ORIGIN=http://webapp:3000 +# Optional: origin advertised to MANAGED (deployed) runner pods as both +# TRIGGER_API_URL and TRIGGER_STREAM_URL (intentional: keeps all managed +# runner traffic on the same bypass hop). Dev (CLI) task runs are NOT +# affected -- they keep using API_ORIGIN/APP_ORIGIN so a developer running +# `trigger.dev dev` from outside the cluster doesn't lose connectivity. +# Set this to an in-cluster service URL when running behind a tracing-enabled +# gateway that rewrites the W3C `traceparent` header on egress (e.g. Envoy/ +# Istio with tracing on). If you need streams on a dedicated endpoint (CDN, +# etc.), keep RUNTIME_API_ORIGIN unset and use STREAM_ORIGIN instead. +# RUNTIME_API_ORIGIN=http://webapp:3000 # Webapp - memory management # - This sets the maximum memory allocation for Node.js heap in MiB (e.g. "4096" for 4GB) diff --git a/hosting/docker/webapp/docker-compose.yml b/hosting/docker/webapp/docker-compose.yml index d246babf953..b06cac2603b 100644 --- a/hosting/docker/webapp/docker-compose.yml +++ b/hosting/docker/webapp/docker-compose.yml @@ -43,6 +43,7 @@ services: APP_ORIGIN: ${APP_ORIGIN:-http://localhost:8030} LOGIN_ORIGIN: ${LOGIN_ORIGIN:-http://localhost:8030} API_ORIGIN: ${API_ORIGIN:-http://localhost:8030} + RUNTIME_API_ORIGIN: ${RUNTIME_API_ORIGIN:-} ELECTRIC_ORIGIN: http://electric:3000 DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/main?schema=public&sslmode=disable} DIRECT_URL: ${DIRECT_URL:-postgresql://postgres:postgres@postgres:5432/main?schema=public&sslmode=disable} diff --git a/hosting/k8s/helm/templates/webapp.yaml b/hosting/k8s/helm/templates/webapp.yaml index 721e5e60705..8b31e030fa9 100644 --- a/hosting/k8s/helm/templates/webapp.yaml +++ b/hosting/k8s/helm/templates/webapp.yaml @@ -186,6 +186,10 @@ spec: value: {{ .Values.webapp.loginOrigin | quote }} - name: API_ORIGIN value: {{ .Values.webapp.apiOrigin | quote }} + {{- with .Values.webapp.runtimeApiOrigin }} + - name: RUNTIME_API_ORIGIN + value: {{ . | quote }} + {{- end }} - name: ELECTRIC_ORIGIN value: {{ include "trigger-v4.electric.url" . | quote }} {{- if include "trigger-v4.postgres.useSecretUrl" . }} diff --git a/hosting/k8s/helm/values.yaml b/hosting/k8s/helm/values.yaml index 062bebf9c7f..a20bce25b5d 100644 --- a/hosting/k8s/helm/values.yaml +++ b/hosting/k8s/helm/values.yaml @@ -68,6 +68,12 @@ webapp: appOrigin: "http://localhost:3040" loginOrigin: "http://localhost:3040" apiOrigin: "http://localhost:3040" + # Origin advertised to runner pods as TRIGGER_API_URL. + # When unset (default), runners use apiOrigin/appOrigin. Set this to an + # in-cluster service URL to keep runner->webapp traffic inside the cluster, + # bypassing gateways/proxies (e.g. Envoy with tracing enabled) that rewrite + # the W3C `traceparent` header on egress and break the parent->child run link. + runtimeApiOrigin: "" replicaCount: 1