unstract/docker/sample.env at UN-3608-pg-queue-executor-callback · Zipstack/unstract

132 lines (114 loc) · 6.72 KB
# Path where public and private tools are registered
# with a YAML and JSONs
TOOL_REGISTRY_CONFIG_SRC_PATH="${PWD}/../unstract/tool-registry/tool_registry_config"
# Celery Autoscaling Configuration
# Specify the maximum and minimum number of concurrent workers for each Celery worker.
# Format: <max_workers>,<min_workers>
# Hint: The max value (max_workers) is related to your CPU resources and the level of concurrency you need.
# Always monitor system performance and adjust the max value as needed.
# Worker autoscaling (matches hierarchical configuration below)
WORKER_API_DEPLOYMENT_AUTOSCALE=4,1         # API deployment worker autoscale
WORKER_CALLBACK_AUTOSCALE=4,1               # Callback worker autoscale
WORKER_GENERAL_AUTOSCALE=6,2                # General worker autoscale (enhanced)
WORKER_FILE_PROCESSING_AUTOSCALE=8,2    # File processing unified worker autoscale
WORKER_NOTIFICATION_AUTOSCALE=4,1           # Notification worker autoscale
WORKER_LOG_CONSUMER_AUTOSCALE=2,1           # Log consumer worker autoscale
WORKER_SCHEDULER_AUTOSCALE=2,1              # Scheduler worker autoscale
# Worker-specific configurations
API_DEPLOYMENT_WORKER_NAME=api-deployment-worker
API_DEPLOYMENT_HEALTH_PORT=8080
API_DEPLOYMENT_MAX_CONCURRENT_TASKS=5
CALLBACK_WORKER_NAME=callback-worker
CALLBACK_HEALTH_PORT=8083
CALLBACK_MAX_CONCURRENT_TASKS=3
FILE_PROCESSING_WORKER_NAME=file-processing-worker
FILE_PROCESSING_HEALTH_PORT=8082
FILE_PROCESSING_MAX_CONCURRENT_TASKS=4
GENERAL_WORKER_NAME=general-worker
GENERAL_HEALTH_PORT=8081
GENERAL_MAX_CONCURRENT_TASKS=10
# =============================================================================
# HIERARCHICAL CELERY CONFIGURATION SYSTEM
# =============================================================================
# This system uses a 3-tier hierarchy for all Celery settings (most specific wins):
# 1. {WORKER_TYPE}_{SETTING_NAME} - Worker-specific override (highest priority)
# 2. CELERY_{SETTING_NAME} - Global override (medium priority)
# 3. Code default - Celery standard default (lowest priority)
# Examples:
# - CALLBACK_TASK_TIME_LIMIT=3600 (callback worker only)
# - CELERY_TASK_TIME_LIMIT=300 (all workers)
# - Code provides default if neither is set
# Worker types: API_DEPLOYMENT, GENERAL, FILE_PROCESSING, CALLBACK,
#              NOTIFICATION, LOG_CONSUMER, SCHEDULER
# =============================================================================
# Global Celery Configuration (applies to all workers unless overridden)
CELERY_RESULT_CHORD_RETRY_INTERVAL=3        # Global chord unlock retry interval
CELERY_TASK_TIME_LIMIT=7200                 # Global task timeout (2 hours)
CELERY_TASK_SOFT_TIME_LIMIT=6300            # Global soft timeout (1h 45m)
CELERY_PREFETCH_MULTIPLIER=1                # Global prefetch multiplier
CELERY_MAX_TASKS_PER_CHILD=1000             # Global max tasks per child process
CELERY_TASK_ACKS_LATE=true                  # Global acks late setting
CELERY_TASK_DEFAULT_RETRY_DELAY=60          # Global retry delay (1 minute)
CELERY_TASK_MAX_RETRIES=3                   # Global max retries
# Worker-Specific Configuration Examples
# Callback Worker - Chord settings and extended timeouts
CALLBACK_RESULT_CHORD_RETRY_INTERVAL=3      # Callback-specific chord retry interval
CALLBACK_TASK_TIME_LIMIT=7200               # Callback tasks need more time (2 hours)
CALLBACK_TASK_SOFT_TIME_LIMIT=6300          # Callback soft timeout (1h 45m)
# File Processing Worker - Thread pool and optimized settings
FILE_PROCESSING_POOL_TYPE=threads           # Use threads instead of prefork
FILE_PROCESSING_CONCURRENCY=4               # Fixed concurrency for file processing
FILE_PROCESSING_TASK_TIME_LIMIT=10800       # File processing timeout (3 hours)
# API Deployment Worker - Autoscaling and timeout configuration
API_DEPLOYMENT_AUTOSCALE=4,1                # Max 4, min 1 workers
API_DEPLOYMENT_TASK_TIME_LIMIT=3600         # API deployment timeout (1 hour)
# General Worker - Enhanced scaling for high-throughput tasks
GENERAL_AUTOSCALE=6,2                       # Max 6, min 2 workers
# Docker Worker-Specific Concurrency Settings (for docker-compose.yaml)
WORKER_API_DEPLOYMENT_CONCURRENCY=4         # API deployment fixed concurrency
WORKER_FILE_PROCESSING_CONCURRENCY=8        # File processing fixed concurrency
WORKER_NOTIFICATION_CONCURRENCY=4           # Notification worker concurrency
WORKER_LOG_CONSUMER_CONCURRENCY=2           # Log consumer worker concurrency
WORKER_SCHEDULER_CONCURRENCY=2              # Scheduler worker concurrency
# Notification Worker - Optimized for quick message processing
NOTIFICATION_AUTOSCALE=4,1                  # Max 4, min 1 workers
NOTIFICATION_TASK_TIME_LIMIT=120            # Quick timeout for notifications
# Scheduler Worker - Conservative settings for scheduled tasks
SCHEDULER_AUTOSCALE=2,1                     # Max 2, min 1 workers
SCHEDULER_TASK_TIME_LIMIT=1800              # Scheduler timeout (30 minutes)
# Log Consumer Worker - Optimized for log processing
LOG_CONSUMER_AUTOSCALE=2,1                  # Max 2, min 1 workers
LOG_CONSUMER_TASK_TIME_LIMIT=600            # Log processing timeout (10 minutes)
# Worker Circuit Breaker Settings
CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
CIRCUIT_BREAKER_RECOVERY_TIMEOUT=60
# Worker Health Check Settings
HEALTH_CHECK_INTERVAL=30
HEALTH_CHECK_TIMEOUT=10
ENABLE_METRICS=true
# =============================================================================
# PG-Queue Services (Postgres-backed transport) — gated, opt-in
# =============================================================================
# The PG consumer/reaper services live behind the `pg-queue` compose profile and
# are OFF by default. Bring them up with:
#   docker compose --profile pg-queue up -d
# They consume from Postgres (not the broker); their worker-type/queue identity
# is set per service in docker-compose.yaml, so nothing here is required to run
# them. The values below are documented for reference / overrides only.
# WORKER_BARRIER_BACKEND        - fan-in barrier substrate; the PG services set
#                                 this to `pg` (default elsewhere: chord/redis).
# WORKER_PG_QUEUE_CONSUMER_WORKER_TYPE - which worker's tasks the consumer loads.
# WORKER_PG_QUEUE_CONSUMER_QUEUE       - comma-separated queues the consumer polls.
# WORKER_PG_QUEUE_CONSUMER_HEALTH_PORT - opt-in consumer liveness port (unset = off).
# WORKER_PG_REAPER_HEALTH_PORT         - opt-in reaper liveness port (unset = off).
WORKER_PG_REAPER_INTERVAL_SECONDS=5   # Reaper sweep interval (seconds)
# NOTE: routing executions to PG is a SEPARATE, later step. Running these
# services does NOT move any traffic — the backend gate PG_QUEUE_TRANSPORT_ENABLED
# (in backend/.env, default off) plus the Flipt flag still decide per-execution
# transport, and both stay off until the rollout ramp.
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

sample.env

Latest commit

History

sample.env

File metadata and controls