diff --git a/.github/workflows/embedding-integration.yml b/.github/workflows/embedding-integration.yml
new file mode 100644
index 0000000000..cae5fb5fba
--- /dev/null
+++ b/.github/workflows/embedding-integration.yml
@@ -0,0 +1,42 @@
+name: Embedding Service Integration Tests
+
+on:
+ push:
+ paths:
+ - 'examples/embedding_service/**'
+ - 'gunicorn/dirty/**'
+ pull_request:
+ paths:
+ - 'examples/embedding_service/**'
+ - 'gunicorn/dirty/**'
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ timeout-minutes: 15
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Build and start service
+ run: |
+ cd examples/embedding_service
+ docker compose up -d --build
+ docker compose logs -f &
+
+ - name: Wait for healthy
+ run: |
+ for i in {1..30}; do
+ curl -s http://127.0.0.1:8000/health && break
+ sleep 2
+ done
+
+ - name: Run tests
+ run: |
+ pip install requests numpy
+ python examples/embedding_service/test_embedding.py
+
+ - name: Cleanup
+ if: always()
+ run: |
+ cd examples/embedding_service
+ docker compose down
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 8e1e6a2ddd..b0736303d4 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- toxenv: [lint, docs-lint, pycodestyle]
+ toxenv: [lint, pycodestyle]
python-version: [ "3.12" ]
include:
# for actions that want git env, not tox env
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
deleted file mode 100644
index 0ff5596272..0000000000
--- a/.readthedocs.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-# .readthedocs.yaml
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
-version: 2
-
-# Set the version of Python and other tools you might need
-build:
- os: ubuntu-22.04
- tools:
- python: "3.11"
-
-# Build documentation in the docs/ directory with Sphinx
-sphinx:
- configuration: docs/source/conf.py
-
-# We recommend specifying your dependencies to enable reproducible builds:
-# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
-# python:
-# install:
-# - requirements: docs/requirements.txt
diff --git a/MANIFEST.in b/MANIFEST.in
index 1423168b5e..e42b54e298 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,7 @@
include .gitignore
include LICENSE
include NOTICE
-include README.rst
+include README.md
include THANKS
include requirements_dev.txt
include requirements_test.txt
diff --git a/Makefile b/Makefile
index 2c7d8bc256..17f2f2855c 100644
--- a/Makefile
+++ b/Makefile
@@ -3,12 +3,6 @@ build:
venv/bin/pip install -e .
venv/bin/pip install -r requirements_dev.txt
-test:
- venv/bin/python setup.py test
-
-coverage:
- venv/bin/python setup.py test --cov
-
docs:
mkdocs build
@@ -20,4 +14,4 @@ clean:
@find . -type f -name "*.py[co]" -delete
@find . -type d -name "__pycache__" -delete
-.PHONY: build clean coverage docs docs-serve test
+.PHONY: build clean docs docs-serve
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..eac4c4f756
--- /dev/null
+++ b/README.md
@@ -0,0 +1,59 @@
+# Gunicorn
+
+[](https://pypi.python.org/pypi/gunicorn)
+[](https://pypi.python.org/pypi/gunicorn)
+[](https://github.com/benoitc/gunicorn/actions/workflows/tox.yml)
+
+Gunicorn 'Green Unicorn' is a Python WSGI HTTP Server for UNIX. It's a pre-fork
+worker model ported from Ruby's [Unicorn](https://bogomips.org/unicorn/) project. The Gunicorn server is broadly
+compatible with various web frameworks, simply implemented, light on server
+resource usage, and fairly speedy.
+
+**New in v25**: Per-app worker allocation for dirty arbiters, HTTP/2 support (beta)!
+
+## Quick Start
+
+```bash
+pip install gunicorn
+gunicorn myapp:app --workers 4
+```
+
+For ASGI applications (FastAPI, Starlette):
+
+```bash
+gunicorn myapp:app --worker-class asgi
+```
+
+## Features
+
+- WSGI support for Django, Flask, Pyramid, and any WSGI framework
+- **ASGI support** (beta) for FastAPI, Starlette, Quart
+- **HTTP/2 support** (beta) with multiplexed streams
+- **Dirty Arbiters** for heavy workloads (ML models, long-running tasks)
+- uWSGI binary protocol for nginx integration
+- Multiple worker types: sync, gthread, gevent, eventlet, asgi
+- Graceful worker process management
+- Compatible with Python 3.9+
+
+## Documentation
+
+Full documentation at https://gunicorn.org
+
+- [Quickstart](https://gunicorn.org/quickstart/)
+- [Configuration](https://gunicorn.org/configure/)
+- [Deployment](https://gunicorn.org/deploy/)
+- [Settings Reference](https://gunicorn.org/reference/settings/)
+
+## Community
+
+- Report bugs on [GitHub Issues](https://github.com/benoitc/gunicorn/issues)
+- Chat in [#gunicorn](https://web.libera.chat/?channels=#gunicorn) on [Libera.chat](https://libera.chat/)
+- See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines
+
+## Sponsors
+
+Gunicorn is maintained thanks to our sponsors. [Become a sponsor](https://github.com/sponsors/benoitc).
+
+## License
+
+Gunicorn is released under the MIT License. See the [LICENSE](https://github.com/benoitc/gunicorn/blob/master/LICENSE) file for details.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index 97558ec278..0000000000
--- a/README.rst
+++ /dev/null
@@ -1,79 +0,0 @@
-Gunicorn
-========
-
-.. image:: https://img.shields.io/pypi/v/gunicorn.svg?style=flat
- :alt: PyPI version
- :target: https://pypi.python.org/pypi/gunicorn
-
-.. image:: https://img.shields.io/pypi/pyversions/gunicorn.svg
- :alt: Supported Python versions
- :target: https://pypi.python.org/pypi/gunicorn
-
-.. image:: https://github.com/benoitc/gunicorn/actions/workflows/tox.yml/badge.svg
- :alt: Build Status
- :target: https://github.com/benoitc/gunicorn/actions/workflows/tox.yml
-
-Gunicorn 'Green Unicorn' is a Python WSGI HTTP Server for UNIX. It's a pre-fork
-worker model ported from Ruby's Unicorn_ project. The Gunicorn server is broadly
-compatible with various web frameworks, simply implemented, light on server
-resource usage, and fairly speedy.
-
-**New in v24**: Native ASGI support (beta) for async frameworks like FastAPI!
-
-Quick Start
------------
-
-.. code-block:: bash
-
- pip install gunicorn
- gunicorn myapp:app --workers 4
-
-For ASGI applications (FastAPI, Starlette):
-
-.. code-block:: bash
-
- gunicorn myapp:app --worker-class asgi
-
-Features
---------
-
-- WSGI support for Django, Flask, Pyramid, and any WSGI framework
-- **ASGI support** (beta) for FastAPI, Starlette, Quart
-- uWSGI binary protocol for nginx integration
-- Multiple worker types: sync, gthread, gevent, eventlet, asgi
-- Graceful worker process management
-- Compatible with Python 3.12+
-
-Documentation
--------------
-
-Full documentation at https://gunicorn.org
-
-- `Quickstart `_
-- `Configuration `_
-- `Deployment `_
-- `Settings Reference `_
-
-Community
----------
-
-- Report bugs on `GitHub Issues `_
-- Chat in `#gunicorn`_ on `Libera.chat`_
-- See `CONTRIBUTING.md `_ for contribution guidelines
-
-Sponsors
---------
-
-Gunicorn is maintained thanks to our sponsors. `Become a sponsor `_.
-
-.. Sponsor logos will appear here
-
-License
--------
-
-Gunicorn is released under the MIT License. See the LICENSE_ file for details.
-
-.. _Unicorn: https://bogomips.org/unicorn/
-.. _`#gunicorn`: https://web.libera.chat/?channels=#gunicorn
-.. _`Libera.chat`: https://libera.chat/
-.. _LICENSE: https://github.com/benoitc/gunicorn/blob/master/LICENSE
diff --git a/SECURITY.md b/SECURITY.md
index 0ab2c51341..b65d3c7b79 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -10,18 +10,16 @@ If you believe you are found a problem in Gunicorn software, examples or documen
## Supported Releases
-At this time, **only the latest release** receives any security attention whatsoever.
-
Please target reports against :white_check_mark: or current master. Please understand that :x: will
not receive further security attention.
-| Version | Status |
+| Version | Status |
| ------- | ------------------ |
-| 23.0.0 | :white_check_mark: |
-| 22.0.0 | :x: |
-| 21.2.0 | :x: |
-| 20.0.0 | :x: |
-| < 20.0 | :x: |
+| 25.0.0 | :white_check_mark: |
+| 24.1.1 | :white_check_mark: |
+| 23.0.0 | :x: |
+| 22.0.0 | :x: |
+| < 22.0 | :x: |
## Python Versions
diff --git a/appveyor.yml b/appveyor.yml
index 1a3017417b..5a64f0debf 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -3,8 +3,6 @@ environment:
matrix:
- TOXENV: lint
PYTHON: "C:\\Python312-x64"
- - TOXENV: docs-lint
- PYTHON: "C:\\Python312-x64"
- TOXENV: pycodestyle
PYTHON: "C:\\Python312-x64"
# Windows cannot even import the module when they unconditionally import, see below.
diff --git a/benchmarks/dirty_bench_app.py b/benchmarks/dirty_bench_app.py
new file mode 100644
index 0000000000..8ac6b96311
--- /dev/null
+++ b/benchmarks/dirty_bench_app.py
@@ -0,0 +1,223 @@
+#
+# This file is part of gunicorn released under the MIT license.
+# See the NOTICE for more information.
+
+"""
+Benchmark DirtyApp for stress testing the dirty arbiter pool.
+
+Provides configurable workloads for testing:
+- Pure sleep (scheduling overhead)
+- CPU-bound work (thread pool utilization)
+- Mixed I/O + CPU (realistic workloads)
+- Payload generation (serialization overhead)
+"""
+
+import time
+
+from gunicorn.dirty import DirtyApp
+
+
+class BenchmarkApp(DirtyApp):
+ """
+ Configurable benchmark app for stress testing.
+
+ Provides various task types to test different aspects of the
+ dirty pool performance.
+ """
+
+ def init(self):
+ """Fast initialization - no heavy resources to load."""
+ self.call_count = 0
+ self.total_sleep_ms = 0
+ self.total_cpu_ms = 0
+
+ def sleep_task(self, duration_ms):
+ """
+ Pure sleep task - tests scheduling overhead.
+
+ This simulates I/O-bound work like waiting for external APIs.
+ The thread is blocked but not consuming CPU.
+
+ Args:
+ duration_ms: Sleep duration in milliseconds
+
+ Returns:
+ dict with sleep duration
+ """
+ self.call_count += 1
+ self.total_sleep_ms += duration_ms
+ time.sleep(duration_ms / 1000.0)
+ return {"slept_ms": duration_ms}
+
+ def cpu_task(self, duration_ms, intensity=1.0):
+ """
+ CPU-bound work - tests thread pool utilization.
+
+ Performs actual computation to simulate CPU-intensive work
+ like model inference or data processing.
+
+ Args:
+ duration_ms: Target duration in milliseconds
+ intensity: Work intensity multiplier (1.0 = normal)
+
+ Returns:
+ dict with computed iterations and actual duration
+ """
+ self.call_count += 1
+ start = time.perf_counter()
+ target_end = start + (duration_ms / 1000.0)
+
+ # Perform CPU work until target duration
+ iterations = 0
+ work_per_iteration = int(1000 * intensity)
+
+ while time.perf_counter() < target_end:
+ # Do some actual computation
+ x = 0.0
+ for i in range(work_per_iteration):
+ x += i * 0.001
+ x = x * 1.001 if x < 1000000 else x * 0.999
+ iterations += 1
+
+ actual_ms = (time.perf_counter() - start) * 1000
+ self.total_cpu_ms += actual_ms
+
+ return {
+ "iterations": iterations,
+ "target_ms": duration_ms,
+ "actual_ms": round(actual_ms, 2),
+ "intensity": intensity
+ }
+
+ def mixed_task(self, sleep_ms, cpu_ms, intensity=1.0):
+ """
+ Mixed I/O + CPU task - simulates realistic workloads.
+
+ First performs I/O (sleep), then does CPU work. This is
+ common in real apps: fetch data, then process it.
+
+ Args:
+ sleep_ms: I/O simulation duration in milliseconds
+ cpu_ms: CPU work duration in milliseconds
+ intensity: CPU work intensity multiplier
+
+ Returns:
+ dict with both sleep and CPU metrics
+ """
+ self.call_count += 1
+
+ # I/O phase (sleep)
+ time.sleep(sleep_ms / 1000.0)
+ self.total_sleep_ms += sleep_ms
+
+ # CPU phase
+ start = time.perf_counter()
+ target_end = start + (cpu_ms / 1000.0)
+
+ iterations = 0
+ work_per_iteration = int(1000 * intensity)
+
+ while time.perf_counter() < target_end:
+ x = 0.0
+ for i in range(work_per_iteration):
+ x += i * 0.001
+ x = x * 1.001 if x < 1000000 else x * 0.999
+ iterations += 1
+
+ actual_cpu_ms = (time.perf_counter() - start) * 1000
+ self.total_cpu_ms += actual_cpu_ms
+
+ return {
+ "sleep_ms": sleep_ms,
+ "cpu_iterations": iterations,
+ "target_cpu_ms": cpu_ms,
+ "actual_cpu_ms": round(actual_cpu_ms, 2),
+ "total_ms": round(sleep_ms + actual_cpu_ms, 2)
+ }
+
+ def payload_task(self, size_bytes, duration_ms=0):
+ """
+ Generate payload of specified size - tests serialization.
+
+ Creates a deterministic payload to test JSON serialization
+ overhead for different response sizes.
+
+ Args:
+ size_bytes: Target payload size in bytes
+ duration_ms: Optional sleep before generating payload
+
+ Returns:
+ dict with 'data' field of specified size
+ """
+ self.call_count += 1
+
+ if duration_ms > 0:
+ time.sleep(duration_ms / 1000.0)
+ self.total_sleep_ms += duration_ms
+
+ # Generate payload - use a pattern that compresses differently
+ # than pure repeated characters for more realistic testing
+ pattern = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+ repeats = (size_bytes // len(pattern)) + 1
+ data = (pattern * repeats)[:size_bytes]
+
+ return {
+ "data": data,
+ "size": len(data)
+ }
+
+ def echo_task(self, payload):
+ """
+ Echo back payload - tests round-trip serialization.
+
+ Useful for testing request/response serialization together.
+
+ Args:
+ payload: Data to echo back
+
+ Returns:
+ dict with echoed payload and its size
+ """
+ self.call_count += 1
+
+ # Calculate size based on type
+ if isinstance(payload, str):
+ size = len(payload)
+ elif isinstance(payload, (dict, list)):
+ import json
+ size = len(json.dumps(payload))
+ else:
+ size = len(str(payload))
+
+ return {
+ "echoed_size": size,
+ "payload": payload
+ }
+
+ def stats(self):
+ """
+ Return accumulated statistics.
+
+ Returns:
+ dict with call counts and totals
+ """
+ return {
+ "call_count": self.call_count,
+ "total_sleep_ms": self.total_sleep_ms,
+ "total_cpu_ms": round(self.total_cpu_ms, 2)
+ }
+
+ def reset_stats(self):
+ """Reset accumulated statistics."""
+ self.call_count = 0
+ self.total_sleep_ms = 0
+ self.total_cpu_ms = 0
+ return {"reset": True}
+
+ def health(self):
+ """Health check endpoint for warmup."""
+ return {"status": "ok"}
+
+ def close(self):
+ """Cleanup on shutdown."""
+ pass
diff --git a/benchmarks/dirty_bench_gunicorn.py b/benchmarks/dirty_bench_gunicorn.py
new file mode 100644
index 0000000000..4d49e75ce3
--- /dev/null
+++ b/benchmarks/dirty_bench_gunicorn.py
@@ -0,0 +1,60 @@
+#
+# This file is part of gunicorn released under the MIT license.
+# See the NOTICE for more information.
+
+"""
+Gunicorn configuration for dirty pool integration benchmarks.
+
+Usage:
+ gunicorn -c benchmarks/dirty_bench_gunicorn.py \
+ benchmarks.dirty_bench_wsgi:app
+"""
+
+# Bind address
+bind = "127.0.0.1:8000"
+
+# HTTP worker configuration
+workers = 4
+worker_class = "gthread"
+threads = 4
+worker_connections = 1000
+
+# Dirty pool configuration
+dirty_apps = ["benchmarks.dirty_bench_app:BenchmarkApp"]
+dirty_workers = 4
+dirty_threads = 1
+dirty_timeout = 300
+dirty_graceful_timeout = 30
+
+# Logging
+accesslog = "-"
+errorlog = "-"
+loglevel = "info"
+
+# Timeouts
+timeout = 120
+graceful_timeout = 30
+keepalive = 2
+
+
+# Lifecycle hooks
+
+def on_dirty_starting(arbiter):
+ """Called when dirty arbiter is starting."""
+ print(f"[dirty] Arbiter starting (pid: {arbiter.pid})")
+
+
+def dirty_post_fork(arbiter, worker):
+ """Called after dirty worker fork."""
+ print(f"[dirty] Worker {worker.pid} forked")
+
+
+def dirty_worker_init(worker):
+ """Called after dirty worker apps are initialized."""
+ print(f"[dirty] Worker {worker.pid} initialized with apps: "
+ f"{list(worker.apps.keys())}")
+
+
+def dirty_worker_exit(arbiter, worker):
+ """Called when dirty worker exits."""
+ print(f"[dirty] Worker {worker.pid} exiting")
diff --git a/benchmarks/dirty_bench_wsgi.py b/benchmarks/dirty_bench_wsgi.py
new file mode 100644
index 0000000000..5324ba7e59
--- /dev/null
+++ b/benchmarks/dirty_bench_wsgi.py
@@ -0,0 +1,167 @@
+#
+# This file is part of gunicorn released under the MIT license.
+# See the NOTICE for more information.
+
+"""
+WSGI app for integration benchmarking of the dirty pool.
+
+This simple WSGI application calls the dirty pool and returns results.
+Use with gunicorn for end-to-end benchmarking that includes HTTP overhead.
+
+Example:
+ gunicorn benchmarks.dirty_bench_wsgi:app \
+ --workers 4 \
+ --dirty-app benchmarks.dirty_bench_app:BenchmarkApp \
+ --dirty-workers 2 \
+ --bind 127.0.0.1:8000
+"""
+
+import json
+from urllib.parse import parse_qs
+
+from gunicorn.dirty import get_dirty_client
+
+
+# Default benchmark app path
+BENCHMARK_APP = "benchmarks.dirty_bench_app:BenchmarkApp"
+
+
+def app(environ, start_response):
+ """
+ WSGI application that calls dirty pool tasks.
+
+ Query parameters:
+ action: Task action to call (default: sleep_task)
+ duration: Duration in ms for sleep/cpu tasks (default: 10)
+ sleep: Sleep duration for mixed_task (default: 50)
+ cpu: CPU duration for mixed_task (default: 50)
+ size: Payload size in bytes for payload_task (default: 100)
+ intensity: CPU intensity for cpu/mixed tasks (default: 1.0)
+ app: Dirty app path (default: benchmarks.dirty_bench_app:BenchmarkApp)
+
+ Endpoints:
+ / - Default sleep_task
+ /sleep - sleep_task with ?duration=N
+ /cpu - cpu_task with ?duration=N&intensity=N
+ /mixed - mixed_task with ?sleep=N&cpu=N
+ /payload - payload_task with ?size=N
+ /echo - echo_task (POST body echoed)
+ /stats - Get accumulated stats
+ /health - Health check
+ """
+ path = environ.get('PATH_INFO', '/')
+ method = environ.get('REQUEST_METHOD', 'GET')
+ query = parse_qs(environ.get('QUERY_STRING', ''))
+
+ # Helper to get query params with defaults
+ def get_param(name, default, type_fn=int):
+ values = query.get(name, [])
+ if values:
+ try:
+ return type_fn(values[0])
+ except (ValueError, TypeError):
+ return default
+ return default
+
+ # Get app path from query or use default
+ app_path = query.get('app', [BENCHMARK_APP])[0]
+
+ try:
+ client = get_dirty_client()
+
+ # Route based on path
+ if path in ('/', '/sleep'):
+ duration = get_param('duration', 10)
+ result = client.execute(app_path, "sleep_task", duration)
+
+ elif path == '/cpu':
+ duration = get_param('duration', 100)
+ intensity = get_param('intensity', 1.0, float)
+ result = client.execute(app_path, "cpu_task", duration, intensity)
+
+ elif path == '/mixed':
+ sleep_ms = get_param('sleep', 50)
+ cpu_ms = get_param('cpu', 50)
+ intensity = get_param('intensity', 1.0, float)
+ result = client.execute(app_path, "mixed_task", sleep_ms, cpu_ms,
+ intensity)
+
+ elif path == '/payload':
+ size = get_param('size', 100)
+ duration = get_param('duration', 0)
+ result = client.execute(app_path, "payload_task", size, duration)
+
+ elif path == '/echo':
+ # Read request body for echo
+ try:
+ content_length = int(environ.get('CONTENT_LENGTH', 0))
+ except (ValueError, TypeError):
+ content_length = 0
+
+ if content_length > 0:
+ body = environ['wsgi.input'].read(content_length)
+ try:
+ payload = json.loads(body.decode('utf-8'))
+ except (json.JSONDecodeError, UnicodeDecodeError):
+ payload = body.decode('utf-8', errors='replace')
+ else:
+ payload = ""
+
+ result = client.execute(app_path, "echo_task", payload)
+
+ elif path == '/stats':
+ result = client.execute(app_path, "stats")
+
+ elif path == '/reset':
+ result = client.execute(app_path, "reset_stats")
+
+ elif path == '/health':
+ result = client.execute(app_path, "health")
+
+ else:
+ # Unknown path - return 404
+ status = '404 Not Found'
+ body = json.dumps({"error": f"Unknown path: {path}"}).encode()
+ headers = [
+ ('Content-Type', 'application/json'),
+ ('Content-Length', str(len(body))),
+ ]
+ start_response(status, headers)
+ return [body]
+
+ # Success response
+ status = '200 OK'
+ body = json.dumps(result).encode()
+ headers = [
+ ('Content-Type', 'application/json'),
+ ('Content-Length', str(len(body))),
+ ]
+ start_response(status, headers)
+ return [body]
+
+ except Exception as e:
+ # Error response
+ status = '500 Internal Server Error'
+ error_msg = {"error": str(e), "type": type(e).__name__}
+ body = json.dumps(error_msg).encode()
+ headers = [
+ ('Content-Type', 'application/json'),
+ ('Content-Length', str(len(body))),
+ ]
+ start_response(status, headers)
+ return [body]
+
+
+# Gunicorn configuration for integration testing
+# These can be overridden on the command line
+
+# Example gunicorn invocation:
+# gunicorn benchmarks.dirty_bench_wsgi:app \
+# -c benchmarks/dirty_bench_gunicorn.py \
+# --dirty-app benchmarks.dirty_bench_app:BenchmarkApp \
+# --dirty-workers 2
+
+
+def post_fork(server, worker):
+ """Hook called after worker fork."""
+ pass
diff --git a/benchmarks/dirty_benchmark.py b/benchmarks/dirty_benchmark.py
new file mode 100755
index 0000000000..f59fdf18da
--- /dev/null
+++ b/benchmarks/dirty_benchmark.py
@@ -0,0 +1,1061 @@
+#!/usr/bin/env python3
+#
+# This file is part of gunicorn released under the MIT license.
+# See the NOTICE for more information.
+
+"""
+Dirty Pool Benchmark Runner
+
+Stress tests and benchmarks the dirty arbiter pool to find bottlenecks
+and optimization opportunities.
+
+Test Modes:
+- Isolated: Direct client -> arbiter -> worker (no HTTP overhead)
+- Integrated: HTTP workers calling dirty pool (realistic end-to-end)
+
+Usage:
+ # Quick smoke test
+ python benchmarks/dirty_benchmark.py --quick
+
+ # Full isolated suite
+ python benchmarks/dirty_benchmark.py --isolated --output results.json
+
+ # Specific scenario
+ python benchmarks/dirty_benchmark.py \
+ --duration 100 \
+ --concurrency 50 \
+ --workers 4 \
+ --threads 2
+
+ # Payload size tests
+ python benchmarks/dirty_benchmark.py --payload-tests
+
+ # Integration tests (requires gunicorn running)
+ python benchmarks/dirty_benchmark.py --integrated --url http://127.0.0.1:8000
+"""
+
+import argparse
+import asyncio
+import json
+import multiprocessing
+import os
+import signal
+import statistics
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass, field, asdict
+from pathlib import Path
+from typing import Any
+
+# Add parent to path for imports
+BENCHMARK_DIR = Path(__file__).parent
+sys.path.insert(0, str(BENCHMARK_DIR.parent))
+
+from gunicorn.dirty.client import DirtyClient
+from gunicorn.dirty.arbiter import DirtyArbiter
+
+
+# Default benchmark app path
+BENCHMARK_APP = "benchmarks.dirty_bench_app:BenchmarkApp"
+
+
+@dataclass
+class LatencyStats:
+ """Latency statistics in milliseconds."""
+ min: float = 0.0
+ max: float = 0.0
+ mean: float = 0.0
+ stddev: float = 0.0
+ p50: float = 0.0
+ p95: float = 0.0
+ p99: float = 0.0
+
+ @classmethod
+ def from_samples(cls, samples: list[float]) -> "LatencyStats":
+ """Calculate statistics from list of latency samples."""
+ if not samples:
+ return cls()
+
+ sorted_samples = sorted(samples)
+ n = len(sorted_samples)
+
+ return cls(
+ min=sorted_samples[0],
+ max=sorted_samples[-1],
+ mean=statistics.mean(sorted_samples),
+ stddev=statistics.stdev(sorted_samples) if n > 1 else 0.0,
+ p50=sorted_samples[int(n * 0.50)],
+ p95=sorted_samples[int(n * 0.95)] if n >= 20 else sorted_samples[-1],
+ p99=sorted_samples[int(n * 0.99)] if n >= 100 else sorted_samples[-1],
+ )
+
+
+@dataclass
+class BenchmarkResult:
+ """Results from a single benchmark run."""
+ scenario: str
+ config: dict
+ total_requests: int = 0
+ successful: int = 0
+ failed: int = 0
+ errors: list[str] = field(default_factory=list)
+ duration_sec: float = 0.0
+ requests_per_sec: float = 0.0
+ latency_ms: LatencyStats = field(default_factory=LatencyStats)
+
+ def to_dict(self) -> dict:
+ """Convert to dictionary for JSON serialization."""
+ d = asdict(self)
+ d['latency_ms'] = asdict(self.latency_ms)
+ return d
+
+
+class MockConfig:
+ """Mock gunicorn config for standalone arbiter testing."""
+
+ def __init__(
+ self,
+ dirty_apps: list[str],
+ dirty_workers: int = 2,
+ dirty_threads: int = 1,
+ dirty_timeout: int = 300,
+ dirty_graceful_timeout: int = 30,
+ ):
+ self.dirty_apps = dirty_apps
+ self.dirty_workers = dirty_workers
+ self.dirty_threads = dirty_threads
+ self.dirty_timeout = dirty_timeout
+ self.dirty_graceful_timeout = dirty_graceful_timeout
+
+ # Other required config
+ self.env = {}
+ self.uid = os.getuid()
+ self.gid = os.getgid()
+ self.initgroups = False
+ self.proc_name = "dirty-benchmark"
+
+ # WorkerTmp requirements
+ self.umask = 0
+ self.worker_tmp_dir = None
+
+ # Hook stubs
+ def on_dirty_starting(self, arbiter):
+ pass
+
+ def dirty_post_fork(self, arbiter, worker):
+ pass
+
+ def dirty_worker_init(self, worker):
+ pass
+
+ def dirty_worker_exit(self, arbiter, worker):
+ pass
+
+
+class MockLogger:
+ """Mock logger for standalone testing."""
+
+ def __init__(self, verbose: bool = False):
+ self.verbose = verbose
+
+ def debug(self, msg, *args):
+ if self.verbose:
+ print(f"[DEBUG] {msg % args if args else msg}")
+
+ def info(self, msg, *args):
+ if self.verbose:
+ print(f"[INFO] {msg % args if args else msg}")
+
+ def warning(self, msg, *args):
+ print(f"[WARN] {msg % args if args else msg}")
+
+ def error(self, msg, *args):
+ print(f"[ERROR] {msg % args if args else msg}")
+
+ def critical(self, msg, *args):
+ print(f"[CRIT] {msg % args if args else msg}")
+
+ def exception(self, msg, *args):
+ print(f"[EXC] {msg % args if args else msg}")
+
+ def reopen_files(self):
+ pass
+
+ def close_on_exec(self):
+ pass
+
+
+class IsolatedBenchmark:
+ """
+ Run benchmarks directly against the dirty pool without HTTP.
+
+ Spawns a standalone dirty arbiter and workers, then runs concurrent
+ clients to measure performance.
+ """
+
+ def __init__(
+ self,
+ dirty_workers: int = 2,
+ dirty_threads: int = 1,
+ dirty_timeout: int = 300,
+ verbose: bool = False,
+ ):
+ self.dirty_workers = dirty_workers
+ self.dirty_threads = dirty_threads
+ self.dirty_timeout = dirty_timeout
+ self.verbose = verbose
+
+ self.arbiter = None
+ self.arbiter_pid = None
+ self.socket_path = None
+ self._tmpdir = None
+
+ def start(self):
+ """Start the dirty arbiter and workers."""
+ # Create temp directory for socket
+ self._tmpdir = tempfile.mkdtemp(prefix="dirty-bench-")
+ self.socket_path = os.path.join(self._tmpdir, "arbiter.sock")
+
+ # Create config and logger
+ cfg = MockConfig(
+ dirty_apps=[BENCHMARK_APP],
+ dirty_workers=self.dirty_workers,
+ dirty_threads=self.dirty_threads,
+ dirty_timeout=self.dirty_timeout,
+ )
+ log = MockLogger(verbose=self.verbose)
+
+ # Fork arbiter process
+ pid = os.fork()
+ if pid == 0:
+ # Child process - run arbiter
+ try:
+ arbiter = DirtyArbiter(cfg, log, socket_path=self.socket_path)
+ arbiter.run()
+ except Exception as e:
+ print(f"Arbiter error: {e}")
+ finally:
+ os._exit(0)
+
+ # Parent process
+ self.arbiter_pid = pid
+
+ # Wait for arbiter socket to be ready
+ for _ in range(50): # 5 seconds max
+ if os.path.exists(self.socket_path):
+ break
+ time.sleep(0.1)
+ else:
+ raise RuntimeError("Arbiter socket not ready")
+
+ # Give workers time to start
+ time.sleep(0.5)
+
+ def stop(self):
+ """Stop the dirty arbiter."""
+ if self.arbiter_pid:
+ try:
+ os.kill(self.arbiter_pid, signal.SIGTERM)
+ os.waitpid(self.arbiter_pid, 0)
+ except (OSError, ChildProcessError):
+ pass
+ self.arbiter_pid = None
+
+ # Cleanup temp directory
+ if self._tmpdir:
+ try:
+ for f in os.listdir(self._tmpdir):
+ os.unlink(os.path.join(self._tmpdir, f))
+ os.rmdir(self._tmpdir)
+ except OSError:
+ pass
+ self._tmpdir = None
+
+ def warmup(self, requests: int = 10):
+ """Warm up the pool with a few requests."""
+ with DirtyClient(self.socket_path, timeout=30.0) as client:
+ for _ in range(requests):
+ client.execute(BENCHMARK_APP, "health")
+
+ def run_benchmark(
+ self,
+ action: str,
+ args: tuple = (),
+ kwargs: dict = None,
+ total_requests: int = 1000,
+ concurrency: int = 10,
+ timeout: float = 30.0,
+ ) -> tuple[list[float], list[str]]:
+ """
+ Run a benchmark with specified parameters.
+
+ Each concurrent worker maintains a persistent connection to the arbiter
+ and makes sequential requests. This simulates how real HTTP workers
+ use the dirty client (one connection per worker thread).
+
+ Args:
+ action: Action to call on the benchmark app
+ args: Positional arguments for the action
+ kwargs: Keyword arguments for the action
+ total_requests: Total number of requests to make
+ concurrency: Number of concurrent clients
+ timeout: Timeout per request in seconds
+
+ Returns:
+ Tuple of (latencies in ms, error messages)
+ """
+ kwargs = kwargs or {}
+ latencies = []
+ errors = []
+ lock = threading.Lock()
+
+ # Calculate requests per worker
+ requests_per_worker = total_requests // concurrency
+ remainder = total_requests % concurrency
+
+ def worker_task(num_requests: int) -> None:
+ """Worker that makes sequential requests on a persistent connection."""
+ worker_latencies = []
+ worker_errors = []
+
+ try:
+ client = DirtyClient(self.socket_path, timeout=timeout)
+ client.connect()
+
+ for _ in range(num_requests):
+ try:
+ start = time.perf_counter()
+ client.execute(BENCHMARK_APP, action, *args, **kwargs)
+ elapsed = (time.perf_counter() - start) * 1000
+ worker_latencies.append(elapsed)
+ except Exception as e:
+ worker_errors.append(str(e))
+ # Reconnect on error
+ try:
+ client.close()
+ client = DirtyClient(self.socket_path, timeout=timeout)
+ client.connect()
+ except Exception:
+ pass
+
+ client.close()
+ except Exception as e:
+ worker_errors.append(f"Connection error: {e}")
+
+ # Add results to shared lists
+ with lock:
+ latencies.extend(worker_latencies)
+ errors.extend(worker_errors)
+
+ # Run concurrent workers
+ with ThreadPoolExecutor(max_workers=concurrency) as executor:
+ futures = []
+ for i in range(concurrency):
+ # Distribute remainder requests among first few workers
+ num = requests_per_worker + (1 if i < remainder else 0)
+ if num > 0:
+ futures.append(executor.submit(worker_task, num))
+
+ # Wait for all workers to complete
+ for future in as_completed(futures):
+ future.result() # Raises any exceptions
+
+ return latencies, errors
+
+
+class IntegratedBenchmark:
+ """
+ Run benchmarks against gunicorn with dirty pool via HTTP.
+
+ Uses wrk or ab for load testing, or falls back to Python requests.
+ """
+
+ def __init__(
+ self,
+ url: str = "http://127.0.0.1:8000",
+ verbose: bool = False,
+ ):
+ self.url = url.rstrip('/')
+ self.verbose = verbose
+ self._tool = None
+
+ def check_dependencies(self) -> str | None:
+ """Check for available load testing tools."""
+ for tool in ['wrk', 'ab']:
+ try:
+ subprocess.run([tool, '--version'], capture_output=True,
+ check=False)
+ return tool
+ except FileNotFoundError:
+ continue
+ return None
+
+ def warmup(self, requests: int = 10):
+ """Warm up the server."""
+ import urllib.request
+ for _ in range(requests):
+ try:
+ urllib.request.urlopen(f"{self.url}/health", timeout=5)
+ except Exception:
+ pass
+
+ def run_wrk(
+ self,
+ path: str,
+ duration: int = 10,
+ threads: int = 4,
+ connections: int = 100,
+ ) -> dict:
+ """Run wrk benchmark and parse results."""
+ url = f"{self.url}{path}"
+ cmd = [
+ 'wrk',
+ '-t', str(threads),
+ '-c', str(connections),
+ '-d', f'{duration}s',
+ '--latency',
+ url,
+ ]
+
+ result = subprocess.run(cmd, capture_output=True, text=True,
+ check=False)
+ return self._parse_wrk_output(result.stdout)
+
+ def _parse_wrk_output(self, output: str) -> dict:
+ """Parse wrk output to extract metrics."""
+ metrics = {
+ 'requests_per_sec': 0.0,
+ 'latency_ms': {},
+ 'errors': 0,
+ }
+
+ for line in output.split('\n'):
+ if 'Requests/sec' in line:
+ try:
+ metrics['requests_per_sec'] = float(
+ line.split(':')[1].strip())
+ except (ValueError, IndexError):
+ pass
+ elif 'Latency' in line and 'Distribution' not in line:
+ parts = line.split()
+ if len(parts) >= 2:
+ metrics['latency_ms']['avg'] = self._parse_duration(
+ parts[1])
+ elif '50%' in line:
+ parts = line.split()
+ if len(parts) >= 2:
+ metrics['latency_ms']['p50'] = self._parse_duration(
+ parts[1])
+ elif '99%' in line:
+ parts = line.split()
+ if len(parts) >= 2:
+ metrics['latency_ms']['p99'] = self._parse_duration(
+ parts[1])
+ elif 'Socket errors' in line:
+ # Parse error counts
+ parts = line.split(',')
+ for part in parts:
+ if any(x in part for x in ['connect', 'read', 'write',
+ 'timeout']):
+ try:
+ metrics['errors'] += int(part.split()[-1])
+ except (ValueError, IndexError):
+ pass
+
+ return metrics
+
+ def _parse_duration(self, s: str) -> float:
+ """Parse wrk duration string (e.g., '12.34ms', '1.23s') to ms."""
+ s = s.strip()
+ if s.endswith('us'):
+ return float(s[:-2]) / 1000
+ elif s.endswith('ms'):
+ return float(s[:-2])
+ elif s.endswith('s'):
+ return float(s[:-1]) * 1000
+ else:
+ return float(s)
+
+ def run_python_benchmark(
+ self,
+ path: str,
+ total_requests: int = 1000,
+ concurrency: int = 10,
+ timeout: float = 30.0,
+ ) -> tuple[list[float], list[str]]:
+ """
+ Run benchmark using Python urllib.
+
+ Fallback when wrk/ab not available.
+ """
+ import urllib.request
+ import urllib.error
+
+ url = f"{self.url}{path}"
+ latencies = []
+ errors = []
+
+ def make_request() -> tuple[float | None, str | None]:
+ try:
+ start = time.perf_counter()
+ urllib.request.urlopen(url, timeout=timeout)
+ elapsed = (time.perf_counter() - start) * 1000
+ return elapsed, None
+ except Exception as e:
+ return None, str(e)
+
+ with ThreadPoolExecutor(max_workers=concurrency) as executor:
+ futures = [executor.submit(make_request)
+ for _ in range(total_requests)]
+
+ for future in as_completed(futures):
+ latency, error = future.result()
+ if latency is not None:
+ latencies.append(latency)
+ if error:
+ errors.append(error)
+
+ return latencies, errors
+
+
+def run_isolated_suite(
+ workers: int = 2,
+ threads: int = 1,
+ verbose: bool = False,
+) -> list[BenchmarkResult]:
+ """Run the full isolated benchmark suite."""
+ results = []
+
+ bench = IsolatedBenchmark(
+ dirty_workers=workers,
+ dirty_threads=threads,
+ verbose=verbose,
+ )
+
+ print(f"\nStarting isolated benchmarks (workers={workers}, "
+ f"threads={threads})...")
+
+ try:
+ bench.start()
+ bench.warmup()
+
+ # Define scenarios
+ scenarios = [
+ # Baseline
+ {
+ "name": "baseline_10ms",
+ "action": "sleep_task",
+ "args": (10,),
+ "requests": 1000,
+ "concurrency": 1,
+ "description": "Single request latency (10ms sleep)",
+ },
+ # Throughput
+ {
+ "name": "throughput_10ms",
+ "action": "sleep_task",
+ "args": (10,),
+ "requests": 5000,
+ "concurrency": 100,
+ "description": "Max requests/sec (10ms sleep, 100 clients)",
+ },
+ # CPU Bound
+ {
+ "name": "cpu_bound_100ms",
+ "action": "cpu_task",
+ "args": (100,),
+ "requests": 500,
+ "concurrency": 20,
+ "description": "CPU-bound work (100ms, 20 clients)",
+ },
+ # I/O Bound
+ {
+ "name": "io_bound_500ms",
+ "action": "sleep_task",
+ "args": (500,),
+ "requests": 200,
+ "concurrency": 50,
+ "description": "I/O-bound work (500ms sleep, 50 clients)",
+ },
+ # Mixed
+ {
+ "name": "mixed_50_50",
+ "action": "mixed_task",
+ "args": (50, 50),
+ "requests": 500,
+ "concurrency": 30,
+ "description": "Mixed workload (50ms sleep + 50ms CPU)",
+ },
+ # Overload
+ {
+ "name": "overload_10ms",
+ "action": "sleep_task",
+ "args": (10,),
+ "requests": 2000,
+ "concurrency": 200,
+ "description": "Overload test (10ms, 200 clients)",
+ },
+ ]
+
+ for scenario in scenarios:
+ print(f" Running {scenario['name']}: {scenario['description']}...")
+
+ start_time = time.perf_counter()
+ latencies, errors = bench.run_benchmark(
+ action=scenario["action"],
+ args=scenario.get("args", ()),
+ kwargs=scenario.get("kwargs"),
+ total_requests=scenario["requests"],
+ concurrency=scenario["concurrency"],
+ )
+ duration = time.perf_counter() - start_time
+
+ result = BenchmarkResult(
+ scenario=scenario["name"],
+ config={
+ "dirty_workers": workers,
+ "dirty_threads": threads,
+ "task_action": scenario["action"],
+ "task_args": scenario.get("args", ()),
+ "concurrency": scenario["concurrency"],
+ },
+ total_requests=scenario["requests"],
+ successful=len(latencies),
+ failed=len(errors),
+ errors=errors[:10] if errors else [], # First 10 errors
+ duration_sec=round(duration, 2),
+ requests_per_sec=round(len(latencies) / duration, 1),
+ latency_ms=LatencyStats.from_samples(latencies),
+ )
+ results.append(result)
+
+ print(f" Requests/sec: {result.requests_per_sec:.1f}, "
+ f"p50: {result.latency_ms.p50:.1f}ms, "
+ f"p99: {result.latency_ms.p99:.1f}ms, "
+ f"failed: {result.failed}")
+
+ finally:
+ bench.stop()
+
+ return results
+
+
+def run_payload_suite(
+ workers: int = 2,
+ threads: int = 1,
+ verbose: bool = False,
+) -> list[BenchmarkResult]:
+ """Run payload size benchmark suite."""
+ results = []
+
+ bench = IsolatedBenchmark(
+ dirty_workers=workers,
+ dirty_threads=threads,
+ verbose=verbose,
+ )
+
+ print(f"\nStarting payload benchmarks (workers={workers})...")
+
+ try:
+ bench.start()
+ bench.warmup()
+
+ # Payload sizes to test
+ payload_sizes = [
+ (100, "100B", "Tiny payload"),
+ (1024, "1KB", "Small payload"),
+ (10240, "10KB", "Medium payload"),
+ (102400, "100KB", "Large payload"),
+ (1048576, "1MB", "Very large payload"),
+ ]
+
+ for size, size_label, description in payload_sizes:
+ # Adjust concurrency for larger payloads
+ concurrency = max(5, 100 // (size // 1024 + 1))
+ requests = max(100, 1000 // (size // 1024 + 1))
+
+ print(f" Running payload_{size_label}: {description}...")
+
+ start_time = time.perf_counter()
+ latencies, errors = bench.run_benchmark(
+ action="payload_task",
+ args=(size,),
+ total_requests=requests,
+ concurrency=concurrency,
+ )
+ duration = time.perf_counter() - start_time
+
+ result = BenchmarkResult(
+ scenario=f"payload_{size_label}",
+ config={
+ "dirty_workers": workers,
+ "dirty_threads": threads,
+ "payload_bytes": size,
+ "concurrency": concurrency,
+ },
+ total_requests=requests,
+ successful=len(latencies),
+ failed=len(errors),
+ errors=errors[:5] if errors else [],
+ duration_sec=round(duration, 2),
+ requests_per_sec=round(len(latencies) / duration, 1),
+ latency_ms=LatencyStats.from_samples(latencies),
+ )
+ results.append(result)
+
+ # Calculate throughput in MB/s
+ throughput_mb = (len(latencies) * size) / duration / 1024 / 1024
+
+ print(f" Requests/sec: {result.requests_per_sec:.1f}, "
+ f"p50: {result.latency_ms.p50:.1f}ms, "
+ f"throughput: {throughput_mb:.1f} MB/s")
+
+ finally:
+ bench.stop()
+
+ return results
+
+
+def run_quick_test(verbose: bool = False) -> list[BenchmarkResult]:
+ """Run a quick smoke test."""
+ results = []
+
+ bench = IsolatedBenchmark(dirty_workers=1, dirty_threads=1, verbose=verbose)
+
+ print("\nRunning quick smoke test...")
+
+ try:
+ bench.start()
+ bench.warmup(5)
+
+ # Simple test
+ start_time = time.perf_counter()
+ latencies, errors = bench.run_benchmark(
+ action="sleep_task",
+ args=(10,),
+ total_requests=100,
+ concurrency=10,
+ )
+ duration = time.perf_counter() - start_time
+
+ result = BenchmarkResult(
+ scenario="quick_test",
+ config={"dirty_workers": 1, "dirty_threads": 1},
+ total_requests=100,
+ successful=len(latencies),
+ failed=len(errors),
+ errors=errors[:5] if errors else [],
+ duration_sec=round(duration, 2),
+ requests_per_sec=round(len(latencies) / duration, 1),
+ latency_ms=LatencyStats.from_samples(latencies),
+ )
+ results.append(result)
+
+ print(f" Requests/sec: {result.requests_per_sec:.1f}, "
+ f"p50: {result.latency_ms.p50:.1f}ms, "
+ f"failed: {result.failed}")
+
+ if result.failed == 0:
+ print(" PASS: Quick test successful")
+ else:
+ print(f" WARN: {result.failed} requests failed")
+
+ finally:
+ bench.stop()
+
+ return results
+
+
+def run_config_sweep(verbose: bool = False) -> list[BenchmarkResult]:
+ """
+ Sweep through different configurations to find optimal settings.
+
+ Tests combinations of workers and threads.
+ """
+ results = []
+
+ configs = [
+ (1, 1), # Baseline
+ (2, 1), # 2 workers, 1 thread each
+ (4, 1), # 4 workers, 1 thread each
+ (2, 2), # 2 workers, 2 threads each
+ (2, 4), # 2 workers, 4 threads each
+ (4, 2), # 4 workers, 2 threads each
+ ]
+
+ print("\nRunning configuration sweep...")
+
+ for workers, threads in configs:
+ print(f"\n Testing workers={workers}, threads={threads}...")
+
+ bench = IsolatedBenchmark(
+ dirty_workers=workers,
+ dirty_threads=threads,
+ verbose=verbose,
+ )
+
+ try:
+ bench.start()
+ bench.warmup()
+
+ # Run a standard workload
+ start_time = time.perf_counter()
+ latencies, errors = bench.run_benchmark(
+ action="mixed_task",
+ args=(20, 20), # 20ms sleep + 20ms CPU
+ total_requests=1000,
+ concurrency=50,
+ )
+ duration = time.perf_counter() - start_time
+
+ result = BenchmarkResult(
+ scenario=f"config_w{workers}_t{threads}",
+ config={
+ "dirty_workers": workers,
+ "dirty_threads": threads,
+ "task": "mixed_task(20, 20)",
+ "concurrency": 50,
+ },
+ total_requests=1000,
+ successful=len(latencies),
+ failed=len(errors),
+ errors=errors[:5] if errors else [],
+ duration_sec=round(duration, 2),
+ requests_per_sec=round(len(latencies) / duration, 1),
+ latency_ms=LatencyStats.from_samples(latencies),
+ )
+ results.append(result)
+
+ print(f" Requests/sec: {result.requests_per_sec:.1f}, "
+ f"p50: {result.latency_ms.p50:.1f}ms, "
+ f"p99: {result.latency_ms.p99:.1f}ms")
+
+ finally:
+ bench.stop()
+
+ # Print summary
+ print("\n Configuration Summary:")
+ print(" " + "-" * 60)
+ sorted_results = sorted(results, key=lambda r: -r.requests_per_sec)
+ for r in sorted_results:
+ cfg = r.config
+ print(f" w={cfg['dirty_workers']}, t={cfg['dirty_threads']}: "
+ f"{r.requests_per_sec:.1f} req/s, "
+ f"p99={r.latency_ms.p99:.1f}ms")
+
+ return results
+
+
+def generate_report(results: list[BenchmarkResult], output_path: str = None):
+ """Generate a summary report from benchmark results."""
+ print("\n" + "=" * 70)
+ print("BENCHMARK REPORT")
+ print("=" * 70)
+
+ for result in results:
+ print(f"\n{result.scenario}")
+ print("-" * 40)
+ print(f" Config: {json.dumps(result.config, indent=None)}")
+ print(f" Requests: {result.successful}/{result.total_requests} "
+ f"({result.failed} failed)")
+ print(f" Duration: {result.duration_sec}s")
+ print(f" Throughput: {result.requests_per_sec:.1f} req/s")
+ print(f" Latency (ms):")
+ print(f" min: {result.latency_ms.min:.2f}")
+ print(f" p50: {result.latency_ms.p50:.2f}")
+ print(f" p95: {result.latency_ms.p95:.2f}")
+ print(f" p99: {result.latency_ms.p99:.2f}")
+ print(f" max: {result.latency_ms.max:.2f}")
+ print(f" mean: {result.latency_ms.mean:.2f} "
+ f"(stddev: {result.latency_ms.stddev:.2f})")
+
+ if result.errors:
+ print(f" Errors (first {len(result.errors)}):")
+ for err in result.errors[:3]:
+ print(f" - {err[:80]}")
+
+ if output_path:
+ output_data = {
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
+ "results": [r.to_dict() for r in results],
+ }
+ with open(output_path, 'w') as f:
+ json.dump(output_data, f, indent=2)
+ print(f"\nResults saved to: {output_path}")
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Benchmark the gunicorn dirty pool',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=__doc__,
+ )
+
+ # Mode selection
+ mode_group = parser.add_mutually_exclusive_group()
+ mode_group.add_argument('--quick', action='store_true',
+ help='Run quick smoke test')
+ mode_group.add_argument('--isolated', action='store_true',
+ help='Run isolated benchmark suite')
+ mode_group.add_argument('--payload-tests', action='store_true',
+ help='Run payload size tests')
+ mode_group.add_argument('--config-sweep', action='store_true',
+ help='Sweep through configurations')
+ mode_group.add_argument('--integrated', action='store_true',
+ help='Run integrated HTTP benchmarks')
+
+ # Configuration
+ parser.add_argument('--workers', type=int, default=2,
+ help='Number of dirty workers (default: 2)')
+ parser.add_argument('--threads', type=int, default=1,
+ help='Threads per dirty worker (default: 1)')
+ parser.add_argument('--duration', type=int, default=10,
+ help='Task duration in ms for custom run')
+ parser.add_argument('--concurrency', type=int, default=10,
+ help='Number of concurrent clients')
+ parser.add_argument('--requests', type=int, default=1000,
+ help='Total requests to make')
+
+ # Integration mode options
+ parser.add_argument('--url', default='http://127.0.0.1:8000',
+ help='Server URL for integrated tests')
+
+ # Output
+ parser.add_argument('--output', '-o',
+ help='Output JSON file for results')
+ parser.add_argument('--verbose', '-v', action='store_true',
+ help='Verbose output')
+
+ args = parser.parse_args()
+
+ results = []
+
+ try:
+ if args.quick:
+ results = run_quick_test(verbose=args.verbose)
+ elif args.isolated:
+ results = run_isolated_suite(
+ workers=args.workers,
+ threads=args.threads,
+ verbose=args.verbose,
+ )
+ elif args.payload_tests:
+ results = run_payload_suite(
+ workers=args.workers,
+ threads=args.threads,
+ verbose=args.verbose,
+ )
+ elif args.config_sweep:
+ results = run_config_sweep(verbose=args.verbose)
+ elif args.integrated:
+ bench = IntegratedBenchmark(url=args.url, verbose=args.verbose)
+ tool = bench.check_dependencies()
+
+ if tool == 'wrk':
+ print(f"\nRunning integrated benchmarks with wrk...")
+ bench.warmup()
+
+ # Run basic scenarios
+ scenarios = [
+ ("/sleep?duration=10", "sleep_10ms"),
+ ("/cpu?duration=100", "cpu_100ms"),
+ ("/mixed?sleep=50&cpu=50", "mixed_50_50"),
+ ]
+
+ for path, name in scenarios:
+ print(f" Running {name}...")
+ metrics = bench.run_wrk(path, duration=10, connections=100)
+ print(f" Requests/sec: {metrics.get('requests_per_sec', 'N/A')}")
+
+ print("\nNote: For detailed results, use wrk directly:")
+ print(f" wrk -t4 -c100 -d30s --latency '{args.url}/sleep?duration=10'")
+ else:
+ print("\nUsing Python fallback (install wrk for better results)...")
+ bench.warmup()
+
+ latencies, errors = bench.run_python_benchmark(
+ "/sleep?duration=10",
+ total_requests=args.requests,
+ concurrency=args.concurrency,
+ )
+
+ result = BenchmarkResult(
+ scenario="integrated_sleep",
+ config={"url": args.url, "concurrency": args.concurrency},
+ total_requests=args.requests,
+ successful=len(latencies),
+ failed=len(errors),
+ errors=errors[:5],
+ duration_sec=sum(latencies) / 1000 / args.concurrency,
+ requests_per_sec=len(latencies) / (sum(latencies) / 1000 /
+ args.concurrency),
+ latency_ms=LatencyStats.from_samples(latencies),
+ )
+ results.append(result)
+
+ else:
+ # Default: run custom single benchmark
+ print(f"\nRunning custom benchmark: "
+ f"duration={args.duration}ms, concurrency={args.concurrency}")
+
+ bench = IsolatedBenchmark(
+ dirty_workers=args.workers,
+ dirty_threads=args.threads,
+ verbose=args.verbose,
+ )
+
+ try:
+ bench.start()
+ bench.warmup()
+
+ start_time = time.perf_counter()
+ latencies, errors = bench.run_benchmark(
+ action="sleep_task",
+ args=(args.duration,),
+ total_requests=args.requests,
+ concurrency=args.concurrency,
+ )
+ duration = time.perf_counter() - start_time
+
+ result = BenchmarkResult(
+ scenario="custom",
+ config={
+ "dirty_workers": args.workers,
+ "dirty_threads": args.threads,
+ "task_duration_ms": args.duration,
+ "concurrency": args.concurrency,
+ },
+ total_requests=args.requests,
+ successful=len(latencies),
+ failed=len(errors),
+ errors=errors[:10],
+ duration_sec=round(duration, 2),
+ requests_per_sec=round(len(latencies) / duration, 1),
+ latency_ms=LatencyStats.from_samples(latencies),
+ )
+ results.append(result)
+
+ finally:
+ bench.stop()
+
+ # Generate report
+ if results:
+ generate_report(results, args.output)
+
+ except KeyboardInterrupt:
+ print("\nBenchmark interrupted")
+ sys.exit(1)
+ except Exception as e:
+ print(f"\nError: {e}")
+ if args.verbose:
+ import traceback
+ traceback.print_exc()
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/benchmarks/dirty_streaming.py b/benchmarks/dirty_streaming.py
new file mode 100644
index 0000000000..f5a279187f
--- /dev/null
+++ b/benchmarks/dirty_streaming.py
@@ -0,0 +1,755 @@
+#!/usr/bin/env python
+#
+# This file is part of gunicorn released under the MIT license.
+# See the NOTICE for more information.
+
+"""
+Benchmark suite for dirty worker streaming functionality.
+
+This script benchmarks the streaming performance of dirty workers
+to measure throughput, latency, and memory usage.
+
+Usage:
+ python benchmarks/dirty_streaming.py [OPTIONS]
+
+Options:
+ --quick Run quick benchmarks only
+ --full Run full benchmark suite including stress tests
+"""
+
+import argparse
+import asyncio
+import gc
+import json
+import os
+import struct
+import sys
+import time
+import tracemalloc
+from datetime import datetime
+from unittest import mock
+
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from gunicorn.dirty.protocol import (
+ DirtyProtocol,
+ make_request,
+ make_chunk_message,
+ make_end_message,
+ make_response,
+)
+from gunicorn.dirty.worker import DirtyWorker
+from gunicorn.dirty.arbiter import DirtyArbiter
+from gunicorn.dirty.client import (
+ DirtyClient,
+ DirtyStreamIterator,
+ DirtyAsyncStreamIterator,
+)
+from gunicorn.config import Config
+
+
+class MockStreamWriter:
+ """Mock StreamWriter that captures written messages."""
+
+ def __init__(self):
+ self.messages = []
+ self._buffer = b""
+ self.bytes_written = 0
+
+ def write(self, data):
+ self._buffer += data
+ self.bytes_written += len(data)
+
+ async def drain(self):
+ while len(self._buffer) >= DirtyProtocol.HEADER_SIZE:
+ length = struct.unpack(
+ DirtyProtocol.HEADER_FORMAT,
+ self._buffer[:DirtyProtocol.HEADER_SIZE]
+ )[0]
+ total_size = DirtyProtocol.HEADER_SIZE + length
+ if len(self._buffer) >= total_size:
+ msg_data = self._buffer[DirtyProtocol.HEADER_SIZE:total_size]
+ self._buffer = self._buffer[total_size:]
+ self.messages.append(DirtyProtocol.decode(msg_data))
+ else:
+ break
+
+ def close(self):
+ pass
+
+ async def wait_closed(self):
+ pass
+
+
+class MockStreamReader:
+ """Mock StreamReader that yields predefined messages."""
+
+ def __init__(self, messages):
+ self._data = b''
+ for msg in messages:
+ self._data += DirtyProtocol.encode(msg)
+ self._pos = 0
+
+ async def readexactly(self, n):
+ if self._pos + n > len(self._data):
+ raise asyncio.IncompleteReadError(self._data[self._pos:], n)
+ result = self._data[self._pos:self._pos + n]
+ self._pos += n
+ return result
+
+
+class MockLog:
+ """Silent logger for benchmarks."""
+
+ def debug(self, msg, *args):
+ pass
+
+ def info(self, msg, *args):
+ pass
+
+ def warning(self, msg, *args):
+ pass
+
+ def error(self, msg, *args):
+ pass
+
+ def close_on_exec(self):
+ pass
+
+ def reopen_files(self):
+ pass
+
+
+def create_worker():
+ """Create a test worker for benchmarks."""
+ cfg = Config()
+ cfg.set("dirty_timeout", 300)
+ log = MockLog()
+
+ with mock.patch('gunicorn.dirty.worker.WorkerTmp'):
+ worker = DirtyWorker(
+ age=1,
+ ppid=os.getpid(),
+ app_paths=["benchmark:App"],
+ cfg=cfg,
+ log=log,
+ socket_path="/tmp/benchmark.sock"
+ )
+
+ worker.apps = {}
+ worker._executor = None
+ worker.tmp = mock.Mock()
+
+ return worker
+
+
+def create_arbiter():
+ """Create a test arbiter for benchmarks."""
+ cfg = Config()
+ cfg.set("dirty_timeout", 300)
+ log = MockLog()
+
+ arbiter = DirtyArbiter(cfg=cfg, log=log)
+ arbiter.alive = True
+ arbiter.workers = {1234: mock.Mock()}
+ arbiter.worker_sockets = {1234: '/tmp/worker.sock'}
+
+ return arbiter
+
+
+class BenchmarkResults:
+ """Store and display benchmark results."""
+
+ def __init__(self):
+ self.results = []
+
+ def add(self, name, iterations, duration, chunks=None, bytes_total=None,
+ memory_start=None, memory_end=None):
+ throughput = iterations / duration if duration > 0 else 0
+ result = {
+ "name": name,
+ "iterations": iterations,
+ "duration_s": round(duration, 4),
+ "throughput_per_s": round(throughput, 2),
+ }
+ if chunks:
+ result["chunks_per_s"] = round(chunks / duration, 2)
+ if bytes_total:
+ result["mb_per_s"] = round(bytes_total / (1024 * 1024) / duration, 2)
+ if memory_start is not None and memory_end is not None:
+ result["memory_start_mb"] = round(memory_start / (1024 * 1024), 2)
+ result["memory_end_mb"] = round(memory_end / (1024 * 1024), 2)
+ result["memory_delta_mb"] = round((memory_end - memory_start) / (1024 * 1024), 2)
+ self.results.append(result)
+
+ def display(self):
+ print("\n" + "=" * 70)
+ print("BENCHMARK RESULTS")
+ print("=" * 70)
+ for result in self.results:
+ print(f"\n{result['name']}")
+ print("-" * 50)
+ for key, value in result.items():
+ if key != "name":
+ print(f" {key}: {value}")
+ print("\n" + "=" * 70)
+
+ def save_json(self, filepath):
+ with open(filepath, 'w') as f:
+ json.dump({
+ "timestamp": datetime.now().isoformat(),
+ "results": self.results
+ }, f, indent=2)
+ print(f"Results saved to {filepath}")
+
+
+async def benchmark_worker_streaming_throughput(results, chunk_size=1024, num_chunks=1000):
+ """Benchmark worker streaming throughput with various chunk sizes."""
+ worker = create_worker()
+ writer = MockStreamWriter()
+
+ chunk_data = "x" * chunk_size
+
+ async def sync_gen():
+ for _ in range(num_chunks):
+ yield chunk_data
+
+ async def mock_execute(app_path, action, args, kwargs):
+ return sync_gen()
+
+ gc.collect()
+ tracemalloc.start()
+ memory_start = tracemalloc.get_traced_memory()[0]
+
+ start = time.perf_counter()
+
+ with mock.patch.object(worker, 'execute', side_effect=mock_execute):
+ request = make_request("bench-1", "benchmark:App", "stream")
+ await worker.handle_request(request, writer)
+
+ duration = time.perf_counter() - start
+ memory_end = tracemalloc.get_traced_memory()[0]
+ tracemalloc.stop()
+
+ total_bytes = chunk_size * num_chunks
+
+ results.add(
+ f"Worker streaming ({chunk_size}B chunks, {num_chunks} chunks)",
+ iterations=1,
+ duration=duration,
+ chunks=num_chunks,
+ bytes_total=total_bytes,
+ memory_start=memory_start,
+ memory_end=memory_end
+ )
+
+
+async def benchmark_arbiter_forwarding(results, num_chunks=1000):
+ """Benchmark arbiter message forwarding throughput."""
+ arbiter = create_arbiter()
+
+ messages = []
+ for i in range(num_chunks):
+ messages.append(make_chunk_message(f"bench-{i}", f"data-{i}"))
+ messages.append(make_end_message(f"bench-{num_chunks}"))
+
+ mock_reader = MockStreamReader(messages)
+
+ async def mock_get_connection(pid):
+ return mock_reader, MockStreamWriter()
+
+ arbiter._get_worker_connection = mock_get_connection
+
+ client_writer = MockStreamWriter()
+
+ gc.collect()
+ start = time.perf_counter()
+
+ request = make_request("bench-forward", "benchmark:App", "stream")
+ await arbiter._execute_on_worker(1234, request, client_writer)
+
+ duration = time.perf_counter() - start
+
+ results.add(
+ f"Arbiter forwarding ({num_chunks} chunks)",
+ iterations=1,
+ duration=duration,
+ chunks=num_chunks,
+ bytes_total=client_writer.bytes_written
+ )
+
+ arbiter._cleanup_sync()
+
+
+async def benchmark_streaming_latency(results, iterations=100):
+ """Benchmark time-to-first-chunk and time-to-last-chunk."""
+ worker = create_worker()
+
+ first_chunk_times = []
+ total_times = []
+
+ for _ in range(iterations):
+ writer = MockStreamWriter()
+
+ async def gen_3_chunks():
+ yield "first"
+ yield "second"
+ yield "third"
+
+ async def mock_execute(app_path, action, args, kwargs):
+ return gen_3_chunks()
+
+ start = time.perf_counter()
+
+ with mock.patch.object(worker, 'execute', side_effect=mock_execute):
+ request = make_request("bench-latency", "benchmark:App", "stream")
+ await worker.handle_request(request, writer)
+
+ # Find time when first chunk was received
+ if writer.messages:
+ first_chunk_times.append(time.perf_counter() - start)
+
+ total_times.append(time.perf_counter() - start)
+
+ avg_first_chunk = sum(first_chunk_times) / len(first_chunk_times) if first_chunk_times else 0
+ avg_total = sum(total_times) / len(total_times)
+
+ print(f"\nLatency Results ({iterations} iterations):")
+ print(f" Avg time-to-first-chunk: {avg_first_chunk * 1000:.3f}ms")
+ print(f" Avg time-to-last-chunk: {avg_total * 1000:.3f}ms")
+
+ results.add(
+ f"Streaming latency ({iterations} iterations)",
+ iterations=iterations,
+ duration=sum(total_times),
+ chunks=iterations * 3
+ )
+
+
+async def benchmark_concurrent_streams(results, num_streams=10, chunks_per_stream=100):
+ """Benchmark multiple concurrent streams."""
+ arbiter = create_arbiter()
+
+ async def run_stream(stream_id):
+ messages = []
+ for i in range(chunks_per_stream):
+ messages.append(make_chunk_message(f"stream-{stream_id}", f"chunk-{i}"))
+ messages.append(make_end_message(f"stream-{stream_id}"))
+
+ mock_reader = MockStreamReader(messages)
+ async def mock_get_connection(pid):
+ return mock_reader, MockStreamWriter()
+
+ arbiter._get_worker_connection = mock_get_connection
+ client_writer = MockStreamWriter()
+
+ request = make_request(f"bench-concurrent-{stream_id}", "benchmark:App", "stream")
+ await arbiter._execute_on_worker(1234, request, client_writer)
+ return len(client_writer.messages)
+
+ gc.collect()
+ start = time.perf_counter()
+
+ # Run streams concurrently
+ tasks = [run_stream(i) for i in range(num_streams)]
+ results_list = await asyncio.gather(*tasks)
+
+ duration = time.perf_counter() - start
+
+ total_chunks = sum(results_list)
+
+ results.add(
+ f"Concurrent streams ({num_streams} streams, {chunks_per_stream} chunks each)",
+ iterations=num_streams,
+ duration=duration,
+ chunks=total_chunks
+ )
+
+ arbiter._cleanup_sync()
+
+
+async def benchmark_memory_stability(results, iterations=10, chunks=1000):
+ """Check memory stability over many iterations."""
+ worker = create_worker()
+
+ gc.collect()
+ tracemalloc.start()
+ memory_samples = [tracemalloc.get_traced_memory()[0]]
+
+ for i in range(iterations):
+ writer = MockStreamWriter()
+
+ async def gen_chunks():
+ for j in range(chunks):
+ yield f"chunk-{j}"
+
+ async def mock_execute(app_path, action, args, kwargs):
+ return gen_chunks()
+
+ with mock.patch.object(worker, 'execute', side_effect=mock_execute):
+ request = make_request(f"bench-mem-{i}", "benchmark:App", "stream")
+ await worker.handle_request(request, writer)
+
+ gc.collect()
+ memory_samples.append(tracemalloc.get_traced_memory()[0])
+
+ tracemalloc.stop()
+
+ memory_start = memory_samples[0]
+ memory_end = memory_samples[-1]
+ memory_max = max(memory_samples)
+
+ print(f"\nMemory stability ({iterations} iterations of {chunks} chunks):")
+ print(f" Start: {memory_start / 1024 / 1024:.2f}MB")
+ print(f" End: {memory_end / 1024 / 1024:.2f}MB")
+ print(f" Max: {memory_max / 1024 / 1024:.2f}MB")
+ print(f" Delta: {(memory_end - memory_start) / 1024 / 1024:.2f}MB")
+
+ results.add(
+ f"Memory stability ({iterations} x {chunks} chunks)",
+ iterations=iterations * chunks,
+ duration=0.001, # Use small non-zero value to avoid division by zero
+ memory_start=memory_start,
+ memory_end=memory_end
+ )
+
+
+class MockClientReader:
+ """Mock async reader that simulates receiving streaming messages."""
+
+ def __init__(self, num_chunks, chunk_data):
+ self.num_chunks = num_chunks
+ self.chunk_data = chunk_data
+ self._chunk_idx = 0
+ self._messages = []
+ self._build_messages()
+ self._pos = 0
+ self._data = b''
+ for msg in self._messages:
+ self._data += DirtyProtocol.encode(msg)
+
+ def _build_messages(self):
+ for i in range(self.num_chunks):
+ self._messages.append(make_chunk_message(f"bench-{i}", self.chunk_data))
+ self._messages.append(make_end_message(f"bench-end"))
+
+ async def readexactly(self, n):
+ if self._pos + n > len(self._data):
+ raise asyncio.IncompleteReadError(self._data[self._pos:], n)
+ result = self._data[self._pos:self._pos + n]
+ self._pos += n
+ return result
+
+
+class MockClientWriter:
+ """Mock async writer for client connection."""
+
+ def __init__(self):
+ self._buffer = b""
+ self._closed = False
+
+ def write(self, data):
+ self._buffer += data
+
+ async def drain(self):
+ pass
+
+ def close(self):
+ self._closed = True
+
+ async def wait_closed(self):
+ pass
+
+
+async def benchmark_async_client_streaming(results, chunk_size=1024, num_chunks=1000):
+ """
+ Benchmark DirtyAsyncStreamIterator directly.
+
+ Measures async iterator overhead vs raw message reading.
+ """
+ chunk_data = "x" * chunk_size
+
+ # Create mock client with mock reader/writer
+ client = DirtyClient("/tmp/benchmark.sock", timeout=30.0)
+ client._reader = MockClientReader(num_chunks, chunk_data)
+ client._writer = MockClientWriter()
+
+ gc.collect()
+ tracemalloc.start()
+ memory_start = tracemalloc.get_traced_memory()[0]
+
+ start = time.perf_counter()
+
+ # Use the async stream iterator directly
+ iterator = DirtyAsyncStreamIterator(client, "benchmark:App", "stream", (), {})
+ iterator._started = True # Skip the request sending
+ iterator._request_id = "bench-async"
+ iterator._deadline = time.perf_counter() + 300 # 5 min deadline
+ iterator._last_chunk_time = time.perf_counter()
+
+ chunks_received = 0
+ bytes_received = 0
+ async for chunk in iterator:
+ chunks_received += 1
+ bytes_received += len(chunk)
+
+ duration = time.perf_counter() - start
+ memory_end = tracemalloc.get_traced_memory()[0]
+ tracemalloc.stop()
+
+ results.add(
+ f"Async client streaming ({chunk_size}B chunks, {num_chunks} chunks)",
+ iterations=1,
+ duration=duration,
+ chunks=chunks_received,
+ bytes_total=bytes_received,
+ memory_start=memory_start,
+ memory_end=memory_end
+ )
+
+
+async def benchmark_sync_client_streaming(results, chunk_size=1024, num_chunks=1000):
+ """
+ Benchmark DirtyStreamIterator directly (for comparison with async).
+
+ Note: This runs the sync iterator within an async context for comparison.
+ """
+ chunk_data = "x" * chunk_size
+
+ # Build raw message data
+ messages_data = b''
+ for i in range(num_chunks):
+ msg = make_chunk_message(f"bench-{i}", chunk_data)
+ messages_data += DirtyProtocol.encode(msg)
+ messages_data += DirtyProtocol.encode(make_end_message("bench-end"))
+
+ # Create a mock socket-like object
+ class MockSocket:
+ def __init__(self, data):
+ self._data = data
+ self._pos = 0
+ self._timeout = None
+
+ def recv(self, n, flags=0):
+ if self._pos >= len(self._data):
+ return b''
+ result = self._data[self._pos:self._pos + n]
+ self._pos += len(result)
+ return result
+
+ def settimeout(self, timeout):
+ self._timeout = timeout
+
+ # Create mock client
+ client = DirtyClient("/tmp/benchmark.sock", timeout=30.0)
+ client._sock = MockSocket(messages_data)
+
+ gc.collect()
+ tracemalloc.start()
+ memory_start = tracemalloc.get_traced_memory()[0]
+
+ start = time.perf_counter()
+
+ # Use the sync stream iterator
+ iterator = DirtyStreamIterator(client, "benchmark:App", "stream", (), {})
+ iterator._started = True # Skip the request sending
+ iterator._request_id = "bench-sync"
+ iterator._deadline = time.perf_counter() + 300 # 5 min deadline
+ iterator._last_chunk_time = time.perf_counter()
+
+ chunks_received = 0
+ bytes_received = 0
+ for chunk in iterator:
+ chunks_received += 1
+ bytes_received += len(chunk)
+
+ duration = time.perf_counter() - start
+ memory_end = tracemalloc.get_traced_memory()[0]
+ tracemalloc.stop()
+
+ results.add(
+ f"Sync client streaming ({chunk_size}B chunks, {num_chunks} chunks)",
+ iterations=1,
+ duration=duration,
+ chunks=chunks_received,
+ bytes_total=bytes_received,
+ memory_start=memory_start,
+ memory_end=memory_end
+ )
+
+
+async def benchmark_async_vs_sync_client_streaming(results, chunk_size=1024, num_chunks=1000):
+ """
+ Compare stream() vs stream_async() performance with the same workload.
+ """
+ chunk_data = "x" * chunk_size
+
+ # --- Sync test ---
+ messages_data = b''
+ for i in range(num_chunks):
+ msg = make_chunk_message(f"bench-{i}", chunk_data)
+ messages_data += DirtyProtocol.encode(msg)
+ messages_data += DirtyProtocol.encode(make_end_message("bench-end"))
+
+ class MockSocket:
+ def __init__(self, data):
+ self._data = data
+ self._pos = 0
+ self._timeout = None
+
+ def recv(self, n, flags=0):
+ if self._pos >= len(self._data):
+ return b''
+ result = self._data[self._pos:self._pos + n]
+ self._pos += len(result)
+ return result
+
+ def settimeout(self, timeout):
+ self._timeout = timeout
+
+ sync_client = DirtyClient("/tmp/benchmark.sock", timeout=30.0)
+ sync_client._sock = MockSocket(messages_data)
+
+ gc.collect()
+ sync_start = time.perf_counter()
+
+ sync_iter = DirtyStreamIterator(sync_client, "benchmark:App", "stream", (), {})
+ sync_iter._started = True
+ sync_iter._request_id = "bench-sync"
+ sync_iter._deadline = time.perf_counter() + 300 # 5 min deadline
+ sync_iter._last_chunk_time = time.perf_counter()
+
+ sync_chunks = 0
+ for _ in sync_iter:
+ sync_chunks += 1
+
+ sync_duration = time.perf_counter() - sync_start
+
+ # --- Async test ---
+ async_client = DirtyClient("/tmp/benchmark.sock", timeout=30.0)
+ async_client._reader = MockClientReader(num_chunks, chunk_data)
+ async_client._writer = MockClientWriter()
+
+ gc.collect()
+ async_start = time.perf_counter()
+
+ async_iter = DirtyAsyncStreamIterator(async_client, "benchmark:App", "stream", (), {})
+ async_iter._started = True
+ async_iter._request_id = "bench-async"
+ async_iter._deadline = time.perf_counter() + 300 # 5 min deadline
+ async_iter._last_chunk_time = time.perf_counter()
+
+ async_chunks = 0
+ async for _ in async_iter:
+ async_chunks += 1
+
+ async_duration = time.perf_counter() - async_start
+
+ # Report comparison
+ print(f"\nSync vs Async Client Streaming Comparison ({num_chunks} x {chunk_size}B chunks):")
+ print(f" Sync: {sync_duration * 1000:.3f}ms ({sync_chunks} chunks)")
+ print(f" Async: {async_duration * 1000:.3f}ms ({async_chunks} chunks)")
+ if sync_duration > 0:
+ ratio = async_duration / sync_duration
+ print(f" Ratio (async/sync): {ratio:.3f}x")
+
+ results.add(
+ f"Sync client streaming comparison ({chunk_size}B, {num_chunks} chunks)",
+ iterations=1,
+ duration=sync_duration,
+ chunks=sync_chunks,
+ bytes_total=sync_chunks * chunk_size
+ )
+
+ results.add(
+ f"Async client streaming comparison ({chunk_size}B, {num_chunks} chunks)",
+ iterations=1,
+ duration=async_duration,
+ chunks=async_chunks,
+ bytes_total=async_chunks * chunk_size
+ )
+
+
+async def run_quick_benchmarks():
+ """Run quick benchmarks."""
+ results = BenchmarkResults()
+
+ print("Running quick benchmarks...")
+
+ await benchmark_worker_streaming_throughput(results, chunk_size=64, num_chunks=1000)
+ await benchmark_worker_streaming_throughput(results, chunk_size=1024, num_chunks=1000)
+ await benchmark_arbiter_forwarding(results, num_chunks=1000)
+ await benchmark_streaming_latency(results, iterations=50)
+
+ # Async client streaming benchmarks
+ await benchmark_async_client_streaming(results, chunk_size=1024, num_chunks=1000)
+ await benchmark_async_vs_sync_client_streaming(results, chunk_size=1024, num_chunks=1000)
+
+ return results
+
+
+async def run_full_benchmarks():
+ """Run full benchmark suite including stress tests."""
+ results = BenchmarkResults()
+
+ print("Running full benchmark suite...")
+
+ # Throughput tests with different chunk sizes
+ for chunk_size in [1, 64, 1024, 65536]:
+ await benchmark_worker_streaming_throughput(
+ results, chunk_size=chunk_size, num_chunks=1000
+ )
+
+ # Arbiter forwarding
+ await benchmark_arbiter_forwarding(results, num_chunks=10000)
+
+ # Latency
+ await benchmark_streaming_latency(results, iterations=100)
+
+ # Concurrent streams
+ await benchmark_concurrent_streams(results, num_streams=10, chunks_per_stream=100)
+ await benchmark_concurrent_streams(results, num_streams=50, chunks_per_stream=100)
+
+ # Memory stability
+ await benchmark_memory_stability(results, iterations=20, chunks=1000)
+
+ # Async client streaming benchmarks
+ for chunk_size in [64, 1024, 65536]:
+ await benchmark_async_client_streaming(results, chunk_size=chunk_size, num_chunks=1000)
+ await benchmark_sync_client_streaming(results, chunk_size=chunk_size, num_chunks=1000)
+
+ # Comparison benchmark
+ await benchmark_async_vs_sync_client_streaming(results, chunk_size=1024, num_chunks=5000)
+
+ return results
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Dirty streaming benchmarks")
+ parser.add_argument("--quick", action="store_true", help="Run quick benchmarks only")
+ parser.add_argument("--full", action="store_true", help="Run full benchmark suite")
+ parser.add_argument("--output", "-o", help="Output JSON file path")
+ args = parser.parse_args()
+
+ if args.full:
+ results = asyncio.run(run_full_benchmarks())
+ else:
+ results = asyncio.run(run_quick_benchmarks())
+
+ results.display()
+
+ if args.output:
+ results.save_json(args.output)
+ else:
+ # Save to default location
+ output_dir = os.path.dirname(os.path.abspath(__file__))
+ results_dir = os.path.join(output_dir, "results")
+ os.makedirs(results_dir, exist_ok=True)
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ output_file = os.path.join(results_dir, f"streaming_benchmark_{timestamp}.json")
+ results.save_json(output_file)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmarks/results/queue_refactor_results.json b/benchmarks/results/queue_refactor_results.json
new file mode 100644
index 0000000000..c5ca192d1a
--- /dev/null
+++ b/benchmarks/results/queue_refactor_results.json
@@ -0,0 +1,168 @@
+{
+ "timestamp": "2026-01-24T10:56:33",
+ "results": [
+ {
+ "scenario": "baseline_10ms",
+ "config": {
+ "dirty_workers": 4,
+ "dirty_threads": 1,
+ "task_action": "sleep_task",
+ "task_args": [
+ 10
+ ],
+ "concurrency": 1
+ },
+ "total_requests": 1000,
+ "successful": 1000,
+ "failed": 0,
+ "errors": [],
+ "duration_sec": 12.27,
+ "requests_per_sec": 81.5,
+ "latency_ms": {
+ "min": 10.432417009724304,
+ "max": 13.792542013106868,
+ "mean": 12.266892079642275,
+ "stddev": 0.871026700472873,
+ "p50": 12.80679099727422,
+ "p95": 13.078375020995736,
+ "p99": 13.141458010068163
+ }
+ },
+ {
+ "scenario": "throughput_10ms",
+ "config": {
+ "dirty_workers": 4,
+ "dirty_threads": 1,
+ "task_action": "sleep_task",
+ "task_args": [
+ 10
+ ],
+ "concurrency": 100
+ },
+ "total_requests": 5000,
+ "successful": 5000,
+ "failed": 0,
+ "errors": [],
+ "duration_sec": 14.95,
+ "requests_per_sec": 334.4,
+ "latency_ms": {
+ "min": 11.470375000499189,
+ "max": 341.3927500077989,
+ "mean": 294.71728502821645,
+ "stddev": 34.9421432011074,
+ "p50": 305.2922079805285,
+ "p95": 326.4670000062324,
+ "p99": 334.32295799138956
+ }
+ },
+ {
+ "scenario": "cpu_bound_100ms",
+ "config": {
+ "dirty_workers": 4,
+ "dirty_threads": 1,
+ "task_action": "cpu_task",
+ "task_args": [
+ 100
+ ],
+ "concurrency": 20
+ },
+ "total_requests": 500,
+ "successful": 500,
+ "failed": 0,
+ "errors": [],
+ "duration_sec": 12.55,
+ "requests_per_sec": 39.8,
+ "latency_ms": {
+ "min": 100.59350001392886,
+ "max": 502.4004160077311,
+ "mean": 493.9748328983551,
+ "stddev": 48.57073135808595,
+ "p50": 502.01483300770633,
+ "p95": 502.21283300197683,
+ "p99": 502.2801249870099
+ }
+ },
+ {
+ "scenario": "io_bound_500ms",
+ "config": {
+ "dirty_workers": 4,
+ "dirty_threads": 1,
+ "task_action": "sleep_task",
+ "task_args": [
+ 500
+ ],
+ "concurrency": 50
+ },
+ "total_requests": 200,
+ "successful": 200,
+ "failed": 0,
+ "errors": [],
+ "duration_sec": 25.19,
+ "requests_per_sec": 7.9,
+ "latency_ms": {
+ "min": 501.3219590182416,
+ "max": 6563.243499986129,
+ "mean": 5566.4884116455505,
+ "stddev": 1566.1525736181566,
+ "p50": 6052.653749997262,
+ "p95": 6553.810708021047,
+ "p99": 6559.503666008823
+ }
+ },
+ {
+ "scenario": "mixed_50_50",
+ "config": {
+ "dirty_workers": 4,
+ "dirty_threads": 1,
+ "task_action": "mixed_task",
+ "task_args": [
+ 50,
+ 50
+ ],
+ "concurrency": 30
+ },
+ "total_requests": 500,
+ "successful": 500,
+ "failed": 0,
+ "errors": [],
+ "duration_sec": 12.98,
+ "requests_per_sec": 38.5,
+ "latency_ms": {
+ "min": 102.34933299943805,
+ "max": 839.0888340072706,
+ "mean": 756.4045974735054,
+ "stddev": 103.21897997316475,
+ "p50": 762.6495829899795,
+ "p95": 832.905125018442,
+ "p99": 836.0978330019861
+ }
+ },
+ {
+ "scenario": "overload_10ms",
+ "config": {
+ "dirty_workers": 4,
+ "dirty_threads": 1,
+ "task_action": "sleep_task",
+ "task_args": [
+ 10
+ ],
+ "concurrency": 200
+ },
+ "total_requests": 2000,
+ "successful": 2000,
+ "failed": 0,
+ "errors": [],
+ "duration_sec": 5.99,
+ "requests_per_sec": 334.1,
+ "latency_ms": {
+ "min": 10.763874975964427,
+ "max": 625.4918330232613,
+ "mean": 565.1407622727129,
+ "stddev": 104.98938999734894,
+ "p50": 590.0453749927692,
+ "p95": 617.4105420068372,
+ "p99": 621.7636249784846
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 9b628a7ced..3d28647a55 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -10,7 +10,7 @@ RUN useradd --create-home --shell /bin/bash gunicorn
WORKDIR /app
# Install gunicorn from source
-COPY pyproject.toml README.rst LICENSE ./
+COPY pyproject.toml README.md LICENSE ./
COPY gunicorn/ ./gunicorn/
RUN pip install --no-cache-dir .
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000000..f36a2b936a
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,27 @@
+# Generate Documentation
+
+## Requirements
+
+Install the documentation dependencies with:
+
+```bash
+pip install -r requirements_dev.txt
+```
+
+This provides MkDocs with the Material theme and supporting plugins.
+
+## Build static HTML
+
+```bash
+mkdocs build
+```
+
+The rendered site is emitted into the `site/` directory.
+
+## Preview locally
+
+```bash
+mkdocs serve
+```
+
+This serves the documentation at http://127.0.0.1:8000/ with live reload.
diff --git a/docs/README.rst b/docs/README.rst
deleted file mode 100644
index 6ce90ba956..0000000000
--- a/docs/README.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Generate Documentation
-======================
-
-Requirements
-------------
-
-Install the documentation dependencies with::
-
- pip install -r requirements_dev.txt
-
-This provides MkDocs with the Material theme and supporting plugins.
-
-
-Build static HTML
------------------
-::
-
- mkdocs build
-
-The rendered site is emitted into the ``site/`` directory.
-
-
-Preview locally
----------------
-::
-
- mkdocs serve
-
-This serves the documentation at http://127.0.0.1:8000/ with live reload.
diff --git a/docs/content/2026-news.md b/docs/content/2026-news.md
index 4e4d74a710..5b602d5218 100644
--- a/docs/content/2026-news.md
+++ b/docs/content/2026-news.md
@@ -1,6 +1,86 @@
# Changelog - 2026
+## 25.0.0 - 2026-02-01
+
+### New Features
+
+- **Dirty Arbiters**: Separate process pool for executing long-running, blocking
+ operations (AI model loading, heavy computation) without blocking HTTP workers
+ ([PR #3460](https://github.com/benoitc/gunicorn/pull/3460))
+ - Inspired by Erlang's dirty schedulers
+ - Asyncio-based with Unix socket IPC
+ - Stateful workers that persist loaded resources
+ - New settings: `--dirty-app`, `--dirty-workers`, `--dirty-timeout`,
+ `--dirty-threads`, `--dirty-graceful-timeout`
+ - Lifecycle hooks: `on_dirty_starting`, `dirty_post_fork`,
+ `dirty_worker_init`, `dirty_worker_exit`
+
+- **Per-App Worker Allocation for Dirty Arbiters**: Control how many dirty workers
+ load each app for memory optimization with heavy models
+ ([PR #3473](https://github.com/benoitc/gunicorn/pull/3473))
+ - Set `workers` class attribute on DirtyApp (e.g., `workers = 2`)
+ - Or use config format `module:class:N` (e.g., `myapp:HeavyModel:2`)
+ - Requests automatically routed to workers with the target app
+ - New exception `DirtyNoWorkersAvailableError` for graceful error handling
+ - Example: 8 workers × 10GB model = 80GB → with `workers=2`: 20GB (75% savings)
+
+- **HTTP/2 Support (Beta)**: Native HTTP/2 (RFC 7540) support for improved performance
+ with modern clients ([PR #3468](https://github.com/benoitc/gunicorn/pull/3468))
+ - Multiplexed streams over a single connection
+ - Header compression (HPACK)
+ - Flow control and stream prioritization
+ - Works with gthread, gevent, and ASGI workers
+ - New settings: `--http-protocols`, `--http2-max-concurrent-streams`,
+ `--http2-initial-window-size`, `--http2-max-frame-size`, `--http2-max-header-list-size`
+ - Requires SSL/TLS and h2 library: `pip install gunicorn[http2]`
+ - See [HTTP/2 Guide](guides/http2.md) for details
+ - New example: `examples/http2_gevent/` with Docker and tests
+
+- **HTTP 103 Early Hints**: Support for RFC 8297 Early Hints to enable browsers to
+ preload resources before the final response
+ ([PR #3468](https://github.com/benoitc/gunicorn/pull/3468))
+ - WSGI: `environ['wsgi.early_hints'](headers)` callback
+ - ASGI: `http.response.informational` message type
+ - Works with both HTTP/1.1 and HTTP/2
+
+- **uWSGI Protocol for ASGI Worker**: The ASGI worker now supports receiving requests
+ via the uWSGI binary protocol from nginx
+ ([PR #3467](https://github.com/benoitc/gunicorn/pull/3467))
+
+### Bug Fixes
+
+- Fix HTTP/2 ALPN negotiation for gevent and eventlet workers when
+ `do_handshake_on_connect` is False (the default). The TLS handshake is now
+ explicitly performed before checking `selected_alpn_protocol()`.
+
+- Fix setproctitle initialization with systemd socket activation
+ ([#3465](https://github.com/benoitc/gunicorn/issues/3465))
+
+- Fix `Expect: 100-continue` handling: ignore the header for HTTP/1.0 requests
+ since 100-continue is only valid for HTTP/1.1+
+ ([PR #3463](https://github.com/benoitc/gunicorn/pull/3463))
+
+- Fix missing `_expected_100_continue` attribute in UWSGIRequest
+
+- Disable setproctitle on macOS to prevent segfaults during process title updates
+
+- Publish full exception traceback when the application fails to load
+ ([#3462](https://github.com/benoitc/gunicorn/issues/3462))
+
+### Deprecations
+
+- **Eventlet Worker**: The `eventlet` worker is deprecated and will be removed in
+ Gunicorn 26.0. Eventlet itself is [no longer actively maintained](https://eventlet.readthedocs.io/en/latest/asyncio/migration.html).
+ Please migrate to `gevent`, `gthread`, or another supported worker type.
+
+### Changes
+
+- Remove obsolete Makefile targets
+ ([PR #3471](https://github.com/benoitc/gunicorn/pull/3471))
+
+---
+
## 24.1.1 - 2026-01-24
### Bug Fixes
diff --git a/docs/content/asgi.md b/docs/content/asgi.md
index 8cc51b4b0b..a09a7190f8 100644
--- a/docs/content/asgi.md
+++ b/docs/content/asgi.md
@@ -33,6 +33,7 @@ The ASGI worker provides:
- **Lifespan protocol** for startup/shutdown hooks
- **Optional uvloop** for improved performance
- **SSL/TLS** support
+- **uWSGI protocol** for nginx `uwsgi_pass` integration
## Configuration
@@ -151,7 +152,7 @@ app = Starlette(routes=[
## Production Deployment
-### With Nginx
+### With Nginx (HTTP Proxy)
```nginx
upstream gunicorn {
@@ -181,6 +182,36 @@ server {
}
```
+### With Nginx (uWSGI Protocol)
+
+For better performance, you can use nginx's native uWSGI protocol support:
+
+```bash
+gunicorn myapp:app --worker-class asgi --protocol uwsgi --bind 127.0.0.1:8000
+```
+
+```nginx
+upstream gunicorn {
+ server 127.0.0.1:8000;
+}
+
+server {
+ listen 80;
+ server_name example.com;
+
+ location / {
+ uwsgi_pass gunicorn;
+ include uwsgi_params;
+ }
+}
+```
+
+!!! note
+ WebSocket connections are not supported when using the uWSGI protocol.
+ Use HTTP proxy for WebSocket endpoints.
+
+See [uWSGI Protocol](uwsgi.md) for more details on uWSGI protocol configuration.
+
### Recommended Settings
For production ASGI deployments:
@@ -204,11 +235,14 @@ asgi_lifespan = "auto" # Auto-detect lifespan support
| Feature | Gunicorn ASGI | Uvicorn | Hypercorn |
|---------|---------------|---------|-----------|
| Process management | Built-in | External | Built-in |
-| HTTP/2 | No | No | Yes |
+| HTTP/2 | Yes | No | Yes |
| WebSocket | Yes | Yes | Yes |
| Lifespan | Yes | Yes | Yes |
| uvloop support | Yes | Yes | Yes |
+!!! note
+ HTTP/2 requires SSL/TLS and the h2 library. See [HTTP/2 Support](guides/http2.md) for details.
+
Gunicorn's ASGI worker provides the same process management, logging, and
configuration capabilities you're familiar with from WSGI deployments.
diff --git a/docs/content/assets/stylesheets/home.css b/docs/content/assets/stylesheets/home.css
index 5f1748ef72..bcdd6d1be7 100644
--- a/docs/content/assets/stylesheets/home.css
+++ b/docs/content/assets/stylesheets/home.css
@@ -360,6 +360,62 @@
color: var(--text-muted);
}
+/* ============================================
+ Sponsors
+ ============================================ */
+.sponsors {
+ text-align: center;
+}
+
+.sponsors h2 {
+ font-size: 1.75rem;
+ margin: 0 0 0.5rem 0;
+}
+
+.sponsors p {
+ color: var(--text-muted);
+ margin: 0 0 2rem 0;
+}
+
+.sponsors__logos {
+ display: flex;
+ flex-wrap: wrap;
+ justify-content: center;
+ align-items: center;
+ gap: 2rem;
+ margin-bottom: 2rem;
+ min-height: 60px;
+}
+
+.sponsors__logos img {
+ max-height: 50px;
+ max-width: 150px;
+ filter: grayscale(100%);
+ opacity: 0.7;
+ transition: all 0.15s ease;
+}
+
+.sponsors__logos img:hover {
+ filter: grayscale(0%);
+ opacity: 1;
+}
+
+[data-md-color-scheme="slate"] .sponsors__logos img {
+ filter: grayscale(100%) brightness(1.5);
+}
+
+[data-md-color-scheme="slate"] .sponsors__logos img:hover {
+ filter: grayscale(0%) brightness(1);
+}
+
+.sponsors__placeholder {
+ color: var(--text-muted);
+ font-size: 0.875rem;
+ padding: 1rem 2rem;
+ border: 2px dashed var(--border);
+ border-radius: 8px;
+}
+
/* ============================================
Footer CTA
============================================ */
diff --git a/docs/content/design.md b/docs/content/design.md
index ec2cce27c6..c06c1cbd52 100644
--- a/docs/content/design.md
+++ b/docs/content/design.md
@@ -95,7 +95,12 @@ Choose a worker type based on your application's needs.
gunicorn myapp:app -k gevent --worker-connections 1000
```
-=== "Eventlet"
+=== "Eventlet (Deprecated)"
+
+ !!! warning "Deprecated"
+ The eventlet worker is **deprecated** and will be removed in Gunicorn 26.0.
+ Eventlet itself is [no longer actively maintained](https://eventlet.readthedocs.io/en/latest/asyncio/migration.html).
+ Please migrate to `gevent`, `gthread`, or another supported worker type.
**Greenlet-based** async worker using [Eventlet](http://eventlet.net/).
@@ -127,14 +132,14 @@ Choose a worker type based on your application's needs.
| `gthread` | Thread pool | ✅ | Mixed workloads, moderate concurrency |
| ASGI workers | AsyncIO | ✅ | Modern async frameworks (FastAPI, etc.) |
| `gevent` | Greenlets | ✅ | I/O-bound, WebSockets, streaming |
-| `eventlet` | Greenlets | ✅ | I/O-bound, long-polling |
+| `eventlet` | Greenlets | ✅ | **Deprecated** - use `gevent` instead |
| `tornado` | Tornado IOLoop | ✅ | Native Tornado applications |
!!! tip "Quick Decision Guide"
- **Simple app behind nginx?** → `sync` (default)
- **Need keep-alive or moderate concurrency?** → `gthread`
- - **WebSockets, streaming, long-polling?** → `gevent` or `eventlet`
+ - **WebSockets, streaming, long-polling?** → `gevent` or ASGI worker
- **FastAPI, Starlette, or async framework?** → ASGI worker
## When to Use Async Workers
@@ -200,9 +205,6 @@ gunicorn myapp:app -k gthread --workers 4 --threads 4
# Gevent - high concurrency for I/O-bound apps
gunicorn myapp:app -k gevent --workers 4 --worker-connections 1000
-# Eventlet - alternative async worker
-gunicorn myapp:app -k eventlet --workers 4 --worker-connections 1000
-
# ASGI - FastAPI/Starlette with Uvicorn worker
gunicorn myapp:app -k uvicorn.workers.UvicornWorker --workers 4
```
diff --git a/docs/content/dirty.md b/docs/content/dirty.md
new file mode 100644
index 0000000000..8afede06fc
--- /dev/null
+++ b/docs/content/dirty.md
@@ -0,0 +1,843 @@
+---
+title: Dirty Arbiters
+menu:
+ guides:
+ weight: 10
+---
+
+# Dirty Arbiters
+
+Dirty Arbiters provide a separate process pool for executing long-running, blocking operations (AI model loading, heavy computation) without blocking HTTP workers. This feature is inspired by Erlang's dirty schedulers.
+
+## Overview
+
+Traditional Gunicorn workers are designed to handle HTTP requests quickly. Long-running operations like loading ML models or performing heavy computation can block these workers, reducing the server's ability to handle concurrent requests.
+
+Dirty Arbiters solve this by providing:
+
+- **Separate worker pool** - Completely separate from HTTP workers, can be killed/restarted independently
+- **Stateful workers** - Loaded resources persist in dirty worker memory
+- **Message-passing IPC** - Communication via Unix sockets with JSON serialization
+- **Explicit API** - Clear `execute()` calls (no hidden IPC)
+- **Asyncio-based** - Clean concurrent handling with streaming support
+
+## Design Philosophy
+
+Dirty Arbiters follow several key design principles:
+
+### Separate Process Hierarchy
+
+Unlike threads or in-process pools, Dirty Arbiters use a fully separate process tree:
+
+- **Isolation** - A crash or memory leak in a dirty worker cannot affect HTTP workers
+- **Independent lifecycle** - Dirty workers can be killed/restarted without affecting request handling
+- **Resource accounting** - OS-level memory limits can be applied per-process
+- **Clean shutdown** - Each process tree can be signaled and terminated independently
+
+### Erlang Inspiration
+
+The name and concept come from Erlang's "dirty schedulers" - special schedulers that handle operations that would block normal schedulers. In Erlang, dirty schedulers run NIFs (Native Implemented Functions) that can't yield. Similarly, Gunicorn's Dirty Arbiters handle Python operations that would block HTTP workers.
+
+### Why Asyncio
+
+The Dirty Arbiter uses asyncio for its core loop rather than the main arbiter's select-based approach:
+
+- **Non-blocking IPC** - Can handle many concurrent client connections efficiently
+- **Concurrent request routing** - Multiple requests can be dispatched to workers simultaneously
+- **Streaming support** - Native async generators for streaming responses
+- **Clean signal handling** - Signals integrate cleanly via `loop.add_signal_handler()`
+
+### Stateful Applications
+
+Traditional WSGI apps are request-scoped - they're invoked per-request and don't maintain state between requests. Dirty apps are different:
+
+- **Long-lived** - Apps persist in worker memory for the worker's lifetime
+- **Pre-loaded resources** - Models, connections, and caches stay loaded
+- **Explicit state management** - Apps control their own lifecycle via `init()` and `close()`
+
+This makes dirty apps ideal for ML inference, where loading a model once and reusing it for many requests is essential.
+
+## Architecture
+
+```
+ +-------------------+
+ | Main Arbiter |
+ | (manages both) |
+ +--------+----------+
+ |
+ SIGTERM/SIGHUP/SIGUSR1 (forwarded)
+ |
+ +----------------------+----------------------+
+ | |
+ +-----v-----+ +------v------+
+ | HTTP | | Dirty |
+ | Workers | | Arbiter |
+ +-----------+ +------+------+
+ | |
+ | Unix Socket IPC SIGTERM/SIGHUP
+ | /tmp/gunicorn_dirty_.sock |
+ +------------------>---------------------->---+
+ +-----------+-----------+
+ | | |
+ +-----v---+ +-----v---+ +-----v---+
+ | Dirty | | Dirty | | Dirty |
+ | Worker | | Worker | | Worker |
+ +---------+ +---------+ +---------+
+ ^ | ^ | ^ |
+ | | | | | |
+ Heartbeat (mtime every dirty_timeout/2)
+ | | | | | |
+ +---+--------+---+-------+---+
+ |
+ Workers load apps based on allocation
+ Worker 1: [MLApp, ImageApp, HeavyApp]
+ Worker 2: [MLApp, ImageApp, HeavyApp]
+ Worker 3: [MLApp, ImageApp] (HeavyApp workers=2)
+```
+
+### Process Relationships
+
+| Component | Parent | Communication |
+|-----------|--------|---------------|
+| Main Arbiter | init/systemd | Signals from OS |
+| HTTP Workers | Main Arbiter | Pipes, signals |
+| Dirty Arbiter | Main Arbiter | Signals, exit status |
+| Dirty Workers | Dirty Arbiter | Unix socket, signals, WorkerTmp |
+
+## Configuration
+
+Add these settings to your Gunicorn configuration file or command line:
+
+```python
+# gunicorn.conf.py
+dirty_apps = [
+ "myapp.ml:MLApp",
+ "myapp.images:ImageApp",
+]
+dirty_workers = 2 # Number of dirty workers
+dirty_timeout = 300 # Task timeout in seconds
+dirty_threads = 1 # Threads per worker
+dirty_graceful_timeout = 30 # Shutdown timeout
+```
+
+Or via command line:
+
+```bash
+gunicorn myapp:app \
+ --dirty-app myapp.ml:MLApp \
+ --dirty-app myapp.images:ImageApp \
+ --dirty-workers 2 \
+ --dirty-timeout 300
+```
+
+### Configuration Options
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `dirty_apps` | `[]` | List of dirty app import paths |
+| `dirty_workers` | `0` | Number of dirty workers (0 = disabled) |
+| `dirty_timeout` | `300` | Task timeout in seconds |
+| `dirty_threads` | `1` | Threads per dirty worker |
+| `dirty_graceful_timeout` | `30` | Graceful shutdown timeout |
+
+## Per-App Worker Allocation
+
+By default, all dirty workers load all configured apps. For apps that consume significant memory (like large ML models), you can limit how many workers load a specific app.
+
+### Why Per-App Allocation?
+
+Consider a scenario with a 10GB ML model and 8 dirty workers:
+
+- **Default behavior**: 8 workers × 10GB = 80GB RAM
+- **With `workers=2`**: 2 workers × 10GB = 20GB RAM (75% savings)
+
+Requests for the limited app are routed only to workers that have it loaded.
+
+### Configuration Methods
+
+**Method 1: Class Attribute**
+
+Set the `workers` attribute on your DirtyApp class:
+
+```python
+from gunicorn.dirty import DirtyApp
+
+class HeavyModelApp(DirtyApp):
+ workers = 2 # Only 2 workers will load this app
+
+ def init(self):
+ self.model = load_10gb_model()
+
+ def predict(self, data):
+ return self.model.predict(data)
+
+ def close(self):
+ pass
+```
+
+**Method 2: Config Override**
+
+Use the `module:class:N` format in your config:
+
+```python
+# gunicorn.conf.py
+dirty_apps = [
+ "myapp.light:LightApp", # All workers (default)
+ "myapp.heavy:HeavyModelApp:2", # Only 2 workers
+ "myapp.single:SingletonApp:1", # Only 1 worker
+]
+dirty_workers = 4
+```
+
+Config overrides take precedence over class attributes.
+
+### Worker Distribution
+
+When workers spawn, apps are assigned based on their limits:
+
+```
+Example with dirty_workers=4:
+ - LightApp (workers=None): Loaded on workers 1, 2, 3, 4
+ - HeavyModelApp (workers=2): Loaded on workers 1, 2
+ - SingletonApp (workers=1): Loaded on worker 1
+
+Worker 1: [LightApp, HeavyModelApp, SingletonApp]
+Worker 2: [LightApp, HeavyModelApp]
+Worker 3: [LightApp]
+Worker 4: [LightApp]
+```
+
+### Request Routing
+
+Requests are automatically routed to workers that have the target app:
+
+```python
+client = get_dirty_client()
+
+# Goes to any of 4 workers (round-robin)
+client.execute("myapp.light:LightApp", "action")
+
+# Goes to worker 1 or 2 only (round-robin between those)
+client.execute("myapp.heavy:HeavyModelApp", "predict", data)
+
+# Always goes to worker 1
+client.execute("myapp.single:SingletonApp", "process")
+```
+
+### Error Handling
+
+If no workers have the requested app loaded, a `DirtyNoWorkersAvailableError` is raised:
+
+```python
+from gunicorn.dirty import get_dirty_client
+from gunicorn.dirty.errors import DirtyNoWorkersAvailableError
+
+def my_view(request):
+ client = get_dirty_client()
+ try:
+ result = client.execute("myapp.heavy:HeavyModelApp", "predict", data)
+ except DirtyNoWorkersAvailableError as e:
+ # All workers with this app are down or app not configured
+ return {"error": "Service temporarily unavailable", "app": e.app_path}
+```
+
+### Worker Crash Recovery
+
+When a worker crashes, its replacement gets the **same apps** as the dead worker:
+
+```
+Timeline:
+ t=0: Worker 1 crashes (had HeavyModelApp)
+ t=1: Arbiter detects crash, queues respawn
+ t=2: New Worker 5 spawns with same apps as Worker 1
+ t=3: HeavyModelApp still available on Worker 2 during gap
+```
+
+This ensures:
+
+- No memory redistribution on existing workers
+- Predictable replacement behavior
+- The heavy model is only loaded on the new worker
+
+### Best Practices
+
+1. **Set realistic limits** - Don't set `workers=1` unless truly necessary (single point of failure)
+2. **Monitor memory** - Track per-worker memory to tune allocation
+3. **Handle unavailability** - Catch `DirtyNoWorkersAvailableError` gracefully
+4. **Use class attributes for app-specific limits** - Makes the limit part of the app definition
+5. **Use config for deployment-specific overrides** - Different limits for dev vs prod
+
+## Creating a Dirty App
+
+Dirty apps inherit from `DirtyApp` and implement three methods:
+
+```python
+# myapp/dirty.py
+from gunicorn.dirty import DirtyApp
+
+class MLApp(DirtyApp):
+ """Dirty application for ML workloads."""
+
+ def __init__(self):
+ self.models = {}
+
+ def init(self):
+ """Called once at dirty worker startup."""
+ # Pre-load commonly used models
+ self.models['default'] = self._load_model('base-model')
+
+ def __call__(self, action, *args, **kwargs):
+ """Dispatch to action methods."""
+ method = getattr(self, action, None)
+ if method is None:
+ raise ValueError(f"Unknown action: {action}")
+ return method(*args, **kwargs)
+
+ def load_model(self, name):
+ """Load a model into memory."""
+ if name not in self.models:
+ self.models[name] = self._load_model(name)
+ return {"loaded": True, "name": name}
+
+ def inference(self, model_name, input_text):
+ """Run inference on loaded model."""
+ model = self.models.get(model_name)
+ if not model:
+ raise ValueError(f"Model not loaded: {model_name}")
+ return model.predict(input_text)
+
+ def _load_model(self, name):
+ import torch
+ model = torch.load(f"models/{name}.pt")
+ return model
+
+ def close(self):
+ """Cleanup on shutdown."""
+ for model in self.models.values():
+ del model
+```
+
+### DirtyApp Interface
+
+| Method/Attribute | Description |
+|------------------|-------------|
+| `workers` | Class attribute. Number of workers to load this app (`None` = all workers). |
+| `init()` | Called once when dirty worker starts, after instantiation. Load resources here. |
+| `__call__(action, *args, **kwargs)` | Handle requests from HTTP workers. |
+| `close()` | Called when dirty worker shuts down. Cleanup resources. |
+
+### Initialization Sequence
+
+When a dirty worker starts, initialization happens in this order:
+
+1. **Fork** - Worker process is forked from dirty arbiter
+2. **`dirty_post_fork(arbiter, worker)`** - Hook called immediately after fork
+3. **App instantiation** - Each dirty app class is instantiated (`__init__`)
+4. **`app.init()`** - Called for each app after instantiation (load models, resources)
+5. **`dirty_worker_init(worker)`** - Hook called after ALL apps are initialized
+6. **Run loop** - Worker starts accepting requests from HTTP workers
+
+This means:
+
+- Use `__init__` for basic setup (initialize empty containers, store config)
+- Use `init()` for heavy loading (ML models, database connections, large files)
+- The `dirty_worker_init` hook fires only after all apps have completed their `init()` calls
+
+## Using from HTTP Workers
+
+### Sync Workers (sync, gthread)
+
+```python
+from gunicorn.dirty import get_dirty_client
+
+def my_view(request):
+ client = get_dirty_client()
+
+ # Load a model
+ client.execute("myapp.ml:MLApp", "load_model", "gpt-4")
+
+ # Run inference
+ result = client.execute(
+ "myapp.ml:MLApp",
+ "inference",
+ "gpt-4",
+ input_text=request.data
+ )
+ return result
+```
+
+### Async Workers (ASGI)
+
+```python
+from gunicorn.dirty import get_dirty_client_async
+
+async def my_view(request):
+ client = await get_dirty_client_async()
+
+ # Non-blocking execution
+ await client.execute_async("myapp.ml:MLApp", "load_model", "gpt-4")
+
+ result = await client.execute_async(
+ "myapp.ml:MLApp",
+ "inference",
+ "gpt-4",
+ input_text=request.data
+ )
+ return result
+```
+
+## Streaming
+
+Dirty Arbiters support streaming responses for use cases like LLM token generation, where data is produced incrementally. This enables real-time delivery of results without waiting for complete execution.
+
+### Streaming with Generators
+
+Any dirty app action that returns a generator (sync or async) automatically streams chunks to the client:
+
+```python
+# myapp/llm.py
+from gunicorn.dirty import DirtyApp
+
+class LLMApp(DirtyApp):
+ def init(self):
+ from transformers import pipeline
+ self.generator = pipeline("text-generation", model="gpt2")
+
+ def generate(self, prompt):
+ """Sync streaming - yields tokens."""
+ for token in self.generator(prompt, stream=True):
+ yield token["generated_text"]
+
+ async def generate_async(self, prompt):
+ """Async streaming - yields tokens."""
+ import openai
+ client = openai.AsyncOpenAI()
+ stream = await client.chat.completions.create(
+ model="gpt-4",
+ messages=[{"role": "user", "content": prompt}],
+ stream=True
+ )
+ async for chunk in stream:
+ if chunk.choices[0].delta.content:
+ yield chunk.choices[0].delta.content
+
+ def close(self):
+ pass
+```
+
+### Client Streaming API
+
+Use `stream()` for sync workers and `stream_async()` for async workers:
+
+**Sync Workers (sync, gthread):**
+
+```python
+from gunicorn.dirty import get_dirty_client
+
+def generate_view(request):
+ client = get_dirty_client()
+
+ def generate_response():
+ for chunk in client.stream("myapp.llm:LLMApp", "generate", request.prompt):
+ yield chunk
+
+ return StreamingResponse(generate_response())
+```
+
+**Async Workers (ASGI):**
+
+```python
+from gunicorn.dirty import get_dirty_client_async
+
+async def generate_view(request):
+ client = await get_dirty_client_async()
+
+ async def generate_response():
+ async for chunk in client.stream_async("myapp.llm:LLMApp", "generate", request.prompt):
+ yield chunk
+
+ return StreamingResponse(generate_response())
+```
+
+### Streaming Protocol
+
+Streaming uses a simple protocol with three message types:
+
+1. **Chunk** (`type: "chunk"`) - Contains partial data
+2. **End** (`type: "end"`) - Signals stream completion
+3. **Error** (`type: "error"`) - Signals error during streaming
+
+Example message flow:
+```
+Client -> Arbiter -> Worker: request
+Worker -> Arbiter -> Client: chunk (data: "Hello")
+Worker -> Arbiter -> Client: chunk (data: " ")
+Worker -> Arbiter -> Client: chunk (data: "World")
+Worker -> Arbiter -> Client: end
+```
+
+### Error Handling in Streams
+
+Errors during streaming are delivered as error messages:
+
+```python
+def generate_view(request):
+ client = get_dirty_client()
+
+ try:
+ for chunk in client.stream("myapp.llm:LLMApp", "generate", prompt):
+ yield chunk
+ except DirtyError as e:
+ # Error occurred mid-stream
+ yield f"\n[Error: {e.message}]"
+```
+
+### Best Practices for Streaming
+
+1. **Use async generators for I/O-bound streaming** - e.g., API calls to external services
+2. **Use sync generators for CPU-bound streaming** - e.g., local model inference
+3. **Yield frequently** - Heartbeats are sent during streaming to keep workers alive
+4. **Keep chunks small** - Smaller chunks provide better perceived latency
+5. **Handle client disconnection** - Streams continue even if client disconnects; design accordingly
+
+### Flask Example
+
+```python
+from flask import Flask, Response
+from gunicorn.dirty import get_dirty_client
+
+app = Flask(__name__)
+
+@app.route("/chat", methods=["POST"])
+def chat():
+ prompt = request.json.get("prompt")
+ client = get_dirty_client()
+
+ def stream():
+ for token in client.stream("myapp.llm:LLMApp", "generate", prompt):
+ yield f"data: {token}\n\n"
+
+ return Response(stream(), content_type="text/event-stream")
+```
+
+### FastAPI Example
+
+```python
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from gunicorn.dirty import get_dirty_client_async
+
+app = FastAPI()
+
+@app.post("/chat")
+async def chat(prompt: str):
+ client = await get_dirty_client_async()
+
+ async def stream():
+ async for token in client.stream_async("myapp.llm:LLMApp", "generate", prompt):
+ yield f"data: {token}\n\n"
+
+ return StreamingResponse(stream(), media_type="text/event-stream")
+```
+
+## Lifecycle Hooks
+
+Dirty Arbiters provide hooks for customization:
+
+```python
+# gunicorn.conf.py
+
+def on_dirty_starting(arbiter):
+ """Called just before the dirty arbiter starts."""
+ print("Dirty arbiter starting...")
+
+def dirty_post_fork(arbiter, worker):
+ """Called just after a dirty worker is forked."""
+ print(f"Dirty worker {worker.pid} forked")
+
+def dirty_worker_init(worker):
+ """Called after a dirty worker initializes all apps."""
+ print(f"Dirty worker {worker.pid} initialized")
+
+def dirty_worker_exit(arbiter, worker):
+ """Called when a dirty worker exits."""
+ print(f"Dirty worker {worker.pid} exiting")
+
+on_dirty_starting = on_dirty_starting
+dirty_post_fork = dirty_post_fork
+dirty_worker_init = dirty_worker_init
+dirty_worker_exit = dirty_worker_exit
+```
+
+## Signal Handling
+
+Dirty Arbiters integrate with the main arbiter's signal handling. Signals are forwarded from the main arbiter to the dirty arbiter, which then propagates them to workers.
+
+### Signal Flow
+
+```
+ Main Arbiter Dirty Arbiter Dirty Workers
+ | | |
+ SIGTERM/SIGHUP/SIGUSR1 ------> signal_handler() |
+ | | |
+ | call_soon_threadsafe() |
+ | | |
+ | handle_signal() |
+ | | |
+ | +------> os.kill(worker, sig) |
+ | |
+```
+
+### Signal Reference
+
+| Signal | At Dirty Arbiter | At Dirty Workers | Notes |
+|--------|-----------------|------------------|-------|
+| `SIGTERM` | Sets `self.alive = False`, waits for graceful shutdown | Exits after completing current request | Graceful shutdown with timeout |
+| `SIGQUIT` | Immediate exit via `sys.exit(0)` | Killed immediately | Fast shutdown, no cleanup |
+| `SIGHUP` | Kills all workers, spawns new ones | Exits immediately | Hot reload of workers |
+| `SIGUSR1` | Reopens log files, forwards to workers | Reopens log files | Log rotation support |
+| `SIGCHLD` | Handled by event loop, triggers reap | N/A | Worker death detection |
+| `SIGINT` | Same as SIGTERM | Same as SIGTERM | Ctrl-C handling |
+
+### Forwarded Signals
+
+The main arbiter forwards these signals to the dirty arbiter process:
+
+- **SIGTERM** - Graceful shutdown of entire process tree
+- **SIGHUP** - Worker reload (main arbiter reloads HTTP workers, dirty arbiter reloads dirty workers)
+- **SIGUSR1** - Log rotation across all processes
+
+### Async Signal Handling
+
+The dirty arbiter uses asyncio's signal integration for safe handling in the event loop:
+
+```python
+# Signals are registered with the event loop
+loop.add_signal_handler(signal.SIGTERM, self.signal_handler, signal.SIGTERM)
+
+def signal_handler(self, sig):
+ # Use call_soon_threadsafe for thread-safe event loop integration
+ self.loop.call_soon_threadsafe(self.handle_signal, sig)
+```
+
+This pattern ensures signals don't interrupt asyncio operations mid-execution, preventing race conditions and partial state updates.
+
+## Liveness and Health Monitoring
+
+Dirty Arbiters implement multiple layers of health monitoring to ensure workers remain responsive and orphaned processes are cleaned up.
+
+### Heartbeat Mechanism
+
+Each dirty worker maintains a "worker tmp" file whose mtime serves as a heartbeat:
+
+```
+Worker Lifecycle:
+ 1. Worker spawns, creates WorkerTmp file
+ 2. Worker touches file every (dirty_timeout / 2) seconds
+ 3. Arbiter checks all worker mtimes every 1 second
+ 4. If mtime > dirty_timeout seconds old, worker is killed
+```
+
+This file-based heartbeat has several advantages:
+
+- **OS-level tracking** - No IPC required, works even if worker is stuck in C code
+- **Crash detection** - Arbiter notices immediately when worker stops updating
+- **Graceful recovery** - Worker killed with SIGKILL, arbiter spawns replacement
+
+### Timeout Detection
+
+The arbiter's monitoring loop checks worker health every second:
+
+```python
+# Pseudocode for worker monitoring
+for worker in self.workers:
+ mtime = worker.tmp.last_update()
+ if time.time() - mtime > self.dirty_timeout:
+ log.warning(f"Worker {worker.pid} timed out, killing")
+ os.kill(worker.pid, signal.SIGKILL)
+```
+
+When a worker is killed:
+
+1. `SIGCHLD` is delivered to the arbiter
+2. Arbiter reaps the worker process
+3. `dirty_worker_exit` hook is called
+4. A new worker is spawned to maintain `dirty_workers` count
+
+### Parent Death Detection
+
+Dirty arbiters monitor their parent process (the main arbiter) to detect orphaning:
+
+```python
+# In the dirty arbiter's main loop
+if os.getppid() != self.parent_pid:
+ log.info("Parent died, shutting down")
+ self.alive = False
+```
+
+This check runs every iteration of the event loop (typically sub-millisecond). When parent death is detected:
+
+1. Arbiter sets `self.alive = False`
+2. All workers are sent SIGTERM
+3. Arbiter waits for graceful shutdown (up to `dirty_graceful_timeout`)
+4. Remaining workers are sent SIGKILL
+5. Arbiter exits
+
+### Orphan Cleanup
+
+To handle edge cases where the dirty arbiter itself crashes, a well-known PID file is used:
+
+**PID file location**: `/tmp/gunicorn_dirty_.pid`
+
+On startup, the dirty arbiter:
+
+1. Checks if PID file exists
+2. If yes, reads the old PID and attempts to kill it (`SIGTERM`)
+3. Waits briefly for cleanup
+4. Writes its own PID to the file
+5. On exit, removes the PID file
+
+This ensures that if a dirty arbiter crashes and the main arbiter restarts it, the old orphaned process is terminated.
+
+### Respawn Behavior
+
+| Component | Respawn Trigger | Respawn Behavior |
+|-----------|-----------------|------------------|
+| Dirty Worker | Exit, timeout, or crash | Immediate respawn to maintain `dirty_workers` count |
+| Dirty Arbiter | Exit or crash | Main arbiter respawns if not shutting down |
+
+The dirty arbiter maintains a target worker count and continuously spawns workers until the target is reached:
+
+```python
+while len(self.workers) < self.num_workers:
+ self.spawn_worker()
+```
+
+### Monitoring Recommendations
+
+For production deployments, consider:
+
+1. **Log monitoring** - Watch for "Worker timed out" messages indicating hung workers
+2. **Process monitoring** - Use systemd or supervisord to monitor the main arbiter
+3. **Metrics** - Track respawn frequency to detect unstable workers
+
+```bash
+# Check for recent worker timeouts
+grep "Worker.*timed out" /var/log/gunicorn.log | tail -20
+
+# Monitor process tree
+watch -n 1 'pstree -p $(cat gunicorn.pid)'
+```
+
+## Error Handling
+
+The dirty client raises specific exceptions:
+
+```python
+from gunicorn.dirty.errors import (
+ DirtyError,
+ DirtyTimeoutError,
+ DirtyConnectionError,
+ DirtyAppError,
+ DirtyAppNotFoundError,
+ DirtyNoWorkersAvailableError,
+)
+
+try:
+ result = client.execute("myapp.ml:MLApp", "inference", "model", data)
+except DirtyTimeoutError:
+ # Operation timed out
+ pass
+except DirtyAppNotFoundError:
+ # App not loaded in dirty workers
+ pass
+except DirtyNoWorkersAvailableError as e:
+ # No workers have this app (all crashed or app limited to 0 workers)
+ print(f"No workers for app: {e.app_path}")
+except DirtyAppError as e:
+ # Error during app execution
+ print(f"App error: {e.message}, traceback: {e.traceback}")
+except DirtyConnectionError:
+ # Connection to dirty arbiter failed
+ pass
+```
+
+## Best Practices
+
+1. **Pre-load commonly used resources** in `init()` to avoid cold starts
+2. **Set appropriate timeouts** based on your workload
+3. **Handle errors gracefully** - dirty workers may restart
+4. **Use meaningful action names** for easier debugging
+5. **Keep responses JSON-serializable** - results are passed via IPC
+
+## Monitoring
+
+Monitor dirty workers using standard process monitoring:
+
+```bash
+# Check dirty arbiter and workers
+ps aux | grep "dirty"
+
+# View logs
+tail -f gunicorn.log | grep dirty
+```
+
+## Example: Image Processing
+
+```python
+# myapp/images.py
+from gunicorn.dirty import DirtyApp
+from PIL import Image
+import io
+
+class ImageApp(DirtyApp):
+ def init(self):
+ # Pre-import heavy libraries
+ import cv2
+ self.cv2 = cv2
+
+ def resize(self, image_data, width, height):
+ """Resize an image."""
+ img = Image.open(io.BytesIO(image_data))
+ resized = img.resize((width, height))
+ buffer = io.BytesIO()
+ resized.save(buffer, format='PNG')
+ return buffer.getvalue()
+
+ def thumbnail(self, image_data, size=128):
+ """Create a thumbnail."""
+ img = Image.open(io.BytesIO(image_data))
+ img.thumbnail((size, size))
+ buffer = io.BytesIO()
+ img.save(buffer, format='JPEG')
+ return buffer.getvalue()
+
+ def close(self):
+ pass
+```
+
+Usage:
+
+```python
+from gunicorn.dirty import get_dirty_client
+
+def upload_image(request):
+ client = get_dirty_client()
+
+ # Create thumbnail in dirty worker
+ thumbnail = client.execute(
+ "myapp.images:ImageApp",
+ "thumbnail",
+ request.files['image'].read(),
+ size=256
+ )
+
+ return save_thumbnail(thumbnail)
+```
+
+## Complete Examples
+
+For full working examples with Docker deployment, see:
+
+- [Embedding Service Example](https://github.com/benoitc/gunicorn/tree/master/examples/embedding_service) - FastAPI-based text embedding API using sentence-transformers with dirty workers for ML model management.
+- [Streaming Chat Example](https://github.com/benoitc/gunicorn/tree/master/examples/streaming_chat) - Simulated LLM chat with token-by-token SSE streaming, demonstrating dirty worker generators and real-time response delivery.
diff --git a/docs/content/guides/http2.md b/docs/content/guides/http2.md
new file mode 100644
index 0000000000..2648017577
--- /dev/null
+++ b/docs/content/guides/http2.md
@@ -0,0 +1,848 @@
+# HTTP/2 Support
+
+!!! warning "Beta Feature"
+ HTTP/2 support is a beta feature introduced in Gunicorn 25.0.0. While it has been tested,
+ the API and behavior may change in future releases. Please report any issues on
+ [GitHub](https://github.com/benoitc/gunicorn/issues).
+
+Gunicorn supports HTTP/2 (RFC 7540) for improved performance with modern clients.
+HTTP/2 provides multiplexed streams, header compression, and other optimizations
+over HTTP/1.1.
+
+## Quick Start
+
+```bash
+# Install gunicorn with HTTP/2 support
+pip install gunicorn[http2]
+
+# Run with HTTP/2 enabled (requires SSL)
+gunicorn myapp:app \
+ --worker-class gthread \
+ --threads 4 \
+ --certfile server.crt \
+ --keyfile server.key \
+ --http-protocols h2,h1
+```
+
+## Requirements
+
+HTTP/2 support requires:
+
+- **SSL/TLS**: HTTP/2 uses ALPN (Application-Layer Protocol Negotiation) which
+ requires an encrypted connection
+- **h2 library**: Install with `pip install gunicorn[http2]` or `pip install h2`
+- **Compatible worker**: gthread, gevent, or ASGI workers
+
+## Configuration
+
+### Enable HTTP/2
+
+Enable HTTP/2 by setting the `--http-protocols` option:
+
+```bash
+gunicorn myapp:app --http-protocols h2,h1
+```
+
+Or in a configuration file:
+
+```python
+# gunicorn.conf.py
+http_protocols = ["h2", "h1"]
+```
+
+The order matters for ALPN negotiation - protocols are tried in order of preference.
+
+| Protocol | Description |
+|----------|-------------|
+| `h2` | HTTP/2 over TLS |
+| `h1` | HTTP/1.1 (fallback) |
+
+!!! note
+ Always include `h1` as a fallback for clients that don't support HTTP/2.
+
+### SSL/TLS Configuration
+
+HTTP/2 requires SSL/TLS. Configure certificates:
+
+```bash
+gunicorn myapp:app \
+ --certfile /path/to/server.crt \
+ --keyfile /path/to/server.key \
+ --http-protocols h2,h1
+```
+
+Or in a configuration file:
+
+```python
+# gunicorn.conf.py
+certfile = "/path/to/server.crt"
+keyfile = "/path/to/server.key"
+http_protocols = ["h2", "h1"]
+```
+
+### HTTP/2 Settings
+
+Fine-tune HTTP/2 behavior with these settings:
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `http2_max_concurrent_streams` | 100 | Maximum concurrent streams per connection |
+| `http2_initial_window_size` | 65535 | Initial flow control window size (bytes) |
+| `http2_max_frame_size` | 16384 | Maximum frame size (bytes) |
+| `http2_max_header_list_size` | 65536 | Maximum header list size (bytes) |
+
+Example configuration:
+
+```python
+# gunicorn.conf.py
+http_protocols = ["h2", "h1"]
+http2_max_concurrent_streams = 200
+http2_initial_window_size = 1048576 # 1MB
+```
+
+## Worker Compatibility
+
+Not all workers support HTTP/2:
+
+| Worker | HTTP/2 Support | Notes |
+|--------|----------------|-------|
+| `sync` | No | Single-threaded, cannot multiplex streams |
+| `gthread` | Yes | Recommended for HTTP/2 |
+| `gevent` | Yes | Requires gevent |
+| `eventlet` | Yes | **Deprecated** - will be removed in 26.0 |
+| `asgi` | Yes | For async frameworks |
+| `tornado` | No | Tornado handles its own protocol |
+
+If you use the sync or tornado worker with HTTP/2 enabled, Gunicorn will log a
+warning and fall back to HTTP/1.1.
+
+### Recommended: gthread Worker
+
+For HTTP/2, the gthread worker is recommended:
+
+```bash
+gunicorn myapp:app \
+ --worker-class gthread \
+ --threads 4 \
+ --workers 2 \
+ --http-protocols h2,h1 \
+ --certfile server.crt \
+ --keyfile server.key
+```
+
+## HTTP 103 Early Hints
+
+Gunicorn supports HTTP 103 Early Hints (RFC 8297), allowing servers to send
+resource hints before the final response. This enables browsers to preload
+CSS, JavaScript, and other assets in parallel.
+
+### WSGI Applications
+
+Use the `wsgi.early_hints` callback in your WSGI application:
+
+```python
+def app(environ, start_response):
+ # Send early hints if available
+ if 'wsgi.early_hints' in environ:
+ environ['wsgi.early_hints']([
+ ('Link', '; rel=preload; as=style'),
+ ('Link', '; rel=preload; as=script'),
+ ])
+
+ # Continue with the actual response
+ start_response('200 OK', [('Content-Type', 'text/html')])
+ return [b'...']
+```
+
+### ASGI Applications
+
+Use the `http.response.informational` message type:
+
+```python
+async def app(scope, receive, send):
+ # Send early hints
+ await send({
+ "type": "http.response.informational",
+ "status": 103,
+ "headers": [
+ (b"link", b"; rel=preload; as=style"),
+ (b"link", b"; rel=preload; as=script"),
+ ],
+ })
+
+ # Send the actual response
+ await send({
+ "type": "http.response.start",
+ "status": 200,
+ "headers": [(b"content-type", b"text/html")],
+ })
+ await send({
+ "type": "http.response.body",
+ "body": b"...",
+ })
+```
+
+!!! note
+ Early hints are only sent to HTTP/1.1+ clients. HTTP/1.0 clients silently
+ ignore the callback since they don't support 1xx responses.
+
+## Stream Priority
+
+HTTP/2 allows clients to indicate the relative priority of streams using PRIORITY frames
+(RFC 7540 Section 5.3). Gunicorn tracks stream priorities and exposes them to both
+WSGI and ASGI applications.
+
+### Accessing Priority in WSGI
+
+Priority information is available in the WSGI environ for HTTP/2 requests:
+
+```python
+def app(environ, start_response):
+ # Access stream priority (HTTP/2 only)
+ weight = environ.get('gunicorn.http2.priority_weight')
+ depends_on = environ.get('gunicorn.http2.priority_depends_on')
+
+ if weight is not None:
+ # This is an HTTP/2 request with priority info
+ # Higher weight = client considers this more important
+ print(f"Request priority: weight={weight}, depends_on={depends_on}")
+
+ start_response('200 OK', [('Content-Type', 'text/plain')])
+ return [b'OK']
+```
+
+| Environ Key | Range | Default | Description |
+|-------------|-------|---------|-------------|
+| `gunicorn.http2.priority_weight` | 1-256 | 16 | Higher weight = more resources |
+| `gunicorn.http2.priority_depends_on` | Stream ID | 0 | Parent stream (0 = root) |
+
+### Accessing Priority in ASGI
+
+For ASGI applications, priority is available in the scope's `extensions` dict:
+
+```python
+async def app(scope, receive, send):
+ if scope["type"] == "http":
+ # Check for HTTP/2 priority extension
+ extensions = scope.get("extensions", {})
+ priority = extensions.get("http.response.priority")
+
+ if priority:
+ weight = priority["weight"] # 1-256
+ depends_on = priority["depends_on"] # Parent stream ID
+ print(f"Request priority: weight={weight}, depends_on={depends_on}")
+
+ await send({
+ "type": "http.response.start",
+ "status": 200,
+ "headers": [(b"content-type", b"text/plain")],
+ })
+ await send({
+ "type": "http.response.body",
+ "body": b"OK",
+ })
+```
+
+| Extension Key | Field | Range | Default | Description |
+|---------------|-------|-------|---------|-------------|
+| `http.response.priority` | `weight` | 1-256 | 16 | Higher weight = more resources |
+| `http.response.priority` | `depends_on` | Stream ID | 0 | Parent stream (0 = root) |
+
+!!! note
+ Stream priority is advisory. Applications can use it for scheduling decisions,
+ but Gunicorn does not enforce priority-based request ordering. Priority
+ information is only present for HTTP/2 requests.
+
+## Response Trailers
+
+HTTP/2 supports trailing headers (trailers) sent after the response body.
+This is commonly used for gRPC status codes, checksums, and timing information.
+
+### WSGI Applications
+
+For WSGI applications, use the `gunicorn.http2.send_trailers` callback in the environ:
+
+```python
+def app(environ, start_response):
+ # Get trailer callback (HTTP/2 only)
+ send_trailers = environ.get('gunicorn.http2.send_trailers')
+
+ # Announce trailers in response headers
+ headers = [
+ ('Content-Type', 'application/grpc'),
+ ('Trailer', 'grpc-status, grpc-message'),
+ ]
+ start_response('200 OK', headers)
+
+ # Yield response body
+ yield b'response data'
+
+ # Send trailers after body (if available)
+ if send_trailers:
+ send_trailers([
+ ('grpc-status', '0'),
+ ('grpc-message', 'OK'),
+ ])
+```
+
+### ASGI Applications
+
+For ASGI applications, use the `http.response.trailers` extension:
+
+```python
+async def app(scope, receive, send):
+ # Send response with trailers flag
+ await send({
+ "type": "http.response.start",
+ "status": 200,
+ "headers": [
+ (b"content-type", b"application/grpc"),
+ (b"trailer", b"grpc-status, grpc-message"),
+ ],
+ })
+
+ # Send body
+ await send({
+ "type": "http.response.body",
+ "body": b"response data",
+ "more_body": False,
+ })
+
+ # Send trailers (HTTP/2 only)
+ if "http.response.trailers" in scope.get("extensions", {}):
+ await send({
+ "type": "http.response.trailers",
+ "headers": [
+ (b"grpc-status", b"0"),
+ (b"grpc-message", b"OK"),
+ ],
+ })
+```
+
+### Trailer Rules (RFC 7540)
+
+- Trailers MUST NOT include pseudo-headers (`:status`, `:path`, etc.)
+- Announce trailers using the `Trailer` response header
+- Trailers are only available in HTTP/2 (HTTP/1.1 chunked encoding not supported)
+
+### Common Use Cases
+
+| Use Case | Trailer Headers |
+|----------|-----------------|
+| gRPC | `grpc-status`, `grpc-message` |
+| Checksums | `Content-MD5`, `Digest` |
+| Timing | `Server-Timing` |
+| Signatures | `Signature` |
+
+## Production Deployment
+
+### With Nginx
+
+Configure nginx to proxy HTTP/2 connections to Gunicorn:
+
+```nginx
+upstream gunicorn {
+ server 127.0.0.1:8443;
+ keepalive 32;
+}
+
+server {
+ listen 443 ssl;
+ http2 on;
+ server_name example.com;
+
+ ssl_certificate /path/to/server.crt;
+ ssl_certificate_key /path/to/server.key;
+ ssl_protocols TLSv1.2 TLSv1.3;
+
+ # Forward 103 Early Hints (requires nginx 1.29+)
+ location / {
+ proxy_pass https://gunicorn;
+ proxy_http_version 1.1;
+ proxy_ssl_verify off;
+
+ early_hints $http2;
+
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ }
+}
+```
+
+!!! note
+ For nginx to forward 103 Early Hints from upstream, you need nginx 1.29+
+ and the [`early_hints`](https://nginx.org/en/docs/http/ngx_http_core_module.html#early_hints) directive.
+
+### Direct TLS Termination
+
+For simpler deployments, Gunicorn can terminate TLS directly:
+
+```python
+# gunicorn.conf.py
+bind = "0.0.0.0:443"
+worker_class = "gthread"
+threads = 4
+workers = 4
+
+# SSL
+certfile = "/etc/letsencrypt/live/example.com/fullchain.pem"
+keyfile = "/etc/letsencrypt/live/example.com/privkey.pem"
+
+# HTTP/2
+http_protocols = ["h2", "h1"]
+http2_max_concurrent_streams = 100
+```
+
+### Recommended Settings
+
+For production HTTP/2 deployments:
+
+```python
+# gunicorn.conf.py
+worker_class = "gthread"
+workers = 4
+threads = 4
+keepalive = 120 # HTTP/2 connections are long-lived
+
+# SSL/TLS
+certfile = "/path/to/server.crt"
+keyfile = "/path/to/server.key"
+ssl_version = "TLSv1_2" # Minimum TLS 1.2 for HTTP/2
+
+# HTTP/2
+http_protocols = ["h2", "h1"]
+http2_max_concurrent_streams = 100
+http2_initial_window_size = 65535
+```
+
+## Troubleshooting
+
+### HTTP/2 not negotiated
+
+If clients fall back to HTTP/1.1:
+
+1. Verify SSL is configured correctly
+2. Check that `h2` is in `--http-protocols`
+3. Ensure the h2 library is installed: `pip install h2`
+4. Verify ALPN support: `openssl s_client -alpn h2 -connect host:port`
+
+### Worker doesn't support HTTP/2
+
+If you see "HTTP/2 is not supported by the sync worker":
+
+```bash
+# Switch to gthread worker
+gunicorn myapp:app --worker-class gthread --threads 4
+```
+
+### Connection errors with large requests
+
+Increase flow control window sizes:
+
+```python
+http2_initial_window_size = 1048576 # 1MB
+http2_max_frame_size = 32768 # 32KB
+```
+
+### Too many concurrent streams
+
+If clients report stream limit errors:
+
+```python
+http2_max_concurrent_streams = 200 # Increase from default 100
+```
+
+## Performance Tuning
+
+HTTP/2 performance depends on proper tuning of both Gunicorn and system settings.
+This section covers different tuning profiles and their trade-offs.
+
+### Tuning Profiles
+
+#### Conservative (Default)
+
+Best for: Low to moderate traffic, memory-constrained environments.
+
+```python
+# gunicorn.conf.py - Conservative profile
+workers = 2
+worker_class = "gthread"
+threads = 4
+
+http2_max_concurrent_streams = 100
+http2_initial_window_size = 65535 # 64KB
+http2_max_frame_size = 16384 # 16KB
+```
+
+| Pros | Cons |
+|------|------|
+| Low memory footprint | Limited throughput at high concurrency |
+| Safe defaults per RFC | More round-trips for large transfers |
+| Works on constrained systems | May bottleneck at ~10K req/s |
+
+#### Balanced
+
+Best for: Moderate traffic, general production use.
+
+```python
+# gunicorn.conf.py - Balanced profile
+workers = 4
+worker_class = "gthread"
+threads = 4
+backlog = 2048
+
+http2_max_concurrent_streams = 128
+http2_initial_window_size = 262144 # 256KB
+http2_max_frame_size = 16384 # 16KB
+```
+
+| Pros | Cons |
+|------|------|
+| Good throughput (15K+ req/s) | More memory per connection |
+| Handles traffic spikes | Requires more CPU |
+| Good balance of resources | |
+
+#### High Concurrency
+
+Best for: High traffic APIs, microservices, load testing.
+
+```python
+# gunicorn.conf.py - High concurrency profile
+workers = 4
+worker_class = "gthread"
+threads = 8
+backlog = 2048
+worker_connections = 10000
+
+http2_max_concurrent_streams = 256
+http2_initial_window_size = 1048576 # 1MB
+http2_max_frame_size = 32768 # 32KB
+```
+
+| Pros | Cons |
+|------|------|
+| High throughput (20K+ req/s) | Higher memory usage (~4x conservative) |
+| Handles 1000s of clients | Requires system tuning |
+| Better large transfer performance | May overwhelm downstream services |
+
+### Setting Trade-offs
+
+#### `http2_max_concurrent_streams`
+
+Controls how many simultaneous streams a client can open per connection.
+
+| Value | Memory | Throughput | Use Case |
+|-------|--------|------------|----------|
+| 50-100 | Low | Moderate | APIs with small payloads |
+| 128-256 | Medium | High | General web applications |
+| 500+ | High | Very High | Streaming, real-time apps |
+
+!!! warning
+ Very high values (500+) can lead to resource exhaustion under attack.
+ Use with rate limiting.
+
+#### `http2_initial_window_size`
+
+Flow control window size determines how much data can be sent before waiting for acknowledgment.
+
+| Value | Memory | Latency | Use Case |
+|-------|--------|---------|----------|
+| 65535 (64KB) | Low | Higher for large transfers | Default, memory-constrained |
+| 262144 (256KB) | Medium | Balanced | General use |
+| 1048576 (1MB) | High | Lower for large transfers | Large file transfers, streaming |
+
+!!! note
+ Larger windows improve throughput for large responses but increase memory
+ usage per stream. Calculate: `max_streams × window_size × connections`.
+
+#### `http2_max_frame_size`
+
+Maximum size of individual HTTP/2 frames.
+
+| Value | Memory | Efficiency | Use Case |
+|-------|--------|------------|----------|
+| 16384 (16KB) | Low | More frames for large data | Default, RFC minimum |
+| 32768 (32KB) | Medium | Balanced | General use |
+| 65536 (64KB) | Higher | Fewer frames | Large payloads |
+
+### System Tuning (Linux)
+
+For high concurrency (1000+ clients), tune these kernel parameters:
+
+```bash
+# /etc/sysctl.conf or /etc/sysctl.d/99-gunicorn.conf
+
+# Increase socket backlog for burst connections
+net.core.somaxconn = 65535
+net.ipv4.tcp_max_syn_backlog = 65535
+
+# Increase network queue size
+net.core.netdev_max_backlog = 65535
+
+# Expand ephemeral port range
+net.ipv4.ip_local_port_range = 1024 65535
+
+# Allow reuse of TIME_WAIT sockets
+net.ipv4.tcp_tw_reuse = 1
+
+# Increase max open files system-wide
+fs.file-max = 2097152
+```
+
+Apply with: `sudo sysctl -p`
+
+Also increase file descriptor limits:
+
+```bash
+# /etc/security/limits.conf
+* soft nofile 65535
+* hard nofile 65535
+```
+
+### Docker Tuning
+
+For Docker deployments, add these to your container or compose file:
+
+```yaml
+# docker-compose.yml
+services:
+ gunicorn:
+ ulimits:
+ nofile:
+ soft: 65535
+ hard: 65535
+ sysctls:
+ net.core.somaxconn: 65535
+```
+
+Or in Dockerfile:
+
+```dockerfile
+# Increase file descriptor limit
+RUN ulimit -n 65535
+```
+
+### Benchmark Results
+
+Reference benchmarks using h2load with 4 Gunicorn workers in Docker (Apple M4 Pro):
+
+| Profile | Clients | Streams | Requests/sec | Latency (mean) |
+|---------|---------|---------|--------------|----------------|
+| Conservative | 100 | 10 | 11,700 | 69ms |
+| Conservative | 1000 | 10 | 12,750 | 441ms |
+| High Concurrency | 100 | 10 | 15,000+ | 50ms |
+| High Concurrency | 1000 | 10 | 21,700 | 253ms |
+| High Concurrency | 2000 | 10 | 12,300 | 243ms |
+
+!!! note
+ Actual performance varies based on hardware, network, and application complexity.
+ Always benchmark your specific workload.
+
+## Testing HTTP/2
+
+### Using curl
+
+```bash
+# Check HTTP/2 support
+curl -v --http2 https://localhost:443/
+
+# Force HTTP/2
+curl --http2-prior-knowledge https://localhost:443/
+```
+
+### Using Python
+
+```python
+import httpx
+
+with httpx.Client(http2=True, verify=False) as client:
+ response = client.get("https://localhost:8443/")
+ print(f"HTTP Version: {response.http_version}")
+```
+
+## Complete Example
+
+A complete HTTP/2 example demonstrating priority and trailers is available in the
+`examples/http2_features/` directory. This includes:
+
+- **http2_app.py**: ASGI application showing priority access and trailer sending
+- **test_http2.py**: Test script verifying HTTP/2 features
+- **Dockerfile** and **docker-compose.yml**: Docker setup for testing
+
+To run the example:
+
+```bash
+cd examples/http2_features
+docker compose up --build
+
+# In another terminal:
+docker compose exec http2-features python /app/http2_features/test_http2.py
+```
+
+The example demonstrates:
+
+1. **Priority access**: Reading `http.response.priority` extension in ASGI scope
+2. **Response trailers**: Sending `http.response.trailers` messages
+3. **Combined features**: Using both priority and trailers in one response
+
+## RFC Compliance
+
+Gunicorn's HTTP/2 implementation is built on the [h2 library](https://github.com/python-hyper/h2)
+and complies with the following specifications:
+
+| Feature | RFC | Status | Notes |
+|---------|-----|--------|-------|
+| HTTP/2 Protocol | [RFC 7540](https://tools.ietf.org/html/rfc7540) | Compliant | Core protocol support |
+| HTTP/2 Semantics | [RFC 9113](https://tools.ietf.org/html/rfc9113) | Compliant | Updated HTTP/2 spec |
+| HPACK Compression | [RFC 7541](https://tools.ietf.org/html/rfc7541) | Compliant | Via h2 library |
+| Stream State Machine | RFC 7540 Section 5.1 | Compliant | Full state transitions |
+| Flow Control | RFC 7540 Section 6.9 | Compliant | Stream and connection level |
+| Stream Priority | RFC 7540 Section 5.3 | Compliant | Weight and dependency tracking |
+| Frame Size Limits | RFC 7540 Section 6.2 | Compliant | Validated 16384-16777215 bytes |
+| Pseudo-Headers | RFC 9113 Section 8.3 | Compliant | All required headers supported |
+| `:authority` Handling | RFC 9113 Section 8.3.1 | Compliant | Takes precedence over Host |
+| Response Trailers | RFC 9110 Section 6.5 | Compliant | Pseudo-headers forbidden |
+| GOAWAY Handling | RFC 7540 Section 6.8 | Compliant | Graceful shutdown |
+| RST_STREAM Handling | RFC 7540 Section 6.4 | Compliant | Stream reset |
+| Early Hints | [RFC 8297](https://tools.ietf.org/html/rfc8297) | Compliant | 103 informational responses |
+| Server Push | RFC 7540 Section 6.6 | Not Implemented | Optional feature, rarely used |
+
+!!! note
+ Server Push (PUSH_PROMISE) is not implemented. This is an optional HTTP/2 feature that is
+ being deprecated in HTTP/3 and is rarely used in practice.
+
+## Security Considerations
+
+HTTP/2 introduces new attack vectors compared to HTTP/1.1. Gunicorn includes several
+protections against known vulnerabilities.
+
+### Built-in Protections
+
+| Attack | Protection | Setting |
+|--------|------------|---------|
+| Stream Multiplexing Abuse | Limit concurrent streams | `http2_max_concurrent_streams` (default: 100) |
+| HPACK Bomb | Header size limits | `http2_max_header_list_size` (default: 65536) |
+| Large Frame Attack | Frame size limits | `http2_max_frame_size` (validated: 16384-16777215) |
+| Resource Exhaustion | Flow control windows | `http2_initial_window_size` (default: 65535) |
+| Slow Read (Slowloris) | Connection timeouts | `timeout` and `keepalive` settings |
+
+### Recommended Security Settings
+
+```python
+# gunicorn.conf.py - Security-hardened HTTP/2 configuration
+
+# Limit concurrent streams to prevent resource exhaustion
+http2_max_concurrent_streams = 100
+
+# Limit header size to prevent HPACK bomb attacks
+http2_max_header_list_size = 65536 # 64KB
+
+# Standard frame size (RFC minimum)
+http2_max_frame_size = 16384
+
+# Reasonable flow control window
+http2_initial_window_size = 65535 # 64KB
+
+# Connection timeouts to prevent slow attacks
+timeout = 30
+keepalive = 120
+graceful_timeout = 30
+
+# Limit request sizes
+limit_request_line = 4094
+limit_request_fields = 100
+limit_request_field_size = 8190
+```
+
+### Additional Recommendations
+
+1. **Use a reverse proxy**: Deploy behind nginx, HAProxy, or a cloud load balancer
+ for additional DDoS protection and rate limiting.
+
+2. **Enable rate limiting**: Use your reverse proxy to limit requests per client.
+
+3. **Monitor connections**: Watch for clients opening many streams or holding
+ connections open without sending data.
+
+4. **Keep dependencies updated**: Regularly update the `h2` library for security fixes.
+
+For more information on HTTP/2 security vulnerabilities, see:
+
+- [Imperva HTTP/2 Vulnerability Report](https://www.imperva.com/docs/Imperva_HII_HTTP2.pdf)
+- [NGINX HTTP/2 Security Advisory](https://www.nginx.com/blog/the-imperva-http2-vulnerability-report-and-nginx/)
+
+## Compliance Testing
+
+### h2spec
+
+[h2spec](https://github.com/summerwind/h2spec) is the standard conformance testing tool
+for HTTP/2 implementations. It tests compliance with RFC 7540 and RFC 7541.
+
+```bash
+# Install h2spec
+# macOS
+brew install h2spec
+
+# Linux (download from releases)
+curl -L https://github.com/summerwind/h2spec/releases/download/v2.6.0/h2spec_linux_amd64.tar.gz | tar xz
+
+# Run against your server
+h2spec -h localhost -p 8443 -t -k
+
+# Options:
+# -t Use TLS
+# -k Skip certificate verification
+# -S Strict mode (test SHOULD requirements)
+# -v Verbose output
+# -j Generate JUnit report
+```
+
+Example output:
+```
+Generic tests for HTTP/2 server
+ 1. Starting HTTP/2
+ ✓ Sends a client connection preface
+ ...
+
+Hypertext Transfer Protocol Version 2 (HTTP/2)
+ 3. Starting HTTP/2
+ 3.5. HTTP/2 Connection Preface
+ ✓ Sends invalid connection preface
+ ...
+
+94 tests, 94 passed, 0 skipped, 0 failed
+```
+
+### nghttp2 Tools
+
+[nghttp2](https://nghttp2.org/) provides useful debugging tools:
+
+```bash
+# Install nghttp2
+# macOS
+brew install nghttp2
+
+# Linux
+apt-get install nghttp2-client
+
+# Test HTTP/2 connection
+nghttp -v https://localhost:8443/
+
+# Benchmark with h2load
+h2load -n 1000 -c 10 https://localhost:8443/
+```
+
+### Online Testing
+
+For public servers, you can use online tools:
+
+- [KeyCDN HTTP/2 Test](https://tools.keycdn.com/http2-test)
+- [HTTP/2 Check](https://http.dev/2/test)
+
+## See Also
+
+- [Settings Reference](../reference/settings.md#http2_max_concurrent_streams) - All HTTP/2 settings
+- [ASGI Worker](../asgi.md) - ASGI worker with HTTP/2 support
+- [Deploy](../deploy.md) - General deployment guidance
diff --git a/docs/content/index.md b/docs/content/index.md
index 7a9136b599..353a6b128e 100644
--- a/docs/content/index.md
+++ b/docs/content/index.md
@@ -114,6 +114,18 @@ title: Gunicorn - Python WSGI HTTP Server
+
+