77from pathlib import Path
88
99import pytest
10+ import requests
1011from testcontainers .keycloak import KeycloakContainer
1112from testcontainers .minio import MinioContainer
1213from testcontainers .mysql import MySqlContainer
@@ -45,18 +46,39 @@ def _write_keycloak_state(state):
4546
4647def _acquire_keycloak_lock ():
4748 """Acquire file-based lock for Keycloak coordination across workers"""
48- max_wait = 120 # 2 minutes max wait
49+ max_wait = 30 # Reduced to 30 seconds to avoid hanging
4950 wait_time = 0
5051 while wait_time < max_wait :
5152 try :
5253 # Try to create lock file exclusively
5354 with open (KEYCLOAK_LOCK_FILE , "x" ) as f :
5455 f .write (str (os .getpid ()))
56+ logger .info (f"Acquired Keycloak lock (PID: { os .getpid ()} )" )
5557 return True
5658 except FileExistsError :
57- # Another worker has the lock, wait
58- time .sleep (1 )
59- wait_time += 1
59+ # Check if lock is stale (previous process died)
60+ try :
61+ with open (KEYCLOAK_LOCK_FILE , "r" ) as f :
62+ lock_pid = int (f .read ().strip ())
63+ # Check if the process is still running
64+ try :
65+ os .kill (lock_pid , 0 ) # Signal 0 just checks if process exists
66+ except (OSError , ProcessLookupError ):
67+ # Process is dead, remove stale lock
68+ logger .warning (
69+ f"Removing stale Keycloak lock (dead PID: { lock_pid } )"
70+ )
71+ KEYCLOAK_LOCK_FILE .unlink ()
72+ continue
73+ except (ValueError , FileNotFoundError ):
74+ # Lock file is corrupt or missing, try again
75+ continue
76+
77+ # Another worker has the lock, wait briefly
78+ time .sleep (0.5 )
79+ wait_time += 0.5
80+
81+ logger .error (f"Failed to acquire Keycloak lock after { max_wait } s" )
6082 return False
6183
6284
@@ -94,31 +116,95 @@ def _cleanup_keycloak():
94116atexit .register (_cleanup_keycloak )
95117
96118
119+ def _cleanup_stale_keycloak_state ():
120+ """Clean up any stale Keycloak state files from previous runs"""
121+ try :
122+ if KEYCLOAK_STATE_FILE .exists ():
123+ # Check if the state file references a dead process
124+ with open (KEYCLOAK_STATE_FILE , "r" ) as f :
125+ state = json .load (f )
126+
127+ worker_pid = state .get ("worker_pid" )
128+ if worker_pid :
129+ try :
130+ os .kill (worker_pid , 0 ) # Check if process exists
131+ logger .info (
132+ f"Keycloak state file exists with active worker PID { worker_pid } "
133+ )
134+ except (OSError , ProcessLookupError ):
135+ logger .warning (
136+ f"Removing stale Keycloak state file (dead worker PID: { worker_pid } )"
137+ )
138+ KEYCLOAK_STATE_FILE .unlink ()
139+ else :
140+ logger .warning ("Removing Keycloak state file with no worker PID" )
141+ KEYCLOAK_STATE_FILE .unlink ()
142+
143+ if KEYCLOAK_LOCK_FILE .exists ():
144+ logger .warning ("Removing stale Keycloak lock file from previous run" )
145+ KEYCLOAK_LOCK_FILE .unlink ()
146+ except Exception as e :
147+ logger .warning (f"Error cleaning up stale Keycloak state: { e } " )
148+
149+
97150@pytest .fixture (scope = "session" )
98151def start_keycloak_server ():
99152 """Single Keycloak instance shared across ALL pytest workers"""
100153 global _global_keycloak_container
101154
155+ # Clean up any stale state from previous runs
156+ _cleanup_stale_keycloak_state ()
157+
158+ logger .info (f"Worker PID { os .getpid ()} requesting Keycloak instance" )
159+
102160 # First check if Keycloak is already running (without lock)
103161 state = _read_keycloak_state ()
104162 if state .get ("keycloak_started" ) and state .get ("keycloak_url" ):
105163 logger .info (f"Reusing existing Keycloak instance: { state ['keycloak_url' ]} " )
106- return state ["keycloak_url" ]
164+ # Verify the container is actually reachable
165+ try :
166+ response = requests .get (f"{ state ['keycloak_url' ]} /health/ready" , timeout = 5 )
167+ if response .status_code == 200 :
168+ return state ["keycloak_url" ]
169+ else :
170+ logger .warning ("Keycloak health check failed, will restart" )
171+ except Exception :
172+ logger .warning ("Cannot reach existing Keycloak, will restart" )
107173
108174 # Need to start Keycloak - acquire lock for coordination
109- logger .info ("Attempting to acquire Keycloak startup lock..." )
110- if not _acquire_keycloak_lock ():
111- logger .error ("Failed to acquire Keycloak lock within timeout" )
112- raise RuntimeError ("Could not acquire Keycloak startup lock" )
113-
175+ logger .info (
176+ f"Worker PID { os .getpid ()} attempting to acquire Keycloak startup lock..."
177+ )
178+ lock_acquired = False
114179 try :
180+ if not _acquire_keycloak_lock ():
181+ logger .error ("Failed to acquire Keycloak lock, will try fallback" )
182+ # Fallback: wait a bit and check if another worker succeeded
183+ time .sleep (10 )
184+ state = _read_keycloak_state ()
185+ if state .get ("keycloak_started" ) and state .get ("keycloak_url" ):
186+ logger .info (
187+ f"Fallback: Using Keycloak started by another worker: { state ['keycloak_url' ]} "
188+ )
189+ return state ["keycloak_url" ]
190+ else :
191+ raise RuntimeError (
192+ "Could not acquire Keycloak startup lock and no fallback available"
193+ )
194+
195+ lock_acquired = True
196+
115197 # Double-check state after acquiring lock
116198 state = _read_keycloak_state ()
117199 if state .get ("keycloak_started" ) and state .get ("keycloak_url" ):
118- logger .info (f"Another worker started Keycloak: { state ['keycloak_url' ]} " )
200+ logger .info (
201+ f"Another worker started Keycloak while we waited: { state ['keycloak_url' ]} "
202+ )
119203 return state ["keycloak_url" ]
120204
121- logger .info ("Starting single Keycloak instance for all pytest workers" )
205+ logger .info (
206+ f"Worker PID { os .getpid ()} starting single Keycloak instance for all pytest workers"
207+ )
122208
123209 # Create and start the container manually (no context manager)
124210 _global_keycloak_container = KeycloakContainer (
@@ -131,13 +217,18 @@ def start_keycloak_server():
131217 )
132218
133219 # Start the container manually and keep it running
220+ logger .info ("Starting Keycloak container..." )
134221 _global_keycloak_container .start ()
222+ logger .info ("Keycloak container started, setting up permissions..." )
135223
136224 # Add retry logic for health check
137225 max_retries = 3
138226 for attempt in range (max_retries ):
139227 try :
140228 setup_permissions_on_keycloak (_global_keycloak_container .get_client ())
229+ logger .info (
230+ f"Keycloak permissions setup successful on attempt { attempt + 1 } "
231+ )
141232 break
142233 except Exception as e :
143234 if attempt < max_retries - 1 :
@@ -146,6 +237,7 @@ def start_keycloak_server():
146237 )
147238 time .sleep (5 )
148239 else :
240+ logger .error (f"All Keycloak setup attempts failed: { e } " )
149241 raise
150242
151243 # Store the container info in shared file
@@ -159,7 +251,7 @@ def start_keycloak_server():
159251 _write_keycloak_state (state )
160252
161253 logger .info (
162- f"Keycloak started successfully and will be shared across all pytest workers : { keycloak_url } "
254+ f"Keycloak started successfully by PID { os . getpid () } and will be shared: { keycloak_url } "
163255 )
164256
165257 # Return the URL - container stays running for all tests
@@ -170,13 +262,15 @@ def start_keycloak_server():
170262 # Clean up on failure
171263 if _global_keycloak_container :
172264 try :
265+ logger .info ("Cleaning up failed Keycloak container..." )
173266 _global_keycloak_container .stop ()
174267 _global_keycloak_container = None
175- except Exception :
176- pass
268+ except Exception as cleanup_e :
269+ logger . error ( f"Error during cleanup: { cleanup_e } " )
177270 raise
178271 finally :
179- _release_keycloak_lock ()
272+ if lock_acquired :
273+ _release_keycloak_lock ()
180274
181275
182276@pytest .fixture (scope = "session" )
0 commit comments