Skip to content

Commit 3e9f5fd

Browse files
committed
fix: logging
Signed-off-by: ntkathole <nikhilkathole2683@gmail.com>
1 parent 22278a8 commit 3e9f5fd

File tree

2 files changed

+111
-17
lines changed

2 files changed

+111
-17
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ test-python-integration-rbac-remote: ## Run Python remote RBAC integration tests
156156
FEAST_IS_LOCAL_TEST=True \
157157
FEAST_LOCAL_ONLINE_CONTAINER=True \
158158
PYTHONHASHSEED=1 \
159-
python -m pytest --tb=short -v -n 4 --color=yes --integration --durations=10 --timeout=1200 --timeout_method=thread --dist loadscope \
159+
python -m pytest --tb=short -v -n 4 --color=yes --integration --durations=10 --timeout=600 --timeout_method=thread --dist loadscope \
160160
-k "not test_lambda_materialization and not test_snowflake_materialization" \
161161
-m "rbac_remote_integration_test" \
162162
--log-cli-level=INFO -s \

sdk/python/tests/integration/conftest.py

Lines changed: 110 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pathlib import Path
88

99
import pytest
10+
import requests
1011
from testcontainers.keycloak import KeycloakContainer
1112
from testcontainers.minio import MinioContainer
1213
from testcontainers.mysql import MySqlContainer
@@ -45,18 +46,39 @@ def _write_keycloak_state(state):
4546

4647
def _acquire_keycloak_lock():
4748
"""Acquire file-based lock for Keycloak coordination across workers"""
48-
max_wait = 120 # 2 minutes max wait
49+
max_wait = 30 # Reduced to 30 seconds to avoid hanging
4950
wait_time = 0
5051
while wait_time < max_wait:
5152
try:
5253
# Try to create lock file exclusively
5354
with open(KEYCLOAK_LOCK_FILE, "x") as f:
5455
f.write(str(os.getpid()))
56+
logger.info(f"Acquired Keycloak lock (PID: {os.getpid()})")
5557
return True
5658
except FileExistsError:
57-
# Another worker has the lock, wait
58-
time.sleep(1)
59-
wait_time += 1
59+
# Check if lock is stale (previous process died)
60+
try:
61+
with open(KEYCLOAK_LOCK_FILE, "r") as f:
62+
lock_pid = int(f.read().strip())
63+
# Check if the process is still running
64+
try:
65+
os.kill(lock_pid, 0) # Signal 0 just checks if process exists
66+
except (OSError, ProcessLookupError):
67+
# Process is dead, remove stale lock
68+
logger.warning(
69+
f"Removing stale Keycloak lock (dead PID: {lock_pid})"
70+
)
71+
KEYCLOAK_LOCK_FILE.unlink()
72+
continue
73+
except (ValueError, FileNotFoundError):
74+
# Lock file is corrupt or missing, try again
75+
continue
76+
77+
# Another worker has the lock, wait briefly
78+
time.sleep(0.5)
79+
wait_time += 0.5
80+
81+
logger.error(f"Failed to acquire Keycloak lock after {max_wait}s")
6082
return False
6183

6284

@@ -94,31 +116,95 @@ def _cleanup_keycloak():
94116
atexit.register(_cleanup_keycloak)
95117

96118

119+
def _cleanup_stale_keycloak_state():
120+
"""Clean up any stale Keycloak state files from previous runs"""
121+
try:
122+
if KEYCLOAK_STATE_FILE.exists():
123+
# Check if the state file references a dead process
124+
with open(KEYCLOAK_STATE_FILE, "r") as f:
125+
state = json.load(f)
126+
127+
worker_pid = state.get("worker_pid")
128+
if worker_pid:
129+
try:
130+
os.kill(worker_pid, 0) # Check if process exists
131+
logger.info(
132+
f"Keycloak state file exists with active worker PID {worker_pid}"
133+
)
134+
except (OSError, ProcessLookupError):
135+
logger.warning(
136+
f"Removing stale Keycloak state file (dead worker PID: {worker_pid})"
137+
)
138+
KEYCLOAK_STATE_FILE.unlink()
139+
else:
140+
logger.warning("Removing Keycloak state file with no worker PID")
141+
KEYCLOAK_STATE_FILE.unlink()
142+
143+
if KEYCLOAK_LOCK_FILE.exists():
144+
logger.warning("Removing stale Keycloak lock file from previous run")
145+
KEYCLOAK_LOCK_FILE.unlink()
146+
except Exception as e:
147+
logger.warning(f"Error cleaning up stale Keycloak state: {e}")
148+
149+
97150
@pytest.fixture(scope="session")
98151
def start_keycloak_server():
99152
"""Single Keycloak instance shared across ALL pytest workers"""
100153
global _global_keycloak_container
101154

155+
# Clean up any stale state from previous runs
156+
_cleanup_stale_keycloak_state()
157+
158+
logger.info(f"Worker PID {os.getpid()} requesting Keycloak instance")
159+
102160
# First check if Keycloak is already running (without lock)
103161
state = _read_keycloak_state()
104162
if state.get("keycloak_started") and state.get("keycloak_url"):
105163
logger.info(f"Reusing existing Keycloak instance: {state['keycloak_url']}")
106-
return state["keycloak_url"]
164+
# Verify the container is actually reachable
165+
try:
166+
response = requests.get(f"{state['keycloak_url']}/health/ready", timeout=5)
167+
if response.status_code == 200:
168+
return state["keycloak_url"]
169+
else:
170+
logger.warning("Keycloak health check failed, will restart")
171+
except Exception:
172+
logger.warning("Cannot reach existing Keycloak, will restart")
107173

108174
# Need to start Keycloak - acquire lock for coordination
109-
logger.info("Attempting to acquire Keycloak startup lock...")
110-
if not _acquire_keycloak_lock():
111-
logger.error("Failed to acquire Keycloak lock within timeout")
112-
raise RuntimeError("Could not acquire Keycloak startup lock")
113-
175+
logger.info(
176+
f"Worker PID {os.getpid()} attempting to acquire Keycloak startup lock..."
177+
)
178+
lock_acquired = False
114179
try:
180+
if not _acquire_keycloak_lock():
181+
logger.error("Failed to acquire Keycloak lock, will try fallback")
182+
# Fallback: wait a bit and check if another worker succeeded
183+
time.sleep(10)
184+
state = _read_keycloak_state()
185+
if state.get("keycloak_started") and state.get("keycloak_url"):
186+
logger.info(
187+
f"Fallback: Using Keycloak started by another worker: {state['keycloak_url']}"
188+
)
189+
return state["keycloak_url"]
190+
else:
191+
raise RuntimeError(
192+
"Could not acquire Keycloak startup lock and no fallback available"
193+
)
194+
195+
lock_acquired = True
196+
115197
# Double-check state after acquiring lock
116198
state = _read_keycloak_state()
117199
if state.get("keycloak_started") and state.get("keycloak_url"):
118-
logger.info(f"Another worker started Keycloak: {state['keycloak_url']}")
200+
logger.info(
201+
f"Another worker started Keycloak while we waited: {state['keycloak_url']}"
202+
)
119203
return state["keycloak_url"]
120204

121-
logger.info("Starting single Keycloak instance for all pytest workers")
205+
logger.info(
206+
f"Worker PID {os.getpid()} starting single Keycloak instance for all pytest workers"
207+
)
122208

123209
# Create and start the container manually (no context manager)
124210
_global_keycloak_container = KeycloakContainer(
@@ -131,13 +217,18 @@ def start_keycloak_server():
131217
)
132218

133219
# Start the container manually and keep it running
220+
logger.info("Starting Keycloak container...")
134221
_global_keycloak_container.start()
222+
logger.info("Keycloak container started, setting up permissions...")
135223

136224
# Add retry logic for health check
137225
max_retries = 3
138226
for attempt in range(max_retries):
139227
try:
140228
setup_permissions_on_keycloak(_global_keycloak_container.get_client())
229+
logger.info(
230+
f"Keycloak permissions setup successful on attempt {attempt + 1}"
231+
)
141232
break
142233
except Exception as e:
143234
if attempt < max_retries - 1:
@@ -146,6 +237,7 @@ def start_keycloak_server():
146237
)
147238
time.sleep(5)
148239
else:
240+
logger.error(f"All Keycloak setup attempts failed: {e}")
149241
raise
150242

151243
# Store the container info in shared file
@@ -159,7 +251,7 @@ def start_keycloak_server():
159251
_write_keycloak_state(state)
160252

161253
logger.info(
162-
f"Keycloak started successfully and will be shared across all pytest workers: {keycloak_url}"
254+
f"Keycloak started successfully by PID {os.getpid()} and will be shared: {keycloak_url}"
163255
)
164256

165257
# Return the URL - container stays running for all tests
@@ -170,13 +262,15 @@ def start_keycloak_server():
170262
# Clean up on failure
171263
if _global_keycloak_container:
172264
try:
265+
logger.info("Cleaning up failed Keycloak container...")
173266
_global_keycloak_container.stop()
174267
_global_keycloak_container = None
175-
except Exception:
176-
pass
268+
except Exception as cleanup_e:
269+
logger.error(f"Error during cleanup: {cleanup_e}")
177270
raise
178271
finally:
179-
_release_keycloak_lock()
272+
if lock_acquired:
273+
_release_keycloak_lock()
180274

181275

182276
@pytest.fixture(scope="session")

0 commit comments

Comments
 (0)