feldera
diff --git a/‎docs.feldera.com/docs/tutorials/rest_api/index.md‎
Lines changed: 4 additions & 2 deletions b/‎docs.feldera.com/docs/tutorials/rest_api/index.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎python/feldera/pipeline.py‎
Lines changed: 18 additions & 5 deletions b/‎python/feldera/pipeline.py‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎python/feldera/rest/_httprequests.py‎
Lines changed: 4 additions & 4 deletions b/‎python/feldera/rest/_httprequests.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎python/feldera/rest/feldera_client.py‎
Lines changed: 51 additions & 17 deletions b/‎python/feldera/rest/feldera_client.py‎
Lines changed: 51 additions & 17 deletions
diff --git a/‎python/tests/platform/conftest.py‎
Lines changed: 4 additions & 0 deletions b/‎python/tests/platform/conftest.py‎
Lines changed: 4 additions & 0 deletions
@@ -200,13 +200,15 @@ curl -i -X POST http://127.0.0.1:8080/v0/pipelines/supply-chain/start
 
 ... which will return `HTTP/1.1 202 Accepted` when successful.
 
-Check that it has successfully started using:
+The start action is asynchronous (hence its `202` Accepted response).
+As such, it will take some time before the final target state has been reached.
+Regularly poll the status until the start has completed:
 
 ```
 curl -s GET http://127.0.0.1:8080/v0/pipelines/supply-chain | jq '.deployment_status'
 ```
 
-... which will say 'Running` when the pipeline has started:
+... which eventually will say `Running` when the pipeline has started.
 
 > Note: Connectors are only initialized when a pipeline starts to use them.
 > A pipeline will not start if a connector is unable to connect to its
 
@@ -84,14 +84,13 @@ def wait_for_status(
         :param timeout: Maximum time to wait in seconds. If None, waits forever (default: None)
         :raises TimeoutError: If the expected status is not reached within the timeout
         """
-        start_time = time.time()
-
+        start_time = time.monotonic()
         while True:
             current_status = self.status()
             if current_status == expected_status:
                 return
 
-            if timeout is not None and time.time() - start_time >= timeout:
+            if timeout is not None and time.monotonic() - start_time >= timeout:
                 raise TimeoutError(
                     f"Pipeline did not reach {expected_status.name} status within {timeout} seconds"
                 )
@@ -392,7 +391,7 @@ def wait_for_idle(
         have been processed).
 
         :param idle_interval_s: Idle interval duration (default is 5.0 seconds).
-        :param timeout_s: Timeout waiting for idle (default is 600.0 seconds).
+        :param timeout_s: Timeout waiting for idle (`None` = no timeout is enforced).
         :param poll_interval_s: Polling interval, should be set substantially
             smaller than the idle interval (default is 0.2 seconds).
         :raises ValueError: If idle interval is larger than timeout, poll interval
@@ -512,6 +511,13 @@ def start_paused(
     ):
         """
         Starts the pipeline in the paused state.
+
+        :param bootstrap_policy: The bootstrap policy to use.
+        :param wait: Set True to wait for the pipeline to start. True by default.
+        :param timeout_s: The maximum time (in seconds) to wait for the
+            pipeline to start (defaults to `None` = no timeout is enforced).
+        :param dismiss_error: Set True to dismiss any deployment error before starting;
+            set False to make it fail in that case. True by default.
         """
 
         return self.client.start_pipeline_as_paused(
@@ -531,6 +537,13 @@ def start_standby(
     ):
         """
         Starts the pipeline in the standby state.
+
+        :param bootstrap_policy: The bootstrap policy to use.
+        :param wait: Set True to wait for the pipeline to start. True by default.
+        :param timeout_s: The maximum time (in seconds) to wait for the
+            pipeline to start (defaults to `None` = no timeout is enforced).
+        :param dismiss_error: Set True to dismiss any deployment error before starting;
+            set False to make it fail in that case. True by default.
         """
 
         self.client.start_pipeline_as_standby(
@@ -747,7 +760,7 @@ def checkpoint(self, wait: bool = False, timeout_s: Optional[float] = None) -> i
 
         :param wait: If true, will block until the checkpoint completes.
         :param timeout_s: The maximum time (in seconds) to wait for the
-            checkpoint to complete.
+            checkpoint to complete (defaults to `None` = no timeout is enforced).
 
         :return: The checkpoint sequence number.
 
 
@@ -71,21 +71,21 @@ def _wait_for_health_recovery(self, max_wait_seconds: int = 300) -> bool:
         Returns:
             bool: True if cluster became healthy within timeout, False otherwise
         """
-        start_time = time.time()
+        start_time = time.monotonic()
         check_interval = 5
 
         logging.info(
             f"Waiting for cluster health recovery (max {max_wait_seconds}s)..."
         )
 
-        while time.time() - start_time < max_wait_seconds:
+        while time.monotonic() - start_time < max_wait_seconds:
             if self._check_cluster_health():
-                elapsed = time.time() - start_time
+                elapsed = time.monotonic() - start_time
                 logging.info(f"Instance health recovered after {elapsed:.1f}s")
                 return True
 
             time.sleep(check_interval)
-            elapsed = time.time() - start_time
+            elapsed = time.monotonic() - start_time
             logging.debug(
                 f"Still waiting for health recovery ({elapsed:.1f}s elapsed)..."
             )
 
@@ -16,6 +16,8 @@
 from feldera.rest.feldera_config import FelderaConfig
 from feldera.rest.pipeline import Pipeline
 
+logger = logging.getLogger(__name__)
+
 
 def _validate_no_none_keys_in_map(data):
     def validate_no_none_keys(d: Dict[Any, Any]) -> None:
@@ -93,12 +95,12 @@ def __init__(
             client_version = determine_client_version()
             server_config = self.get_config()
             if client_version != server_config.version:
-                logging.warning(
+                logger.warning(
                     f"Feldera client is on version {client_version} while server is at "
                     f"{server_config.version}. There could be incompatibilities."
                 )
         except Exception as e:
-            logging.error(f"Failed to connect to Feldera API: {e}")
+            logger.error(f"Failed to connect to Feldera API: {e}")
             raise e
 
     @staticmethod
@@ -153,15 +155,38 @@ def pipelines(
 
         return [Pipeline.from_dict(pipeline) for pipeline in resp]
 
-    def __wait_for_compilation(self, name: str):
+    def _wait_for_compilation(
+        self,
+        name: str,
+        expected_program_version: int | None = None,
+        timeout_s: float | None = None,
+        poll_interval_s: float = 1.0,
+    ) -> Pipeline:
+        """Wait for pipeline compilation -- internal use only."""
         wait = ["Pending", "CompilingSql", "SqlCompiled", "CompilingRust"]
-
+        start_time = time.monotonic()
         while True:
+            elapsed = time.monotonic() - start_time
+            if timeout_s is not None and elapsed > timeout_s:
+                raise TimeoutError(
+                    f"Timed out waiting for pipeline '{name}' to compile "
+                    f"(expected program_version >= {expected_program_version})"
+                )
+
             p = self.get_pipeline(name, PipelineFieldSelector.STATUS)
             status = p.program_status
 
             if status == "Success":
-                return self.get_pipeline(name, PipelineFieldSelector.ALL)
+                if expected_program_version is None:
+                    return self.get_pipeline(name, PipelineFieldSelector.ALL)
+
+                current_version = p.program_version or 0
+                if current_version == expected_program_version:
+                    return self.get_pipeline(name, PipelineFieldSelector.ALL)
+                else:
+                    raise RuntimeError(
+                        f"program version ({current_version}) != expected program version ({expected_program_version})"
+                    )
             elif status not in wait:
                 p = self.get_pipeline(name, PipelineFieldSelector.ALL)
 
@@ -189,15 +214,20 @@ def __wait_for_compilation(self, name: str):
 
                 raise RuntimeError(error_message)
 
-            logging.debug("still compiling %s, waiting for 100 more milliseconds", name)
-            time.sleep(0.1)
+            logger.debug(
+                "still compiling %s, waiting for %.1f more seconds",
+                name,
+                poll_interval_s,
+            )
+            time.sleep(poll_interval_s)
 
     def __wait_for_pipeline_state(
         self,
         pipeline_name: str,
         state: str,
         timeout_s: Optional[float] = None,
         start: bool = True,
+        poll_interval_s: float = 0.5,
     ):
         start_time = time.monotonic()
 
@@ -227,20 +257,22 @@ def __wait_for_pipeline_state(
 {resp.deployment_error.get("message", "")}"""
                 )
 
-            logging.debug(
-                "still starting %s, waiting for 100 more milliseconds", pipeline_name
+            logger.debug(
+                "still starting %s, waiting for %.1f more seconds",
+                pipeline_name,
+                poll_interval_s,
             )
-            time.sleep(0.1)
+            time.sleep(poll_interval_s)
 
     def __wait_for_pipeline_state_one_of(
         self,
         pipeline_name: str,
         states: list[str],
         timeout_s: float | None = None,
         start: bool = True,
+        poll_interval_s: float = 0.5,
     ) -> PipelineStatus:
         start_time = time.monotonic()
-        poll_interval_s = 0.1
         states = [state.lower() for state in states]
 
         while True:
@@ -268,8 +300,10 @@ def __wait_for_pipeline_state_one_of(
 Reason: The pipeline is in a STOPPED state due to the following error:
 {resp.deployment_error.get("message", "")}"""
                 )
-            logging.debug(
-                "still starting %s, waiting for 100 more milliseconds", pipeline_name
+            logger.debug(
+                "still starting %s, waiting for %.1f more seconds",
+                pipeline_name,
+                poll_interval_s,
             )
             time.sleep(poll_interval_s)
 
@@ -299,7 +333,7 @@ def create_pipeline(self, pipeline: Pipeline, wait: bool = True) -> Pipeline:
         if not wait:
             return pipeline
 
-        return self.__wait_for_compilation(pipeline.name)
+        return self._wait_for_compilation(pipeline.name)
 
     def create_or_update_pipeline(
         self, pipeline: Pipeline, wait: bool = True
@@ -331,7 +365,7 @@ def create_or_update_pipeline(
         if not wait:
             return pipeline
 
-        return self.__wait_for_compilation(pipeline.name)
+        return self._wait_for_compilation(pipeline.name)
 
     def patch_pipeline(
         self,
@@ -667,7 +701,7 @@ def stop_pipeline(
             if status == "Stopped":
                 return
 
-            logging.debug(
+            logger.debug(
                 "still stopping %s, waiting for 100 more milliseconds",
                 pipeline_name,
             )
@@ -1021,7 +1055,7 @@ def wait_for_token(
                 break
 
             elapsed = time.monotonic() - start
-            logging.debug(
+            logger.debug(
                 f"still waiting for inputs represented by {token} to be processed; elapsed: {elapsed}s"
             )
 
 
@@ -6,6 +6,7 @@
 """
 
 import pytest
+import logging
 
 
 def is_master(config) -> bool:
@@ -15,6 +16,9 @@ def is_master(config) -> bool:
 
 def pytest_configure(config):
     """Configure hook: fetch OIDC token on master node only."""
+    # Keep SDK debug logs enabled in tests without affecting production defaults.
+    logging.getLogger("feldera.rest.feldera_client").setLevel(logging.DEBUG)
+
     if is_master(config):
         # This runs only on the master node (or in single-node mode)
         from feldera.testutils_oidc import setup_token_cache