py: modify kafka integration test to support multi variant test methods

rivudhk · rivudhk · commit d652e901beb9 · 2026-03-17T21:35:57.000Z
- Introduces multiple test methods per pipeline variant.
- Generates connector configurations from predefined variant settings.
- Inherits from SharedTestPipeline instead of unittest.TestCase to allow single SQL compilation.
- Separates SQL definitions into different functions instead of a single string.
- Adds helper functions for polling and loopback validation.

Signed-off-by: rivudhk &lt;rivudhkr@gmail.com&gt;
diff --git a/python/tests/workloads/test_kafka_avro.py b/python/tests/workloads/test_kafka_avro.py
@@ -1,11 +1,12 @@
-import unittest
 from tests import TEST_CLIENT
-from feldera import PipelineBuilder
 import time
 import os
 from confluent_kafka.admin import AdminClient
 import requests
 import re
+import json
+
+from tests.shared_test_pipeline import SharedTestPipeline, sql
 
 
 def env(name: str, default: str) -> str:
@@ -68,14 +69,27 @@ def cleanup_kafka(sql: str, bootstrap_servers: str, registry_url: str):
     delete_schema_subjects(registry_url, subjects)
 
 
-# Set the limit for number of records to generate
-LIMIT = 1000000
+class Variant:
+    """Represents a pipeline variant whose tables and views share the same SQL but differ in connector configuration.
+    Each variant generates unique topic, table, and view names based on the provided configuration."""
+
+    def __init__(self, cfg):
+        self.id = cfg["id"]
+        self.limit = cfg["limit"]
+        self.partitions = cfg.get("partitions")
+        self.sync = cfg.get("sync")
+        self.start_from = cfg.get("start_from")
 
+        self.topic1 = f"my_topic_avro_{self.id}"
+        self.topic2 = f"my_topic_avro2_{self.id}"
+        self.source = f"t_{self.id}"
+        self.view = f"v_{self.id}"
+        self.loopback = f"loopback_{self.id}"
 
-class TestKafkaAvro(unittest.TestCase):
-    def test_check_avro(self):
-        sql = f"""
-create table t (
+
+def sql_source_table(v: Variant) -> str:
+    return f"""
+create table {v.source} (
     id int,
     str varchar,
     dec decimal,
@@ -90,19 +104,23 @@ def test_check_avro(self):
   'connectors' = '[{{
     "transport": {{
       "name": "datagen",
-      "config": {{ "plan": [{{"limit": {LIMIT}}}], "seed": 1 }}
+      "config": {{ "plan": [{{"limit": {v.limit}}}], "seed": 1 }}
     }}
   }}]'
 );
+"""
 
-create view v
+
+def sql_view(v: Variant) -> str:
+    return f"""
+create view {v.view}
 with (
   'connectors' = '[{{
     "transport": {{
       "name": "kafka_output",
       "config": {{
         "bootstrap.servers": "{KAFKA_BOOTSTRAP}",
-        "topic": "my_topic_avro"
+        "topic": "{v.topic1}"
       }}
     }},
     "format": {{
@@ -114,12 +132,12 @@ def test_check_avro(self):
     }}
   }},
   {{
-    "index": "t_index",
+    "index": "idx_{v.id}",
     "transport": {{
       "name": "kafka_output",
       "config": {{
         "bootstrap.servers": "{KAFKA_BOOTSTRAP}",
-        "topic": "my_topic_avro2"
+        "topic": "{v.topic2}"
       }}
     }},
     "format": {{
@@ -131,11 +149,31 @@ def test_check_avro(self):
     }}
   }}]'
 )
-as select * from t;
+as select * from {v.source};
+
+create index idx_{v.id} on {v.view}(id);
+"""
 
-create index t_index on v(id);
 
-create table loopback (
+def sql_loopback_table(v: Variant) -> str:
+    # Optional configurations that will use connector defaults if not specified
+    config = {
+        "bootstrap.servers": KAFKA_BOOTSTRAP,
+        "topic": v.topic2,
+    }
+
+    if v.start_from:
+        config["start_from"] = v.start_from
+    if v.partitions:
+        config["partitions"] = v.partitions
+    if v.sync:
+        config["synchronize_partitions"] = v.sync
+
+    # Convert to SQL config string
+    config_json = json.dumps(config)
+
+    return f"""
+create table {v.loopback} (
     id int,
     str varchar,
     dec decimal,
@@ -150,11 +188,7 @@ def test_check_avro(self):
   'connectors' = '[{{
     "transport": {{
       "name": "kafka_input",
-      "config": {{
-        "topic": "my_topic_avro2",
-        "start_from": "earliest",
-        "bootstrap.servers": "{KAFKA_BOOTSTRAP}"
-      }}
+       "config": {config_json}
     }},
     "format": {{
       "name": "avro",
@@ -166,61 +200,100 @@ def test_check_avro(self):
   }}]'
 );
 """
-        pipeline = PipelineBuilder(
-            TEST_CLIENT,
-            "test_kafka_avro",
-            sql=sql,
-        ).create_or_replace()
 
-        try:
-            pipeline.start()
-
-            # NOTE => total_completed_records counts all rows that are processed through each output as follows:
-            # 1. Written by the view<v> -> Kafka
-            # 2. Ingested into loopback table from Kafka
-            # Thus, expected_records = generated_rows * number_of_outputs (in this case 2)
-            expected_records = LIMIT * 2
-            timeout_s = 1800
-            poll_interval_s = 5
-
-            start_time = time.perf_counter()
-            # Poll  `total_completed_records` every `poll_interval_s` seconds until it reaches `expected_records`
-            while True:
-                stats = TEST_CLIENT.get_pipeline_stats(pipeline.name)
-                completed = stats["global_metrics"]["total_completed_records"]
-
-                print(f"Processed {completed}/{expected_records} rows so far...")
-
-                if completed >= expected_records:
-                    break
-
-                # Prevent infinite polling
-                if time.perf_counter() - start_time > timeout_s:
-                    raise AssertionError(
-                        f"Timeout: only {completed}/{expected_records} rows processed"
-                    )
-
-                time.sleep(poll_interval_s)
-
-            elapsed = time.perf_counter() - start_time
-            print(
-                f"All {completed}/{expected_records} rows processed in {elapsed:.3f}s"
+
+def build_sql(configs) -> str:
+    """Generate SQL for the pipeline by combining all tables and view for each variant"""
+    variants = [Variant(c) for c in configs]
+    parts = []
+
+    for v in variants:
+        parts.append(sql_source_table(v))
+        parts.append(sql_view(v))
+        parts.append(sql_loopback_table(v))
+
+    return "\n".join(parts)
+
+
+def wait_for_rows(pipeline, expected_rows, timeout_s=1800, poll_interval_s=5):
+    """Since records aren't processed instantaneously, wait until all rows are processed to validate completion by
+    polling `total_completed_records` every `poll_interval_s` seconds until it reaches `expected_records`"""
+    start = time.perf_counter()
+    while True:
+        stats = TEST_CLIENT.get_pipeline_stats(pipeline.name)
+        completed = stats["global_metrics"]["total_completed_records"]
+        print(f"Processed {completed}/{expected_rows} rows so far...")
+        if completed >= expected_rows:
+            return completed
+        # Prevent infinite polling
+        if time.perf_counter() - start > timeout_s:
+            raise AssertionError(
+                f"Timeout: only {completed}/{expected_rows} rows processed"
             )
+        time.sleep(poll_interval_s)
 
-            # Validation: once finished, the loopback table should contain all generated values
-            # Validate by comparing the hash of the source table 't' and loopback table
 
-            expected_hash = pipeline.query_hash("SELECT * FROM t ORDER BY id, str")
-            result_hash = pipeline.query_hash("SELECT * FROM loopback ORDER BY id, str")
+def validate_loopback(self, variant: Variant):
+    """Validation: once finished, the loopback table should contain all generated values
+    Validate by comparing the hash of the source table 't' and loopback table"""
+    src_tbl_hash = self.pipeline.query_hash(
+        f"SELECT * FROM {variant.source} ORDER BY id, str"
+    )
+
+    loopback_tbl_hash = self.pipeline.query_hash(
+        f"SELECT * FROM {variant.loopback} ORDER BY id, str"
+    )
+
+    assert src_tbl_hash == loopback_tbl_hash, (
+        f"Loopback table hash mismatch for variant {variant.id}!\n"
+        f"Source table: {variant.source}\n"
+        f"Loopback table: {variant.loopback}\n"
+        f"Expected hash: {src_tbl_hash}\n"
+        f"Got hash: {loopback_tbl_hash}"
+    )
+
+    print(f"Loopback table validated successfully for variant {variant.id}")
 
-            assert result_hash == expected_hash, (
-                f"Validation failed: loopback table hash mismatch!\n"
-                f"Expected: {expected_hash}\nGot: {result_hash}"
-            )
-            print("Loopback table validated successfully!")
 
+class TestKafkaAvro(SharedTestPipeline):
+    """Each test method uses its own SQL snippet and processes only its own variant."""
+
+    TEST_CONFIGS = [
+        {"id": 0, "limit": 10},
+        {"id": 1, "limit": 20},
+        # {
+        #     "id": 2,
+        #     "limit": 1000000,
+        #     "partitions": [0],
+        #     "sync": True,
+        #     "start_from": "earliest",
+        # },
+    ]
+
+    @sql(build_sql([TEST_CONFIGS[0]]))
+    def test_kafka_avro_config_0(self):
+        cfg = self.TEST_CONFIGS[0]
+        variant = Variant(cfg)
+
+        self.pipeline.start()
+        try:
+            expected_rows = variant.limit * 2  # view->Kafka + Kafka->loopback
+            wait_for_rows(self.pipeline, expected_rows)
+            validate_loopback(self, variant)
         finally:
-            pipeline.stop(force=True)
+            self.pipeline.stop(force=True)
+            cleanup_kafka(build_sql([cfg]), KAFKA_BOOTSTRAP, SCHEMA_REGISTRY)
 
-            # Cleanup Kafka and Schema Registry
-            cleanup_kafka(sql, KAFKA_BOOTSTRAP, SCHEMA_REGISTRY)
+    @sql(build_sql([TEST_CONFIGS[1]]))
+    def test_kafka_avro_config_1(self):
+        cfg = self.TEST_CONFIGS[1]
+        variant = Variant(cfg)
+
+        self.pipeline.start()
+        try:
+            expected_rows = variant.limit * 2
+            wait_for_rows(self.pipeline, expected_rows)
+            validate_loopback(self, variant)
+        finally:
+            self.pipeline.stop(force=True)
+            cleanup_kafka(build_sql([cfg]), KAFKA_BOOTSTRAP, SCHEMA_REGISTRY)