[ci] apply automatic fixes

feldera-bot · feldera-bot · commit 9e17a33c08b1 · 2026-03-20T02:20:03.000Z
Signed-off-by: feldera-bot &lt;feldera-bot@feldera.com&gt;
diff --git a/python/felderize/spark/cli.py b/python/felderize/spark/cli.py
@@ -22,13 +22,23 @@ def cli():
 @click.option("--validate", is_flag=True, help="Validate against Feldera instance")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
-@click.option("--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt")
+@click.option(
+    "--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
+)
 def translate(
-    schema_file: str, query_file: str, validate: bool, json_output: bool, no_docs: bool, verbose: bool
+    schema_file: str,
+    query_file: str,
+    validate: bool,
+    json_output: bool,
+    no_docs: bool,
+    verbose: bool,
 ):
     """Translate a single Spark SQL schema + query pair to Feldera SQL."""
     if not validate:
-        click.echo("Warning: running without validation — output SQL is not verified against the Feldera compiler.", err=True)
+        click.echo(
+            "Warning: running without validation — output SQL is not verified against the Feldera compiler.",
+            err=True,
+        )
     config = Config.from_env()
     schema_sql = Path(schema_file).read_text()
     query_sql = Path(query_file).read_text()
@@ -53,11 +63,18 @@ def translate(
 @click.option("--validate", is_flag=True, help="Validate against Feldera instance")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
-@click.option("--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt")
-def translate_file(sql_file: str, validate: bool, json_output: bool, no_docs: bool, verbose: bool):
+@click.option(
+    "--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
+)
+def translate_file(
+    sql_file: str, validate: bool, json_output: bool, no_docs: bool, verbose: bool
+):
     """Translate a single combined Spark SQL file (schema + views) to Feldera SQL."""
     if not validate:
-        click.echo("Warning: running without validation — output SQL is not verified against the Feldera compiler.", err=True)
+        click.echo(
+            "Warning: running without validation — output SQL is not verified against the Feldera compiler.",
+            err=True,
+        )
     config = Config.from_env()
     combined_sql = Path(sql_file).read_text()
     schema_sql, query_sql = split_combined_sql(combined_sql)
@@ -148,8 +165,12 @@ def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
 )
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
-@click.option("--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt")
-def example(name: str | None, validate: bool, json_output: bool, no_docs: bool, verbose: bool):
+@click.option(
+    "--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
+)
+def example(
+    name: str | None, validate: bool, json_output: bool, no_docs: bool, verbose: bool
+):
     """Run a built-in example translation.
 
     Without NAME, lists available examples. With NAME, translates that example.
@@ -199,7 +220,10 @@ def example(name: str | None, validate: bool, json_output: bool, no_docs: bool,
     click.echo("\nTranslating...\n", err=True)
 
     if not validate:
-        click.echo("Warning: running without validation — output SQL is not verified against the Feldera compiler.", err=True)
+        click.echo(
+            "Warning: running without validation — output SQL is not verified against the Feldera compiler.",
+            err=True,
+        )
     config = Config.from_env()
     result = translate_spark_to_feldera(
         schema_sql,
diff --git a/python/felderize/spark/docs.py b/python/felderize/spark/docs.py
@@ -24,22 +24,22 @@
 # (keywords, operators, syntax forms rather than named functions).
 # Keep these specific — broad patterns like \bDATE\b match almost every query.
 _EXTRA_PATTERNS: dict[str, list[str]] = {
-    "types":       [],                        # always matched — no keywords needed
-    "datetime":    [r"\bINTERVAL\b"],         # DATE/TIMESTAMP covered by index function names
-    "aggregates":  [r"\bGROUP\s+BY\b", r"\bHAVING\b", r"\bOVER\s*\("],
-    "array":       [r"\bEXPLODE\b", r"\bUNNEST\b", r"\bsize\s*\("],
-    "map":         [r"\bMAP\s*<"],            # MAP( covered by index; MAP< is type syntax
-    "json":        [r"\bVARIANT\b"],          # JSON covered by index function names
-    "casts":       [r"::"],                   # CAST covered by index; :: is operator syntax
+    "types": [],  # always matched — no keywords needed
+    "datetime": [r"\bINTERVAL\b"],  # DATE/TIMESTAMP covered by index function names
+    "aggregates": [r"\bGROUP\s+BY\b", r"\bHAVING\b", r"\bOVER\s*\("],
+    "array": [r"\bEXPLODE\b", r"\bUNNEST\b", r"\bsize\s*\("],
+    "map": [r"\bMAP\s*<"],  # MAP( covered by index; MAP< is type syntax
+    "json": [r"\bVARIANT\b"],  # JSON covered by index function names
+    "casts": [r"::"],  # CAST covered by index; :: is operator syntax
     "comparisons": [r"\bCASE\s+WHEN\b"],
 }
 
 # Spark function names that appear in SQL but are not in the Feldera index.
 _SPARK_ALIASES: dict[str, list[str]] = {
-    "json":    [r"\bget_json_object\b", r"\bfrom_json\b", r"\bjson_tuple\b"],
-    "array":   [r"\barray_contains\b", r"\bsort_array\b", r"\barray_distinct\b"],
+    "json": [r"\bget_json_object\b", r"\bfrom_json\b", r"\bjson_tuple\b"],
+    "array": [r"\barray_contains\b", r"\bsort_array\b", r"\barray_distinct\b"],
     "decimal": [r"\bNUMERIC\b"],
-    "float":   [r"\bFLOAT\b"],
+    "float": [r"\bFLOAT\b"],
 }
 
 # Regex to find HTML anchor IDs embedded in doc files: <a id="name">
@@ -76,8 +76,8 @@ def _build_categories_from_index(
         func_upper = func_name.upper()
         for link_m in link_re.finditer(line):
             cat = link_m.group(1)
-            doc_file = link_m.group(2)   # e.g. "string.md"
-            anchor = link_m.group(3)     # e.g. "upper" (may be None)
+            doc_file = link_m.group(2)  # e.g. "string.md"
+            anchor = link_m.group(3)  # e.g. "upper" (may be None)
             if cat in known:
                 keyword = rf"\b{re.escape(func_name)}\b"
                 if keyword not in cats[cat]:
@@ -91,7 +91,10 @@ def _build_categories_from_index(
 def _make_categories() -> tuple[dict[str, list[str]], dict[str, list[tuple[str, str]]]]:
     index_path = (
         Path(__file__).resolve().parents[3]
-        / "docs.feldera.com" / "docs" / "sql" / "function-index.md"
+        / "docs.feldera.com"
+        / "docs"
+        / "sql"
+        / "function-index.md"
     )
     cats, func_anchors = _build_categories_from_index(index_path)
     for source in (_EXTRA_PATTERNS, _SPARK_ALIASES):
@@ -230,7 +233,9 @@ def load_docs(sql: str, docs_dir: Path | None = None) -> str:
     matched by keyword patterns (e.g., GROUP BY) with no specific function match.
     """
     if docs_dir is None:
-        docs_dir = Path(__file__).resolve().parents[3] / "docs.feldera.com" / "docs" / "sql"
+        docs_dir = (
+            Path(__file__).resolve().parents[3] / "docs.feldera.com" / "docs" / "sql"
+        )
 
     if not docs_dir.is_dir():
         return ""
diff --git a/python/felderize/spark/feldera_client.py b/python/felderize/spark/feldera_client.py
@@ -21,7 +21,14 @@ def validate_sql(sql: str, compiler_path: str | Path | None = None) -> list[str]
 
         try:
             result = subprocess.run(
-                [str(compiler), "-i", "--ignoreOrder", "--alltables", "--noRust", f.name],
+                [
+                    str(compiler),
+                    "-i",
+                    "--ignoreOrder",
+                    "--alltables",
+                    "--noRust",
+                    f.name,
+                ],
                 capture_output=True,
                 text=True,
                 timeout=60,
@@ -45,4 +52,6 @@ def validate_sql(sql: str, compiler_path: str | Path | None = None) -> list[str]
     if not errors and stderr.strip():
         errors.append(stderr.strip())
 
-    return errors if errors else [f"Compilation failed with exit code {result.returncode}"]
+    return (
+        errors if errors else [f"Compilation failed with exit code {result.returncode}"]
+    )
diff --git a/python/felderize/spark/translator.py b/python/felderize/spark/translator.py
@@ -114,7 +114,10 @@ def _translate_with_repair(
     full_sql = result.feldera_schema + "\n\n" + result.feldera_query
     for attempt in range(max_retries):
         if verbose:
-            print(f"\n--- SQL submitted to validator (attempt {attempt + 1}) ---", file=sys.stderr)
+            print(
+                f"\n--- SQL submitted to validator (attempt {attempt + 1}) ---",
+                file=sys.stderr,
+            )
             print(full_sql, file=sys.stderr)
             print("---", file=sys.stderr)
         errors = validate_sql(full_sql, config.feldera_compiler or None)
@@ -153,7 +156,10 @@ def _translate_with_repair(
 
     # Final validation after all retries
     if verbose:
-        print(f"\n--- SQL submitted to validator (attempt {max_retries + 1}) ---", file=sys.stderr)
+        print(
+            f"\n--- SQL submitted to validator (attempt {max_retries + 1}) ---",
+            file=sys.stderr,
+        )
         print(full_sql, file=sys.stderr)
         print("---", file=sys.stderr)
     errors = validate_sql(full_sql, config.feldera_compiler or None)
@@ -191,7 +197,11 @@ def split_combined_sql(sql: str) -> tuple[str, str]:
             continue
         # Find first non-comment, non-blank line to identify statement type.
         first_kw = next(
-            (ln.strip() for ln in stripped.splitlines() if ln.strip() and not ln.strip().startswith("--")),
+            (
+                ln.strip()
+                for ln in stripped.splitlines()
+                if ln.strip() and not ln.strip().startswith("--")
+            ),
             "",
         ).upper()
         if not first_kw:
@@ -228,7 +238,14 @@ def translate_spark_to_feldera(
         with_docs=False,
     )
     result = _translate_with_repair(
-        schema_sql, query_sql, config, client, system_prompt_skills, validate, max_retries, verbose,
+        schema_sql,
+        query_sql,
+        config,
+        client,
+        system_prompt_skills,
+        validate,
+        max_retries,
+        verbose,
     )
 
     if result.status != Status.ERROR:
@@ -246,7 +263,14 @@ def translate_spark_to_feldera(
             with_skills=False,
         )
         result = _translate_with_repair(
-            schema_sql, query_sql, config, client, system_prompt_docs, validate, max_retries, verbose,
+            schema_sql,
+            query_sql,
+            config,
+            client,
+            system_prompt_docs,
+            validate,
+            max_retries,
+            verbose,
         )
         if result.status != Status.ERROR:
             result.warnings.append("Resolved with docs-only fallback")