Skip to content

Commit 9e17a33

Browse files
committed
[ci] apply automatic fixes
Signed-off-by: feldera-bot <feldera-bot@feldera.com>
1 parent 7d56201 commit 9e17a33

File tree

4 files changed

+92
-30
lines changed

4 files changed

+92
-30
lines changed

python/felderize/spark/cli.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,23 @@ def cli():
2222
@click.option("--validate", is_flag=True, help="Validate against Feldera instance")
2323
@click.option("--json-output", is_flag=True, help="Output as JSON")
2424
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
25-
@click.option("--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt")
25+
@click.option(
26+
"--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
27+
)
2628
def translate(
27-
schema_file: str, query_file: str, validate: bool, json_output: bool, no_docs: bool, verbose: bool
29+
schema_file: str,
30+
query_file: str,
31+
validate: bool,
32+
json_output: bool,
33+
no_docs: bool,
34+
verbose: bool,
2835
):
2936
"""Translate a single Spark SQL schema + query pair to Feldera SQL."""
3037
if not validate:
31-
click.echo("Warning: running without validation — output SQL is not verified against the Feldera compiler.", err=True)
38+
click.echo(
39+
"Warning: running without validation — output SQL is not verified against the Feldera compiler.",
40+
err=True,
41+
)
3242
config = Config.from_env()
3343
schema_sql = Path(schema_file).read_text()
3444
query_sql = Path(query_file).read_text()
@@ -53,11 +63,18 @@ def translate(
5363
@click.option("--validate", is_flag=True, help="Validate against Feldera instance")
5464
@click.option("--json-output", is_flag=True, help="Output as JSON")
5565
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
56-
@click.option("--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt")
57-
def translate_file(sql_file: str, validate: bool, json_output: bool, no_docs: bool, verbose: bool):
66+
@click.option(
67+
"--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
68+
)
69+
def translate_file(
70+
sql_file: str, validate: bool, json_output: bool, no_docs: bool, verbose: bool
71+
):
5872
"""Translate a single combined Spark SQL file (schema + views) to Feldera SQL."""
5973
if not validate:
60-
click.echo("Warning: running without validation — output SQL is not verified against the Feldera compiler.", err=True)
74+
click.echo(
75+
"Warning: running without validation — output SQL is not verified against the Feldera compiler.",
76+
err=True,
77+
)
6178
config = Config.from_env()
6279
combined_sql = Path(sql_file).read_text()
6380
schema_sql, query_sql = split_combined_sql(combined_sql)
@@ -148,8 +165,12 @@ def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
148165
)
149166
@click.option("--json-output", is_flag=True, help="Output as JSON")
150167
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
151-
@click.option("--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt")
152-
def example(name: str | None, validate: bool, json_output: bool, no_docs: bool, verbose: bool):
168+
@click.option(
169+
"--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
170+
)
171+
def example(
172+
name: str | None, validate: bool, json_output: bool, no_docs: bool, verbose: bool
173+
):
153174
"""Run a built-in example translation.
154175
155176
Without NAME, lists available examples. With NAME, translates that example.
@@ -199,7 +220,10 @@ def example(name: str | None, validate: bool, json_output: bool, no_docs: bool,
199220
click.echo("\nTranslating...\n", err=True)
200221

201222
if not validate:
202-
click.echo("Warning: running without validation — output SQL is not verified against the Feldera compiler.", err=True)
223+
click.echo(
224+
"Warning: running without validation — output SQL is not verified against the Feldera compiler.",
225+
err=True,
226+
)
203227
config = Config.from_env()
204228
result = translate_spark_to_feldera(
205229
schema_sql,

python/felderize/spark/docs.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,22 @@
2424
# (keywords, operators, syntax forms rather than named functions).
2525
# Keep these specific — broad patterns like \bDATE\b match almost every query.
2626
_EXTRA_PATTERNS: dict[str, list[str]] = {
27-
"types": [], # always matched — no keywords needed
28-
"datetime": [r"\bINTERVAL\b"], # DATE/TIMESTAMP covered by index function names
29-
"aggregates": [r"\bGROUP\s+BY\b", r"\bHAVING\b", r"\bOVER\s*\("],
30-
"array": [r"\bEXPLODE\b", r"\bUNNEST\b", r"\bsize\s*\("],
31-
"map": [r"\bMAP\s*<"], # MAP( covered by index; MAP< is type syntax
32-
"json": [r"\bVARIANT\b"], # JSON covered by index function names
33-
"casts": [r"::"], # CAST covered by index; :: is operator syntax
27+
"types": [], # always matched — no keywords needed
28+
"datetime": [r"\bINTERVAL\b"], # DATE/TIMESTAMP covered by index function names
29+
"aggregates": [r"\bGROUP\s+BY\b", r"\bHAVING\b", r"\bOVER\s*\("],
30+
"array": [r"\bEXPLODE\b", r"\bUNNEST\b", r"\bsize\s*\("],
31+
"map": [r"\bMAP\s*<"], # MAP( covered by index; MAP< is type syntax
32+
"json": [r"\bVARIANT\b"], # JSON covered by index function names
33+
"casts": [r"::"], # CAST covered by index; :: is operator syntax
3434
"comparisons": [r"\bCASE\s+WHEN\b"],
3535
}
3636

3737
# Spark function names that appear in SQL but are not in the Feldera index.
3838
_SPARK_ALIASES: dict[str, list[str]] = {
39-
"json": [r"\bget_json_object\b", r"\bfrom_json\b", r"\bjson_tuple\b"],
40-
"array": [r"\barray_contains\b", r"\bsort_array\b", r"\barray_distinct\b"],
39+
"json": [r"\bget_json_object\b", r"\bfrom_json\b", r"\bjson_tuple\b"],
40+
"array": [r"\barray_contains\b", r"\bsort_array\b", r"\barray_distinct\b"],
4141
"decimal": [r"\bNUMERIC\b"],
42-
"float": [r"\bFLOAT\b"],
42+
"float": [r"\bFLOAT\b"],
4343
}
4444

4545
# Regex to find HTML anchor IDs embedded in doc files: <a id="name">
@@ -76,8 +76,8 @@ def _build_categories_from_index(
7676
func_upper = func_name.upper()
7777
for link_m in link_re.finditer(line):
7878
cat = link_m.group(1)
79-
doc_file = link_m.group(2) # e.g. "string.md"
80-
anchor = link_m.group(3) # e.g. "upper" (may be None)
79+
doc_file = link_m.group(2) # e.g. "string.md"
80+
anchor = link_m.group(3) # e.g. "upper" (may be None)
8181
if cat in known:
8282
keyword = rf"\b{re.escape(func_name)}\b"
8383
if keyword not in cats[cat]:
@@ -91,7 +91,10 @@ def _build_categories_from_index(
9191
def _make_categories() -> tuple[dict[str, list[str]], dict[str, list[tuple[str, str]]]]:
9292
index_path = (
9393
Path(__file__).resolve().parents[3]
94-
/ "docs.feldera.com" / "docs" / "sql" / "function-index.md"
94+
/ "docs.feldera.com"
95+
/ "docs"
96+
/ "sql"
97+
/ "function-index.md"
9598
)
9699
cats, func_anchors = _build_categories_from_index(index_path)
97100
for source in (_EXTRA_PATTERNS, _SPARK_ALIASES):
@@ -230,7 +233,9 @@ def load_docs(sql: str, docs_dir: Path | None = None) -> str:
230233
matched by keyword patterns (e.g., GROUP BY) with no specific function match.
231234
"""
232235
if docs_dir is None:
233-
docs_dir = Path(__file__).resolve().parents[3] / "docs.feldera.com" / "docs" / "sql"
236+
docs_dir = (
237+
Path(__file__).resolve().parents[3] / "docs.feldera.com" / "docs" / "sql"
238+
)
234239

235240
if not docs_dir.is_dir():
236241
return ""

python/felderize/spark/feldera_client.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@ def validate_sql(sql: str, compiler_path: str | Path | None = None) -> list[str]
2121

2222
try:
2323
result = subprocess.run(
24-
[str(compiler), "-i", "--ignoreOrder", "--alltables", "--noRust", f.name],
24+
[
25+
str(compiler),
26+
"-i",
27+
"--ignoreOrder",
28+
"--alltables",
29+
"--noRust",
30+
f.name,
31+
],
2532
capture_output=True,
2633
text=True,
2734
timeout=60,
@@ -45,4 +52,6 @@ def validate_sql(sql: str, compiler_path: str | Path | None = None) -> list[str]
4552
if not errors and stderr.strip():
4653
errors.append(stderr.strip())
4754

48-
return errors if errors else [f"Compilation failed with exit code {result.returncode}"]
55+
return (
56+
errors if errors else [f"Compilation failed with exit code {result.returncode}"]
57+
)

python/felderize/spark/translator.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ def _translate_with_repair(
114114
full_sql = result.feldera_schema + "\n\n" + result.feldera_query
115115
for attempt in range(max_retries):
116116
if verbose:
117-
print(f"\n--- SQL submitted to validator (attempt {attempt + 1}) ---", file=sys.stderr)
117+
print(
118+
f"\n--- SQL submitted to validator (attempt {attempt + 1}) ---",
119+
file=sys.stderr,
120+
)
118121
print(full_sql, file=sys.stderr)
119122
print("---", file=sys.stderr)
120123
errors = validate_sql(full_sql, config.feldera_compiler or None)
@@ -153,7 +156,10 @@ def _translate_with_repair(
153156

154157
# Final validation after all retries
155158
if verbose:
156-
print(f"\n--- SQL submitted to validator (attempt {max_retries + 1}) ---", file=sys.stderr)
159+
print(
160+
f"\n--- SQL submitted to validator (attempt {max_retries + 1}) ---",
161+
file=sys.stderr,
162+
)
157163
print(full_sql, file=sys.stderr)
158164
print("---", file=sys.stderr)
159165
errors = validate_sql(full_sql, config.feldera_compiler or None)
@@ -191,7 +197,11 @@ def split_combined_sql(sql: str) -> tuple[str, str]:
191197
continue
192198
# Find first non-comment, non-blank line to identify statement type.
193199
first_kw = next(
194-
(ln.strip() for ln in stripped.splitlines() if ln.strip() and not ln.strip().startswith("--")),
200+
(
201+
ln.strip()
202+
for ln in stripped.splitlines()
203+
if ln.strip() and not ln.strip().startswith("--")
204+
),
195205
"",
196206
).upper()
197207
if not first_kw:
@@ -228,7 +238,14 @@ def translate_spark_to_feldera(
228238
with_docs=False,
229239
)
230240
result = _translate_with_repair(
231-
schema_sql, query_sql, config, client, system_prompt_skills, validate, max_retries, verbose,
241+
schema_sql,
242+
query_sql,
243+
config,
244+
client,
245+
system_prompt_skills,
246+
validate,
247+
max_retries,
248+
verbose,
232249
)
233250

234251
if result.status != Status.ERROR:
@@ -246,7 +263,14 @@ def translate_spark_to_feldera(
246263
with_skills=False,
247264
)
248265
result = _translate_with_repair(
249-
schema_sql, query_sql, config, client, system_prompt_docs, validate, max_retries, verbose,
266+
schema_sql,
267+
query_sql,
268+
config,
269+
client,
270+
system_prompt_docs,
271+
validate,
272+
max_retries,
273+
verbose,
250274
)
251275
if result.status != Status.ERROR:
252276
result.warnings.append("Resolved with docs-only fallback")

0 commit comments

Comments
 (0)