Skip to content

Commit bb1b24c

Browse files
committed
[ci] apply automatic fixes
Signed-off-by: feldera-bot <feldera-bot@feldera.com>
1 parent 046f795 commit bb1b24c

File tree

4 files changed

+130
-39
lines changed

4 files changed

+130
-39
lines changed

python/felderize/spark/cli.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,20 @@ def cli():
2222
@click.option("--validate", is_flag=True, help="Validate against Feldera instance")
2323
@click.option("--json-output", is_flag=True, help="Output as JSON")
2424
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
25-
def translate(schema_file: str, query_file: str, validate: bool, json_output: bool, no_docs: bool):
25+
def translate(
26+
schema_file: str, query_file: str, validate: bool, json_output: bool, no_docs: bool
27+
):
2628
"""Translate a single Spark SQL schema + query pair to Feldera SQL."""
2729
config = Config.from_env()
2830
schema_sql = Path(schema_file).read_text()
2931
query_sql = Path(query_file).read_text()
3032

3133
result = translate_spark_to_feldera(
32-
schema_sql, query_sql, config, validate=validate, include_docs=not no_docs,
34+
schema_sql,
35+
query_sql,
36+
config,
37+
validate=validate,
38+
include_docs=not no_docs,
3339
)
3440

3541
if json_output:
@@ -70,7 +76,11 @@ def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
7076

7177
click.echo(f"Translating {name}...", err=True)
7278
result = translate_spark_to_feldera(
73-
schema_sql, query_sql, config, validate=validate, include_docs=not no_docs,
79+
schema_sql,
80+
query_sql,
81+
config,
82+
validate=validate,
83+
include_docs=not no_docs,
7484
)
7585
results[name] = result.to_dict()
7686

@@ -98,7 +108,11 @@ def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
98108

99109
@cli.command()
100110
@click.argument("name", required=False)
101-
@click.option("--validate/--no-validate", default=True, help="Validate against Feldera instance (default: on)")
111+
@click.option(
112+
"--validate/--no-validate",
113+
default=True,
114+
help="Validate against Feldera instance (default: on)",
115+
)
102116
@click.option("--json-output", is_flag=True, help="Output as JSON")
103117
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
104118
def example(name: str | None, validate: bool, json_output: bool, no_docs: bool):
@@ -124,7 +138,7 @@ def example(name: str | None, validate: bool, json_output: bool, no_docs: bool):
124138
for ex_name, (sf, qf) in pairs.items():
125139
schema_preview = sf.read_text().strip().split("\n")[0]
126140
click.echo(f" {ex_name:20s} {schema_preview}")
127-
click.echo(f"\nRun one with: felderize example <name>")
141+
click.echo("\nRun one with: felderize example <name>")
128142
return
129143

130144
if name not in pairs:
@@ -143,7 +157,11 @@ def example(name: str | None, validate: bool, json_output: bool, no_docs: bool):
143157

144158
config = Config.from_env()
145159
result = translate_spark_to_feldera(
146-
schema_sql, query_sql, config, validate=validate, include_docs=not no_docs,
160+
schema_sql,
161+
query_sql,
162+
config,
163+
validate=validate,
164+
include_docs=not no_docs,
147165
)
148166

149167
if json_output:

python/felderize/spark/docs.py

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,40 +10,88 @@
1010
_CATEGORIES: dict[str, list[str]] = {
1111
"types": [], # Always matched
1212
"string": [
13-
r"\bUPPER\b", r"\bLOWER\b", r"\bTRIM\b", r"\bCONCAT\b",
14-
r"\bSUBSTRING\b", r"\bREPLACE\b", r"\bLIKE\b", r"\bREGEXP\b",
15-
r"\bLENGTH\b", r"\bINITCAP\b", r"\bREVERSE\b", r"\bREPEAT\b",
16-
r"\bSPLIT\b", r"\bLPAD\b", r"\bRPAD\b",
13+
r"\bUPPER\b",
14+
r"\bLOWER\b",
15+
r"\bTRIM\b",
16+
r"\bCONCAT\b",
17+
r"\bSUBSTRING\b",
18+
r"\bREPLACE\b",
19+
r"\bLIKE\b",
20+
r"\bREGEXP\b",
21+
r"\bLENGTH\b",
22+
r"\bINITCAP\b",
23+
r"\bREVERSE\b",
24+
r"\bREPEAT\b",
25+
r"\bSPLIT\b",
26+
r"\bLPAD\b",
27+
r"\bRPAD\b",
1728
],
1829
"datetime": [
19-
r"\bDATE\b", r"\bTIMESTAMP\b", r"\bINTERVAL\b", r"\bYEAR\b",
20-
r"\bMONTH\b", r"\bDAY\b", r"\bHOUR\b", r"\bEXTRACT\b",
21-
r"\bDATE_ADD\b", r"\bDATE_SUB\b", r"\bDATEDIFF\b",
22-
r"\bDATE_TRUNC\b", r"\bCURRENT_DATE\b", r"\bCURRENT_TIMESTAMP\b",
30+
r"\bDATE\b",
31+
r"\bTIMESTAMP\b",
32+
r"\bINTERVAL\b",
33+
r"\bYEAR\b",
34+
r"\bMONTH\b",
35+
r"\bDAY\b",
36+
r"\bHOUR\b",
37+
r"\bEXTRACT\b",
38+
r"\bDATE_ADD\b",
39+
r"\bDATE_SUB\b",
40+
r"\bDATEDIFF\b",
41+
r"\bDATE_TRUNC\b",
42+
r"\bCURRENT_DATE\b",
43+
r"\bCURRENT_TIMESTAMP\b",
2344
],
2445
"json": [
25-
r"\bJSON\b", r"\bPARSE_JSON\b", r"\bVARIANT\b",
26-
r"\bget_json_object\b", r"\bfrom_json\b", r"\bjson_tuple\b",
46+
r"\bJSON\b",
47+
r"\bPARSE_JSON\b",
48+
r"\bVARIANT\b",
49+
r"\bget_json_object\b",
50+
r"\bfrom_json\b",
51+
r"\bjson_tuple\b",
2752
r"\bTO_JSON\b",
2853
],
2954
"aggregates": [
30-
r"\bCOUNT\b", r"\bSUM\b", r"\bAVG\b", r"\bGROUP\s+BY\b",
31-
r"\bHAVING\b", r"\bOVER\s*\(", r"\bROW_NUMBER\b", r"\bRANK\b",
32-
r"\bLAG\b", r"\bLEAD\b", r"\bWINDOW\b",
55+
r"\bCOUNT\b",
56+
r"\bSUM\b",
57+
r"\bAVG\b",
58+
r"\bGROUP\s+BY\b",
59+
r"\bHAVING\b",
60+
r"\bOVER\s*\(",
61+
r"\bROW_NUMBER\b",
62+
r"\bRANK\b",
63+
r"\bLAG\b",
64+
r"\bLEAD\b",
65+
r"\bWINDOW\b",
3366
],
3467
"array": [
35-
r"\bARRAY\b", r"\bEXPLODE\b", r"\bUNNEST\b",
36-
r"\barray_contains\b", r"\bsort_array\b", r"\barray_distinct\b",
37-
r"\bCARDINALITY\b", r"\bsize\s*\(",
68+
r"\bARRAY\b",
69+
r"\bEXPLODE\b",
70+
r"\bUNNEST\b",
71+
r"\barray_contains\b",
72+
r"\bsort_array\b",
73+
r"\barray_distinct\b",
74+
r"\bCARDINALITY\b",
75+
r"\bsize\s*\(",
3876
],
3977
"map": [r"\bMAP\s*<", r"\bMAP\s*\(", r"\bmap_keys\b", r"\bmap_values\b"],
4078
"decimal": [
41-
r"\bDECIMAL\b", r"\bNUMERIC\b", r"\bROUND\b", r"\bCEIL\b",
42-
r"\bFLOOR\b", r"\bTRUNCATE\b",
79+
r"\bDECIMAL\b",
80+
r"\bNUMERIC\b",
81+
r"\bROUND\b",
82+
r"\bCEIL\b",
83+
r"\bFLOOR\b",
84+
r"\bTRUNCATE\b",
4385
],
4486
"float": [
45-
r"\bFLOAT\b", r"\bDOUBLE\b", r"\bPOWER\b", r"\bSQRT\b",
46-
r"\bLOG\b", r"\bLN\b", r"\bSIN\b", r"\bCOS\b",
87+
r"\bFLOAT\b",
88+
r"\bDOUBLE\b",
89+
r"\bPOWER\b",
90+
r"\bSQRT\b",
91+
r"\bLOG\b",
92+
r"\bLN\b",
93+
r"\bSIN\b",
94+
r"\bCOS\b",
4795
],
4896
"casts": [r"\bCAST\s*\(", r"::"],
4997
"comparisons": [r"\bBETWEEN\b", r"\bCASE\s+WHEN\b", r"\bCOALESCE\b", r"\bNULLIF\b"],

python/felderize/spark/skills.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import os
43
from pathlib import Path
54

65
import yaml
@@ -77,8 +76,7 @@ def build_system_prompt(
7776
prompt += (
7877
"\n\n## Validated Translation Examples\n\n"
7978
"These examples were validated against the Feldera compiler. "
80-
"Follow the same patterns.\n\n"
81-
+ examples_text
79+
"Follow the same patterns.\n\n" + examples_text
8280
)
8381
if with_docs:
8482
docs_text = load_docs(spark_sql, docs_dir)

python/felderize/spark/translator.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,22 @@ def _translate_once(
127127
raw = client.translate(system_prompt, repair_prompt)
128128
try:
129129
data = _parse_response(raw)
130-
result.feldera_schema = _as_str(data.get("feldera_schema", result.feldera_schema))
131-
result.feldera_query = _as_str(data.get("feldera_query", result.feldera_query))
130+
result.feldera_schema = _as_str(
131+
data.get("feldera_schema", result.feldera_schema)
132+
)
133+
result.feldera_query = _as_str(
134+
data.get("feldera_query", result.feldera_query)
135+
)
132136
result.unsupported = _as_list(data.get("unsupported", result.unsupported))
133137
result.warnings = _as_list(data.get("warnings", result.warnings))
134-
result.explanations = _as_list(data.get("explanations", result.explanations))
138+
result.explanations = _as_list(
139+
data.get("explanations", result.explanations)
140+
)
135141
full_sql = result.feldera_schema + "\n\n" + result.feldera_query
136142
except (json.JSONDecodeError, KeyError):
137-
result.warnings.append(f"Repair attempt {attempt + 1} produced invalid JSON")
143+
result.warnings.append(
144+
f"Repair attempt {attempt + 1} produced invalid JSON"
145+
)
138146

139147
# Final validation after all retries
140148
errors = validate_sql(full_sql, config.feldera_compiler or None)
@@ -143,7 +151,9 @@ def _translate_once(
143151
result.status = Status.UNSUPPORTED if result.unsupported else Status.SUCCESS
144152
else:
145153
result.status = Status.ERROR
146-
result.warnings.extend([f"Still failing after {max_retries} repairs: {e}" for e in errors])
154+
result.warnings.extend(
155+
[f"Still failing after {max_retries} repairs: {e}" for e in errors]
156+
)
147157

148158
return result
149159

@@ -164,21 +174,38 @@ def translate_spark_to_feldera(
164174

165175
# First pass: skills + examples only (no docs)
166176
system_prompt = build_system_prompt(
167-
skills_dir, docs_dir=docs_dir_path, spark_sql=combined_sql, with_docs=False,
177+
skills_dir,
178+
docs_dir=docs_dir_path,
179+
spark_sql=combined_sql,
180+
with_docs=False,
168181
)
169182
result = _translate_once(
170-
schema_sql, query_sql, config, client, system_prompt, validate, max_retries,
183+
schema_sql,
184+
query_sql,
185+
config,
186+
client,
187+
system_prompt,
188+
validate,
189+
max_retries,
171190
)
172191

173192
# If first pass failed and docs are enabled, retry with docs
174193
if result.status == Status.ERROR and include_docs:
175194
print("Retrying with Feldera docs...", file=sys.stderr)
176195
system_prompt_with_docs = build_system_prompt(
177-
skills_dir, docs_dir=docs_dir_path, spark_sql=combined_sql, with_docs=True,
196+
skills_dir,
197+
docs_dir=docs_dir_path,
198+
spark_sql=combined_sql,
199+
with_docs=True,
178200
)
179201
result = _translate_once(
180-
schema_sql, query_sql, config, client, system_prompt_with_docs,
181-
validate, max_retries,
202+
schema_sql,
203+
query_sql,
204+
config,
205+
client,
206+
system_prompt_with_docs,
207+
validate,
208+
max_retries,
182209
)
183210
if result.status != Status.ERROR:
184211
result.warnings.append("Resolved with docs fallback")

0 commit comments

Comments
 (0)