Skip to content

Commit 8fa1210

Browse files
wilmaontherunclaude
andcommitted
Clean up code quality: fix imports, types, and consistency issues
- docs.py: replace module-level _FUNC_ANCHORS with per-dir _get_cats_and_anchors() cache - llm.py: move imports to top level, add unreachable guard - translator.py: move sqlparse import to top level, fix LLMClient type annotation, remove double-strip - feldera_client.py: keep f.name usage inside with block - skills.py: remove redundant intermediate sort - cli.py: remove untested batch command, fix Status import, add missing --compiler/--model to all commands - pyproject.toml: remove unused httpx dependency - README.md: update to reflect removed batch command and full options list - spark_skills.md: add rewrite rules and unsupported constructs from test investigation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6655f7c commit 8fa1210

File tree

9 files changed

+302
-163
lines changed

9 files changed

+302
-163
lines changed

python/felderize/README.md

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,13 @@ felderize translate-file path/to/combined.sql --validate
9999
> **Note:** Running without `--validate` prints a warning — the output SQL has not been verified against the Feldera compiler.
100100
101101
Both commands accept:
102-
- `--verbose` to log the SQL submitted to the validator at each repair attempt
102+
- `--validate` to validate output against the Feldera compiler (opt-in; `example` validates by default, use `--no-validate` to skip)
103103
- `--compiler PATH` to specify the path to the Feldera compiler binary (overrides `FELDERA_COMPILER` env var)
104-
- `--model` to specify the LLM model (overrides `FELDERIZE_MODEL` env var)
105-
106-
### Batch translation
107-
108-
```bash
109-
felderize batch path/to/data_dir/ --output-dir results/
110-
```
111-
112-
Each subdirectory should contain `*_schema.sql` and `*_query.sql` files.
104+
- `--model MODEL` to specify the LLM model (overrides `FELDERIZE_MODEL` env var)
105+
- `--no-docs` to disable Feldera SQL reference docs in the prompt
106+
- `--force-docs` to include docs on the first pass instead of only as a fallback
107+
- `--verbose` to log the SQL submitted to the validator at each repair attempt
108+
- `--json-output` to output results as JSON
113109

114110
## Configuration
115111

python/felderize/pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ requires-python = ">=3.10"
1010
dependencies = [
1111
"anthropic>=0.39.0",
1212
"sqlparse>=0.5.0",
13-
"httpx>=0.27.0",
1413
"click>=8.1.0",
1514
"pyyaml>=6.0",
1615
"python-dotenv>=1.0.0",

python/felderize/spark/cli.py

Lines changed: 9 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import click
88

99
from felderize.config import Config
10-
from felderize.models import TranslationResult
10+
from felderize.models import Status, TranslationResult
1111
from felderize.translator import split_combined_sql, translate_spark_to_feldera
1212

1313

@@ -24,6 +24,7 @@ def cli():
2424
@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
2525
@click.option("--json-output", is_flag=True, help="Output as JSON")
2626
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
27+
@click.option("--force-docs", is_flag=True, help="Include docs on the first pass instead of only as fallback")
2728
@click.option(
2829
"--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
2930
)
@@ -35,9 +36,10 @@ def translate(
3536
model: str | None,
3637
json_output: bool,
3738
no_docs: bool,
39+
force_docs: bool,
3840
verbose: bool,
3941
):
40-
"""Translate a single Spark SQL schema + query pair to Feldera SQL."""
42+
"""Translate a single Spark SQL schema + query/views pair to Feldera SQL."""
4143
if not validate:
4244
click.echo(
4345
"Warning: running without validation — output SQL is not verified against the Feldera compiler.",
@@ -57,6 +59,7 @@ def translate(
5759
config,
5860
validate=validate,
5961
include_docs=not no_docs,
62+
force_docs=force_docs,
6063
verbose=verbose,
6164
)
6265

@@ -73,6 +76,7 @@ def translate(
7376
@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
7477
@click.option("--json-output", is_flag=True, help="Output as JSON")
7578
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
79+
@click.option("--force-docs", is_flag=True, help="Include docs on the first pass instead of only as fallback")
7680
@click.option(
7781
"--verbose", is_flag=True, help="Log SQL submitted to validator at each attempt"
7882
)
@@ -83,6 +87,7 @@ def translate_file(
8387
model: str | None,
8488
json_output: bool,
8589
no_docs: bool,
90+
force_docs: bool,
8691
verbose: bool,
8792
):
8893
"""Translate a single combined Spark SQL file (schema + views) to Feldera SQL."""
@@ -105,6 +110,7 @@ def translate_file(
105110
config,
106111
validate=validate,
107112
include_docs=not no_docs,
113+
force_docs=force_docs,
108114
verbose=verbose,
109115
)
110116

@@ -114,65 +120,6 @@ def translate_file(
114120
_print_result(result)
115121

116122

117-
@cli.command()
118-
@click.argument("data_dir", type=click.Path(exists=True))
119-
@click.option("--validate", is_flag=True, help="Validate against Feldera instance")
120-
@click.option("--output-dir", type=click.Path(), help="Write results to directory")
121-
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
122-
def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
123-
"""Translate all Spark SQL pairs in a directory."""
124-
config = Config.from_env()
125-
data_path = Path(data_dir)
126-
results: dict[str, dict] = {}
127-
128-
# Find all benchmark directories
129-
dirs = sorted(d for d in data_path.iterdir() if d.is_dir())
130-
131-
if not dirs:
132-
click.echo("No benchmark directories found.", err=True)
133-
sys.exit(1)
134-
135-
for bm_dir in dirs:
136-
name = bm_dir.name
137-
schema_files = list(bm_dir.glob("*_schema.sql"))
138-
query_files = list(bm_dir.glob("*_query.sql"))
139-
140-
if not schema_files or not query_files:
141-
click.echo(f"Skipping {name}: missing schema or query file", err=True)
142-
continue
143-
144-
schema_sql = schema_files[0].read_text()
145-
query_sql = query_files[0].read_text()
146-
147-
click.echo(f"Translating {name}...", err=True)
148-
result = translate_spark_to_feldera(
149-
schema_sql,
150-
query_sql,
151-
config,
152-
validate=validate,
153-
include_docs=not no_docs,
154-
)
155-
results[name] = result.to_dict()
156-
157-
if output_dir:
158-
out_path = Path(output_dir)
159-
out_path.mkdir(parents=True, exist_ok=True)
160-
(out_path / f"{name}.sql").write_text(
161-
result.feldera_schema + "\n\n" + result.feldera_query
162-
)
163-
(out_path / f"{name}.json").write_text(
164-
json.dumps(result.to_dict(), indent=2)
165-
)
166-
167-
# Summary
168-
total = len(results)
169-
success = sum(1 for r in results.values() if r["status"] == "success")
170-
click.echo(f"\nResults: {success}/{total} successful", err=True)
171-
172-
# Print full results as JSON to stdout
173-
click.echo(json.dumps(results, indent=2))
174-
175-
176123
_EXAMPLES_DIR = Path(__file__).resolve().parent / "data" / "demo"
177124

178125

@@ -208,7 +155,7 @@ def example(
208155
felderize example # list available examples
209156
felderize example simple # translate the 'simple' example
210157
"""
211-
# Discover available examples: schema+query pairs and combined files
158+
# Discover available examples: schema+views pairs and combined files
212159
pairs: dict[str, tuple[Path, Path] | Path] = {}
213160
for schema_file in sorted(_EXAMPLES_DIR.glob("*_schema.sql")):
214161
example_name = schema_file.name.replace("_schema.sql", "")
@@ -282,8 +229,6 @@ def example(
282229

283230
def _print_result(result: TranslationResult):
284231
"""Pretty-print a translation result."""
285-
from felderize.models import Status
286-
287232
if result.status == Status.ERROR:
288233
click.echo("-- Translation Failed --", err=True)
289234
click.echo(

0 commit comments

Comments
 (0)