Skip to content

Commit 6bd2d52

Browse files
wilmaontherunclaude
andcommitted
Add --model option, remove OpenAI support and hardcoded compiler path
- Added --model CLI option to translate, translate-file, and example commands - Model and compiler path now read exclusively from .env / CLI flags - Removed OpenAI provider support (untested) - Removed hardcoded default compiler path - Updated README for consistency Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 9975f33 commit 6bd2d52

File tree

4 files changed

+19
-38
lines changed

4 files changed

+19
-38
lines changed

python/felderize/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ pip install -e .
1313

1414
> **Note:** `pip install -e .` is required before running `felderize`. It registers the package and CLI command.
1515
16-
Create a `.env` file with your API key and optionally the compiler path:
16+
Create a `.env` file:
1717

1818
```bash
1919
ANTHROPIC_API_KEY=your-key-here
2020
FELDERA_COMPILER=/path/to/sql-to-dbsp # default: sql-to-dbsp-compiler/SQL-compiler/sql-to-dbsp inside the Feldera repo
21+
FELDERIZE_MODEL=claude-sonnet-4-5
2122
```
2223

2324
The `FELDERA_COMPILER` path is required for validation. Without it, translation still works but output SQL is not verified. You can also pass it per-command with `--compiler PATH`.
@@ -100,6 +101,7 @@ felderize translate-file path/to/combined.sql --validate
100101
Both commands accept:
101102
- `--verbose` to log the SQL submitted to the validator at each repair attempt
102103
- `--compiler PATH` to specify the path to the Feldera compiler binary (overrides `FELDERA_COMPILER` env var)
104+
- `--model` to specify the LLM model (overrides `FELDERIZE_MODEL` env var)
103105

104106
### Batch translation
105107

@@ -116,10 +118,8 @@ Environment variables (set in `.env`):
116118
| Variable | Description | Default |
117119
|---|---|---|
118120
| `ANTHROPIC_API_KEY` | Anthropic API key | (required) |
119-
| `FELDERIZE_LLM_PROVIDER` | `anthropic` or `openai` | `anthropic` |
120-
| `FELDERIZE_MODEL` | LLM model to use | `claude-sonnet-4-20250514` |
121-
| `OPENAI_API_KEY` | OpenAI API key (if using openai provider) ||
122-
| `FELDERA_COMPILER` | Path to sql-to-dbsp compiler (can also be set with `--compiler`) | `<repo-root>/sql-to-dbsp-compiler/SQL-compiler/sql-to-dbsp` |
121+
| `FELDERIZE_MODEL` | LLM model to use (can also be set with `--model`) | (required, set in `.env`) |
122+
| `FELDERA_COMPILER` | Path to sql-to-dbsp compiler (can also be set with `--compiler`) | (required for validation) |
123123

124124
## How it works
125125

python/felderize/spark/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def cli():
2121
@click.argument("query_file", type=click.Path(exists=True))
2222
@click.option("--validate", is_flag=True, help="Validate against Feldera instance")
2323
@click.option("--compiler", type=click.Path(), help="Path to Feldera compiler binary")
24+
@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
2425
@click.option("--json-output", is_flag=True, help="Output as JSON")
2526
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
2627
@click.option(
@@ -31,6 +32,7 @@ def translate(
3132
query_file: str,
3233
validate: bool,
3334
compiler: str | None,
35+
model: str | None,
3436
json_output: bool,
3537
no_docs: bool,
3638
verbose: bool,
@@ -44,6 +46,8 @@ def translate(
4446
config = Config.from_env()
4547
if compiler:
4648
config.feldera_compiler = compiler
49+
if model:
50+
config.model = model
4751
schema_sql = Path(schema_file).read_text()
4852
query_sql = Path(query_file).read_text()
4953

@@ -66,6 +70,7 @@ def translate(
6670
@click.argument("sql_file", type=click.Path(exists=True))
6771
@click.option("--validate", is_flag=True, help="Validate against Feldera instance")
6872
@click.option("--compiler", type=click.Path(), help="Path to Feldera compiler binary")
73+
@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
6974
@click.option("--json-output", is_flag=True, help="Output as JSON")
7075
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
7176
@click.option(
@@ -75,6 +80,7 @@ def translate_file(
7580
sql_file: str,
7681
validate: bool,
7782
compiler: str | None,
83+
model: str | None,
7884
json_output: bool,
7985
no_docs: bool,
8086
verbose: bool,
@@ -88,6 +94,8 @@ def translate_file(
8894
config = Config.from_env()
8995
if compiler:
9096
config.feldera_compiler = compiler
97+
if model:
98+
config.model = model
9199
combined_sql = Path(sql_file).read_text()
92100
schema_sql, query_sql = split_combined_sql(combined_sql)
93101

@@ -176,6 +184,7 @@ def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
176184
help="Validate against Feldera instance (default: on)",
177185
)
178186
@click.option("--compiler", type=click.Path(), help="Path to Feldera compiler binary")
187+
@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
179188
@click.option("--json-output", is_flag=True, help="Output as JSON")
180189
@click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
181190
@click.option(
@@ -185,6 +194,7 @@ def example(
185194
name: str | None,
186195
validate: bool,
187196
compiler: str | None,
197+
model: str | None,
188198
json_output: bool,
189199
no_docs: bool,
190200
verbose: bool,
@@ -245,6 +255,8 @@ def example(
245255
config = Config.from_env()
246256
if compiler:
247257
config.feldera_compiler = compiler
258+
if model:
259+
config.model = model
248260
result = translate_spark_to_feldera(
249261
schema_sql,
250262
query_sql,

python/felderize/spark/config.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
@dataclass
1111
class Config:
12-
llm_provider: str = "anthropic"
1312
model: str = ""
1413
api_key: str = ""
1514
feldera_compiler: str = ""
@@ -20,18 +19,8 @@ def from_env(cls) -> Config:
2019
env_path = Path(__file__).resolve().parent.parent / ".env"
2120
load_dotenv(env_path)
2221

23-
provider = os.environ.get("FELDERIZE_LLM_PROVIDER", "anthropic")
24-
25-
if provider == "openai":
26-
default_model = "gpt-4o"
27-
api_key = os.environ.get("OPENAI_API_KEY", "")
28-
else:
29-
default_model = "claude-sonnet-4-20250514"
30-
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
31-
3222
return cls(
33-
llm_provider=provider,
34-
model=os.environ.get("FELDERIZE_MODEL", default_model),
35-
api_key=api_key,
23+
model=os.environ.get("FELDERIZE_MODEL", ""),
24+
api_key=os.environ.get("ANTHROPIC_API_KEY", ""),
3625
feldera_compiler=os.environ.get("FELDERA_COMPILER", ""),
3726
)

python/felderize/spark/llm.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,25 +28,5 @@ def translate(self, system_prompt: str, user_prompt: str) -> str:
2828
return response.content[0].text
2929

3030

31-
class OpenAIClient(LLMClient):
32-
def __init__(self, config: Config):
33-
import openai
34-
35-
self.client = openai.OpenAI(api_key=config.api_key)
36-
self.model = config.model
37-
38-
def translate(self, system_prompt: str, user_prompt: str) -> str:
39-
response = self.client.chat.completions.create(
40-
model=self.model,
41-
messages=[
42-
{"role": "system", "content": system_prompt},
43-
{"role": "user", "content": user_prompt},
44-
],
45-
)
46-
return response.choices[0].message.content or ""
47-
48-
4931
def create_client(config: Config) -> LLMClient:
50-
if config.llm_provider == "openai":
51-
return OpenAIClient(config)
5232
return AnthropicClient(config)

0 commit comments

Comments
 (0)