Add --model option, remove OpenAI support and hardcoded compiler path

wilmaontherun · claude · wilmaontherun · commit 6bd2d52ee9c8 · 2026-03-19T20:01:01.000-07:00
- Added --model CLI option to translate, translate-file, and example commands
- Model and compiler path now read exclusively from .env / CLI flags
- Removed OpenAI provider support (untested)
- Removed hardcoded default compiler path
- Updated README for consistency

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/python/felderize/README.md b/python/felderize/README.md
@@ -13,11 +13,12 @@ pip install -e .
 
 > **Note:** `pip install -e .` is required before running `felderize`. It registers the package and CLI command.
 
-Create a `.env` file with your API key and optionally the compiler path:
+Create a `.env` file:
 
 ```bash
 ANTHROPIC_API_KEY=your-key-here
 FELDERA_COMPILER=/path/to/sql-to-dbsp  # default: sql-to-dbsp-compiler/SQL-compiler/sql-to-dbsp inside the Feldera repo
+FELDERIZE_MODEL=claude-sonnet-4-5
 ```
 
 The `FELDERA_COMPILER` path is required for validation. Without it, translation still works but output SQL is not verified. You can also pass it per-command with `--compiler PATH`.
@@ -100,6 +101,7 @@ felderize translate-file path/to/combined.sql --validate
 Both commands accept:
 - `--verbose` to log the SQL submitted to the validator at each repair attempt
 - `--compiler PATH` to specify the path to the Feldera compiler binary (overrides `FELDERA_COMPILER` env var)
+- `--model` to specify the LLM model (overrides `FELDERIZE_MODEL` env var)
 
 ### Batch translation
 
@@ -116,10 +118,8 @@ Environment variables (set in `.env`):
 | Variable | Description | Default |
 |---|---|---|
 | `ANTHROPIC_API_KEY` | Anthropic API key | (required) |
-| `FELDERIZE_LLM_PROVIDER` | `anthropic` or `openai` | `anthropic` |
-| `FELDERIZE_MODEL` | LLM model to use | `claude-sonnet-4-20250514` |
-| `OPENAI_API_KEY` | OpenAI API key (if using openai provider) | — |
-| `FELDERA_COMPILER` | Path to sql-to-dbsp compiler (can also be set with `--compiler`) | `<repo-root>/sql-to-dbsp-compiler/SQL-compiler/sql-to-dbsp` |
+| `FELDERIZE_MODEL` | LLM model to use (can also be set with `--model`) | (required, set in `.env`) |
+| `FELDERA_COMPILER` | Path to sql-to-dbsp compiler (can also be set with `--compiler`) | (required for validation) |
 
 ## How it works
 
diff --git a/python/felderize/spark/cli.py b/python/felderize/spark/cli.py
@@ -21,6 +21,7 @@ def cli():
 @click.argument("query_file", type=click.Path(exists=True))
 @click.option("--validate", is_flag=True, help="Validate against Feldera instance")
 @click.option("--compiler", type=click.Path(), help="Path to Feldera compiler binary")
+@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
 @click.option(
@@ -31,6 +32,7 @@ def translate(
     query_file: str,
     validate: bool,
     compiler: str | None,
+    model: str | None,
     json_output: bool,
     no_docs: bool,
     verbose: bool,
@@ -44,6 +46,8 @@ def translate(
     config = Config.from_env()
     if compiler:
         config.feldera_compiler = compiler
+    if model:
+        config.model = model
     schema_sql = Path(schema_file).read_text()
     query_sql = Path(query_file).read_text()
 
@@ -66,6 +70,7 @@ def translate(
 @click.argument("sql_file", type=click.Path(exists=True))
 @click.option("--validate", is_flag=True, help="Validate against Feldera instance")
 @click.option("--compiler", type=click.Path(), help="Path to Feldera compiler binary")
+@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
 @click.option(
@@ -75,6 +80,7 @@ def translate_file(
     sql_file: str,
     validate: bool,
     compiler: str | None,
+    model: str | None,
     json_output: bool,
     no_docs: bool,
     verbose: bool,
@@ -88,6 +94,8 @@ def translate_file(
     config = Config.from_env()
     if compiler:
         config.feldera_compiler = compiler
+    if model:
+        config.model = model
     combined_sql = Path(sql_file).read_text()
     schema_sql, query_sql = split_combined_sql(combined_sql)
 
@@ -176,6 +184,7 @@ def batch(data_dir: str, validate: bool, output_dir: str | None, no_docs: bool):
     help="Validate against Feldera instance (default: on)",
 )
 @click.option("--compiler", type=click.Path(), help="Path to Feldera compiler binary")
+@click.option("--model", help="LLM model to use (overrides FELDERIZE_MODEL env var)")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.option("--no-docs", is_flag=True, help="Disable Feldera doc inclusion in prompt")
 @click.option(
@@ -185,6 +194,7 @@ def example(
     name: str | None,
     validate: bool,
     compiler: str | None,
+    model: str | None,
     json_output: bool,
     no_docs: bool,
     verbose: bool,
@@ -245,6 +255,8 @@ def example(
     config = Config.from_env()
     if compiler:
         config.feldera_compiler = compiler
+    if model:
+        config.model = model
     result = translate_spark_to_feldera(
         schema_sql,
         query_sql,
diff --git a/python/felderize/spark/config.py b/python/felderize/spark/config.py
@@ -9,7 +9,6 @@
 
 @dataclass
 class Config:
-    llm_provider: str = "anthropic"
     model: str = ""
     api_key: str = ""
     feldera_compiler: str = ""
@@ -20,18 +19,8 @@ def from_env(cls) -> Config:
         env_path = Path(__file__).resolve().parent.parent / ".env"
         load_dotenv(env_path)
 
-        provider = os.environ.get("FELDERIZE_LLM_PROVIDER", "anthropic")
-
-        if provider == "openai":
-            default_model = "gpt-4o"
-            api_key = os.environ.get("OPENAI_API_KEY", "")
-        else:
-            default_model = "claude-sonnet-4-20250514"
-            api_key = os.environ.get("ANTHROPIC_API_KEY", "")
-
         return cls(
-            llm_provider=provider,
-            model=os.environ.get("FELDERIZE_MODEL", default_model),
-            api_key=api_key,
+            model=os.environ.get("FELDERIZE_MODEL", ""),
+            api_key=os.environ.get("ANTHROPIC_API_KEY", ""),
             feldera_compiler=os.environ.get("FELDERA_COMPILER", ""),
         )
diff --git a/python/felderize/spark/llm.py b/python/felderize/spark/llm.py
@@ -28,25 +28,5 @@ def translate(self, system_prompt: str, user_prompt: str) -> str:
         return response.content[0].text
 
 
-class OpenAIClient(LLMClient):
-    def __init__(self, config: Config):
-        import openai
-
-        self.client = openai.OpenAI(api_key=config.api_key)
-        self.model = config.model
-
-    def translate(self, system_prompt: str, user_prompt: str) -> str:
-        response = self.client.chat.completions.create(
-            model=self.model,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ],
-        )
-        return response.choices[0].message.content or ""
-
-
 def create_client(config: Config) -> LLMClient:
-    if config.llm_provider == "openai":
-        return OpenAIClient(config)
     return AnthropicClient(config)