From 81827f01ae8645adc4639e13a5d52881543e8a93 Mon Sep 17 00:00:00 2001 From: Nghi Bui Date: Fri, 3 Apr 2026 23:05:08 -0700 Subject: [PATCH 1/2] Add Azure OpenAI support (#49) Add azure-openai as a new provider option, using the AzureOpenAI client from the openai package. Users can configure via --provider azure-openai with --azure-deployment and --api-version options. Co-Authored-By: Claude Opus 4.6 (1M context) --- codewiki/cli/commands/config.py | 54 +++++++++++++++++++++++++++++---- codewiki/cli/config_manager.py | 12 ++++++-- codewiki/cli/models/config.py | 12 +++++++- codewiki/src/be/llm_services.py | 37 +++++++++++++++++++++- codewiki/src/config.py | 12 ++++++-- 5 files changed, 115 insertions(+), 12 deletions(-) diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py index 5ef6d99c..63df5609 100644 --- a/codewiki/cli/commands/config.py +++ b/codewiki/cli/commands/config.py @@ -85,7 +85,7 @@ def config_group(): ) @click.option( "--provider", - type=click.Choice(['openai-compatible', 'anthropic', 'bedrock'], case_sensitive=False), + type=click.Choice(['openai-compatible', 'anthropic', 'bedrock', 'azure-openai'], case_sensitive=False), help="LLM provider type (default: openai-compatible)" ) @click.option( @@ -93,6 +93,16 @@ def config_group(): type=str, help="AWS region for Bedrock provider (default: us-east-1)" ) +@click.option( + "--api-version", + type=str, + help="Azure OpenAI API version (default: 2024-12-01-preview)" +) +@click.option( + "--azure-deployment", + type=str, + help="Azure OpenAI deployment name" +) def config_set( api_key: Optional[str], base_url: Optional[str], @@ -104,7 +114,9 @@ def config_set( max_token_per_leaf_module: Optional[int], max_depth: Optional[int], provider: Optional[str] = None, - aws_region: Optional[str] = None + aws_region: Optional[str] = None, + api_version: Optional[str] = None, + azure_deployment: Optional[str] = None ): """ Set configuration values for CodeWiki. @@ -139,7 +151,7 @@ def config_set( """ try: # Check if at least one option is provided - if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth, provider, aws_region]): + if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth, provider, aws_region, api_version, azure_deployment]): click.echo("No options provided. Use --help for usage information.") sys.exit(EXIT_CONFIG_ERROR) @@ -187,6 +199,12 @@ def config_set( if aws_region is not None: validated_data['aws_region'] = aws_region + if api_version is not None: + validated_data['api_version'] = api_version + + if azure_deployment is not None: + validated_data['azure_deployment'] = azure_deployment + # Create config manager and save manager = ConfigManager() manager.load() # Load existing config if present @@ -202,7 +220,9 @@ def config_set( max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module'), max_depth=validated_data.get('max_depth'), provider=validated_data.get('provider'), - aws_region=validated_data.get('aws_region') + aws_region=validated_data.get('aws_region'), + api_version=validated_data.get('api_version'), + azure_deployment=validated_data.get('azure_deployment') ) # Display success messages @@ -256,7 +276,13 @@ def config_set( if aws_region: click.secho(f"✓ AWS Region: {aws_region}", fg="green") - + + if api_version: + click.secho(f"✓ API Version: {api_version}", fg="green") + + if azure_deployment: + click.secho(f"✓ Azure Deployment: {azure_deployment}", fg="green") + click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True)) except ConfigurationError as e: @@ -342,6 +368,12 @@ def config_show(output_json: bool): click.echo(f" Main Model: {config.main_model or 'Not set'}") click.echo(f" Cluster Model: {config.cluster_model or 'Not set'}") click.echo(f" Fallback Model: {config.fallback_model or 'Not set'}") + click.echo(f" Provider: {config.provider}") + if config.provider == "bedrock": + click.echo(f" AWS Region: {config.aws_region}") + elif config.provider == "azure-openai": + click.echo(f" API Version: {config.api_version}") + click.echo(f" Azure Deployment: {config.azure_deployment or 'Not set'}") else: click.secho(" Not configured", fg="yellow") @@ -523,7 +555,17 @@ def config_validate(quick: bool, verbose: bool): try: base_url_lower = (config.base_url or "").lower() - if "api.anthropic.com" in base_url_lower: + provider = getattr(config, 'provider', 'openai-compatible') + if provider == "azure-openai" or ".openai.azure.com" in base_url_lower: + # Use Azure OpenAI SDK + from openai import AzureOpenAI + client = AzureOpenAI( + api_key=api_key, + api_version=config.api_version, + azure_endpoint=config.base_url, + ) + client.models.list() + elif "api.anthropic.com" in base_url_lower: # Use Anthropic SDK for native Anthropic endpoints import anthropic client = anthropic.Anthropic(api_key=api_key) diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py index a652e405..a87df025 100644 --- a/codewiki/cli/config_manager.py +++ b/codewiki/cli/config_manager.py @@ -133,7 +133,9 @@ def save( max_token_per_leaf_module: Optional[int] = None, max_depth: Optional[int] = None, provider: Optional[str] = None, - aws_region: Optional[str] = None + aws_region: Optional[str] = None, + api_version: Optional[str] = None, + azure_deployment: Optional[str] = None ): """ Save configuration to file and keyring. @@ -149,8 +151,10 @@ def save( max_token_per_module: Maximum tokens per module for clustering max_token_per_leaf_module: Maximum tokens per leaf module max_depth: Maximum depth for hierarchical decomposition - provider: LLM provider type (openai-compatible, anthropic, bedrock) + provider: LLM provider type (openai-compatible, anthropic, bedrock, azure-openai) aws_region: AWS region for Bedrock provider + api_version: Azure OpenAI API version + azure_deployment: Azure OpenAI deployment name """ # Ensure config directory exists try: @@ -196,6 +200,10 @@ def save( self._config.provider = provider if aws_region is not None: self._config.aws_region = aws_region + if api_version is not None: + self._config.api_version = api_version + if azure_deployment is not None: + self._config.azure_deployment = azure_deployment # Validate configuration (only if base fields are set) if self._config.base_url and self._config.main_model and self._config.cluster_model: diff --git a/codewiki/cli/models/config.py b/codewiki/cli/models/config.py index 8bfe9152..3f9e0499 100644 --- a/codewiki/cli/models/config.py +++ b/codewiki/cli/models/config.py @@ -113,8 +113,10 @@ class Configuration: cluster_model: Model for module clustering fallback_model: Fallback model for documentation generation default_output: Default output directory - provider: LLM provider type (openai-compatible, anthropic, bedrock) + provider: LLM provider type (openai-compatible, anthropic, bedrock, azure-openai) aws_region: AWS region for Bedrock provider + api_version: Azure OpenAI API version + azure_deployment: Azure OpenAI deployment name max_tokens: Maximum tokens for LLM response (default: 32768) max_token_per_module: Maximum tokens per module for clustering (default: 36369) max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000) @@ -128,6 +130,8 @@ class Configuration: default_output: str = "docs" provider: str = "openai-compatible" aws_region: str = "us-east-1" + api_version: str = "2024-12-01-preview" + azure_deployment: str = "" max_tokens: int = 32768 max_token_per_module: int = 36369 max_token_per_leaf_module: int = 16000 @@ -155,6 +159,8 @@ def to_dict(self) -> dict: 'default_output': self.default_output, 'provider': self.provider, 'aws_region': self.aws_region, + 'api_version': self.api_version, + 'azure_deployment': self.azure_deployment, 'max_tokens': self.max_tokens, 'max_token_per_module': self.max_token_per_module, 'max_token_per_leaf_module': self.max_token_per_leaf_module, @@ -187,6 +193,8 @@ def from_dict(cls, data: dict) -> 'Configuration': default_output=data.get('default_output', 'docs'), provider=data.get('provider', 'openai-compatible'), aws_region=data.get('aws_region', 'us-east-1'), + api_version=data.get('api_version', '2024-12-01-preview'), + azure_deployment=data.get('azure_deployment', ''), max_tokens=data.get('max_tokens', 32768), max_token_per_module=data.get('max_token_per_module', 36369), max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000), @@ -243,6 +251,8 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti fallback_model=self.fallback_model, provider=self.provider, aws_region=self.aws_region, + api_version=self.api_version, + azure_deployment=self.azure_deployment, max_tokens=self.max_tokens, max_token_per_module=self.max_token_per_module, max_token_per_leaf_module=self.max_token_per_leaf_module, diff --git a/codewiki/src/be/llm_services.py b/codewiki/src/be/llm_services.py index 0b03a54c..db3437a7 100644 --- a/codewiki/src/be/llm_services.py +++ b/codewiki/src/be/llm_services.py @@ -4,7 +4,7 @@ Includes a compatibility layer for OpenAI-compatible API proxies that may return slightly non-standard responses (e.g. choices[].index = None). -Supports multiple providers: openai-compatible, anthropic, bedrock. +Supports multiple providers: openai-compatible, anthropic, bedrock, azure-openai. """ import logging from openai.types import chat @@ -174,6 +174,9 @@ def call_llm( if provider in ("bedrock", "anthropic"): return _call_llm_via_litellm(prompt, config, model, temperature) + if provider == "azure-openai": + return _call_llm_via_azure(prompt, config, model, temperature) + # Default: OpenAI-compatible client = create_openai_client(config) @@ -225,3 +228,35 @@ def _call_llm_via_litellm( api_key=config.llm_api_key if config.provider != "bedrock" else None, ) return response.choices[0].message.content + + +def _call_llm_via_azure( + prompt: str, + config: Config, + model: str, + temperature: float = 0.0 +) -> str: + """ + Call LLM via Azure OpenAI. + + Uses the AzureOpenAI client from the openai package with + azure_endpoint, api_version, and deployment name. + """ + from openai import AzureOpenAI + + client = AzureOpenAI( + api_key=config.llm_api_key, + api_version=config.api_version, + azure_endpoint=config.llm_base_url, + ) + + deployment = config.azure_deployment or model + logger.debug("Calling Azure OpenAI deployment %s (api_version=%s)", deployment, config.api_version) + + response = client.chat.completions.create( + model=deployment, + messages=[{"role": "user", "content": prompt}], + temperature=temperature, + max_tokens=config.max_tokens, + ) + return response.choices[0].message.content diff --git a/codewiki/src/config.py b/codewiki/src/config.py index 42757788..120ac2bd 100644 --- a/codewiki/src/config.py +++ b/codewiki/src/config.py @@ -58,8 +58,10 @@ class Config: cluster_model: str fallback_model: str = FALLBACK_MODEL_1 # Provider configuration - provider: str = "openai-compatible" # openai-compatible, anthropic, bedrock + provider: str = "openai-compatible" # openai-compatible, anthropic, bedrock, azure-openai aws_region: str = "us-east-1" + api_version: str = "2024-12-01-preview" # Azure OpenAI API version + azure_deployment: str = "" # Azure OpenAI deployment name # Max token settings max_tokens: int = DEFAULT_MAX_TOKENS max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE @@ -160,6 +162,8 @@ def from_cli( fallback_model: str = FALLBACK_MODEL_1, provider: str = "openai-compatible", aws_region: str = "us-east-1", + api_version: str = "2024-12-01-preview", + azure_deployment: str = "", max_tokens: int = DEFAULT_MAX_TOKENS, max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE, max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE, @@ -177,8 +181,10 @@ def from_cli( main_model: Primary model cluster_model: Clustering model fallback_model: Fallback model - provider: LLM provider type (openai-compatible, anthropic, bedrock) + provider: LLM provider type (openai-compatible, anthropic, bedrock, azure-openai) aws_region: AWS region for Bedrock provider + api_version: Azure OpenAI API version + azure_deployment: Azure OpenAI deployment name max_tokens: Maximum tokens for LLM response max_token_per_module: Maximum tokens per module for clustering max_token_per_leaf_module: Maximum tokens per leaf module @@ -204,6 +210,8 @@ def from_cli( fallback_model=fallback_model, provider=provider, aws_region=aws_region, + api_version=api_version, + azure_deployment=azure_deployment, max_tokens=max_tokens, max_token_per_module=max_token_per_module, max_token_per_leaf_module=max_token_per_leaf_module, From 738e0c4d0e7e3674915372e67e7dc0779d0bf3e8 Mon Sep 17 00:00:00 2001 From: Nghi Bui Date: Fri, 3 Apr 2026 23:09:11 -0700 Subject: [PATCH 2/2] Update README with Azure OpenAI, Bedrock, incremental updates, and MCP server Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 951812bb..60b82e65 100644 --- a/README.md +++ b/README.md @@ -42,15 +42,32 @@ codewiki --version ### 2. Configure Your Environment -CodeWiki supports multiple models via an OpenAI-compatible SDK layer. +CodeWiki supports multiple LLM providers: **OpenAI-compatible**, **Anthropic**, **AWS Bedrock**, and **Azure OpenAI**. ```bash +# Anthropic codewiki config set \ --api-key YOUR_API_KEY \ --base-url https://api.anthropic.com \ --main-model claude-sonnet-4 \ --cluster-model claude-sonnet-4 \ --fallback-model glm-4p5 + +# Azure OpenAI +codewiki config set \ + --provider azure-openai \ + --api-key YOUR_AZURE_KEY \ + --base-url https://YOUR_RESOURCE.openai.azure.com \ + --azure-deployment YOUR_DEPLOYMENT \ + --main-model gpt-4o \ + --cluster-model gpt-4o + +# AWS Bedrock +codewiki config set \ + --provider bedrock \ + --aws-region us-east-1 \ + --main-model anthropic.claude-sonnet-4-v2:0 \ + --cluster-model anthropic.claude-sonnet-4-v2:0 ``` ### 3. Generate Documentation @@ -138,6 +155,9 @@ codewiki generate --verbose # Full-featured generation codewiki generate --create-branch --github-pages --verbose + +# Incremental update (only regenerate changed modules since last run) +codewiki generate --update ``` ### Customization Options @@ -235,7 +255,7 @@ codewiki generate --max-tokens 16384 --max-token-per-module 40000 --max-depth 3 ### Configuration Storage -- **API keys**: Securely stored in system keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service) +- **API keys**: Securely stored in system keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service). Falls back to `~/.codewiki/credentials.json` in headless/container environments. Set `CODEWIKI_NO_KEYRING=1` to force file-based storage. - **Settings & Agent Instructions**: `~/.codewiki/config.json` --- @@ -331,7 +351,7 @@ CodeWiki employs a three-stage process for comprehensive documentation generatio - **Python 3.12+** - **Node.js** (for Mermaid diagram validation) -- **LLM API access** (Anthropic Claude, OpenAI, etc.) +- **LLM API access** (Anthropic Claude, OpenAI, Azure OpenAI, AWS Bedrock) - **Git** (for branch creation features) --- @@ -339,6 +359,7 @@ CodeWiki employs a three-stage process for comprehensive documentation generatio ## Additional Resources ### Documentation & Guides +- **[MCP Server](codewiki/mcp/)** - Model Context Protocol server for IDE integrations - **[Docker Deployment](docker/DOCKER_README.md)** - Containerized deployment instructions - **[Development Guide](DEVELOPMENT.md)** - Project structure, architecture, and contributing guidelines - **[CodeWikiBench](https://github.com/FSoft-AI4Code/CodeWikiBench)** - Repository-level documentation benchmark