From e9387e4360fba9cb06888897e526ffa12e4d645d Mon Sep 17 00:00:00 2001
From: Pascal Pothmann <pascal.pothmann@tchibo-external.com>
Date: Fri, 27 Feb 2026 11:02:03 +0100
Subject: [PATCH 1/9] Add Kotlin support to CodeWiki documentation and analysis
 tools

---
 README.md                                     |   4 +-
 codewiki/cli/utils/repo_validator.py          |   2 +
 codewiki/cli/utils/validation.py              |   1 +
 .../analysis/analysis_service.py              |   3 +-
 .../analysis/call_graph_analyzer.py           |  25 +
 .../dependency_analyzer/analyzers/kotlin.py   | 505 ++++++++++++++++++
 .../src/be/dependency_analyzer/ast_parser.py  |   2 +-
 .../be/dependency_analyzer/utils/patterns.py  |   6 +-
 codewiki/src/be/prompt_template.py            |   2 +
 pyproject.toml                                |   1 +
 requirements.txt                              |   1 +
 11 files changed, 546 insertions(+), 6 deletions(-)
 create mode 100644 codewiki/src/be/dependency_analyzer/analyzers/kotlin.py

diff --git a/README.md b/README.md
index ce047407..951812bb 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ codewiki generate --github-pages --create-branch
 
 ## What is CodeWiki?
 
-CodeWiki is an open-source framework for **automated repository-level documentation** across seven programming languages. It generates holistic, architecture-aware documentation that captures not only individual functions but also their cross-file, cross-module, and system-level interactions.
+CodeWiki is an open-source framework for **automated repository-level documentation** across eight programming languages. It generates holistic, architecture-aware documentation that captures not only individual functions but also their cross-file, cross-module, and system-level interactions.
 
 ### Key Innovations
 
@@ -88,7 +88,7 @@ CodeWiki is an open-source framework for **automated repository-level documentat
 
 ### Supported Languages
 
-**🐍 Python** • **☕ Java** • **🟨 JavaScript** • **🔷 TypeScript** • **⚙️ C** • **🔧 C++** • **🪟 C#**
+**🐍 Python** • **☕ Java** • **🟨 JavaScript** • **🔷 TypeScript** • **⚙️ C** • **🔧 C++** • **🪟 C#** • **🎯 Kotlin**
 
 ---
 
diff --git a/codewiki/cli/utils/repo_validator.py b/codewiki/cli/utils/repo_validator.py
index 3e17d031..608e6037 100644
--- a/codewiki/cli/utils/repo_validator.py
+++ b/codewiki/cli/utils/repo_validator.py
@@ -30,6 +30,8 @@
     '.php',     # PHP
     '.phtml',   # PHP templates
     '.inc',     # PHP includes
+    '.kt',      # Kotlin
+    '.kts',     # Kotlin Scripts
 }
 
 
diff --git a/codewiki/cli/utils/validation.py b/codewiki/cli/utils/validation.py
index 12cb5454..9711ba33 100644
--- a/codewiki/cli/utils/validation.py
+++ b/codewiki/cli/utils/validation.py
@@ -172,6 +172,7 @@ def detect_supported_languages(directory: Path) -> List[Tuple[str, int]]:
         'C++': ['.cpp', '.hpp', '.cc', '.hh', '.cxx', '.hxx'],
         'C#': ['.cs'],
         'PHP': ['.php', '.phtml', '.inc'],
+        'Kotlin': ['.kt', '.kts'],
     }
     
     # Directories to exclude from counting
diff --git a/codewiki/src/be/dependency_analyzer/analysis/analysis_service.py b/codewiki/src/be/dependency_analyzer/analysis/analysis_service.py
index aa3ba471..c9cf5bb6 100644
--- a/codewiki/src/be/dependency_analyzer/analysis/analysis_service.py
+++ b/codewiki/src/be/dependency_analyzer/analysis/analysis_service.py
@@ -310,6 +310,7 @@ def _filter_supported_languages(self, code_files: List[Dict]) -> List[Dict]:
             "php",
             "go",
             "rust",
+            "kotlin",
         }
 
         return [
@@ -320,7 +321,7 @@ def _filter_supported_languages(self, code_files: List[Dict]) -> List[Dict]:
 
     def _get_supported_languages(self) -> List[str]:
         """Get list of currently supported languages for analysis."""
-        return ["python", "javascript", "typescript", "java", "csharp", "c", "cpp", "php"]
+        return ["python", "javascript", "typescript", "java", "csharp", "c", "cpp", "php", "kotlin"]
 
     def _cleanup_repository(self, temp_dir: str):
         """Clean up cloned repository."""
diff --git a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
index 7175cd9b..da825fd4 100644
--- a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
+++ b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
@@ -126,6 +126,8 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
                 self._analyze_typescript_file(file_path, content, repo_dir)
             elif language == "java":
                 self._analyze_java_file(file_path, content, repo_dir)
+            elif language == "kotlin":
+                self._analyze_kotlin_file(file_path, content, repo_dir)
             elif language == "csharp":
                 self._analyze_csharp_file(file_path, content, repo_dir)
             elif language == "c":
@@ -280,6 +282,27 @@ def _analyze_java_file(self, file_path: str, content: str, repo_dir: str):
         except Exception as e:
             logger.error(f"Failed to analyze Java file {file_path}: {e}", exc_info=True)
 
+    def _analyze_kotlin_file(self, file_path: str, content: str, repo_dir: str):
+        """
+        Analyze Kotlin file using tree-sitter based analyzer.
+
+        Args:
+            file_path: Relative path to the Kotlin file
+            content: File content string
+            repo_dir: Repository base directory
+        """
+        from codewiki.src.be.dependency_analyzer.analyzers.kotlin import analyze_kotlin_file
+
+        try:
+            functions, relationships = analyze_kotlin_file(file_path, content, repo_path=repo_dir)
+            for func in functions:
+                func_id = func.id if func.id else f"{file_path}:{func.name}"
+                self.functions[func_id] = func
+
+            self.call_relationships.extend(relationships)
+        except Exception as e:
+            logger.error(f"Failed to analyze Kotlin file {file_path}: {e}", exc_info=True)
+
     def _analyze_csharp_file(self, file_path: str, content: str, repo_dir: str):
         """
         Analyze C# file using tree-sitter based analyzer.
@@ -408,6 +431,8 @@ def _generate_visualization_data(self) -> Dict:
                 node_classes.append("lang-c")
             elif file_ext in [".cpp", ".cc", ".cxx", ".hpp", ".hxx"]:
                 node_classes.append("lang-cpp")
+            elif file_ext in [".kt", ".kts"]:
+                node_classes.append("lang-kotlin")
             elif file_ext in [".php", ".phtml", ".inc"]:
                 node_classes.append("lang-php")
 
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py b/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py
new file mode 100644
index 00000000..d56f220c
--- /dev/null
+++ b/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py
@@ -0,0 +1,505 @@
+import logging
+from typing import List, Optional, Tuple
+from pathlib import Path
+import sys
+import os
+
+from tree_sitter import Parser, Language
+import tree_sitter_kotlin
+from codewiki.src.be.dependency_analyzer.models.core import Node, CallRelationship
+
+logger = logging.getLogger(__name__)
+
+class TreeSitterKotlinAnalyzer:
+    def __init__(self, file_path: str, content: str, repo_path: Optional[str] = None):
+        self.file_path = Path(file_path)
+        self.content = content
+        self.repo_path = repo_path or ""
+        self.nodes: List[Node] = []
+        self.call_relationships: List[CallRelationship] = []
+        self._analyze()
+    
+    def _get_module_path(self) -> str:
+        if self.repo_path:
+            try:
+                rel_path = os.path.relpath(str(self.file_path), self.repo_path)
+            except ValueError:
+                rel_path = str(self.file_path)
+        else:
+            rel_path = str(self.file_path)
+        
+        for ext in ['.kt', '.kts']:
+            if rel_path.endswith(ext):
+                rel_path = rel_path[:-len(ext)]
+                break
+        return rel_path.replace('/', '.').replace('\\', '.')
+    
+    def _get_relative_path(self) -> str:
+        """Get relative path from repo root."""
+        if self.repo_path:
+            try:
+                return os.path.relpath(str(self.file_path), self.repo_path)
+            except ValueError:
+                return str(self.file_path)
+        else:
+            return str(self.file_path)
+    
+    def _get_component_id(self, name: str, parent_class: Optional[str] = None) -> str:
+        module_path = self._get_module_path()
+        if parent_class:
+            return f"{module_path}.{parent_class}.{name}"
+        else:
+            return f"{module_path}.{name}"
+
+    def _analyze(self):
+        try:
+            language_capsule = tree_sitter_kotlin.language()
+            kotlin_language = Language(language_capsule)
+            parser = Parser(kotlin_language)
+            tree = parser.parse(bytes(self.content, "utf8"))
+            root = tree.root_node
+            lines = self.content.splitlines()
+            
+            top_level_nodes = {}
+            
+            self._extract_nodes(root, top_level_nodes, lines)
+            self._extract_relationships(root, top_level_nodes)
+        except Exception as e:
+            logger.error(f"Error parsing Kotlin file {self.file_path}: {e}")
+    
+    def _extract_nodes(self, node, top_level_nodes, lines):
+        node_type = None
+        node_name = None
+        
+        if node.type == "class_declaration":
+            is_interface = any(c.type == "interface" for c in node.children)
+            
+            if is_interface:
+                node_type = "interface"
+            else:
+                modifiers = self._get_class_modifiers(node)
+                if "abstract" in modifiers:
+                    node_type = "abstract class"
+                elif "data" in modifiers:
+                    node_type = "data class"
+                elif "enum" in modifiers:
+                    node_type = "enum class"
+                elif "annotation" in modifiers:
+                    node_type = "annotation class"
+                else:
+                    node_type = "class"
+            
+            name_node = next((c for c in node.children if c.type == "identifier"), None)
+            node_name = name_node.text.decode() if name_node else None
+            
+        elif node.type == "object_declaration":
+            node_type = "object"
+            name_node = next((c for c in node.children if c.type == "identifier"), None)
+            node_name = name_node.text.decode() if name_node else None
+            
+        elif node.type == "function_declaration":
+            name_node = next((c for c in node.children if c.type == "identifier"), None)
+            if name_node:
+                method_name = name_node.text.decode()
+                containing_class = self._find_containing_class_name(node)
+                if containing_class:
+                    node_type = "method"
+                    node_name = f"{containing_class}.{method_name}"
+                else:
+                    node_type = "function"
+                    node_name = method_name
+        
+        if node_type and node_name:
+            component_id = self._get_component_id(node_name)
+            relative_path = self._get_relative_path()
+            
+            # Extract docstring if present
+            docstring = ""
+            if node.prev_sibling and hasattr(node.prev_sibling, "type"):
+                if node.prev_sibling.type in ("line_comment", "block_comment"):
+                    docstring = node.prev_sibling.text.decode().strip()
+                     
+            # Safely extract code lines
+            start_line_idx = node.start_point[0]
+            end_line_idx = node.end_point[0] + 1
+            code_snippet = "\n".join(lines[start_line_idx:end_line_idx]) if start_line_idx < len(lines) else ""
+            
+            node_obj = Node(
+                id=component_id,
+                name=node_name,
+                component_type=node_type,
+                file_path=str(self.file_path),
+                relative_path=relative_path,
+                source_code=code_snippet,
+                start_line=node.start_point[0]+1,
+                end_line=node.end_point[0]+1,
+                has_docstring=bool(docstring),
+                docstring=docstring,
+                parameters=None,
+                node_type=node_type,
+                base_classes=None,
+                class_name=None,
+                display_name=f"{node_type} {node_name}",
+                component_id=component_id
+            )
+            self.nodes.append(node_obj)
+            top_level_nodes[node_name] = node_obj
+        
+        for child in node.children:
+            self._extract_nodes(child, top_level_nodes, lines)
+    
+    def _get_class_modifiers(self, class_node) -> set:
+        """Extract class modifiers (abstract, data, enum, annotation, etc.)."""
+        modifiers = set()
+        modifiers_node = next((c for c in class_node.children if c.type == "modifiers"), None)
+        if modifiers_node:
+            for mod in modifiers_node.children:
+                if mod.type in ("class_modifier", "inheritance_modifier", "visibility_modifier"):
+                    for inner in mod.children:
+                        modifiers.add(inner.type)
+        return modifiers
+            
+    def _extract_relationships(self, node, top_level_nodes):
+        # 1. Inheritance and Interface Implementation via delegation_specifiers
+        if node.type == "class_declaration":
+            class_name = self._get_identifier_name(node)
+            delegation_specifiers = next(
+                (c for c in node.children if c.type == "delegation_specifiers"), None
+            )
+            if delegation_specifiers and class_name:
+                for spec in delegation_specifiers.children:
+                    if spec.type == "delegation_specifier":
+                        for child in spec.children:
+                            type_name = None
+                            if child.type == "constructor_invocation":
+                                user_type = next(
+                                    (c for c in child.children if c.type == "user_type"), None
+                                )
+                                if user_type:
+                                    type_name = self._get_type_name(user_type)
+                            elif child.type == "user_type":
+                                type_name = self._get_type_name(child)
+                            
+                            if type_name and not self._is_primitive_type(type_name):
+                                caller_id = self._get_component_id(class_name)
+                                callee_id = self._get_component_id(type_name)
+                                self.call_relationships.append(CallRelationship(
+                                    caller=caller_id,
+                                    callee=callee_id,
+                                    call_line=node.start_point[0]+1,
+                                    is_resolved=False
+                                ))
+        
+        # 2. Property Type Use (field types)
+        if node.type == "property_declaration":
+            containing_class = self._find_containing_class(node, top_level_nodes)
+            var_decl = next((c for c in node.children if c.type == "variable_declaration"), None)
+            if containing_class and var_decl:
+                type_node = next(
+                    (c for c in var_decl.children if c.type == "user_type"), None
+                )
+                if type_node:
+                    prop_type_name = self._get_type_name(type_node)
+                    if prop_type_name and not self._is_primitive_type(prop_type_name):
+                        self.call_relationships.append(CallRelationship(
+                            caller=containing_class,
+                            callee=prop_type_name,
+                            call_line=node.start_point[0]+1,
+                            is_resolved=False
+                        ))
+        
+        # 3. Constructor parameter type use
+        if node.type == "class_parameter":
+            containing_class_node = node.parent
+            while containing_class_node and containing_class_node.type != "class_declaration":
+                containing_class_node = containing_class_node.parent
+            if containing_class_node:
+                class_name = self._get_identifier_name(containing_class_node)
+                if class_name and class_name in top_level_nodes:
+                    type_node = next(
+                        (c for c in node.children if c.type == "user_type"), None
+                    )
+                    if type_node:
+                        param_type = self._get_type_name(type_node)
+                        if param_type and not self._is_primitive_type(param_type):
+                            caller_id = self._get_component_id(class_name)
+                            self.call_relationships.append(CallRelationship(
+                                caller=caller_id,
+                                callee=param_type,
+                                call_line=node.start_point[0]+1,
+                                is_resolved=False
+                            ))
+        
+        # 4. Method Calls / Function invocations
+        if node.type == "call_expression":
+            caller_id = self._find_containing_method(node) or self._find_containing_class(node, top_level_nodes)
+            
+            target_expr = next(
+                (c for c in node.children if c.type in ["identifier", "navigation_expression"]), None
+            )
+            
+            if target_expr and caller_id:
+                if target_expr.type == "identifier":
+                    callee_name = target_expr.text.decode()
+                    if callee_name and callee_name[0].isupper() and not self._is_primitive_type(callee_name):
+                        callee_id = self._get_component_id(callee_name)
+                        self.call_relationships.append(CallRelationship(
+                            caller=caller_id,
+                            callee=callee_id,
+                            call_line=node.start_point[0]+1,
+                            is_resolved=False
+                        ))
+                    elif callee_name and not self._is_primitive_type(callee_name):
+                        self.call_relationships.append(CallRelationship(
+                            caller=caller_id,
+                            callee=callee_name,
+                            call_line=node.start_point[0]+1,
+                            is_resolved=False
+                        ))
+                        
+                elif target_expr.type == "navigation_expression":
+                    children = list(target_expr.children)
+                    object_node = next(
+                        (c for c in children if c.type == "identifier"), None
+                    )
+                    method_node = None
+                    identifiers = [c for c in children if c.type == "identifier"]
+                    if len(identifiers) >= 2:
+                        object_node = identifiers[0]
+                        method_node = identifiers[-1]
+                    elif len(identifiers) == 1:
+                        method_node = identifiers[0]
+                        nav_child = next(
+                            (c for c in children if c.type == "navigation_expression"), None
+                        )
+                        if nav_child:
+                            object_node = self._get_root_identifier(nav_child)
+                        else:
+                            object_node = None
+                    
+                    if object_node and method_node:
+                        object_name = object_node.text.decode() if hasattr(object_node, 'text') else str(object_node)
+                        method_name = method_node.text.decode()
+                        
+                        target_type = None
+                        if object_name in top_level_nodes:
+                            target_type = object_name
+                        else:
+                            target_type = self._find_variable_type(node, object_name, top_level_nodes)
+                        
+                        if target_type and not self._is_primitive_type(target_type):
+                            callee_id = self._get_component_id(target_type)
+                            self.call_relationships.append(CallRelationship(
+                                caller=caller_id,
+                                callee=callee_id,
+                                call_line=node.start_point[0]+1,
+                                is_resolved=False
+                            ))
+                    elif method_node and not object_node:
+                        callee_name = method_node.text.decode()
+                        self.call_relationships.append(CallRelationship(
+                            caller=caller_id,
+                            callee=callee_name,
+                            call_line=node.start_point[0]+1,
+                            is_resolved=False
+                        ))
+                        
+        for child in node.children:
+            self._extract_relationships(child, top_level_nodes)
+
+    def _is_primitive_type(self, type_name: str) -> bool:
+        """Check if type is a Kotlin primitive or common built-in type."""
+        primitives = {
+            "Boolean", "Byte", "Char", "Double", "Float", "Int", "Long", "Short",
+            "String", "Unit", "Nothing", "Any",
+            "List", "Set", "Map", "Collection", "Iterable", "Sequence",
+            "MutableList", "MutableSet", "MutableMap", "MutableCollection",
+            "Array", "IntArray", "LongArray", "FloatArray", "DoubleArray",
+            "BooleanArray", "ByteArray", "CharArray", "ShortArray",
+            "Pair", "Triple",
+        }
+        return type_name in primitives
+
+    def _get_identifier_name(self, node):
+        """Get identifier name from a node."""
+        name_node = next((c for c in node.children if c.type == "identifier"), None)
+        return name_node.text.decode() if name_node else None
+    
+    def _get_type_name(self, node) -> Optional[str]:
+        """Get the primary type name from a type node, stripping generics."""
+        if node.type == "user_type":
+            id_node = next((c for c in node.children if c.type == "identifier"), None)
+            return id_node.text.decode() if id_node else None
+        elif node.type == "nullable_type":
+            inner = next((c for c in node.children if c.type == "user_type"), None)
+            if inner:
+                return self._get_type_name(inner)
+        elif node.type == "identifier":
+            return node.text.decode()
+        return None
+    
+    def _get_root_identifier(self, nav_node):
+        """Get the root identifier from a chain of navigation_expressions."""
+        first_child = nav_node.children[0] if nav_node.children else None
+        if first_child:
+            if first_child.type == "identifier":
+                return first_child
+            elif first_child.type == "navigation_expression":
+                return self._get_root_identifier(first_child)
+        return None
+
+    def _find_containing_class_name(self, node):
+        """Walk up to find the containing class/object/interface name."""
+        current = node.parent
+        while current:
+            if current.type in ("class_declaration", "object_declaration"):
+                name_node = next((c for c in current.children if c.type == "identifier"), None)
+                if name_node:
+                    return name_node.text.decode()
+            current = current.parent
+        return None
+        
+    def _find_containing_class(self, node, top_level_nodes):
+        """Find the component ID of the containing class."""
+        class_name = self._find_containing_class_name(node)
+        if class_name and class_name in top_level_nodes:
+             return self._get_component_id(class_name)
+        return None
+
+    def _find_containing_method(self, node):
+        """Find the component ID of the containing function/method."""
+        current = node.parent
+        while current:
+            if current.type == "function_declaration":
+                method_name = self._get_identifier_name(current)
+                class_name = self._find_containing_class_name(current)
+                if method_name:
+                    if class_name:
+                        return self._get_component_id(f"{class_name}.{method_name}")
+                    return self._get_component_id(method_name)
+            current = current.parent
+        return None
+
+    def _find_variable_type(self, node, variable_name: str, top_level_nodes) -> Optional[str]:
+        """
+        Try to resolve the type of a variable by searching local declarations,
+        function parameters, constructor parameters, and class properties.
+        """
+        func_node = node.parent
+        while func_node and func_node.type != "function_declaration":
+            func_node = func_node.parent
+        
+        if func_node:
+            params_node = next(
+                (c for c in func_node.children if c.type == "function_value_parameters"), None
+            )
+            if params_node:
+                for param in params_node.children:
+                    if param.type == "parameter":
+                        param_name_node = next(
+                            (c for c in param.children if c.type == "identifier"), None
+                        )
+                        if param_name_node and param_name_node.text.decode() == variable_name:
+                            type_node = next(
+                                (c for c in param.children if c.type in ("user_type", "nullable_type")), None
+                            )
+                            if type_node:
+                                return self._get_type_name(type_node)
+            
+            body_node = next(
+                (c for c in func_node.children if c.type == "function_body"), None
+            )
+            if body_node:
+                block = next((c for c in body_node.children if c.type == "block"), None)
+                if block:
+                    result = self._search_variable_declaration(block, variable_name)
+                    if result:
+                        return result
+        
+        class_node = node.parent
+        while class_node and class_node.type not in ("class_declaration", "object_declaration"):
+            class_node = class_node.parent
+            
+        if class_node:
+            primary_ctor = next(
+                (c for c in class_node.children if c.type == "primary_constructor"), None
+            )
+            if primary_ctor:
+                class_params = next(
+                    (c for c in primary_ctor.children if c.type == "class_parameters"), None
+                )
+                if class_params:
+                    for param in class_params.children:
+                        if param.type == "class_parameter":
+                            param_name = next(
+                                (c for c in param.children if c.type == "identifier"), None
+                            )
+                            if param_name and param_name.text.decode() == variable_name:
+                                type_node = next(
+                                    (c for c in param.children if c.type in ("user_type", "nullable_type")), None
+                                )
+                                if type_node:
+                                    return self._get_type_name(type_node)
+            
+            class_body = next(
+                (c for c in class_node.children if c.type in ("class_body", "enum_class_body")), None
+            )
+            if class_body:
+                for body_child in class_body.children:
+                    if body_child.type == "property_declaration":
+                        var_decl = next(
+                            (c for c in body_child.children if c.type == "variable_declaration"), None
+                        )
+                        if var_decl:
+                            prop_name = next(
+                                (c for c in var_decl.children if c.type == "identifier"), None
+                            )
+                            if prop_name and prop_name.text.decode() == variable_name:
+                                type_node = next(
+                                    (c for c in var_decl.children if c.type in ("user_type", "nullable_type")), None
+                                )
+                                if type_node:
+                                    return self._get_type_name(type_node)
+        
+        return None
+    
+    def _search_variable_declaration(self, block_node, variable_name: str) -> Optional[str]:
+        """Search for a local variable declaration with explicit type annotation in a block."""
+        for child in block_node.children:
+            if child.type == "property_declaration":
+                var_decl = next(
+                    (c for c in child.children if c.type == "variable_declaration"), None
+                )
+                if var_decl:
+                    name_node = next(
+                        (c for c in var_decl.children if c.type == "identifier"), None
+                    )
+                    if name_node and name_node.text.decode() == variable_name:
+                        type_node = next(
+                            (c for c in var_decl.children if c.type in ("user_type", "nullable_type")), None
+                        )
+                        if type_node:
+                            return self._get_type_name(type_node)
+                        
+                        init_expr = next(
+                            (c for c in child.children if c.type == "call_expression"), None
+                        )
+                        if init_expr:
+                            call_id = next(
+                                (c for c in init_expr.children if c.type == "identifier"), None
+                            )
+                            if call_id:
+                                inferred = call_id.text.decode()
+                                if inferred and inferred[0].isupper():
+                                    return inferred
+            
+            elif child.type == "block":
+                result = self._search_variable_declaration(child, variable_name)
+                if result:
+                    return result
+        
+        return None
+
+def analyze_kotlin_file(file_path: str, content: str, repo_path: Optional[str] = None) -> Tuple[List[Node], List[CallRelationship]]:
+    analyzer = TreeSitterKotlinAnalyzer(file_path, content, repo_path)
+    return analyzer.nodes, analyzer.call_relationships
diff --git a/codewiki/src/be/dependency_analyzer/ast_parser.py b/codewiki/src/be/dependency_analyzer/ast_parser.py
index 3323ed7a..81ac0bdc 100644
--- a/codewiki/src/be/dependency_analyzer/ast_parser.py
+++ b/codewiki/src/be/dependency_analyzer/ast_parser.py
@@ -135,7 +135,7 @@ def _determine_component_type(self, func_dict: Dict) -> str:
     
     def _file_to_module_path(self, file_path: str) -> str:
         path = file_path
-        extensions = ['.py', '.js', '.ts', '.java', '.cs', '.cpp', '.hpp', '.h', '.c', '.tsx', '.jsx', '.cc', '.mjs', '.cxx', '.cc', '.cjs']
+        extensions = ['.py', '.js', '.ts', '.java', '.cs', '.cpp', '.hpp', '.h', '.c', '.tsx', '.jsx', '.cc', '.mjs', '.cxx', '.cc', '.cjs', '.kt', '.kts']
         for ext in extensions:
             if path.endswith(ext):
                 path = path[:-len(ext)]
diff --git a/codewiki/src/be/dependency_analyzer/utils/patterns.py b/codewiki/src/be/dependency_analyzer/utils/patterns.py
index 9fb003f7..1680ed4f 100644
--- a/codewiki/src/be/dependency_analyzer/utils/patterns.py
+++ b/codewiki/src/be/dependency_analyzer/utils/patterns.py
@@ -5,7 +5,7 @@
 and function definitions across multiple programming languages.
 """
 
-from typing import List, Dict
+from typing import List, Dict, Optional
 
 DEFAULT_IGNORE_PATTERNS = {
     ".github",
@@ -156,6 +156,7 @@
     "*.rb",
     "*.swift",
     "*.kt",
+    "*.kts",
     "*.scala",
     "*.clj",
     "*.hs",
@@ -407,6 +408,7 @@
     "c": ["void {name}", "int {name}", "{name}("],
     "cpp": ["void {name}", "int {name}", "{name}("],
     "php": ["function {name}", "public function {name}", "private function {name}", "protected function {name}"],
+    "kotlin": ["fun {name}", "private fun {name}", "public fun {name}", "internal fun {name}", "protected fun {name}"],
     "general": ["{name}("],  # Fallback pattern
 }
 
@@ -533,7 +535,7 @@ def has_high_connectivity_potential(filename: str, filepath: str) -> bool:
     return False
 
 
-def is_critical_function(func_name: str, code_snippet: str = None) -> bool:
+def is_critical_function(func_name: str, code_snippet: Optional[str] = None) -> bool:
     """
     Check if a function is critical based on name and code patterns.
 
diff --git a/codewiki/src/be/prompt_template.py b/codewiki/src/be/prompt_template.py
index d37d5b61..f6da5f8b 100644
--- a/codewiki/src/be/prompt_template.py
+++ b/codewiki/src/be/prompt_template.py
@@ -235,6 +235,8 @@
     ".cjs": "javascript",
     ".jsx": "javascript",
     ".cs": "csharp",
+    ".kt": "kotlin",
+    ".kts": "kotlin",
     ".php": "php",
     ".phtml": "php",
     ".inc": "php"
diff --git a/pyproject.toml b/pyproject.toml
index 00c3e01d..8360f6e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
     "tree-sitter-cpp>=0.23.4",
     "tree-sitter-c-sharp>=0.23.1",
     "tree-sitter-php>=0.23.0",
+    "tree-sitter-kotlin>=1.1.0",
     "openai>=1.107.0",
     "litellm>=1.77.0",
     "pydantic>=2.11.7",
diff --git a/requirements.txt b/requirements.txt
index bed6e91a..e2dce481 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -147,6 +147,7 @@ tree-sitter-cpp==0.23.4
 tree-sitter-embedded-template==0.23.2
 tree-sitter-java==0.23.5
 tree-sitter-javascript==0.21.4
+tree-sitter-kotlin==1.1.0
 tree-sitter-language-pack==0.8.0
 tree-sitter-python==0.23.6
 tree-sitter-typescript==0.21.2

From 28b15fbae02d4a7fe0e8dcea40d25af01cf869ee Mon Sep 17 00:00:00 2001
From: dalyzhou <dalyzhou@tencent.com>
Date: Sun, 15 Mar 2026 21:31:35 +0800
Subject: [PATCH 2/9] fix: add missing runtime dependencies in pyproject.toml

Several packages (colorama, fastapi, uvicorn, python-multipart, logfire)
are imported in the source code but not declared in pyproject.toml,
causing ModuleNotFoundError when installed via pip install.
---
 pyproject.toml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8360f6e2..c6eb08a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,12 @@ dependencies = [
     "psutil>=7.0.0",
     "PyYAML>=6.0.2",
     "mermaid-parser-py>=0.0.2",
-    "mermaid-py>=0.8.0"
+    "mermaid-py>=0.8.0",
+    "fastapi>=0.116.0",
+    "uvicorn>=0.35.0",
+    "python-multipart>=0.0.20",
+    "colorama>=0.4.6",
+    "logfire>=4.1.0"
 ]
 
 [external]

From 8ac96473c7d15a86269fd2c22e1daca02f0b9fab Mon Sep 17 00:00:00 2001
From: dalyzhou <dalyzhou@tencent.com>
Date: Sun, 15 Mar 2026 22:11:28 +0800
Subject: [PATCH 3/9] fix: exclude node_modules from dependency analysis and
 add progress logging

node_modules was missing from DEFAULT_IGNORE_PATTERNS, causing the
dependency analyzer to parse all files in node_modules (225k+ files
instead of ~600). Also added per-file progress logging and timeout
protection to improve observability during long analysis runs.
---
 .../analysis/call_graph_analyzer.py           | 118 +++++++++++++-----
 .../be/dependency_analyzer/utils/patterns.py  |   7 +-
 2 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
index da825fd4..272ca0b6 100644
--- a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
+++ b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
@@ -9,7 +9,10 @@
 from typing import Dict, List
 import logging
 import traceback
+import time
+import signal
 from pathlib import Path
+from contextlib import contextmanager
 from codewiki.src.be.dependency_analyzer.models.core import Node, CallRelationship
 from codewiki.src.be.dependency_analyzer.utils.patterns import CODE_EXTENSIONS
 from codewiki.src.be.dependency_analyzer.utils.security import safe_open_text
@@ -17,6 +20,33 @@
 logger = logging.getLogger(__name__)
 
 
+class TimeoutError(Exception):
+    """Raised when file parsing exceeds timeout."""
+    pass
+
+
+@contextmanager
+def timeout(seconds):
+    """Context manager for timeout on file parsing."""
+    def signal_handler(signum, frame):
+        raise TimeoutError(f"File parsing exceeded {seconds}s timeout")
+    
+    # Only use signal on Unix systems (not Windows)
+    try:
+        old_handler = signal.signal(signal.SIGALRM, signal_handler)
+        signal.alarm(seconds)
+        yield
+    except AttributeError:
+        # Windows doesn't support SIGALRM, skip timeout
+        yield
+    finally:
+        try:
+            signal.alarm(0)
+            signal.signal(signal.SIGALRM, old_handler)
+        except (AttributeError, ValueError):
+            pass
+
+
 class CallGraphAnalyzer:
     def __init__(self):
         """Initialize the call graph analyzer."""
@@ -35,17 +65,35 @@ def analyze_code_files(self, code_files: List[Dict], base_dir: str) -> Dict:
         4. Returns all nodes and relationships 
         """
         logger.debug(f"Starting analysis of {len(code_files)} files")
+        logger.info(f"📊 Parsing {len(code_files)} source files (this may take a few minutes)...")
 
         self.functions = {}
         self.call_relationships = []
 
         files_analyzed = 0
-        for file_info in code_files:
-            logger.debug(f"Analyzing: {file_info['path']}")
-            self._analyze_code_file(base_dir, file_info)
-            files_analyzed += 1
-        logger.debug(
-            f"Analysis complete: {files_analyzed} files analyzed, {len(self.functions)} functions, {len(self.call_relationships)} relationships"
+        files_failed = 0
+        start_time = time.time()
+        
+        for idx, file_info in enumerate(code_files, 1):
+            file_path = file_info['path']
+            try:
+                # Log progress every file with elapsed time
+                if idx % max(1, len(code_files) // 10) == 0 or idx <= 5:
+                    elapsed = time.time() - start_time
+                    rate = idx / elapsed if elapsed > 0 else 0
+                    remaining = (len(code_files) - idx) / rate if rate > 0 else 0
+                    logger.info(f"  [{idx}/{len(code_files)}] {file_path} ({elapsed:.1f}s elapsed, ~{remaining:.1f}s remaining)")
+                
+                self._analyze_code_file(base_dir, file_info)
+                files_analyzed += 1
+            except Exception as e:
+                files_failed += 1
+                logger.warning(f"  ⚠️  [{idx}/{len(code_files)}] Failed to analyze {file_path}: {str(e)[:100]}")
+        
+        elapsed_time = time.time() - start_time
+        logger.info(
+            f"✓ Analysis complete: {files_analyzed}/{len(code_files)} files analyzed, "
+            f"{files_failed} failed, {len(self.functions)} functions, {len(self.call_relationships)} relationships ({elapsed_time:.1f}s)"
         )
 
         logger.debug("Resolving call relationships")
@@ -116,34 +164,38 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
         file_path = base / file_info["path"]
 
         try:
-            content = safe_open_text(base, file_path)
-            language = file_info["language"]
-            if language == "python":
-                self._analyze_python_file(file_path, content, repo_dir)
-            elif language == "javascript":
-                self._analyze_javascript_file(file_path, content, repo_dir)
-            elif language == "typescript":
-                self._analyze_typescript_file(file_path, content, repo_dir)
-            elif language == "java":
-                self._analyze_java_file(file_path, content, repo_dir)
-            elif language == "kotlin":
-                self._analyze_kotlin_file(file_path, content, repo_dir)
-            elif language == "csharp":
-                self._analyze_csharp_file(file_path, content, repo_dir)
-            elif language == "c":
-                self._analyze_c_file(file_path, content, repo_dir)
-            elif language == "cpp":
-                self._analyze_cpp_file(file_path, content, repo_dir)
-            elif language == "php":
-                self._analyze_php_file(file_path, content, repo_dir)
-            # else:
-            #     logger.warning(
-            #         f"Unsupported language for call graph analysis: {language} for file {file_path}"
-            #     )
-
+            # Add timeout protection (30 seconds per file max)
+            with timeout(30):
+                content = safe_open_text(base, file_path)
+                language = file_info["language"]
+                if language == "python":
+                    self._analyze_python_file(file_path, content, repo_dir)
+                elif language == "javascript":
+                    self._analyze_javascript_file(file_path, content, repo_dir)
+                elif language == "typescript":
+                    self._analyze_typescript_file(file_path, content, repo_dir)
+                elif language == "java":
+                    self._analyze_java_file(file_path, content, repo_dir)
+                elif language == "kotlin":
+                    self._analyze_kotlin_file(file_path, content, repo_dir)
+                elif language == "csharp":
+                    self._analyze_csharp_file(file_path, content, repo_dir)
+                elif language == "c":
+                    self._analyze_c_file(file_path, content, repo_dir)
+                elif language == "cpp":
+                    self._analyze_cpp_file(file_path, content, repo_dir)
+                elif language == "php":
+                    self._analyze_php_file(file_path, content, repo_dir)
+                # else:
+                #     logger.warning(
+                #         f"Unsupported language for call graph analysis: {language} for file {file_path}"
+                #     )
+
+        except TimeoutError as e:
+            logger.warning(f"⏱️  Timeout analyzing {file_path}: {str(e)}")
         except Exception as e:
-            logger.error(f"⚠️ Error analyzing {file_path}: {str(e)}")
-            logger.error(f"Traceback: {traceback.format_exc()}")
+            logger.debug(f"Error analyzing {file_path}: {str(e)}")
+            logger.debug(f"Traceback: {traceback.format_exc()}")
 
     def _analyze_python_file(self, file_path: str, content: str, base_dir: str):
         """
diff --git a/codewiki/src/be/dependency_analyzer/utils/patterns.py b/codewiki/src/be/dependency_analyzer/utils/patterns.py
index 1680ed4f..36440d3c 100644
--- a/codewiki/src/be/dependency_analyzer/utils/patterns.py
+++ b/codewiki/src/be/dependency_analyzer/utils/patterns.py
@@ -29,12 +29,17 @@
     ".hypothesis",
     "poetry.lock",
     "Pipfile.lock",
-    # JavaScript/FileSystemNode
+    # JavaScript/Node.js (CRITICAL: node_modules must be excluded)
+    "node_modules/",
+    "node_modules",
     "package-lock.json",
     "yarn.lock",
     ".npm",
     ".yarn",
     ".pnpm-store",
+    ".next/",
+    ".nuxt/",
+    ".turbo/",
     "bun.lock",
     "bun.lockb",
     # Java

From 584805c5cb61869865ec97cb4f0d666b4e296840 Mon Sep 17 00:00:00 2001
From: dalyzhou <dalyzhou@tencent.com>
Date: Sun, 15 Mar 2026 22:50:06 +0800
Subject: [PATCH 4/9] fix: handle non-standard responses from OpenAI-compatible
 API proxies

Some OpenAI-compatible proxies (Azure, vLLM, internal proxies, etc.)
return choices[].index as null instead of an integer, causing pydantic
validation to fail. Add a CompatibleOpenAIModel subclass that patches
these fields before validation.
---
 codewiki/src/be/llm_services.py | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/codewiki/src/be/llm_services.py b/codewiki/src/be/llm_services.py
index 0de98438..90a693ec 100644
--- a/codewiki/src/be/llm_services.py
+++ b/codewiki/src/be/llm_services.py
@@ -1,6 +1,11 @@
 """
 LLM service factory for creating configured LLM clients.
+
+Includes a compatibility layer for OpenAI-compatible API proxies that may
+return slightly non-standard responses (e.g. choices[].index = None).
 """
+import logging
+from openai.types import chat
 from pydantic_ai.models.openai import OpenAIModel
 from pydantic_ai.providers.openai import OpenAIProvider
 from pydantic_ai.models.openai import OpenAIModelSettings
@@ -9,10 +14,29 @@
 
 from codewiki.src.config import Config
 
+logger = logging.getLogger(__name__)
+
+
+class CompatibleOpenAIModel(OpenAIModel):
+    """OpenAIModel subclass that patches non-standard API proxy responses.
+    
+    Some OpenAI-compatible proxies return responses with fields like
+    choices[].index set to None instead of an integer. This subclass
+    fixes those fields before pydantic validation runs.
+    """
+
+    def _validate_completion(self, response: chat.ChatCompletion) -> chat.ChatCompletion:
+        # Patch choices[].index: None -> sequential integer (0, 1, 2, ...)
+        if response.choices:
+            for i, choice in enumerate(response.choices):
+                if choice.index is None:
+                    choice.index = i
+        return super()._validate_completion(response)
+
 
-def create_main_model(config: Config) -> OpenAIModel:
+def create_main_model(config: Config) -> CompatibleOpenAIModel:
     """Create the main LLM model from configuration."""
-    return OpenAIModel(
+    return CompatibleOpenAIModel(
         model_name=config.main_model,
         provider=OpenAIProvider(
             base_url=config.llm_base_url,
@@ -25,9 +49,9 @@ def create_main_model(config: Config) -> OpenAIModel:
     )
 
 
-def create_fallback_model(config: Config) -> OpenAIModel:
+def create_fallback_model(config: Config) -> CompatibleOpenAIModel:
     """Create the fallback LLM model from configuration."""
-    return OpenAIModel(
+    return CompatibleOpenAIModel(
         model_name=config.fallback_model,
         provider=OpenAIProvider(
             base_url=config.llm_base_url,

From 36c93c23db8c43ad8de676bc5a8dba276f734e45 Mon Sep 17 00:00:00 2001
From: Nghi Bui <bdqnghi@gmail.com>
Date: Tue, 17 Mar 2026 03:39:02 +0700
Subject: [PATCH 5/9] Fix issues #44, #34, #17, #43: OpenAI compat, Anthropic
 validation, keyring fallback, verbose logging

- #44: Use max_completion_tokens for newer OpenAI models (o1, o3, gpt-4o)
  that reject the deprecated max_tokens parameter
- #34: Detect Anthropic API URLs and use the anthropic SDK for connectivity
  tests instead of forcing OpenAI client on all providers
- #17: Add file-based fallback (credentials.json) when system keyring is
  unavailable (headless containers, RHEL). Support CODEWIKI_NO_KEYRING=1
  env var to force file-based storage
- #43: Add file-level and module-level verbose logging during dependency
  analysis, clustering, and doc generation phases

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 codewiki/cli/adapters/doc_generator.py |  25 ++++-
 codewiki/cli/commands/config.py        |  27 +++++-
 codewiki/cli/config_manager.py         | 121 ++++++++++++++++++-------
 codewiki/src/be/llm_services.py        |  63 ++++++++++---
 4 files changed, 180 insertions(+), 56 deletions(-)

diff --git a/codewiki/cli/adapters/doc_generator.py b/codewiki/cli/adapters/doc_generator.py
index 826b60ca..78256f2e 100644
--- a/codewiki/cli/adapters/doc_generator.py
+++ b/codewiki/cli/adapters/doc_generator.py
@@ -186,9 +186,14 @@ async def _run_backend_generation(self, backend_config: BackendConfig):
             components, leaf_nodes = doc_generator.graph_builder.build_dependency_graph()
             self.job.statistics.total_files_analyzed = len(components)
             self.job.statistics.leaf_nodes = len(leaf_nodes)
-            
+
             if self.verbose:
-                self.progress_tracker.update_stage(1.0, f"Found {len(leaf_nodes)} leaf nodes")
+                self.progress_tracker.update_stage(0.8, f"Analyzed {len(components)} files, found {len(leaf_nodes)} leaf nodes")
+                # Log individual files analyzed
+                for comp_name in sorted(components.keys())[:20]:
+                    self.progress_tracker.update_stage(0.9, f"  File: {comp_name}")
+                if len(components) > 20:
+                    self.progress_tracker.update_stage(0.9, f"  ... and {len(components) - 20} more files")
         except Exception as e:
             raise APIError(f"Dependency analysis failed: {e}")
         
@@ -212,15 +217,22 @@ async def _run_backend_generation(self, backend_config: BackendConfig):
         try:
             if os.path.exists(first_module_tree_path):
                 module_tree = file_manager.load_json(first_module_tree_path)
+                if self.verbose:
+                    self.progress_tracker.update_stage(0.5, "Loaded cached module tree")
             else:
+                if self.verbose:
+                    self.progress_tracker.update_stage(0.3, f"Clustering {len(leaf_nodes)} leaf nodes with LLM...")
                 module_tree = cluster_modules(leaf_nodes, components, backend_config)
                 file_manager.save_json(module_tree, first_module_tree_path)
-            
+
             file_manager.save_json(module_tree, module_tree_path)
             self.job.module_count = len(module_tree)
-            
+
             if self.verbose:
                 self.progress_tracker.update_stage(1.0, f"Created {len(module_tree)} modules")
+                for mod_name in sorted(module_tree.keys()):
+                    file_count = len(module_tree[mod_name]) if isinstance(module_tree[mod_name], list) else "?"
+                    self.progress_tracker.update_stage(1.0, f"  Module: {mod_name} ({file_count} files)")
         except Exception as e:
             raise APIError(f"Module clustering failed: {e}")
         
@@ -232,9 +244,12 @@ async def _run_backend_generation(self, backend_config: BackendConfig):
             self.progress_tracker.update_stage(0.1, "Generating module documentation...")
         
         try:
+            if self.verbose:
+                self.progress_tracker.update_stage(0.2, f"Generating documentation for {self.job.module_count} modules...")
+
             # Run the actual documentation generation
             await doc_generator.generate_module_documentation(components, leaf_nodes)
-            
+
             if self.verbose:
                 self.progress_tracker.update_stage(0.9, "Creating repository overview...")
             
diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py
index f776273a..090b2abc 100644
--- a/codewiki/cli/commands/config.py
+++ b/codewiki/cli/commands/config.py
@@ -490,13 +490,32 @@ def config_validate(quick: bool, verbose: bool):
         
         # Step 5: API connectivity test (unless --quick)
         if not quick:
+            if verbose:
+                click.echo()
+                click.echo("[5/5] Testing API connectivity...")
+                click.echo(f"      URL: {config.base_url}")
+
             try:
-                from openai import OpenAI
-                client = OpenAI(api_key=api_key, base_url=config.base_url)
-                response = client.models.list()
-                click.secho("✓ API connectivity test successful", fg="green")
+                base_url_lower = (config.base_url or "").lower()
+                if "api.anthropic.com" in base_url_lower:
+                    # Use Anthropic SDK for native Anthropic endpoints
+                    import anthropic
+                    client = anthropic.Anthropic(api_key=api_key)
+                    client.models.list(limit=1)
+                else:
+                    # Use OpenAI SDK for OpenAI-compatible endpoints
+                    from openai import OpenAI
+                    client = OpenAI(api_key=api_key, base_url=config.base_url)
+                    client.models.list()
+
+                if verbose:
+                    click.secho("      ✓ API responded successfully", fg="green")
+                else:
+                    click.secho("✓ API connectivity test successful", fg="green")
             except Exception as e:
                 click.secho("✗ API connectivity test failed", fg="red")
+                if verbose:
+                    click.echo(f"      Error: {e}")
                 sys.exit(EXIT_CONFIG_ERROR)
         
         # Success
diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py
index f1f86b2f..5a5c5e88 100644
--- a/codewiki/cli/config_manager.py
+++ b/codewiki/cli/config_manager.py
@@ -1,8 +1,14 @@
 """
 Configuration manager with keyring integration for secure credential storage.
+
+Supports fallback to file-based storage when system keyring is unavailable
+(e.g. headless containers, RHEL without Secret Service). Set the environment
+variable CODEWIKI_NO_KEYRING=1 to force file-based storage.
 """
 
 import json
+import os
+import logging
 from pathlib import Path
 from typing import Optional
 import keyring
@@ -12,6 +18,7 @@
 from codewiki.cli.utils.errors import ConfigurationError, FileSystemError
 from codewiki.cli.utils.fs import ensure_directory, safe_write, safe_read
 
+logger = logging.getLogger(__name__)
 
 # Keyring configuration
 KEYRING_SERVICE = "codewiki"
@@ -20,33 +27,63 @@
 # Configuration file location
 CONFIG_DIR = Path.home() / ".codewiki"
 CONFIG_FILE = CONFIG_DIR / "config.json"
+CREDENTIALS_FILE = CONFIG_DIR / "credentials.json"
 CONFIG_VERSION = "1.0"
 
 
 class ConfigManager:
     """
     Manages CodeWiki configuration with secure keyring storage for API keys.
-    
+
     Storage:
-        - API key: System keychain via keyring (macOS Keychain, Windows Credential Manager, 
+        - API key: System keychain via keyring (macOS Keychain, Windows Credential Manager,
                   Linux Secret Service)
+        - Fallback: ~/.codewiki/credentials.json when keyring is unavailable
         - Other settings: ~/.codewiki/config.json
+
+    Set CODEWIKI_NO_KEYRING=1 to skip keyring and use file-based storage.
     """
-    
+
     def __init__(self):
         """Initialize the configuration manager."""
         self._api_key: Optional[str] = None
         self._config: Optional[Configuration] = None
+        self._force_no_keyring = os.environ.get("CODEWIKI_NO_KEYRING", "").strip() in ("1", "true", "yes")
         self._keyring_available = self._check_keyring_available()
-    
+
     def _check_keyring_available(self) -> bool:
         """Check if system keyring is available."""
+        if self._force_no_keyring:
+            logger.debug("Keyring disabled via CODEWIKI_NO_KEYRING")
+            return False
         try:
             # Try to get/set a test value
             keyring.get_password(KEYRING_SERVICE, "__test__")
             return True
-        except KeyringError:
+        except (KeyringError, Exception):
             return False
+
+    def _load_api_key_from_file(self) -> Optional[str]:
+        """Load API key from fallback credentials file."""
+        if not CREDENTIALS_FILE.exists():
+            return None
+        try:
+            content = safe_read(CREDENTIALS_FILE)
+            data = json.loads(content)
+            return data.get("api_key")
+        except (json.JSONDecodeError, FileSystemError):
+            return None
+
+    def _save_api_key_to_file(self, api_key: str):
+        """Save API key to fallback credentials file (plaintext)."""
+        ensure_directory(CONFIG_DIR)
+        data = {"api_key": api_key}
+        safe_write(CREDENTIALS_FILE, json.dumps(data, indent=2))
+        # Restrict file permissions (owner read/write only)
+        try:
+            CREDENTIALS_FILE.chmod(0o600)
+        except OSError:
+            pass
     
     def load(self) -> bool:
         """
@@ -70,12 +107,14 @@ def load(self) -> bool:
             
             self._config = Configuration.from_dict(data)
             
-            # Load API key from keyring
-            try:
-                self._api_key = keyring.get_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT)
-            except KeyringError:
-                # Keyring unavailable, API key will be None
-                pass
+            # Load API key from keyring, falling back to file
+            if self._keyring_available:
+                try:
+                    self._api_key = keyring.get_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT)
+                except (KeyringError, Exception):
+                    pass
+            if self._api_key is None:
+                self._api_key = self._load_api_key_from_file()
             
             return True
         except (json.JSONDecodeError, FileSystemError) as e:
@@ -154,17 +193,23 @@ def save(
         if self._config.base_url and self._config.main_model and self._config.cluster_model:
             self._config.validate()
         
-        # Save API key to keyring
+        # Save API key to keyring, falling back to file
         if api_key is not None:
             self._api_key = api_key
-            try:
-                keyring.set_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT, api_key)
-            except KeyringError as e:
-                # Fallback: warn about keyring unavailability
-                raise ConfigurationError(
-                    f"System keychain unavailable: {e}\n"
-                    f"Please ensure your system keychain is properly configured."
-                )
+            if self._keyring_available:
+                try:
+                    keyring.set_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT, api_key)
+                except (KeyringError, Exception):
+                    # Keyring failed at runtime — fall back to file
+                    self._keyring_available = False
+                    self._save_api_key_to_file(api_key)
+                    logger.warning(
+                        "System keychain unavailable. API key stored in %s "
+                        "(plaintext). Set CODEWIKI_NO_KEYRING=1 to suppress this warning.",
+                        CREDENTIALS_FILE
+                    )
+            else:
+                self._save_api_key_to_file(api_key)
         
         # Save non-sensitive config to JSON
         config_data = {
@@ -179,17 +224,20 @@ def save(
     
     def get_api_key(self) -> Optional[str]:
         """
-        Get API key from keyring.
-        
+        Get API key from keyring or fallback file.
+
         Returns:
             API key or None if not set
         """
         if self._api_key is None:
-            try:
-                self._api_key = keyring.get_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT)
-            except KeyringError:
-                pass
-        
+            if self._keyring_available:
+                try:
+                    self._api_key = keyring.get_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT)
+                except (KeyringError, Exception):
+                    pass
+            if self._api_key is None:
+                self._api_key = self._load_api_key_from_file()
+
         return self._api_key
     
     def get_config(self) -> Optional[Configuration]:
@@ -219,12 +267,19 @@ def is_configured(self) -> bool:
         return self._config.is_complete()
     
     def delete_api_key(self):
-        """Delete API key from keyring."""
-        try:
-            keyring.delete_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT)
-            self._api_key = None
-        except KeyringError:
-            pass
+        """Delete API key from keyring and fallback file."""
+        if self._keyring_available:
+            try:
+                keyring.delete_password(KEYRING_SERVICE, KEYRING_API_KEY_ACCOUNT)
+            except (KeyringError, Exception):
+                pass
+        # Also remove fallback credentials file
+        if CREDENTIALS_FILE.exists():
+            try:
+                CREDENTIALS_FILE.unlink()
+            except OSError:
+                pass
+        self._api_key = None
     
     def clear(self):
         """Clear all configuration (file and keyring)."""
diff --git a/codewiki/src/be/llm_services.py b/codewiki/src/be/llm_services.py
index 90a693ec..08af907f 100644
--- a/codewiki/src/be/llm_services.py
+++ b/codewiki/src/be/llm_services.py
@@ -6,6 +6,7 @@
 """
 import logging
 from openai.types import chat
+
 from pydantic_ai.models.openai import OpenAIModel
 from pydantic_ai.providers.openai import OpenAIProvider
 from pydantic_ai.models.openai import OpenAIModelSettings
@@ -17,9 +18,40 @@
 logger = logging.getLogger(__name__)
 
 
+def _should_use_max_completion_tokens(model_name: str, base_url: str) -> bool:
+    """
+    Determine whether to use max_completion_tokens instead of max_tokens.
+
+    Newer OpenAI models (o1, o3, gpt-4o, etc.) require max_completion_tokens.
+    Anthropic and other providers still use max_tokens.
+    """
+    model_lower = model_name.lower()
+    # OpenAI models that require max_completion_tokens
+    new_openai_patterns = ("o1", "o3", "gpt-4o", "gpt-4-turbo")
+    if any(pattern in model_lower for pattern in new_openai_patterns):
+        return True
+    # If base_url points to OpenAI directly, newer models may need it
+    if base_url and "api.openai.com" in base_url:
+        return True
+    return False
+
+
+def _build_model_settings(config: Config, model_name: str) -> OpenAIModelSettings:
+    """Build model settings with the correct token parameter."""
+    if _should_use_max_completion_tokens(model_name, config.llm_base_url):
+        return OpenAIModelSettings(
+            temperature=0.0,
+            max_completion_tokens=config.max_tokens
+        )
+    return OpenAIModelSettings(
+        temperature=0.0,
+        max_tokens=config.max_tokens
+    )
+
+
 class CompatibleOpenAIModel(OpenAIModel):
     """OpenAIModel subclass that patches non-standard API proxy responses.
-    
+
     Some OpenAI-compatible proxies return responses with fields like
     choices[].index set to None instead of an integer. This subclass
     fixes those fields before pydantic validation runs.
@@ -42,10 +74,7 @@ def create_main_model(config: Config) -> CompatibleOpenAIModel:
             base_url=config.llm_base_url,
             api_key=config.llm_api_key
         ),
-        settings=OpenAIModelSettings(
-            temperature=0.0,
-            max_tokens=config.max_tokens
-        )
+        settings=_build_model_settings(config, config.main_model)
     )
 
 
@@ -57,10 +86,7 @@ def create_fallback_model(config: Config) -> CompatibleOpenAIModel:
             base_url=config.llm_base_url,
             api_key=config.llm_api_key
         ),
-        settings=OpenAIModelSettings(
-            temperature=0.0,
-            max_tokens=config.max_tokens
-        )
+        settings=_build_model_settings(config, config.fallback_model)
     )
 
 
@@ -87,24 +113,33 @@ def call_llm(
 ) -> str:
     """
     Call LLM with the given prompt.
-    
+
     Args:
         prompt: The prompt to send
         config: Configuration containing LLM settings
         model: Model name (defaults to config.main_model)
         temperature: Temperature setting
-        
+
     Returns:
         LLM response text
     """
     if model is None:
         model = config.main_model
-    
+
     client = create_openai_client(config)
+
+    # Use the correct token parameter based on model/provider
+    token_kwargs = {}
+    if _should_use_max_completion_tokens(model, config.llm_base_url):
+        token_kwargs["max_completion_tokens"] = config.max_tokens
+        logger.debug("Using max_completion_tokens=%d for model %s", config.max_tokens, model)
+    else:
+        token_kwargs["max_tokens"] = config.max_tokens
+
     response = client.chat.completions.create(
         model=model,
         messages=[{"role": "user", "content": prompt}],
         temperature=temperature,
-        max_tokens=config.max_tokens
+        **token_kwargs
     )
-    return response.choices[0].message.content
\ No newline at end of file
+    return response.choices[0].message.content

From 086969a19c8f1d4a338bdea9430a0ff63c2eece9 Mon Sep 17 00:00:00 2001
From: Nghi Bui <bdqnghi@gmail.com>
Date: Tue, 17 Mar 2026 03:51:11 +0700
Subject: [PATCH 6/9] Add AWS Bedrock support (#40), MCP server (#9), and
 incremental updates (#15)

- #40 Bedrock: Add --provider (openai-compatible|anthropic|bedrock) and
  --aws-region flags. Uses litellm to translate API calls for Bedrock and
  Anthropic providers. Passes provider/region through full config pipeline.

- #9 MCP server: Add `codewiki mcp` command that starts an MCP stdio server
  exposing three tools: generate_docs, analyze_repo, and get_module_tree.
  Uses the mcp SDK (already in requirements). Compatible with Claude, Cursor,
  and other MCP clients.

- #15 Incremental updates: Add `codewiki generate --update` flag that detects
  changed files since last generation (via commit_id in metadata.json + git
  diff), invalidates affected module docs, and only regenerates what changed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 codewiki/cli/adapters/doc_generator.py |   2 +
 codewiki/cli/commands/config.py        |  36 ++-
 codewiki/cli/commands/generate.py      | 157 ++++++++++++-
 codewiki/cli/config_manager.py         |  16 +-
 codewiki/cli/main.py                   |  22 ++
 codewiki/cli/models/config.py          |  12 +-
 codewiki/mcp/__init__.py               |   6 +
 codewiki/mcp/server.py                 | 310 +++++++++++++++++++++++++
 codewiki/src/be/llm_services.py        |  82 +++++++
 codewiki/src/config.py                 |  15 +-
 10 files changed, 643 insertions(+), 15 deletions(-)
 create mode 100644 codewiki/mcp/__init__.py
 create mode 100644 codewiki/mcp/server.py

diff --git a/codewiki/cli/adapters/doc_generator.py b/codewiki/cli/adapters/doc_generator.py
index 78256f2e..7551f156 100644
--- a/codewiki/cli/adapters/doc_generator.py
+++ b/codewiki/cli/adapters/doc_generator.py
@@ -137,6 +137,8 @@ def generate(self) -> DocumentationJob:
                 main_model=self.config.get('main_model'),
                 cluster_model=self.config.get('cluster_model'),
                 fallback_model=self.config.get('fallback_model'),
+                provider=self.config.get('provider', 'openai-compatible'),
+                aws_region=self.config.get('aws_region', 'us-east-1'),
                 max_tokens=self.config.get('max_tokens', 32768),
                 max_token_per_module=self.config.get('max_token_per_module', 36369),
                 max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),
diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py
index 090b2abc..5ef6d99c 100644
--- a/codewiki/cli/commands/config.py
+++ b/codewiki/cli/commands/config.py
@@ -83,6 +83,16 @@ def config_group():
     type=int,
     help="Maximum depth for hierarchical decomposition (default: 2)"
 )
+@click.option(
+    "--provider",
+    type=click.Choice(['openai-compatible', 'anthropic', 'bedrock'], case_sensitive=False),
+    help="LLM provider type (default: openai-compatible)"
+)
+@click.option(
+    "--aws-region",
+    type=str,
+    help="AWS region for Bedrock provider (default: us-east-1)"
+)
 def config_set(
     api_key: Optional[str],
     base_url: Optional[str],
@@ -92,7 +102,9 @@ def config_set(
     max_tokens: Optional[int],
     max_token_per_module: Optional[int],
     max_token_per_leaf_module: Optional[int],
-    max_depth: Optional[int]
+    max_depth: Optional[int],
+    provider: Optional[str] = None,
+    aws_region: Optional[str] = None
 ):
     """
     Set configuration values for CodeWiki.
@@ -127,7 +139,7 @@ def config_set(
     """
     try:
         # Check if at least one option is provided
-        if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth]):
+        if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth, provider, aws_region]):
             click.echo("No options provided. Use --help for usage information.")
             sys.exit(EXIT_CONFIG_ERROR)
         
@@ -168,11 +180,17 @@ def config_set(
             if max_depth < 1:
                 raise ConfigurationError("max_depth must be a positive integer")
             validated_data['max_depth'] = max_depth
-        
+
+        if provider is not None:
+            validated_data['provider'] = provider
+
+        if aws_region is not None:
+            validated_data['aws_region'] = aws_region
+
         # Create config manager and save
         manager = ConfigManager()
         manager.load()  # Load existing config if present
-        
+
         manager.save(
             api_key=validated_data.get('api_key'),
             base_url=validated_data.get('base_url'),
@@ -182,7 +200,9 @@ def config_set(
             max_tokens=validated_data.get('max_tokens'),
             max_token_per_module=validated_data.get('max_token_per_module'),
             max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module'),
-            max_depth=validated_data.get('max_depth')
+            max_depth=validated_data.get('max_depth'),
+            provider=validated_data.get('provider'),
+            aws_region=validated_data.get('aws_region')
         )
         
         # Display success messages
@@ -230,6 +250,12 @@ def config_set(
         
         if max_depth:
             click.secho(f"✓ Max depth: {max_depth}", fg="green")
+
+        if provider:
+            click.secho(f"✓ Provider: {provider}", fg="green")
+
+        if aws_region:
+            click.secho(f"✓ AWS Region: {aws_region}", fg="green")
         
         click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
         
diff --git a/codewiki/cli/commands/generate.py b/codewiki/cli/commands/generate.py
index 8512f736..b7caea2d 100644
--- a/codewiki/cli/commands/generate.py
+++ b/codewiki/cli/commands/generate.py
@@ -39,6 +39,139 @@ def parse_patterns(patterns_str: str) -> List[str]:
     return [p.strip() for p in patterns_str.split(',') if p.strip()]
 
 
+def _detect_changed_files(
+    repo_path: Path,
+    output_dir: Path,
+    logger,
+    verbose: bool
+) -> Optional[List[str]]:
+    """
+    Detect files changed since the last documentation generation.
+
+    Reads the commit_id from metadata.json and compares with current HEAD
+    using git diff. Returns list of changed file paths, or None if unable
+    to determine (e.g., no metadata, not a git repo).
+    """
+    import json
+
+    metadata_path = output_dir / "metadata.json"
+    if not metadata_path.exists():
+        if verbose:
+            logger.debug("No metadata.json found — cannot detect changes, running full generation.")
+        return None
+
+    try:
+        metadata = json.loads(metadata_path.read_text())
+        prev_commit = metadata.get("generation_info", {}).get("commit_id")
+        if not prev_commit:
+            if verbose:
+                logger.debug("No commit_id in metadata — running full generation.")
+            return None
+    except (json.JSONDecodeError, OSError):
+        return None
+
+    # Get current HEAD commit
+    try:
+        import git
+        repo = git.Repo(repo_path, search_parent_directories=True)
+        current_commit = repo.head.commit.hexsha
+    except Exception:
+        if verbose:
+            logger.debug("Cannot access git repo — running full generation.")
+        return None
+
+    if prev_commit == current_commit:
+        if verbose:
+            logger.debug(f"HEAD is still at {current_commit[:8]} — no changes.")
+        return []
+
+    # Get changed files between previous and current commit
+    try:
+        diff_index = repo.commit(prev_commit).diff(current_commit)
+        changed = []
+        for diff in diff_index:
+            if diff.a_path:
+                changed.append(diff.a_path)
+            if diff.b_path and diff.b_path != diff.a_path:
+                changed.append(diff.b_path)
+
+        if verbose:
+            logger.debug(f"Changes between {prev_commit[:8]} and {current_commit[:8]}:")
+            for f in changed[:10]:
+                logger.debug(f"  {f}")
+            if len(changed) > 10:
+                logger.debug(f"  ... and {len(changed) - 10} more")
+
+        return changed
+    except Exception as e:
+        if verbose:
+            logger.debug(f"Git diff failed: {e} — running full generation.")
+        return None
+
+
+def _invalidate_affected_modules(
+    output_dir: Path,
+    changed_files: List[str],
+    logger,
+    verbose: bool
+):
+    """
+    Remove cached module documentation for modules that contain changed files.
+
+    Reads module_tree.json to find which modules contain changed files,
+    then deletes their .md files so they get regenerated.
+    """
+    import json
+
+    module_tree_path = output_dir / "module_tree.json"
+    if not module_tree_path.exists():
+        return
+
+    try:
+        module_tree = json.loads(module_tree_path.read_text())
+    except (json.JSONDecodeError, OSError):
+        return
+
+    changed_set = set(changed_files)
+    modules_to_invalidate = set()
+
+    def _find_affected(tree, parent_names=None):
+        if parent_names is None:
+            parent_names = []
+        for mod_name, mod_info in tree.items():
+            components = mod_info.get("components", [])
+            # Check if any component path overlaps with changed files
+            for comp in components:
+                # Component IDs may be class names, check if they match any changed file path
+                if any(changed_file in comp or comp in changed_file for changed_file in changed_set):
+                    modules_to_invalidate.add(mod_name)
+                    # Also invalidate parent modules
+                    for parent in parent_names:
+                        modules_to_invalidate.add(parent)
+                    break
+
+            children = mod_info.get("children", {})
+            if isinstance(children, dict) and children:
+                _find_affected(children, parent_names + [mod_name])
+
+    _find_affected(module_tree)
+
+    # Also remove overview.md since it depends on child docs
+    if modules_to_invalidate:
+        modules_to_invalidate.add("overview")
+
+    # Delete affected module docs
+    for mod_name in modules_to_invalidate:
+        doc_path = output_dir / f"{mod_name}.md"
+        if doc_path.exists():
+            doc_path.unlink()
+            if verbose:
+                logger.debug(f"Invalidated: {doc_path.name}")
+
+    if verbose:
+        logger.debug(f"Invalidated {len(modules_to_invalidate)} modules for regeneration.")
+
+
 @click.command(name="generate")
 @click.option(
     "--output",
@@ -126,6 +259,11 @@ def parse_patterns(patterns_str: str) -> List[str]:
     default=None,
     help="Maximum depth for hierarchical decomposition (overrides config)",
 )
+@click.option(
+    "--update",
+    is_flag=True,
+    help="Incremental update: only regenerate modules affected by changes since last generation",
+)
 @click.pass_context
 def generate_command(
     ctx,
@@ -142,7 +280,8 @@ def generate_command(
     max_tokens: Optional[int],
     max_token_per_module: Optional[int],
     max_token_per_leaf_module: Optional[int],
-    max_depth: Optional[int]
+    max_depth: Optional[int],
+    update: bool = False
 ):
     """
     Generate comprehensive documentation for a code repository.
@@ -246,8 +385,20 @@ def generate_command(
         
         logger.success(f"Output directory: {output_dir}")
         
+        # Incremental update: detect changed files and selectively regenerate
+        changed_files = None
+        if update and output_dir.exists():
+            changed_files = _detect_changed_files(repo_path, output_dir, logger, verbose)
+            if changed_files is not None and len(changed_files) == 0:
+                logger.success("No changes detected since last generation. Documentation is up to date.")
+                sys.exit(EXIT_SUCCESS)
+            if changed_files is not None:
+                logger.info(f"  Detected {len(changed_files)} changed files — regenerating affected modules.")
+                # Remove cached module docs for affected files so they get regenerated
+                _invalidate_affected_modules(output_dir, changed_files, logger, verbose)
+
         # Check for existing documentation
-        if output_dir.exists() and list(output_dir.glob("*.md")):
+        if not update and output_dir.exists() and list(output_dir.glob("*.md")):
             if not click.confirm(
                 f"\n{output_dir} already contains documentation. Overwrite?",
                 default=True
@@ -352,6 +503,8 @@ def generate_command(
                 'fallback_model': config.fallback_model,
                 'base_url': config.base_url,
                 'api_key': api_key,
+                'provider': getattr(config, 'provider', 'openai-compatible'),
+                'aws_region': getattr(config, 'aws_region', 'us-east-1'),
                 'agent_instructions': agent_instructions_dict,
                 # Max token settings (runtime overrides take precedence)
                 'max_tokens': max_tokens if max_tokens is not None else config.max_tokens,
diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py
index 5a5c5e88..a652e405 100644
--- a/codewiki/cli/config_manager.py
+++ b/codewiki/cli/config_manager.py
@@ -121,7 +121,7 @@ def load(self) -> bool:
             raise ConfigurationError(f"Failed to load configuration: {e}")
     
     def save(
-        self, 
+        self,
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
         main_model: Optional[str] = None,
@@ -131,11 +131,13 @@ def save(
         max_tokens: Optional[int] = None,
         max_token_per_module: Optional[int] = None,
         max_token_per_leaf_module: Optional[int] = None,
-        max_depth: Optional[int] = None
+        max_depth: Optional[int] = None,
+        provider: Optional[str] = None,
+        aws_region: Optional[str] = None
     ):
         """
         Save configuration to file and keyring.
-        
+
         Args:
             api_key: API key (stored in keyring)
             base_url: LLM API base URL
@@ -147,6 +149,8 @@ def save(
             max_token_per_module: Maximum tokens per module for clustering
             max_token_per_leaf_module: Maximum tokens per leaf module
             max_depth: Maximum depth for hierarchical decomposition
+            provider: LLM provider type (openai-compatible, anthropic, bedrock)
+            aws_region: AWS region for Bedrock provider
         """
         # Ensure config directory exists
         try:
@@ -188,7 +192,11 @@ def save(
             self._config.max_token_per_leaf_module = max_token_per_leaf_module
         if max_depth is not None:
             self._config.max_depth = max_depth
-        
+        if provider is not None:
+            self._config.provider = provider
+        if aws_region is not None:
+            self._config.aws_region = aws_region
+
         # Validate configuration (only if base fields are set)
         if self._config.base_url and self._config.main_model and self._config.cluster_model:
             self._config.validate()
diff --git a/codewiki/cli/main.py b/codewiki/cli/main.py
index 44b7f751..23ebc319 100644
--- a/codewiki/cli/main.py
+++ b/codewiki/cli/main.py
@@ -39,6 +39,28 @@ def version():
 cli.add_command(generate_command, name="generate")
 
 
+@cli.command(name="mcp")
+def mcp_command():
+    """Start CodeWiki as an MCP (Model Context Protocol) server.
+
+    Exposes documentation generation tools via MCP stdio transport.
+    Configure in your MCP client (Claude, Cursor, etc.) as:
+
+    \b
+    {
+        "mcpServers": {
+            "codewiki": {
+                "command": "codewiki",
+                "args": ["mcp"]
+            }
+        }
+    }
+    """
+    import asyncio
+    from codewiki.mcp.server import main as mcp_main
+    asyncio.run(mcp_main())
+
+
 def main():
     """Entry point for the CLI."""
     try:
diff --git a/codewiki/cli/models/config.py b/codewiki/cli/models/config.py
index 585b4272..8bfe9152 100644
--- a/codewiki/cli/models/config.py
+++ b/codewiki/cli/models/config.py
@@ -106,13 +106,15 @@ def get_prompt_addition(self) -> str:
 class Configuration:
     """
     CodeWiki configuration data model.
-    
+
     Attributes:
         base_url: LLM API base URL
         main_model: Primary model for documentation generation
         cluster_model: Model for module clustering
         fallback_model: Fallback model for documentation generation
         default_output: Default output directory
+        provider: LLM provider type (openai-compatible, anthropic, bedrock)
+        aws_region: AWS region for Bedrock provider
         max_tokens: Maximum tokens for LLM response (default: 32768)
         max_token_per_module: Maximum tokens per module for clustering (default: 36369)
         max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000)
@@ -124,6 +126,8 @@ class Configuration:
     cluster_model: str
     fallback_model: str = "glm-4p5"
     default_output: str = "docs"
+    provider: str = "openai-compatible"
+    aws_region: str = "us-east-1"
     max_tokens: int = 32768
     max_token_per_module: int = 36369
     max_token_per_leaf_module: int = 16000
@@ -149,6 +153,8 @@ def to_dict(self) -> dict:
             'main_model': self.main_model,
             'cluster_model': self.cluster_model,
             'default_output': self.default_output,
+            'provider': self.provider,
+            'aws_region': self.aws_region,
             'max_tokens': self.max_tokens,
             'max_token_per_module': self.max_token_per_module,
             'max_token_per_leaf_module': self.max_token_per_leaf_module,
@@ -179,6 +185,8 @@ def from_dict(cls, data: dict) -> 'Configuration':
             cluster_model=data.get('cluster_model', ''),
             fallback_model=data.get('fallback_model', 'glm-4p5'),
             default_output=data.get('default_output', 'docs'),
+            provider=data.get('provider', 'openai-compatible'),
+            aws_region=data.get('aws_region', 'us-east-1'),
             max_tokens=data.get('max_tokens', 32768),
             max_token_per_module=data.get('max_token_per_module', 36369),
             max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000),
@@ -233,6 +241,8 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti
             main_model=self.main_model,
             cluster_model=self.cluster_model,
             fallback_model=self.fallback_model,
+            provider=self.provider,
+            aws_region=self.aws_region,
             max_tokens=self.max_tokens,
             max_token_per_module=self.max_token_per_module,
             max_token_per_leaf_module=self.max_token_per_leaf_module,
diff --git a/codewiki/mcp/__init__.py b/codewiki/mcp/__init__.py
new file mode 100644
index 00000000..27c9847d
--- /dev/null
+++ b/codewiki/mcp/__init__.py
@@ -0,0 +1,6 @@
+"""
+CodeWiki MCP (Model Context Protocol) server.
+
+Exposes CodeWiki documentation generation capabilities as MCP tools
+that can be called by Claude, Cursor, and other MCP-compatible clients.
+"""
diff --git a/codewiki/mcp/server.py b/codewiki/mcp/server.py
new file mode 100644
index 00000000..600da22c
--- /dev/null
+++ b/codewiki/mcp/server.py
@@ -0,0 +1,310 @@
+"""
+CodeWiki MCP Server.
+
+Exposes documentation generation as MCP tools:
+  - generate_docs: Generate full documentation for a repository
+  - analyze_repo: Analyze repository structure and dependencies
+  - get_module_tree: Get the module clustering for a repository
+
+Usage:
+    # Run as standalone MCP server (stdio transport)
+    python -m codewiki.mcp.server
+
+    # Or register in your MCP client config:
+    {
+        "mcpServers": {
+            "codewiki": {
+                "command": "python",
+                "args": ["-m", "codewiki.mcp.server"]
+            }
+        }
+    }
+"""
+
+import asyncio
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any
+
+from mcp.server import Server
+from mcp.server.stdio import stdio_server
+from mcp.types import (
+    TextContent,
+    Tool,
+)
+
+logger = logging.getLogger(__name__)
+
+# Create the MCP server
+server = Server("codewiki")
+
+
+def _load_config():
+    """Load CodeWiki configuration from ~/.codewiki/config.json + keyring."""
+    from codewiki.cli.config_manager import ConfigManager
+    manager = ConfigManager()
+    if not manager.load():
+        raise RuntimeError(
+            "CodeWiki not configured. Run 'codewiki config set' first."
+        )
+    return manager
+
+
+@server.list_tools()
+async def list_tools() -> list[Tool]:
+    """List available CodeWiki MCP tools."""
+    return [
+        Tool(
+            name="generate_docs",
+            description=(
+                "Generate comprehensive AI-powered documentation for a code repository. "
+                "Analyzes dependencies, clusters modules, and generates markdown documentation."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "repo_path": {
+                        "type": "string",
+                        "description": "Absolute path to the repository to document",
+                    },
+                    "output_dir": {
+                        "type": "string",
+                        "description": "Output directory for generated docs (default: ./docs)",
+                        "default": "docs",
+                    },
+                    "doc_type": {
+                        "type": "string",
+                        "enum": ["api", "architecture", "user-guide", "developer"],
+                        "description": "Type of documentation to generate",
+                    },
+                    "include_patterns": {
+                        "type": "string",
+                        "description": "Comma-separated file patterns to include (e.g., '*.py,*.js')",
+                    },
+                    "exclude_patterns": {
+                        "type": "string",
+                        "description": "Comma-separated patterns to exclude (e.g., '*test*,*spec*')",
+                    },
+                },
+                "required": ["repo_path"],
+            },
+        ),
+        Tool(
+            name="analyze_repo",
+            description=(
+                "Analyze a repository's structure, dependencies, and component hierarchy "
+                "without generating full documentation. Returns file counts, languages, "
+                "and dependency information."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "repo_path": {
+                        "type": "string",
+                        "description": "Absolute path to the repository to analyze",
+                    },
+                },
+                "required": ["repo_path"],
+            },
+        ),
+        Tool(
+            name="get_module_tree",
+            description=(
+                "Get the module clustering tree for a repository. "
+                "Shows how source files are grouped into logical modules."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "repo_path": {
+                        "type": "string",
+                        "description": "Absolute path to the repository",
+                    },
+                    "output_dir": {
+                        "type": "string",
+                        "description": "Directory containing generated docs (default: ./docs)",
+                        "default": "docs",
+                    },
+                },
+                "required": ["repo_path"],
+            },
+        ),
+    ]
+
+
+@server.call_tool()
+async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
+    """Handle MCP tool calls."""
+    try:
+        if name == "generate_docs":
+            return await _handle_generate_docs(arguments)
+        elif name == "analyze_repo":
+            return await _handle_analyze_repo(arguments)
+        elif name == "get_module_tree":
+            return await _handle_get_module_tree(arguments)
+        else:
+            return [TextContent(type="text", text=f"Unknown tool: {name}")]
+    except Exception as e:
+        logger.error("Tool %s failed: %s", name, e, exc_info=True)
+        return [TextContent(type="text", text=f"Error: {e}")]
+
+
+async def _handle_generate_docs(arguments: dict[str, Any]) -> list[TextContent]:
+    """Handle generate_docs tool call."""
+    repo_path = Path(arguments["repo_path"]).expanduser().resolve()
+    output_dir = Path(arguments.get("output_dir", "docs")).expanduser().resolve()
+
+    if not repo_path.exists():
+        return [TextContent(type="text", text=f"Repository not found: {repo_path}")]
+
+    # Load config
+    manager = _load_config()
+    config = manager.get_config()
+    api_key = manager.get_api_key()
+
+    if not api_key:
+        return [TextContent(type="text", text="API key not configured. Run 'codewiki config set --api-key <key>'")]
+
+    # Build agent instructions from arguments
+    agent_instructions = {}
+    if arguments.get("doc_type"):
+        agent_instructions["doc_type"] = arguments["doc_type"]
+    if arguments.get("include_patterns"):
+        agent_instructions["include_patterns"] = [p.strip() for p in arguments["include_patterns"].split(",")]
+    if arguments.get("exclude_patterns"):
+        agent_instructions["exclude_patterns"] = [p.strip() for p in arguments["exclude_patterns"].split(",")]
+
+    from codewiki.src.config import Config as BackendConfig, set_cli_context
+    set_cli_context(True)
+
+    backend_config = BackendConfig.from_cli(
+        repo_path=str(repo_path),
+        output_dir=str(output_dir),
+        llm_base_url=config.base_url,
+        llm_api_key=api_key,
+        main_model=config.main_model,
+        cluster_model=config.cluster_model,
+        fallback_model=config.fallback_model,
+        provider=getattr(config, "provider", "openai-compatible"),
+        aws_region=getattr(config, "aws_region", "us-east-1"),
+        max_tokens=config.max_tokens,
+        agent_instructions=agent_instructions or None,
+    )
+
+    from codewiki.src.be.documentation_generator import DocumentationGenerator
+    doc_gen = DocumentationGenerator(backend_config)
+
+    # Run generation
+    await doc_gen.run()
+
+    # Collect results
+    generated_files = []
+    for f in output_dir.iterdir():
+        if f.suffix in (".md", ".json", ".html"):
+            generated_files.append(f.name)
+
+    result = {
+        "status": "success",
+        "output_dir": str(output_dir),
+        "files_generated": sorted(generated_files),
+        "file_count": len(generated_files),
+    }
+    return [TextContent(type="text", text=json.dumps(result, indent=2))]
+
+
+async def _handle_analyze_repo(arguments: dict[str, Any]) -> list[TextContent]:
+    """Handle analyze_repo tool call — lightweight dependency analysis only."""
+    repo_path = Path(arguments["repo_path"]).expanduser().resolve()
+
+    if not repo_path.exists():
+        return [TextContent(type="text", text=f"Repository not found: {repo_path}")]
+
+    manager = _load_config()
+    config = manager.get_config()
+    api_key = manager.get_api_key()
+
+    from codewiki.src.config import Config as BackendConfig, set_cli_context
+    set_cli_context(True)
+
+    # Create a minimal backend config (no LLM calls needed for analysis)
+    backend_config = BackendConfig.from_cli(
+        repo_path=str(repo_path),
+        output_dir=str(repo_path / ".codewiki_temp"),
+        llm_base_url=config.base_url or "http://localhost",
+        llm_api_key=api_key or "not-needed",
+        main_model=config.main_model or "unused",
+        cluster_model=config.cluster_model or "unused",
+        fallback_model=config.fallback_model or "unused",
+    )
+
+    from codewiki.src.be.dependency_analyzer import DependencyGraphBuilder
+    graph_builder = DependencyGraphBuilder(backend_config)
+    components, leaf_nodes = graph_builder.build_dependency_graph()
+
+    # Aggregate statistics
+    languages = {}
+    files = set()
+    for comp in components.values():
+        lang = getattr(comp, "language", "unknown")
+        languages[lang] = languages.get(lang, 0) + 1
+        files.add(getattr(comp, "relative_path", ""))
+
+    result = {
+        "status": "success",
+        "repo_path": str(repo_path),
+        "total_components": len(components),
+        "total_files": len(files),
+        "leaf_nodes": len(leaf_nodes),
+        "languages": languages,
+        "sample_components": sorted(list(components.keys()))[:20],
+    }
+    return [TextContent(type="text", text=json.dumps(result, indent=2))]
+
+
+async def _handle_get_module_tree(arguments: dict[str, Any]) -> list[TextContent]:
+    """Handle get_module_tree tool call — returns existing module tree."""
+    repo_path = Path(arguments["repo_path"]).expanduser().resolve()
+    output_dir = Path(arguments.get("output_dir", "docs")).expanduser().resolve()
+
+    module_tree_path = output_dir / "module_tree.json"
+    if not module_tree_path.exists():
+        return [TextContent(
+            type="text",
+            text=f"Module tree not found at {module_tree_path}. Run 'codewiki generate' first."
+        )]
+
+    module_tree = json.loads(module_tree_path.read_text())
+
+    def _summarize_tree(tree, depth=0):
+        """Create a readable summary of the module tree."""
+        lines = []
+        for name, info in tree.items():
+            indent = "  " * depth
+            comp_count = len(info.get("components", []))
+            children = info.get("children", {})
+            child_count = len(children) if isinstance(children, dict) else 0
+            lines.append(f"{indent}- {name} ({comp_count} components, {child_count} children)")
+            if isinstance(children, dict) and children:
+                lines.extend(_summarize_tree(children, depth + 1))
+        return lines
+
+    summary = "\n".join(_summarize_tree(module_tree))
+    result = {
+        "status": "success",
+        "module_tree_path": str(module_tree_path),
+        "total_modules": len(module_tree),
+        "tree_summary": summary,
+    }
+    return [TextContent(type="text", text=json.dumps(result, indent=2))]
+
+
+async def main():
+    """Run the MCP server with stdio transport."""
+    async with stdio_server() as (read_stream, write_stream):
+        await server.run(read_stream, write_stream, server.create_initialization_options())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/codewiki/src/be/llm_services.py b/codewiki/src/be/llm_services.py
index 08af907f..0b03a54c 100644
--- a/codewiki/src/be/llm_services.py
+++ b/codewiki/src/be/llm_services.py
@@ -3,6 +3,8 @@
 
 Includes a compatibility layer for OpenAI-compatible API proxies that may
 return slightly non-standard responses (e.g. choices[].index = None).
+
+Supports multiple providers: openai-compatible, anthropic, bedrock.
 """
 import logging
 from openai.types import chat
@@ -49,6 +51,22 @@ def _build_model_settings(config: Config, model_name: str) -> OpenAIModelSetting
     )
 
 
+def _get_litellm_model_name(model_name: str, provider: str) -> str:
+    """
+    Get the litellm-compatible model name for a given provider.
+
+    For Bedrock, prefixes the model name with 'bedrock/' if not already prefixed.
+    For Anthropic, prefixes with 'anthropic/' if not already prefixed.
+    """
+    if provider == "bedrock":
+        if not model_name.startswith("bedrock/"):
+            return f"bedrock/{model_name}"
+    elif provider == "anthropic":
+        if not model_name.startswith("anthropic/"):
+            return f"anthropic/{model_name}"
+    return model_name
+
+
 class CompatibleOpenAIModel(OpenAIModel):
     """OpenAIModel subclass that patches non-standard API proxy responses.
 
@@ -66,6 +84,28 @@ def _validate_completion(self, response: chat.ChatCompletion) -> chat.ChatComple
         return super()._validate_completion(response)
 
 
+def _create_litellm_openai_client(config: Config) -> OpenAI:
+    """
+    Create an OpenAI-compatible client backed by litellm's proxy.
+
+    litellm translates OpenAI API calls to Bedrock, Anthropic, etc.
+    """
+    import litellm
+    # Configure litellm for the provider
+    if config.provider == "bedrock":
+        import os
+        os.environ.setdefault("AWS_DEFAULT_REGION", config.aws_region)
+        os.environ.setdefault("AWS_REGION_NAME", config.aws_region)
+
+    # litellm exposes an OpenAI-compatible Router we can use,
+    # but the simplest path is to use litellm.completion() directly.
+    # For pydantic-ai integration, we create a proxy client.
+    return OpenAI(
+        api_key=config.llm_api_key or "not-needed-for-bedrock",
+        base_url=config.llm_base_url or "https://api.openai.com/v1",
+    )
+
+
 def create_main_model(config: Config) -> CompatibleOpenAIModel:
     """Create the main LLM model from configuration."""
     return CompatibleOpenAIModel(
@@ -114,6 +154,9 @@ def call_llm(
     """
     Call LLM with the given prompt.
 
+    Supports openai-compatible, anthropic, and bedrock providers.
+    For bedrock/anthropic, uses litellm to translate the API call.
+
     Args:
         prompt: The prompt to send
         config: Configuration containing LLM settings
@@ -126,6 +169,12 @@ def call_llm(
     if model is None:
         model = config.main_model
 
+    provider = getattr(config, "provider", "openai-compatible")
+
+    if provider in ("bedrock", "anthropic"):
+        return _call_llm_via_litellm(prompt, config, model, temperature)
+
+    # Default: OpenAI-compatible
     client = create_openai_client(config)
 
     # Use the correct token parameter based on model/provider
@@ -143,3 +192,36 @@ def call_llm(
         **token_kwargs
     )
     return response.choices[0].message.content
+
+
+def _call_llm_via_litellm(
+    prompt: str,
+    config: Config,
+    model: str,
+    temperature: float = 0.0
+) -> str:
+    """
+    Call LLM via litellm for Bedrock/Anthropic providers.
+
+    litellm handles the provider-specific API translation automatically.
+    """
+    import litellm
+    import os
+
+    litellm_model = _get_litellm_model_name(model, config.provider)
+
+    if config.provider == "bedrock":
+        os.environ.setdefault("AWS_DEFAULT_REGION", config.aws_region)
+        os.environ.setdefault("AWS_REGION_NAME", config.aws_region)
+        logger.debug("Calling Bedrock model %s in region %s", litellm_model, config.aws_region)
+    elif config.provider == "anthropic":
+        logger.debug("Calling Anthropic model %s via litellm", litellm_model)
+
+    response = litellm.completion(
+        model=litellm_model,
+        messages=[{"role": "user", "content": prompt}],
+        temperature=temperature,
+        max_tokens=config.max_tokens,
+        api_key=config.llm_api_key if config.provider != "bedrock" else None,
+    )
+    return response.choices[0].message.content
diff --git a/codewiki/src/config.py b/codewiki/src/config.py
index 420d1ea5..42757788 100644
--- a/codewiki/src/config.py
+++ b/codewiki/src/config.py
@@ -57,6 +57,9 @@ class Config:
     main_model: str
     cluster_model: str
     fallback_model: str = FALLBACK_MODEL_1
+    # Provider configuration
+    provider: str = "openai-compatible"  # openai-compatible, anthropic, bedrock
+    aws_region: str = "us-east-1"
     # Max token settings
     max_tokens: int = DEFAULT_MAX_TOKENS
     max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE
@@ -155,6 +158,8 @@ def from_cli(
         main_model: str,
         cluster_model: str,
         fallback_model: str = FALLBACK_MODEL_1,
+        provider: str = "openai-compatible",
+        aws_region: str = "us-east-1",
         max_tokens: int = DEFAULT_MAX_TOKENS,
         max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE,
         max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE,
@@ -163,7 +168,7 @@ def from_cli(
     ) -> 'Config':
         """
         Create configuration for CLI context.
-        
+
         Args:
             repo_path: Repository path
             output_dir: Output directory for generated docs
@@ -172,18 +177,20 @@ def from_cli(
             main_model: Primary model
             cluster_model: Clustering model
             fallback_model: Fallback model
+            provider: LLM provider type (openai-compatible, anthropic, bedrock)
+            aws_region: AWS region for Bedrock provider
             max_tokens: Maximum tokens for LLM response
             max_token_per_module: Maximum tokens per module for clustering
             max_token_per_leaf_module: Maximum tokens per leaf module
             max_depth: Maximum depth for hierarchical decomposition
             agent_instructions: Custom agent instructions dict
-            
+
         Returns:
             Config instance
         """
         repo_name = os.path.basename(os.path.normpath(repo_path))
         base_output_dir = os.path.join(output_dir, "temp")
-        
+
         return cls(
             repo_path=repo_path,
             output_dir=base_output_dir,
@@ -195,6 +202,8 @@ def from_cli(
             main_model=main_model,
             cluster_model=cluster_model,
             fallback_model=fallback_model,
+            provider=provider,
+            aws_region=aws_region,
             max_tokens=max_tokens,
             max_token_per_module=max_token_per_module,
             max_token_per_leaf_module=max_token_per_leaf_module,

From f3fe1c669e27c2c6daaf839070165be421eaae86 Mon Sep 17 00:00:00 2001
From: anhnh2002 <anh.nh204511@gmail.com>
Date: Tue, 24 Mar 2026 17:03:24 +0700
Subject: [PATCH 7/9] re-format component id

---
 .../generate_sub_module_documentations.py     | 15 +++++---
 .../be/agent_tools/read_code_components.py    |  2 +-
 .../analysis/call_graph_analyzer.py           | 13 +++++--
 .../src/be/dependency_analyzer/analyzers/c.py |  4 +--
 .../be/dependency_analyzer/analyzers/cpp.py   |  6 ++--
 .../dependency_analyzer/analyzers/csharp.py   |  4 +--
 .../be/dependency_analyzer/analyzers/java.py  |  6 ++--
 .../analyzers/javascript.py                   | 36 +++++++++----------
 .../dependency_analyzer/analyzers/kotlin.py   |  6 ++--
 .../be/dependency_analyzer/analyzers/php.py   | 17 +++------
 .../dependency_analyzer/analyzers/python.py   | 26 +++++++-------
 .../analyzers/typescript.py                   | 11 +++---
 .../src/be/dependency_analyzer/ast_parser.py  |  8 +++--
 codewiki/src/be/documentation_generator.py    |  3 ++
 codewiki/src/be/prompt_template.py            | 36 ++++++++++++++++---
 15 files changed, 117 insertions(+), 76 deletions(-)

diff --git a/codewiki/src/be/agent_tools/generate_sub_module_documentations.py b/codewiki/src/be/agent_tools/generate_sub_module_documentations.py
index a40b3f42..46580a20 100644
--- a/codewiki/src/be/agent_tools/generate_sub_module_documentations.py
+++ b/codewiki/src/be/agent_tools/generate_sub_module_documentations.py
@@ -1,4 +1,5 @@
 from pydantic_ai import RunContext, Tool, Agent
+from typing import Dict, List
 
 from codewiki.src.be.agent_tools.deps import CodeWikiDeps
 from codewiki.src.be.agent_tools.read_code_components import read_code_components_tool
@@ -15,12 +16,14 @@
 
 async def generate_sub_module_documentation(
     ctx: RunContext[CodeWikiDeps],
-    sub_module_specs: dict[str, list[str]]
+    sub_module_specs: Dict[str, List[str]]
 ) -> str:
-    """Generate detailed description of a given sub-module specs to the sub-agents
+    """Delegate documentation generation of sub-modules to sub-agents. Each sub-module will be documented separately.
 
     Args:
-        sub_module_specs: The specs of the sub-modules to generate documentation for. E.g. {"sub_module_1": ["core_component_1.1", "core_component_1.2"], "sub_module_2": ["core_component_2.1", "core_component_2.2"], ...}
+        sub_module_specs: A dictionary mapping sub-module names to their core component IDs. 
+            Example: {"authentication": ["auth_handler.py::AuthHandler", "auth_middleware.py::verify_token"], "database": ["db_client.py::DBClient", "models.py::UserModel"]}
+            Each key is a descriptive sub-module name, and the value is a list of component IDs from the current module's core components that belong to that sub-module.
     """
 
     deps = ctx.deps
@@ -89,4 +92,8 @@ async def generate_sub_module_documentation(
     return f"Generate successfully. Documentations: {', '.join([key + '.md' for key in sub_module_specs.keys()])} are saved in the working directory."
 
 
-generate_sub_module_documentation_tool = Tool(function=generate_sub_module_documentation, name="generate_sub_module_documentation", description="Generate detailed description of a given sub-module specs to the sub-agents", takes_ctx=True)
\ No newline at end of file
+generate_sub_module_documentation_tool = Tool(
+    function=generate_sub_module_documentation, 
+    name="generate_sub_module_documentation", 
+    takes_ctx=True
+)
\ No newline at end of file
diff --git a/codewiki/src/be/agent_tools/read_code_components.py b/codewiki/src/be/agent_tools/read_code_components.py
index 0125cbb2..93c13446 100644
--- a/codewiki/src/be/agent_tools/read_code_components.py
+++ b/codewiki/src/be/agent_tools/read_code_components.py
@@ -6,7 +6,7 @@ async def read_code_components(ctx: RunContext[CodeWikiDeps], component_ids: lis
     """Read the code of a given component id
 
     Args:
-        component_ids: The ids of the components to read, e.g. ["sweagent.types.AgentRunResult", "sweagent.types.AgentRunResult"] where sweagent.types part is the path to the component and AgentRunResult is the name of the component
+        component_ids: The ids of the components to read, e.g. ["sweagent/types.py::AgentRunResult", "auth/middleware.py::verify_token"] where the part before :: is the file path and the part after :: is the component name
     """
 
     results = []
diff --git a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
index 272ca0b6..8df9e02e 100644
--- a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
+++ b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
@@ -412,7 +412,11 @@ def _resolve_call_relationships(self):
             func_lookup[func_info.name] = func_id
             if func_info.component_id:
                 func_lookup[func_info.component_id] = func_id
-                method_name = func_info.component_id.split(".")[-1]
+                # Extract short name: handle both new (path::Name) and legacy (path.Name) formats
+                if "::" in func_info.component_id:
+                    method_name = func_info.component_id.split("::")[-1]
+                else:
+                    method_name = func_info.component_id.split(".")[-1]
                 if method_name not in func_lookup:
                     func_lookup[method_name] = func_id
 
@@ -424,13 +428,16 @@ def _resolve_call_relationships(self):
                 relationship.callee = func_lookup[callee_name]
                 relationship.is_resolved = True
                 resolved_count += 1
-            elif "." in callee_name:
+            elif "::" in callee_name or "." in callee_name:
                 if callee_name in func_lookup:
                     relationship.callee = func_lookup[callee_name]
                     relationship.is_resolved = True
                     resolved_count += 1
                 else:
-                    method_name = callee_name.split(".")[-1]
+                    if "::" in callee_name:
+                        method_name = callee_name.split("::")[-1]
+                    else:
+                        method_name = callee_name.split(".")[-1]
                     if method_name in func_lookup:
                         relationship.callee = func_lookup[method_name]
                         relationship.is_resolved = True
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/c.py b/codewiki/src/be/dependency_analyzer/analyzers/c.py
index 9332a6f8..1a491961 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/c.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/c.py
@@ -44,8 +44,8 @@ def _get_relative_path(self) -> str:
 			return str(self.file_path)
 	
 	def _get_component_id(self, name: str) -> str:
-		module_path = self._get_module_path()
-		return f"{module_path}.{name}" if module_path else name
+		rel_path = self._get_relative_path()
+		return f"{rel_path}::{name}"
 
 	def _analyze(self):
 		language_capsule = tree_sitter_c.language()
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/cpp.py b/codewiki/src/be/dependency_analyzer/analyzers/cpp.py
index dd89d1b3..bb98a9e1 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/cpp.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/cpp.py
@@ -44,10 +44,10 @@ def _get_relative_path(self) -> str:
 			return str(self.file_path)
 	
 	def _get_component_id(self, name: str, parent_class: str = None) -> str:
-		module_path = self._get_module_path()
+		rel_path = self._get_relative_path()
 		if parent_class:
-			return f"{module_path}.{parent_class}.{name}" if module_path else f"{parent_class}.{name}"
-		return f"{module_path}.{name}" if module_path else name
+			return f"{rel_path}::{parent_class}.{name}"
+		return f"{rel_path}::{name}"
 
 	def _analyze(self):
 		language_capsule = tree_sitter_cpp.language()
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/csharp.py b/codewiki/src/be/dependency_analyzer/analyzers/csharp.py
index 50500aa4..636e2063 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/csharp.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/csharp.py
@@ -44,8 +44,8 @@ def _get_relative_path(self) -> str:
 			return str(self.file_path)
 	
 	def _get_component_id(self, name: str) -> str:
-		module_path = self._get_module_path()
-		return f"{module_path}.{name}" if module_path else name
+		rel_path = self._get_relative_path()
+		return f"{rel_path}::{name}"
 
 	def _analyze(self):
 		language_capsule = tree_sitter_c_sharp.language()
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/java.py b/codewiki/src/be/dependency_analyzer/analyzers/java.py
index 26f586a1..71065645 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/java.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/java.py
@@ -45,11 +45,11 @@ def _get_relative_path(self) -> str:
 			return str(self.file_path)
 	
 	def _get_component_id(self, name: str, parent_class: str = None) -> str:
-		module_path = self._get_module_path()
+		rel_path = self._get_relative_path()
 		if parent_class:
-			return f"{module_path}.{parent_class}.{name}"
+			return f"{rel_path}::{parent_class}.{name}"
 		else:
-			return f"{module_path}.{name}"
+			return f"{rel_path}::{name}"
 
 	def _analyze(self):
 		language_capsule = tree_sitter_java.language()
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/javascript.py b/codewiki/src/be/dependency_analyzer/analyzers/javascript.py
index 7b94e167..a1312695 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/javascript.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/javascript.py
@@ -94,14 +94,14 @@ def _get_relative_path(self) -> str:
             return str(self.file_path)
 
     def _get_component_id(self, name: str, class_name: str = None, is_method: bool = False) -> str:
-        module_path = self._get_module_path()
-        
+        relative_path = self._get_relative_path()
+
         if is_method and class_name:
-            return f"{module_path}.{class_name}.{name}"
-        elif class_name and not is_method: 
-            return f"{module_path}.{name}"
-        else:  
-            return f"{module_path}.{name}"
+            return f"{relative_path}::{class_name}.{name}"
+        elif class_name and not is_method:
+            return f"{relative_path}::{name}"
+        else:
+            return f"{relative_path}::{name}"
 
     def _find_containing_class(self, node) -> Optional[str]:
         parent = node.parent
@@ -167,7 +167,7 @@ def _extract_methods_from_class(self, class_node, class_name: str) -> None:
             if child.type == "method_definition":
                 method_name = self._get_method_name(child)
                 if method_name:
-                    method_key = f"{self._get_module_path()}.{class_name}.{method_name}"
+                    method_key = f"{self._get_relative_path()}::{class_name}.{method_name}"
                     method_node = self._create_method_node(child, method_name, class_name)
                     if method_node:
                         self.top_level_nodes[method_key] = method_node
@@ -175,7 +175,7 @@ def _extract_methods_from_class(self, class_node, class_name: str) -> None:
                 # Handle arrow function properties
                 field_name = self._get_field_name(child)
                 if field_name and self._is_arrow_function_field(child):
-                    method_key = f"{self._get_module_path()}.{class_name}.{field_name}"
+                    method_key = f"{self._get_relative_path()}::{class_name}.{field_name}"
                     method_node = self._create_method_node(child, field_name, class_name)
                     if method_node:
                         self.top_level_nodes[method_key] = method_node
@@ -435,7 +435,7 @@ def _traverse_for_calls(self, node, current_top_level) -> None:
                         if child.type in ["identifier", "type_identifier"]:
                             base_class = self._get_node_text(child)
                             caller_id = self._get_component_id(current_top_level)
-                            callee_id = f"{self._get_module_path()}.{base_class}" 
+                            callee_id = f"{self._get_relative_path()}::{base_class}"
                             inheritance_rel = CallRelationship(
                                 caller=caller_id,
                                 callee=callee_id,
@@ -476,8 +476,8 @@ def _traverse_for_calls(self, node, current_top_level) -> None:
             callee_name = self._extract_callee_name(node)
             if callee_name:
                 call_info = CallRelationship(
-                    caller=f"{self._get_module_path()}.{current_top_level}",
-                    callee=f"{self._get_module_path()}.{callee_name}",
+                    caller=f"{self._get_relative_path()}::{current_top_level}",
+                    callee=f"{self._get_relative_path()}::{callee_name}",
                     call_line=node.start_point[0] + 1,
                     is_resolved=False
                 )
@@ -498,8 +498,8 @@ def _extract_call_from_node(self, node, caller_name: str) -> Optional[CallRelati
             call_text = self._get_node_text(node)
             is_method_call = "this." in call_text or "super." in call_text
             
-            caller_id = f"{self._get_module_path()}.{caller_name}"
-            
+            caller_id = f"{self._get_relative_path()}::{caller_name}"
+
             if is_method_call:
                 current_class = None
                 for node_key, node_obj in self.top_level_nodes.items():
@@ -508,11 +508,11 @@ def _extract_call_from_node(self, node, caller_name: str) -> Optional[CallRelati
                         break
                 
                 if current_class:
-                    method_key = f"{self._get_module_path()}.{current_class}.{callee_name}"
+                    method_key = f"{self._get_relative_path()}::{current_class}.{callee_name}"
                     if method_key in self.top_level_nodes:
                         return None
             
-            callee_id = f"{self._get_module_path()}.{callee_name}"
+            callee_id = f"{self._get_relative_path()}::{callee_name}"
             if callee_name in self.top_level_nodes:
                 return CallRelationship(
                     caller=caller_id,
@@ -570,8 +570,8 @@ def _parse_jsdoc_types(self, comment_text: str, caller_name: str, line_number: i
                     
                     for base_type in base_types:
                         if base_type and not self._is_builtin_type_js(base_type):
-                            caller_id = f"{self._get_module_path()}.{caller_name}"
-                            callee_id = f"{self._get_module_path()}.{base_type}"
+                            caller_id = f"{self._get_relative_path()}::{caller_name}"
+                            callee_id = f"{self._get_relative_path()}::{base_type}"
                             
                             type_rel = CallRelationship(
                                 caller=caller_id,
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py b/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py
index d56f220c..7ef5be9a 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/kotlin.py
@@ -45,11 +45,11 @@ def _get_relative_path(self) -> str:
             return str(self.file_path)
     
     def _get_component_id(self, name: str, parent_class: Optional[str] = None) -> str:
-        module_path = self._get_module_path()
+        rel_path = self._get_relative_path()
         if parent_class:
-            return f"{module_path}.{parent_class}.{name}"
+            return f"{rel_path}::{parent_class}.{name}"
         else:
-            return f"{module_path}.{name}"
+            return f"{rel_path}::{name}"
 
     def _analyze(self):
         try:
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/php.py b/codewiki/src/be/dependency_analyzer/analyzers/php.py
index 2029ec82..8a5696fd 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/php.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/php.py
@@ -148,17 +148,10 @@ def _get_relative_path(self) -> str:
 
     def _get_component_id(self, name: str, parent_class: str = None) -> str:
         """Generate component ID for a node."""
-        # Use namespace if available
-        if self.namespace_resolver.current_namespace:
-            ns_prefix = self.namespace_resolver.current_namespace.replace("\\", ".")
-            if parent_class:
-                return f"{ns_prefix}.{parent_class}.{name}"
-            return f"{ns_prefix}.{name}"
-
-        module_path = self._get_module_path()
+        rel_path = self._get_relative_path()
         if parent_class:
-            return f"{module_path}.{parent_class}.{name}"
-        return f"{module_path}.{name}"
+            return f"{rel_path}::{parent_class}.{name}"
+        return f"{rel_path}::{name}"
 
     def _analyze(self):
         """Parse and analyze the PHP file."""
@@ -442,7 +435,7 @@ def _add_use_relationships(self, node):
                 if name_node:
                     fqn = name_node.text.decode().replace("\\", ".")
                     # Add relationship from file to imported class
-                    file_id = self._get_module_path()
+                    file_id = self._get_relative_path()
                     self.call_relationships.append(CallRelationship(
                         caller=file_id,
                         callee=fqn,
@@ -458,7 +451,7 @@ def _add_use_relationships(self, node):
                         name_node = self._find_child_by_type(group_child, "namespace_name")
                         if name_node:
                             fqn = f"{prefix}\\{name_node.text.decode()}" if prefix else name_node.text.decode()
-                            file_id = self._get_module_path()
+                            file_id = self._get_relative_path()
                             self.call_relationships.append(CallRelationship(
                                 caller=file_id,
                                 callee=fqn.replace("\\", "."),
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/python.py b/codewiki/src/be/dependency_analyzer/analyzers/python.py
index deda7935..e865729e 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/python.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/python.py
@@ -53,12 +53,12 @@ def _get_module_path(self) -> str:
             return str(self.file_path).replace('/', '.').replace('\\', '.')
     
     def _get_component_id(self, name: str) -> str:
-        """Generate dot-separated component ID."""
-        module_path = self._get_module_path()
+        """Generate component ID in relative_path::name format."""
+        rel_path = self._get_relative_path()
         if self.current_class_name:
-            return f"{module_path}.{self.current_class_name}.{name}"
+            return f"{rel_path}::{self.current_class_name}.{name}"
         else:
-            return f"{module_path}.{name}"
+            return f"{rel_path}::{name}"
 
     def generic_visit(self, node):
         """Override generic_visit to continue AST traversal."""
@@ -70,9 +70,9 @@ def visit_ClassDef(self, node: ast.ClassDef):
         base_classes = [self._extract_base_class_name(base) for base in node.bases]
         base_classes = [name for name in base_classes if name is not None]
         
-        component_id = f"{self._get_module_path()}.{node.name}"
+        component_id = f"{self._get_relative_path()}::{node.name}"
         relative_path = self._get_relative_path()
-        
+
         class_node = Node(
             id=component_id,
             name=node.name,
@@ -98,7 +98,7 @@ def visit_ClassDef(self, node: ast.ClassDef):
             if base_name in self.top_level_nodes:
                 self.call_relationships.append(CallRelationship(
                     caller=component_id,
-                    callee=f"{self._get_module_path()}.{base_name}",
+                    callee=f"{self._get_relative_path()}::{base_name}",
                     call_line=node.lineno,
                     is_resolved=True
                 ))
@@ -126,9 +126,9 @@ def _process_function_node(self, node: ast.FunctionDef | ast.AsyncFunctionDef):
         """Process function definition - only add to nodes if it's top-level."""
 
         if not self.current_class_name:
-            component_id = f"{self._get_module_path()}.{node.name}"
+            component_id = f"{self._get_relative_path()}::{node.name}"
             relative_path = self._get_relative_path()
-            
+
             func_node = Node(
                 id=component_id,
                 name=node.name,
@@ -175,12 +175,12 @@ def visit_Call(self, node: ast.Call):
             call_name = self._get_call_name(node.func)
             if call_name:
                 if self.current_class_name:
-                    caller_id = f"{self._get_module_path()}.{self.current_class_name}"
+                    caller_id = f"{self._get_relative_path()}::{self.current_class_name}"
                 else:
-                    caller_id = f"{self._get_module_path()}.{self.current_function_name}"
-                
+                    caller_id = f"{self._get_relative_path()}::{self.current_function_name}"
+
                 if call_name in self.top_level_nodes:
-                    callee_id = f"{self._get_module_path()}.{call_name}"
+                    callee_id = f"{self._get_relative_path()}::{call_name}"
                 else:
                     callee_id = call_name
                 
diff --git a/codewiki/src/be/dependency_analyzer/analyzers/typescript.py b/codewiki/src/be/dependency_analyzer/analyzers/typescript.py
index 68abc86d..0119bc9c 100644
--- a/codewiki/src/be/dependency_analyzer/analyzers/typescript.py
+++ b/codewiki/src/be/dependency_analyzer/analyzers/typescript.py
@@ -608,8 +608,8 @@ def _extract_parameter_dependencies(self, formal_params, caller_name: str) -> No
                         if type_id:
                             dependency_name = self._get_node_text(type_id)
                             if dependency_name and dependency_name != caller_name:
-                                caller_id = f"{self._get_module_path()}.{caller_name}"
-                                callee_id = f"{self._get_module_path()}.{dependency_name}"
+                                caller_id = f"{self._get_relative_path()}::{caller_name}"
+                                callee_id = f"{self._get_relative_path()}::{dependency_name}"
                                 
                                 relationship = CallRelationship(
                                     caller=caller_id,
@@ -648,8 +648,7 @@ def _get_relative_path(self) -> str:
             return str(self.file_path)
 
     def _get_component_id(self, name: str) -> str:
-        module_path = self._get_module_path()
-        return f"{module_path}.{name}"
+        return f"{self._get_relative_path()}::{name}"
 
     def _extract_inheritance(self, node) -> List[str]:
         """Extract inheritance/implementation relationships."""
@@ -920,8 +919,8 @@ def _resolve_to_top_level(self, entity_name: str, all_entities: dict) -> Optiona
         return entity_name if entity_name in self.top_level_nodes else None
 
     def _add_relationship(self, caller_name: str, callee_name: str, call_line: int) -> None:
-        caller_id = f"{self._get_module_path()}.{caller_name}"
-        callee_id = f"{self._get_module_path()}.{callee_name}"  
+        caller_id = f"{self._get_relative_path()}::{caller_name}"
+        callee_id = f"{self._get_relative_path()}::{callee_name}"  
         
         relationship = CallRelationship(
             caller=caller_id,
diff --git a/codewiki/src/be/dependency_analyzer/ast_parser.py b/codewiki/src/be/dependency_analyzer/ast_parser.py
index 81ac0bdc..4c50ea59 100644
--- a/codewiki/src/be/dependency_analyzer/ast_parser.py
+++ b/codewiki/src/be/dependency_analyzer/ast_parser.py
@@ -97,8 +97,12 @@ def _build_components_from_analysis(self, call_graph_result: Dict):
             if legacy_id and legacy_id != component_id:
                 component_id_mapping[legacy_id] = component_id
             
-            if "." in component_id:
-                module_parts = component_id.split(".")[:-1]  
+            if "::" in component_id:
+                file_path_part = component_id.split("::")[0]
+                if file_path_part:
+                    self.modules.add(file_path_part)
+            elif "." in component_id:
+                module_parts = component_id.split(".")[:-1]
                 module_path = ".".join(module_parts)
                 if module_path:
                     self.modules.add(module_path)
diff --git a/codewiki/src/be/documentation_generator.py b/codewiki/src/be/documentation_generator.py
index 261be616..234712cd 100644
--- a/codewiki/src/be/documentation_generator.py
+++ b/codewiki/src/be/documentation_generator.py
@@ -143,6 +143,9 @@ async def generate_module_documentation(self, components: Dict[str, Any], leaf_n
         if len(module_tree) > 0:
             for module_path, module_name in processing_order:
                 try:
+                    # Reload module tree to get latest hierarchical structure from sub-agent modifications
+                    module_tree = file_manager.load_json(module_tree_path)
+                    
                     # Get the module info from the tree
                     module_info = module_tree
                     for path_part in module_path:
diff --git a/codewiki/src/be/prompt_template.py b/codewiki/src/be/prompt_template.py
index f6da5f8b..f374315f 100644
--- a/codewiki/src/be/prompt_template.py
+++ b/codewiki/src/be/prompt_template.py
@@ -265,12 +265,26 @@ def _format_module_tree(module_tree: dict[str, any], indent: int = 0):
                 lines.append(f"{'  ' * indent}{key} (current module)")
             else:
                 lines.append(f"{'  ' * indent}{key}")
-            
-            lines.append(f"{'  ' * (indent + 1)} Core components: {', '.join(value['components'])}")
+
+            # Group components by file
+            from collections import defaultdict
+            by_file = defaultdict(list)
+            for c in value['components']:
+                if "::" in c:
+                    fpath, name = c.split("::", 1)
+                    by_file[fpath].append(name)
+                else:
+                    by_file[""].append(c)
+            for fpath, names in by_file.items():
+                if fpath:
+                    lines.append(f"{'  ' * (indent + 1)} {fpath}: {', '.join(names)}")
+                else:
+                    lines.append(f"{'  ' * (indent + 1)} {', '.join(names)}")
+
             if isinstance(value["children"], dict) and len(value["children"]) > 0:
                 lines.append(f"{'  ' * (indent + 1)} Children:")
                 _format_module_tree(value["children"], indent + 2)
-    
+
     _format_module_tree(module_tree, 0)
     formatted_module_tree = "\n".join(lines)
 
@@ -326,7 +340,21 @@ def _format_module_tree(module_tree: dict[str, any], indent: int = 0):
             else:
                 lines.append(f"{'  ' * indent}{key}")
             
-            lines.append(f"{'  ' * (indent + 1)} Core components: {', '.join(value['components'])}")
+            # Group components by file
+            from collections import defaultdict
+            by_file = defaultdict(list)
+            for c in value['components']:
+                if "::" in c:
+                    fpath, name = c.split("::", 1)
+                    by_file[fpath].append(name)
+                else:
+                    by_file[""].append(c)
+            for fpath, names in by_file.items():
+                if fpath:
+                    lines.append(f"{'  ' * (indent + 1)} {fpath}: {', '.join(names)}")
+                else:
+                    lines.append(f"{'  ' * (indent + 1)} {', '.join(names)}")
+
             if ("children" in value) and isinstance(value["children"], dict) and len(value["children"]) > 0:
                 lines.append(f"{'  ' * (indent + 1)} Children:")
                 _format_module_tree(value["children"], indent + 2)

From 81827f01ae8645adc4639e13a5d52881543e8a93 Mon Sep 17 00:00:00 2001
From: Nghi Bui <bdqnghi@gmail.com>
Date: Fri, 3 Apr 2026 23:05:08 -0700
Subject: [PATCH 8/9] Add Azure OpenAI support (#49)

Add azure-openai as a new provider option, using the AzureOpenAI client
from the openai package. Users can configure via --provider azure-openai
with --azure-deployment and --api-version options.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 codewiki/cli/commands/config.py | 54 +++++++++++++++++++++++++++++----
 codewiki/cli/config_manager.py  | 12 ++++++--
 codewiki/cli/models/config.py   | 12 +++++++-
 codewiki/src/be/llm_services.py | 37 +++++++++++++++++++++-
 codewiki/src/config.py          | 12 ++++++--
 5 files changed, 115 insertions(+), 12 deletions(-)

diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py
index 5ef6d99c..63df5609 100644
--- a/codewiki/cli/commands/config.py
+++ b/codewiki/cli/commands/config.py
@@ -85,7 +85,7 @@ def config_group():
 )
 @click.option(
     "--provider",
-    type=click.Choice(['openai-compatible', 'anthropic', 'bedrock'], case_sensitive=False),
+    type=click.Choice(['openai-compatible', 'anthropic', 'bedrock', 'azure-openai'], case_sensitive=False),
     help="LLM provider type (default: openai-compatible)"
 )
 @click.option(
@@ -93,6 +93,16 @@ def config_group():
     type=str,
     help="AWS region for Bedrock provider (default: us-east-1)"
 )
+@click.option(
+    "--api-version",
+    type=str,
+    help="Azure OpenAI API version (default: 2024-12-01-preview)"
+)
+@click.option(
+    "--azure-deployment",
+    type=str,
+    help="Azure OpenAI deployment name"
+)
 def config_set(
     api_key: Optional[str],
     base_url: Optional[str],
@@ -104,7 +114,9 @@ def config_set(
     max_token_per_leaf_module: Optional[int],
     max_depth: Optional[int],
     provider: Optional[str] = None,
-    aws_region: Optional[str] = None
+    aws_region: Optional[str] = None,
+    api_version: Optional[str] = None,
+    azure_deployment: Optional[str] = None
 ):
     """
     Set configuration values for CodeWiki.
@@ -139,7 +151,7 @@ def config_set(
     """
     try:
         # Check if at least one option is provided
-        if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth, provider, aws_region]):
+        if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth, provider, aws_region, api_version, azure_deployment]):
             click.echo("No options provided. Use --help for usage information.")
             sys.exit(EXIT_CONFIG_ERROR)
         
@@ -187,6 +199,12 @@ def config_set(
         if aws_region is not None:
             validated_data['aws_region'] = aws_region
 
+        if api_version is not None:
+            validated_data['api_version'] = api_version
+
+        if azure_deployment is not None:
+            validated_data['azure_deployment'] = azure_deployment
+
         # Create config manager and save
         manager = ConfigManager()
         manager.load()  # Load existing config if present
@@ -202,7 +220,9 @@ def config_set(
             max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module'),
             max_depth=validated_data.get('max_depth'),
             provider=validated_data.get('provider'),
-            aws_region=validated_data.get('aws_region')
+            aws_region=validated_data.get('aws_region'),
+            api_version=validated_data.get('api_version'),
+            azure_deployment=validated_data.get('azure_deployment')
         )
         
         # Display success messages
@@ -256,7 +276,13 @@ def config_set(
 
         if aws_region:
             click.secho(f"✓ AWS Region: {aws_region}", fg="green")
-        
+
+        if api_version:
+            click.secho(f"✓ API Version: {api_version}", fg="green")
+
+        if azure_deployment:
+            click.secho(f"✓ Azure Deployment: {azure_deployment}", fg="green")
+
         click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
         
     except ConfigurationError as e:
@@ -342,6 +368,12 @@ def config_show(output_json: bool):
                 click.echo(f"  Main Model:       {config.main_model or 'Not set'}")
                 click.echo(f"  Cluster Model:    {config.cluster_model or 'Not set'}")
                 click.echo(f"  Fallback Model:   {config.fallback_model or 'Not set'}")
+                click.echo(f"  Provider:         {config.provider}")
+                if config.provider == "bedrock":
+                    click.echo(f"  AWS Region:       {config.aws_region}")
+                elif config.provider == "azure-openai":
+                    click.echo(f"  API Version:      {config.api_version}")
+                    click.echo(f"  Azure Deployment: {config.azure_deployment or 'Not set'}")
             else:
                 click.secho("  Not configured", fg="yellow")
             
@@ -523,7 +555,17 @@ def config_validate(quick: bool, verbose: bool):
 
             try:
                 base_url_lower = (config.base_url or "").lower()
-                if "api.anthropic.com" in base_url_lower:
+                provider = getattr(config, 'provider', 'openai-compatible')
+                if provider == "azure-openai" or ".openai.azure.com" in base_url_lower:
+                    # Use Azure OpenAI SDK
+                    from openai import AzureOpenAI
+                    client = AzureOpenAI(
+                        api_key=api_key,
+                        api_version=config.api_version,
+                        azure_endpoint=config.base_url,
+                    )
+                    client.models.list()
+                elif "api.anthropic.com" in base_url_lower:
                     # Use Anthropic SDK for native Anthropic endpoints
                     import anthropic
                     client = anthropic.Anthropic(api_key=api_key)
diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py
index a652e405..a87df025 100644
--- a/codewiki/cli/config_manager.py
+++ b/codewiki/cli/config_manager.py
@@ -133,7 +133,9 @@ def save(
         max_token_per_leaf_module: Optional[int] = None,
         max_depth: Optional[int] = None,
         provider: Optional[str] = None,
-        aws_region: Optional[str] = None
+        aws_region: Optional[str] = None,
+        api_version: Optional[str] = None,
+        azure_deployment: Optional[str] = None
     ):
         """
         Save configuration to file and keyring.
@@ -149,8 +151,10 @@ def save(
             max_token_per_module: Maximum tokens per module for clustering
             max_token_per_leaf_module: Maximum tokens per leaf module
             max_depth: Maximum depth for hierarchical decomposition
-            provider: LLM provider type (openai-compatible, anthropic, bedrock)
+            provider: LLM provider type (openai-compatible, anthropic, bedrock, azure-openai)
             aws_region: AWS region for Bedrock provider
+            api_version: Azure OpenAI API version
+            azure_deployment: Azure OpenAI deployment name
         """
         # Ensure config directory exists
         try:
@@ -196,6 +200,10 @@ def save(
             self._config.provider = provider
         if aws_region is not None:
             self._config.aws_region = aws_region
+        if api_version is not None:
+            self._config.api_version = api_version
+        if azure_deployment is not None:
+            self._config.azure_deployment = azure_deployment
 
         # Validate configuration (only if base fields are set)
         if self._config.base_url and self._config.main_model and self._config.cluster_model:
diff --git a/codewiki/cli/models/config.py b/codewiki/cli/models/config.py
index 8bfe9152..3f9e0499 100644
--- a/codewiki/cli/models/config.py
+++ b/codewiki/cli/models/config.py
@@ -113,8 +113,10 @@ class Configuration:
         cluster_model: Model for module clustering
         fallback_model: Fallback model for documentation generation
         default_output: Default output directory
-        provider: LLM provider type (openai-compatible, anthropic, bedrock)
+        provider: LLM provider type (openai-compatible, anthropic, bedrock, azure-openai)
         aws_region: AWS region for Bedrock provider
+        api_version: Azure OpenAI API version
+        azure_deployment: Azure OpenAI deployment name
         max_tokens: Maximum tokens for LLM response (default: 32768)
         max_token_per_module: Maximum tokens per module for clustering (default: 36369)
         max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000)
@@ -128,6 +130,8 @@ class Configuration:
     default_output: str = "docs"
     provider: str = "openai-compatible"
     aws_region: str = "us-east-1"
+    api_version: str = "2024-12-01-preview"
+    azure_deployment: str = ""
     max_tokens: int = 32768
     max_token_per_module: int = 36369
     max_token_per_leaf_module: int = 16000
@@ -155,6 +159,8 @@ def to_dict(self) -> dict:
             'default_output': self.default_output,
             'provider': self.provider,
             'aws_region': self.aws_region,
+            'api_version': self.api_version,
+            'azure_deployment': self.azure_deployment,
             'max_tokens': self.max_tokens,
             'max_token_per_module': self.max_token_per_module,
             'max_token_per_leaf_module': self.max_token_per_leaf_module,
@@ -187,6 +193,8 @@ def from_dict(cls, data: dict) -> 'Configuration':
             default_output=data.get('default_output', 'docs'),
             provider=data.get('provider', 'openai-compatible'),
             aws_region=data.get('aws_region', 'us-east-1'),
+            api_version=data.get('api_version', '2024-12-01-preview'),
+            azure_deployment=data.get('azure_deployment', ''),
             max_tokens=data.get('max_tokens', 32768),
             max_token_per_module=data.get('max_token_per_module', 36369),
             max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000),
@@ -243,6 +251,8 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti
             fallback_model=self.fallback_model,
             provider=self.provider,
             aws_region=self.aws_region,
+            api_version=self.api_version,
+            azure_deployment=self.azure_deployment,
             max_tokens=self.max_tokens,
             max_token_per_module=self.max_token_per_module,
             max_token_per_leaf_module=self.max_token_per_leaf_module,
diff --git a/codewiki/src/be/llm_services.py b/codewiki/src/be/llm_services.py
index 0b03a54c..db3437a7 100644
--- a/codewiki/src/be/llm_services.py
+++ b/codewiki/src/be/llm_services.py
@@ -4,7 +4,7 @@
 Includes a compatibility layer for OpenAI-compatible API proxies that may
 return slightly non-standard responses (e.g. choices[].index = None).
 
-Supports multiple providers: openai-compatible, anthropic, bedrock.
+Supports multiple providers: openai-compatible, anthropic, bedrock, azure-openai.
 """
 import logging
 from openai.types import chat
@@ -174,6 +174,9 @@ def call_llm(
     if provider in ("bedrock", "anthropic"):
         return _call_llm_via_litellm(prompt, config, model, temperature)
 
+    if provider == "azure-openai":
+        return _call_llm_via_azure(prompt, config, model, temperature)
+
     # Default: OpenAI-compatible
     client = create_openai_client(config)
 
@@ -225,3 +228,35 @@ def _call_llm_via_litellm(
         api_key=config.llm_api_key if config.provider != "bedrock" else None,
     )
     return response.choices[0].message.content
+
+
+def _call_llm_via_azure(
+    prompt: str,
+    config: Config,
+    model: str,
+    temperature: float = 0.0
+) -> str:
+    """
+    Call LLM via Azure OpenAI.
+
+    Uses the AzureOpenAI client from the openai package with
+    azure_endpoint, api_version, and deployment name.
+    """
+    from openai import AzureOpenAI
+
+    client = AzureOpenAI(
+        api_key=config.llm_api_key,
+        api_version=config.api_version,
+        azure_endpoint=config.llm_base_url,
+    )
+
+    deployment = config.azure_deployment or model
+    logger.debug("Calling Azure OpenAI deployment %s (api_version=%s)", deployment, config.api_version)
+
+    response = client.chat.completions.create(
+        model=deployment,
+        messages=[{"role": "user", "content": prompt}],
+        temperature=temperature,
+        max_tokens=config.max_tokens,
+    )
+    return response.choices[0].message.content
diff --git a/codewiki/src/config.py b/codewiki/src/config.py
index 42757788..120ac2bd 100644
--- a/codewiki/src/config.py
+++ b/codewiki/src/config.py
@@ -58,8 +58,10 @@ class Config:
     cluster_model: str
     fallback_model: str = FALLBACK_MODEL_1
     # Provider configuration
-    provider: str = "openai-compatible"  # openai-compatible, anthropic, bedrock
+    provider: str = "openai-compatible"  # openai-compatible, anthropic, bedrock, azure-openai
     aws_region: str = "us-east-1"
+    api_version: str = "2024-12-01-preview"  # Azure OpenAI API version
+    azure_deployment: str = ""  # Azure OpenAI deployment name
     # Max token settings
     max_tokens: int = DEFAULT_MAX_TOKENS
     max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE
@@ -160,6 +162,8 @@ def from_cli(
         fallback_model: str = FALLBACK_MODEL_1,
         provider: str = "openai-compatible",
         aws_region: str = "us-east-1",
+        api_version: str = "2024-12-01-preview",
+        azure_deployment: str = "",
         max_tokens: int = DEFAULT_MAX_TOKENS,
         max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE,
         max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE,
@@ -177,8 +181,10 @@ def from_cli(
             main_model: Primary model
             cluster_model: Clustering model
             fallback_model: Fallback model
-            provider: LLM provider type (openai-compatible, anthropic, bedrock)
+            provider: LLM provider type (openai-compatible, anthropic, bedrock, azure-openai)
             aws_region: AWS region for Bedrock provider
+            api_version: Azure OpenAI API version
+            azure_deployment: Azure OpenAI deployment name
             max_tokens: Maximum tokens for LLM response
             max_token_per_module: Maximum tokens per module for clustering
             max_token_per_leaf_module: Maximum tokens per leaf module
@@ -204,6 +210,8 @@ def from_cli(
             fallback_model=fallback_model,
             provider=provider,
             aws_region=aws_region,
+            api_version=api_version,
+            azure_deployment=azure_deployment,
             max_tokens=max_tokens,
             max_token_per_module=max_token_per_module,
             max_token_per_leaf_module=max_token_per_leaf_module,

From 738e0c4d0e7e3674915372e67e7dc0779d0bf3e8 Mon Sep 17 00:00:00 2001
From: Nghi Bui <bdqnghi@gmail.com>
Date: Fri, 3 Apr 2026 23:09:11 -0700
Subject: [PATCH 9/9] Update README with Azure OpenAI, Bedrock, incremental
 updates, and MCP server

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 README.md | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 951812bb..60b82e65 100644
--- a/README.md
+++ b/README.md
@@ -42,15 +42,32 @@ codewiki --version
 
 ### 2. Configure Your Environment
 
-CodeWiki supports multiple models via an OpenAI-compatible SDK layer.
+CodeWiki supports multiple LLM providers: **OpenAI-compatible**, **Anthropic**, **AWS Bedrock**, and **Azure OpenAI**.
 
 ```bash
+# Anthropic
 codewiki config set \
   --api-key YOUR_API_KEY \
   --base-url https://api.anthropic.com \
   --main-model claude-sonnet-4 \
   --cluster-model claude-sonnet-4 \
   --fallback-model glm-4p5
+
+# Azure OpenAI
+codewiki config set \
+  --provider azure-openai \
+  --api-key YOUR_AZURE_KEY \
+  --base-url https://YOUR_RESOURCE.openai.azure.com \
+  --azure-deployment YOUR_DEPLOYMENT \
+  --main-model gpt-4o \
+  --cluster-model gpt-4o
+
+# AWS Bedrock
+codewiki config set \
+  --provider bedrock \
+  --aws-region us-east-1 \
+  --main-model anthropic.claude-sonnet-4-v2:0 \
+  --cluster-model anthropic.claude-sonnet-4-v2:0
 ```
 
 ### 3. Generate Documentation
@@ -138,6 +155,9 @@ codewiki generate --verbose
 
 # Full-featured generation
 codewiki generate --create-branch --github-pages --verbose
+
+# Incremental update (only regenerate changed modules since last run)
+codewiki generate --update
 ```
 
 ### Customization Options
@@ -235,7 +255,7 @@ codewiki generate --max-tokens 16384 --max-token-per-module 40000 --max-depth 3
 
 ### Configuration Storage
 
-- **API keys**: Securely stored in system keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service)
+- **API keys**: Securely stored in system keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service). Falls back to `~/.codewiki/credentials.json` in headless/container environments. Set `CODEWIKI_NO_KEYRING=1` to force file-based storage.
 - **Settings & Agent Instructions**: `~/.codewiki/config.json`
 
 ---
@@ -331,7 +351,7 @@ CodeWiki employs a three-stage process for comprehensive documentation generatio
 
 - **Python 3.12+**
 - **Node.js** (for Mermaid diagram validation)
-- **LLM API access** (Anthropic Claude, OpenAI, etc.)
+- **LLM API access** (Anthropic Claude, OpenAI, Azure OpenAI, AWS Bedrock)
 - **Git** (for branch creation features)
 
 ---
@@ -339,6 +359,7 @@ CodeWiki employs a three-stage process for comprehensive documentation generatio
 ## Additional Resources
 
 ### Documentation & Guides
+- **[MCP Server](codewiki/mcp/)** - Model Context Protocol server for IDE integrations
 - **[Docker Deployment](docker/DOCKER_README.md)** - Containerized deployment instructions
 - **[Development Guide](DEVELOPMENT.md)** - Project structure, architecture, and contributing guidelines
 - **[CodeWikiBench](https://github.com/FSoft-AI4Code/CodeWikiBench)** - Repository-level documentation benchmark