-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
85 lines (71 loc) · 3.2 KB
/
main.py
File metadata and controls
85 lines (71 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import dotenv
import os
import argparse
from flow import create_schema_flow
dotenv.load_dotenv()
DEFAULT_INCLUDE_PATTERNS = {
"*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.go", "*.java", "*.pyi", "*.pyx",
"*.c", "*.cc", "*.cpp", "*.h", "*.sql", "*.ddl", "*.dml",
"*.md", "*.rst", "*Dockerfile", "*Makefile", "*.yaml", "*.yml",
"*migration*", "*schema*", "*model*", "*entity*", "*dao*", "*repository*"
}
DEFAULT_EXCLUDE_PATTERNS = {
"assets/*", "data/*", "images/*", "public/*", "static/*", "temp/*",
"*docs/*",
"*venv/*",
"*.venv/*",
"*test*",
"*tests/*",
"*examples/*",
"v1/*",
"*dist/*",
"*build/*",
"*experimental/*",
"*deprecated/*",
"*misc/*",
"*legacy/*",
".git/*", ".github/*", ".next/*", ".vscode/*",
"*obj/*",
"*bin/*",
"*node_modules/*",
"*.log"
}
def main():
parser = argparse.ArgumentParser(description="Generate a database schema for a GitHub codebase or local directory.")
source_group = parser.add_mutually_exclusive_group(required=True)
source_group.add_argument("--repo", help="URL of the public GitHub repository.")
source_group.add_argument("--dir", help="Path to local directory.")
parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).")
parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.sql'). Defaults to common code and database files if not specified.")
parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
args = parser.parse_args()
github_token = None
if args.repo:
github_token = args.token or os.environ.get('GITHUB_TOKEN')
if not github_token:
print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")
shared = {
"repo_url": args.repo,
"local_dir": args.dir,
"project_name": args.name,
"github_token": github_token,
"output_dir": args.output,
"include_patterns": set(args.include) if args.include else DEFAULT_INCLUDE_PATTERNS,
"exclude_patterns": set(args.exclude) if args.exclude else DEFAULT_EXCLUDE_PATTERNS,
"max_file_size": args.max_size,
"use_cache": not args.no_cache,
"files": [],
"tables": [],
"schema": [],
"final_output_dir": None,
}
print(f"Starting database schema generation for: {args.repo or args.dir}")
print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}")
schema_flow = create_schema_flow()
schema_flow.run(shared)
if __name__ == "__main__":
main()