forked from dyad-sh/dyad
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodebase.ts
More file actions
415 lines (358 loc) · 11.8 KB
/
codebase.ts
File metadata and controls
415 lines (358 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
import fs from "node:fs";
import fsAsync from "node:fs/promises";
import path from "node:path";
import { isIgnored } from "isomorphic-git";
import log from "electron-log";
import { IS_TEST_BUILD } from "../ipc/utils/test_utils";
const logger = log.scope("utils/codebase");
// File extensions to include in the extraction
const ALLOWED_EXTENSIONS = [
".ts",
".tsx",
".js",
".jsx",
".mjs",
".cjs",
".mts",
".cts",
".css",
".html",
".md",
];
// Directories to always exclude
const EXCLUDED_DIRS = ["node_modules", ".git", "dist", "build"];
// Files to always include, regardless of extension
const ALWAYS_INCLUDE_FILES = ["package.json"];
// Maximum file size to include (in bytes) - 100KB
const MAX_FILE_SIZE = 100 * 1024;
// Maximum size for fileContentCache
const MAX_FILE_CACHE_SIZE = 500;
// File content cache with timestamps
type FileCache = {
content: string;
mtime: number;
};
// Cache for file contents
const fileContentCache = new Map<string, FileCache>();
// Cache for git ignored paths
const gitIgnoreCache = new Map<string, boolean>();
// Map to store .gitignore file paths and their modification times
const gitIgnoreMtimes = new Map<string, number>();
/**
* Check if a path should be ignored based on git ignore rules
*/
async function isGitIgnored(
filePath: string,
baseDir: string,
): Promise<boolean> {
try {
// Check if any relevant .gitignore has been modified
// Git checks .gitignore files in the path from the repo root to the file
let currentDir = baseDir;
const pathParts = path.relative(baseDir, filePath).split(path.sep);
let shouldClearCache = false;
// Check root .gitignore
const rootGitIgnorePath = path.join(baseDir, ".gitignore");
try {
const stats = await fsAsync.stat(rootGitIgnorePath);
const lastMtime = gitIgnoreMtimes.get(rootGitIgnorePath) || 0;
if (stats.mtimeMs > lastMtime) {
gitIgnoreMtimes.set(rootGitIgnorePath, stats.mtimeMs);
shouldClearCache = true;
}
} catch {
// Root .gitignore might not exist, which is fine
}
// Check .gitignore files in parent directories
for (let i = 0; i < pathParts.length - 1; i++) {
currentDir = path.join(currentDir, pathParts[i]);
const gitIgnorePath = path.join(currentDir, ".gitignore");
try {
const stats = await fsAsync.stat(gitIgnorePath);
const lastMtime = gitIgnoreMtimes.get(gitIgnorePath) || 0;
if (stats.mtimeMs > lastMtime) {
gitIgnoreMtimes.set(gitIgnorePath, stats.mtimeMs);
shouldClearCache = true;
}
} catch {
// This directory might not have a .gitignore, which is fine
}
}
// Clear cache if any .gitignore was modified
if (shouldClearCache) {
gitIgnoreCache.clear();
}
const cacheKey = `${baseDir}:${filePath}`;
if (gitIgnoreCache.has(cacheKey)) {
return gitIgnoreCache.get(cacheKey)!;
}
const relativePath = path.relative(baseDir, filePath);
const result = await isIgnored({
fs,
dir: baseDir,
filepath: relativePath,
});
gitIgnoreCache.set(cacheKey, result);
return result;
} catch (error) {
logger.error(`Error checking if path is git ignored: ${filePath}`, error);
return false;
}
}
/**
* Read file contents with caching based on last modified time
*/
async function readFileWithCache(filePath: string): Promise<string | null> {
try {
// Get file stats to check the modification time
const stats = await fsAsync.stat(filePath);
const currentMtime = stats.mtimeMs;
// If file is in cache and hasn't been modified, use cached content
if (fileContentCache.has(filePath)) {
const cache = fileContentCache.get(filePath)!;
if (cache.mtime === currentMtime) {
return cache.content;
}
}
// Read file and update cache
const rawContent = await fsAsync.readFile(filePath, "utf-8");
const content = cleanContent({ content: rawContent, filePath });
fileContentCache.set(filePath, {
content,
mtime: currentMtime,
});
// Manage cache size by clearing oldest entries when it gets too large
if (fileContentCache.size > MAX_FILE_CACHE_SIZE) {
// Get the oldest 25% of entries to remove
const entriesToDelete = Math.ceil(MAX_FILE_CACHE_SIZE * 0.25);
const keys = Array.from(fileContentCache.keys());
// Remove oldest entries (first in, first out)
for (let i = 0; i < entriesToDelete; i++) {
fileContentCache.delete(keys[i]);
}
}
return content;
} catch (error) {
logger.error(`Error reading file: ${filePath}`, error);
return null;
}
}
function cleanContent({
content,
filePath,
}: {
content: string;
filePath: string;
}): string {
// Why are we cleaning package.json?
// 1. It contains unnecessary information for LLM context
// 2. Fields like packageManager cause diffs in e2e test snapshots.
if (path.basename(filePath) === "package.json") {
try {
const { dependencies, devDependencies } = JSON.parse(content);
const cleanPackageJson = {
dependencies,
devDependencies,
};
return JSON.stringify(cleanPackageJson, null, 2);
} catch (error) {
logger.error(`Error cleaning package.json: ${filePath}`, error);
return content;
}
}
return content;
}
/**
* Recursively walk a directory and collect all relevant files
*/
async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
const files: string[] = [];
// Check if directory exists
try {
await fsAsync.access(dir);
} catch {
// Directory doesn't exist or is not accessible
return files;
}
try {
// Read directory contents
const entries = await fsAsync.readdir(dir, { withFileTypes: true });
// Process entries concurrently
const promises = entries.map(async (entry) => {
const fullPath = path.join(dir, entry.name);
// Skip excluded directories
if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) {
return;
}
// Skip if the entry is git ignored
if (await isGitIgnored(fullPath, baseDir)) {
return;
}
if (entry.isDirectory()) {
// Recursively process subdirectories
const subDirFiles = await collectFiles(fullPath, baseDir);
files.push(...subDirFiles);
} else if (entry.isFile()) {
// Check file extension and filename
const ext = path.extname(entry.name).toLowerCase();
const shouldAlwaysInclude = ALWAYS_INCLUDE_FILES.includes(entry.name);
// Skip files that are too large
try {
const stats = await fsAsync.stat(fullPath);
if (stats.size > MAX_FILE_SIZE) {
return;
}
} catch (error) {
logger.error(`Error checking file size: ${fullPath}`, error);
return;
}
if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) {
files.push(fullPath);
}
}
});
await Promise.all(promises);
} catch (error) {
logger.error(`Error reading directory ${dir}:`, error);
}
return files;
}
// Skip large configuration files or generated code (just include the path)
function isOmittedFile(relativePath: string): boolean {
return (
// Why are we not using path.join here?
// Because we have already normalized the path to use /.
relativePath.includes("src/components/ui") ||
relativePath.includes("eslint.config") ||
relativePath.includes("tsconfig.json") ||
relativePath.includes("package-lock.json") ||
// These should already be excluded based on file type, but
// just in case, we'll redact the contents here.
relativePath.includes(".env")
);
}
const OMITTED_FILE_CONTENT = "// Contents omitted for brevity";
/**
* Format a file for inclusion in the codebase extract
*/
async function formatFile(filePath: string, baseDir: string): Promise<string> {
try {
const relativePath = path
.relative(baseDir, filePath)
// Why? Normalize Windows-style paths which causes lots of weird issues (e.g. Git commit)
.split(path.sep)
.join("/");
if (isOmittedFile(relativePath)) {
return `<dyad-file path="${relativePath}">
${OMITTED_FILE_CONTENT}
</dyad-file>
`;
}
const content = await readFileWithCache(filePath);
if (content === null) {
return `<dyad-file path="${relativePath}">
// Error reading file
</dyad-file>
`;
}
return `<dyad-file path="${relativePath}">
${content}
</dyad-file>
`;
} catch (error) {
logger.error(`Error reading file: ${filePath}`, error);
return `<dyad-file path="${path.relative(baseDir, filePath)}">
// Error reading file: ${error}
</dyad-file>
`;
}
}
/**
* Extract and format codebase files as a string to be included in prompts
* @param appPath - Path to the codebase to extract
* @returns Object containing formatted output and individual files
*/
export async function extractCodebase(appPath: string): Promise<{
formattedOutput: string;
files: { path: string; content: string }[];
}> {
try {
await fsAsync.access(appPath);
} catch {
return {
formattedOutput: `# Error: Directory ${appPath} does not exist or is not accessible`,
files: [],
};
}
const startTime = Date.now();
// Collect all relevant files
const files = await collectFiles(appPath, appPath);
// Sort files by modification time (oldest first)
// This is important for cache-ability.
const sortedFiles = await sortFilesByModificationTime(files);
// Format files and collect individual file contents
const filesArray: { path: string; content: string }[] = [];
const formatPromises = sortedFiles.map(async (file) => {
const formattedContent = await formatFile(file, appPath);
// Get raw content for the files array
const relativePath = path
.relative(appPath, file)
// Why? Normalize Windows-style paths which causes lots of weird issues (e.g. Git commit)
.split(path.sep)
.join("/");
const fileContent = isOmittedFile(relativePath)
? OMITTED_FILE_CONTENT
: await readFileWithCache(file);
if (fileContent !== null) {
filesArray.push({
path: relativePath,
content: fileContent,
});
}
return formattedContent;
});
const formattedFiles = await Promise.all(formatPromises);
const formattedOutput = formattedFiles.join("");
const endTime = Date.now();
logger.log("extractCodebase: time taken", endTime - startTime);
if (IS_TEST_BUILD) {
// Why? For some reason, file ordering is not stable on Windows.
// This is a workaround to ensure stable ordering, although
// ideally we'd like to sort it by modification time which is
// important for cache-ability.
filesArray.sort((a, b) => a.path.localeCompare(b.path));
}
return {
formattedOutput,
files: filesArray,
};
}
/**
* Sort files by their modification timestamp (oldest first)
*/
async function sortFilesByModificationTime(files: string[]): Promise<string[]> {
// Get stats for all files
const fileStats = await Promise.all(
files.map(async (file) => {
try {
const stats = await fsAsync.stat(file);
return { file, mtime: stats.mtimeMs };
} catch (error) {
// If there's an error getting stats, use current time as fallback
logger.error(`Error getting file stats for ${file}:`, error);
return { file, mtime: Date.now() };
}
}),
);
if (IS_TEST_BUILD) {
// Why? For some reason, file ordering is not stable on Windows.
// This is a workaround to ensure stable ordering, although
// ideally we'd like to sort it by modification time which is
// important for cache-ability.
return fileStats
.sort((a, b) => a.file.localeCompare(b.file))
.map((item) => item.file);
}
// Sort by modification time (oldest first)
return fileStats.sort((a, b) => a.mtime - b.mtime).map((item) => item.file);
}