Skip to content

Commit ce220a5

Browse files
committed
Resolve merge conflict with main (gitignore + symlink rename)
Keeps the PR's visitedDirs rename and main's gitIgnoredDirs addition.
2 parents 0f2eda8 + 62a6cf3 commit ce220a5

4 files changed

Lines changed: 158 additions & 3 deletions

File tree

__tests__/extraction.test.ts

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ import * as fs from 'fs';
99
import * as path from 'path';
1010
import * as os from 'os';
1111
import { CodeGraph } from '../src';
12-
import { extractFromSource } from '../src/extraction';
12+
import { extractFromSource, scanDirectory, shouldIncludeFile } from '../src/extraction';
1313
import { detectLanguage, isLanguageSupported, getSupportedLanguages } from '../src/extraction/grammars';
14+
import { normalizePath } from '../src/utils';
15+
import { DEFAULT_CONFIG } from '../src/types';
1416

1517
// Create a temporary directory for each test
1618
function createTempDir(): string {
@@ -1981,3 +1983,106 @@ export function multiply(a: number, b: number): number {
19811983
cg.close();
19821984
});
19831985
});
1986+
1987+
describe('Path Normalization', () => {
1988+
it('should convert backslashes to forward slashes', () => {
1989+
expect(normalizePath('gui\\node_modules\\foo')).toBe('gui/node_modules/foo');
1990+
expect(normalizePath('src\\components\\Button.tsx')).toBe('src/components/Button.tsx');
1991+
});
1992+
1993+
it('should leave forward-slash paths unchanged', () => {
1994+
expect(normalizePath('src/components/Button.tsx')).toBe('src/components/Button.tsx');
1995+
});
1996+
1997+
it('should handle empty string', () => {
1998+
expect(normalizePath('')).toBe('');
1999+
});
2000+
});
2001+
2002+
describe('Directory Exclusion', () => {
2003+
let tempDir: string;
2004+
2005+
beforeEach(() => {
2006+
tempDir = createTempDir();
2007+
});
2008+
2009+
afterEach(() => {
2010+
cleanupTempDir(tempDir);
2011+
});
2012+
2013+
it('should exclude node_modules directories', () => {
2014+
// Create structure: src/index.ts + node_modules/pkg/index.js
2015+
const srcDir = path.join(tempDir, 'src');
2016+
const nmDir = path.join(tempDir, 'node_modules', 'pkg');
2017+
fs.mkdirSync(srcDir, { recursive: true });
2018+
fs.mkdirSync(nmDir, { recursive: true });
2019+
fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
2020+
fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
2021+
2022+
const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
2023+
const files = scanDirectory(tempDir, config);
2024+
2025+
expect(files).toContain('src/index.ts');
2026+
expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
2027+
});
2028+
2029+
it('should exclude nested node_modules directories', () => {
2030+
// Create structure: packages/app/node_modules/pkg/index.js
2031+
const srcDir = path.join(tempDir, 'packages', 'app', 'src');
2032+
const nmDir = path.join(tempDir, 'packages', 'app', 'node_modules', 'pkg');
2033+
fs.mkdirSync(srcDir, { recursive: true });
2034+
fs.mkdirSync(nmDir, { recursive: true });
2035+
fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
2036+
fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
2037+
2038+
const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
2039+
const files = scanDirectory(tempDir, config);
2040+
2041+
expect(files).toContain('packages/app/src/index.ts');
2042+
expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
2043+
});
2044+
2045+
it('should exclude .git directories', () => {
2046+
const srcDir = path.join(tempDir, 'src');
2047+
const gitDir = path.join(tempDir, '.git', 'objects');
2048+
fs.mkdirSync(srcDir, { recursive: true });
2049+
fs.mkdirSync(gitDir, { recursive: true });
2050+
fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
2051+
fs.writeFileSync(path.join(gitDir, 'pack.ts'), 'export const y = 2;');
2052+
2053+
const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
2054+
const files = scanDirectory(tempDir, config);
2055+
2056+
expect(files).toContain('src/index.ts');
2057+
expect(files.every((f) => !f.includes('.git'))).toBe(true);
2058+
});
2059+
2060+
it('should return forward-slash paths on all platforms', () => {
2061+
const srcDir = path.join(tempDir, 'src', 'components');
2062+
fs.mkdirSync(srcDir, { recursive: true });
2063+
fs.writeFileSync(path.join(srcDir, 'Button.tsx'), 'export function Button() {}');
2064+
2065+
const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
2066+
const files = scanDirectory(tempDir, config);
2067+
2068+
expect(files.length).toBe(1);
2069+
expect(files[0]).toBe('src/components/Button.tsx');
2070+
expect(files[0]).not.toContain('\\');
2071+
});
2072+
2073+
it('should respect .codegraphignore marker', () => {
2074+
const srcDir = path.join(tempDir, 'src');
2075+
const vendorDir = path.join(tempDir, 'vendor');
2076+
fs.mkdirSync(srcDir, { recursive: true });
2077+
fs.mkdirSync(vendorDir, { recursive: true });
2078+
fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
2079+
fs.writeFileSync(path.join(vendorDir, 'lib.ts'), 'export const y = 2;');
2080+
fs.writeFileSync(path.join(vendorDir, '.codegraphignore'), '');
2081+
2082+
const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
2083+
const files = scanDirectory(tempDir, config);
2084+
2085+
expect(files).toContain('src/index.ts');
2086+
expect(files.every((f) => !f.includes('vendor'))).toBe(true);
2087+
});
2088+
});

src/config.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import * as fs from 'fs';
88
import * as path from 'path';
99
import { CodeGraphConfig, DEFAULT_CONFIG, Language, NodeKind } from './types';
10+
import { normalizePath } from './utils';
1011

1112
/**
1213
* Configuration filename
@@ -240,6 +241,9 @@ export function addCustomPattern(
240241
* Check if a file path matches the include/exclude patterns
241242
*/
242243
export function shouldIncludeFile(filePath: string, config: CodeGraphConfig): boolean {
244+
// Normalize to forward slashes so Windows backslash paths match glob patterns
245+
filePath = normalizePath(filePath);
246+
243247
// Simple glob matching (for now, just check if any pattern matches)
244248
// A full implementation would use a proper glob library
245249

src/extraction/index.ts

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import * as fs from 'fs';
88
import * as fsp from 'fs/promises';
99
import * as path from 'path';
1010
import * as crypto from 'crypto';
11+
import { execFileSync } from 'child_process';
1112
import {
1213
Language,
1314
FileRecord,
@@ -20,7 +21,7 @@ import { extractFromSource } from './tree-sitter';
2021
import { detectLanguage, isLanguageSupported } from './grammars';
2122
import { logDebug, logWarn } from '../errors';
2223
import { captureException } from '../sentry';
23-
import { validatePathWithinRoot } from '../utils';
24+
import { validatePathWithinRoot, normalizePath } from '../utils';
2425

2526
/**
2627
* Number of files to read in parallel during indexing.
@@ -74,6 +75,9 @@ export function hashContent(content: string): string {
7475
* Check if a path matches any glob pattern (simplified)
7576
*/
7677
function matchesGlob(filePath: string, pattern: string): boolean {
78+
// Normalize to forward slashes so Windows backslash paths match glob patterns
79+
filePath = normalizePath(filePath);
80+
7781
// Convert glob to regex using placeholders to avoid conflicts
7882
let regexStr = pattern;
7983

@@ -120,6 +124,31 @@ export function shouldIncludeFile(
120124
return false;
121125
}
122126

127+
/**
128+
* Get directories ignored by .gitignore using git ls-files.
129+
* Returns a Set of normalized relative directory paths (forward slashes, no trailing slash).
130+
* Gracefully returns empty Set on any failure.
131+
*/
132+
function getGitIgnoredDirectories(rootDir: string): Set<string> {
133+
try {
134+
const output = execFileSync(
135+
'git',
136+
['ls-files', '-oi', '--exclude-standard', '--directory'],
137+
{ cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
138+
);
139+
const dirs = new Set<string>();
140+
for (const line of output.split('\n')) {
141+
const trimmed = line.trim();
142+
if (trimmed.endsWith('/')) {
143+
dirs.add(normalizePath(trimmed.slice(0, -1)));
144+
}
145+
}
146+
return dirs;
147+
} catch {
148+
return new Set<string>();
149+
}
150+
}
151+
123152
/**
124153
* Marker file name that indicates a directory (and all children) should be skipped
125154
*/
@@ -137,6 +166,7 @@ export function scanDirectory(
137166
let count = 0;
138167
// Track visited real paths to detect symlink cycles
139168
const visitedDirs = new Set<string>();
169+
const gitIgnoredDirs = getGitIgnoredDirectories(rootDir);
140170

141171
function walk(dir: string): void {
142172
// Resolve real path to detect symlink cycles
@@ -172,14 +202,18 @@ export function scanDirectory(
172202

173203
for (const entry of entries) {
174204
const fullPath = path.join(dir, entry.name);
175-
const relativePath = path.relative(rootDir, fullPath);
205+
const relativePath = normalizePath(path.relative(rootDir, fullPath));
176206

177207
// Follow symlinked directories, but skip symlinked files to non-project targets
178208
if (entry.isSymbolicLink()) {
179209
try {
180210
const realTarget = fs.realpathSync(fullPath);
181211
const stat = fs.statSync(realTarget);
182212
if (stat.isDirectory()) {
213+
// Check gitignore first (fast O(1) lookup)
214+
if (gitIgnoredDirs.has(relativePath)) {
215+
continue;
216+
}
183217
// Check exclusion, then recurse (cycle detection handles the rest)
184218
const dirPattern = relativePath + '/';
185219
let excluded = false;
@@ -208,6 +242,10 @@ export function scanDirectory(
208242
}
209243

210244
if (entry.isDirectory()) {
245+
// Check gitignore first (fast O(1) lookup)
246+
if (gitIgnoredDirs.has(relativePath)) {
247+
continue;
248+
}
211249
// Check if directory should be excluded
212250
const dirPattern = relativePath + '/';
213251
let excluded = false;

src/utils.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ export function clamp(value: number, min: number, max: number): number {
7474
return Math.max(min, Math.min(max, value));
7575
}
7676

77+
/**
78+
* Normalize a file path to use forward slashes.
79+
* Fixes Windows backslash paths so glob matching works consistently.
80+
*/
81+
export function normalizePath(filePath: string): string {
82+
return filePath.replace(/\\/g, '/');
83+
}
84+
7785
/**
7886
* Cross-process file lock using lock files.
7987
* Prevents concurrent database writes from CLI, MCP server, and git hooks.

0 commit comments

Comments
 (0)