Skip to content

Commit 59ea5a4

Browse files
committed
feat: Add Ruby module extraction with containment and qualified names
Addresses Ruby methods inside modules missing owner in qualified_name by adding visitNode hook to extract module AST nodes. Methods inside modules now get Module::method qualified names with proper containment relationships. Includes ExtractorContext wiring with pushScope/popScope for language hooks and updates isInsideClassLikeNode to include module kind for nested method handling.
1 parent 07d899b commit 59ea5a4

7 files changed

Lines changed: 126 additions & 6 deletions

File tree

__tests__/extraction.test.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,70 @@ require_relative 'helper'
17871787
});
17881788
});
17891789

1790+
describe('Ruby modules', () => {
1791+
it('should extract module as module node with containment', () => {
1792+
const code = `
1793+
module CachedCounting
1794+
def self.disable
1795+
@enabled = false
1796+
end
1797+
1798+
def perform_increment!(key, count)
1799+
write_cache!(key, count)
1800+
end
1801+
end
1802+
`;
1803+
const result = extractFromSource('concerns/cached_counting.rb', code);
1804+
1805+
const moduleNode = result.nodes.find((n) => n.kind === 'module' && n.name === 'CachedCounting');
1806+
expect(moduleNode).toBeDefined();
1807+
expect(moduleNode?.qualifiedName).toBe('CachedCounting');
1808+
1809+
// Methods inside module should have module-qualified names
1810+
const disableMethod = result.nodes.find((n) => n.name === 'disable' && n.kind === 'method');
1811+
expect(disableMethod).toBeDefined();
1812+
expect(disableMethod?.qualifiedName).toBe('CachedCounting::disable');
1813+
1814+
const incrementMethod = result.nodes.find((n) => n.name === 'perform_increment!' && n.kind === 'method');
1815+
expect(incrementMethod).toBeDefined();
1816+
expect(incrementMethod?.qualifiedName).toBe('CachedCounting::perform_increment!');
1817+
1818+
// Containment edge from module to methods
1819+
const containsEdges = result.edges.filter((e) => e.source === moduleNode?.id && e.kind === 'contains');
1820+
expect(containsEdges.length).toBeGreaterThanOrEqual(2);
1821+
});
1822+
1823+
it('should handle nested modules with classes', () => {
1824+
const code = `
1825+
module Discourse
1826+
module Auth
1827+
class AuthProvider
1828+
def authenticate(params)
1829+
validate(params)
1830+
end
1831+
end
1832+
end
1833+
end
1834+
`;
1835+
const result = extractFromSource('lib/auth.rb', code);
1836+
1837+
const discourseModule = result.nodes.find((n) => n.kind === 'module' && n.name === 'Discourse');
1838+
expect(discourseModule).toBeDefined();
1839+
1840+
const authModule = result.nodes.find((n) => n.kind === 'module' && n.name === 'Auth');
1841+
expect(authModule).toBeDefined();
1842+
expect(authModule?.qualifiedName).toBe('Discourse::Auth');
1843+
1844+
const authProvider = result.nodes.find((n) => n.kind === 'class' && n.name === 'AuthProvider');
1845+
expect(authProvider).toBeDefined();
1846+
expect(authProvider?.qualifiedName).toBe('Discourse::Auth::AuthProvider');
1847+
1848+
const authMethod = result.nodes.find((n) => n.name === 'authenticate');
1849+
expect(authMethod).toBeDefined();
1850+
expect(authMethod?.qualifiedName).toBe('Discourse::Auth::AuthProvider::authenticate');
1851+
});
1852+
});
1853+
17901854
describe('C/C++ imports', () => {
17911855
it('should extract system include', () => {
17921856
const code = `#include <iostream>`;

docs/SEARCH_QUALITY_LOOP.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ test().catch(console.error);
447447
| Import edges missing | `extractImport` returns null for this syntax | `src/extraction/languages/<lang>.ts: extractImport` |
448448
| C++ classes/structs/enums missing from macro namespaces | Macros like `NLOHMANN_JSON_NAMESPACE_BEGIN` cause tree-sitter to misparse namespace blocks as `function_definition` | `src/extraction/languages/c-cpp.ts: isMisparsedFunction` filters bad names; `src/extraction/tree-sitter.ts: visitFunctionBody` extracts structural nodes |
449449
| C++ classes missing from `.h` headers | `.h` files default to `c` language which has `classTypes: []` | `src/extraction/grammars.ts: looksLikeCpp()` — content-based heuristic promotes `.h` files to `cpp` when C++ patterns detected |
450+
| Ruby methods inside modules missing owner in `qualified_name` | Ruby `module` AST nodes not being extracted | `src/extraction/languages/ruby.ts: visitNode` hook extracts modules; `src/extraction/tree-sitter.ts: isInsideClassLikeNode` includes `module` kind |
450451

451452
## After Fixing Issues
452453

@@ -529,6 +530,7 @@ if (receiverType) {
529530
- [x] **C** — NOT needed. No methods in C. Strong function/struct/enum extraction with excellent call edge density. Verified against Redis
530531
- [x] **C++** — NOT needed for header-only libs. `isMisparsedFunction` hook filters macro-caused misparse artifacts (e.g. `NLOHMANN_JSON_NAMESPACE_BEGIN`). `visitFunctionBody` now extracts structural nodes (classes/structs/enums) inside macro-confused "function" bodies. Content-based `.h` detection (`looksLikeCpp` in `grammars.ts`) promotes C++ headers to `cpp` language so classes in `.h` files are extracted. Verified against nlohmann/json and gRPC. Note: out-of-class `Type::method()` definitions would need `getReceiverType` but are uncommon in header-only codebases.
531532
- [x] **C#** — NOT needed. Methods nested in class body. Added `base_list` handling in `extractInheritance` for C#'s `: Parent, IInterface` syntax. Added `propertyTypes` support for C# `property_declaration` nodes. Fixed `extractField` to handle C#'s nested `variable_declaration > variable_declarator` structure. Verified against Jellyfin
533+
- [x] **Ruby** — NOT needed for `getReceiverType`. Methods nested in class body. Added `visitNode` hook to extract Ruby `module` nodes (concerns, namespaces) with proper containment and qualified names. Methods inside modules get `Module::method` qualified names. Also wired up the `ExtractorContext` with `pushScope`/`popScope` for language hooks. Verified against Discourse
532534

533535
### Needs Verification
534536

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@colbymchenry/codegraph",
3-
"version": "0.6.8",
3+
"version": "0.6.9",
44
"description": "Supercharge Claude Code with semantic code intelligence. 30% fewer tokens, 25% fewer tool calls, 100% local.",
55
"main": "dist/index.js",
66
"types": "dist/index.d.ts",

src/extraction/languages/ruby.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export const rubyExtractor: LanguageExtractor = {
66
functionTypes: ['method'],
77
classTypes: ['class'],
88
methodTypes: ['method', 'singleton_method'],
9-
interfaceTypes: [], // Ruby uses modules
9+
interfaceTypes: [], // Ruby uses modules (handled via visitNode hook)
1010
structTypes: [],
1111
enumTypes: [],
1212
typeAliasTypes: [],
@@ -16,6 +16,28 @@ export const rubyExtractor: LanguageExtractor = {
1616
nameField: 'name',
1717
bodyField: 'body',
1818
paramsField: 'parameters',
19+
visitNode: (node, ctx) => {
20+
if (node.type !== 'module') return false;
21+
22+
const nameNode = node.childForFieldName('name');
23+
if (!nameNode) return false;
24+
const name = nameNode.text;
25+
26+
const moduleNode = ctx.createNode('module', name, node);
27+
if (!moduleNode) return false;
28+
29+
// Push module onto scope stack so children get proper qualified names
30+
ctx.pushScope(moduleNode.id);
31+
const body = node.childForFieldName('body');
32+
if (body) {
33+
for (let i = 0; i < body.namedChildCount; i++) {
34+
const child = body.namedChild(i);
35+
if (child) ctx.visitNode(child);
36+
}
37+
}
38+
ctx.popScope();
39+
return true; // handled
40+
},
1941
getVisibility: (node) => {
2042
// Ruby visibility is based on preceding visibility modifiers
2143
let sibling = node.previousNamedSibling;

src/extraction/tree-sitter-types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ export interface ExtractorContext {
5656
visitFunctionBody(body: SyntaxNode, functionId: string): void;
5757
/** Add an unresolved reference */
5858
addUnresolvedReference(ref: UnresolvedReference): void;
59+
/** Push a node ID onto the scope stack (for containment/qualified name building) */
60+
pushScope(nodeId: string): void;
61+
/** Pop the last node ID from the scope stack */
62+
popScope(): void;
5963
/** Current file path */
6064
readonly filePath: string;
6165
/** Current source text */

src/extraction/tree-sitter.ts

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import {
1717
} from '../types';
1818
import { getParser, detectLanguage, isLanguageSupported } from './grammars';
1919
import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers';
20-
import type { LanguageExtractor } from './tree-sitter-types';
20+
import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types';
2121
import { EXTRACTORS } from './languages';
2222
import { LiquidExtractor } from './liquid-extractor';
2323
import { SvelteExtractor } from './svelte-extractor';
@@ -223,6 +223,13 @@ export class TreeSitterExtractor {
223223
const nodeType = node.type;
224224
let skipChildren = false;
225225

226+
// Language-specific custom visitor hook
227+
if (this.extractor.visitNode) {
228+
const ctx = this.makeExtractorContext();
229+
const handled = this.extractor.visitNode(node, ctx);
230+
if (handled) return;
231+
}
232+
226233
// Pascal-specific AST handling
227234
if (this.language === 'pascal') {
228235
skipChildren = this.visitPascalNode(node);
@@ -409,6 +416,26 @@ export class TreeSitterExtractor {
409416
return parts.join('::');
410417
}
411418

419+
/**
420+
* Build an ExtractorContext for passing to language-specific visitNode hooks.
421+
*/
422+
private makeExtractorContext(): ExtractorContext {
423+
// eslint-disable-next-line @typescript-eslint/no-this-alias
424+
const self = this;
425+
return {
426+
createNode: (kind, name, node, extra) => self.createNode(kind, name, node, extra),
427+
visitNode: (node) => self.visitNode(node),
428+
visitFunctionBody: (body, functionId) => self.visitFunctionBody(body, functionId),
429+
addUnresolvedReference: (ref) => self.unresolvedReferences.push(ref),
430+
pushScope: (nodeId) => self.nodeStack.push(nodeId),
431+
popScope: () => self.nodeStack.pop(),
432+
get filePath() { return self.filePath; },
433+
get source() { return self.source; },
434+
get nodeStack() { return self.nodeStack; },
435+
get nodes() { return self.nodes; },
436+
};
437+
}
438+
412439
/**
413440
* Check if the current node stack indicates we are inside a class-like node
414441
* (class, struct, interface, trait). File nodes do not count as class-like.
@@ -424,7 +451,8 @@ export class TreeSitterExtractor {
424451
parentNode.kind === 'struct' ||
425452
parentNode.kind === 'interface' ||
426453
parentNode.kind === 'trait' ||
427-
parentNode.kind === 'enum'
454+
parentNode.kind === 'enum' ||
455+
parentNode.kind === 'module'
428456
);
429457
}
430458

0 commit comments

Comments
 (0)