forked from colbymchenry/codegraph
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerated-detection.ts
More file actions
78 lines (76 loc) · 3.07 KB
/
generated-detection.ts
File metadata and controls
78 lines (76 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/**
* Generated-file detection for symbol-disambiguation down-ranking.
*
* When a query like "Send" matches 17 symbols across protobuf scaffolding,
* test mocks, and the hand-written implementation, the FTS ranker often
* surfaces the generated stubs first because their names are identical
* to the implementation's name (validated empirically on cosmos-sdk —
* see project_go_multi_module_audit memory). Generated stubs frequently
* have no body to trace from, so the agent ends up reading source anyway.
*
* This helper is a pure path-based classifier consulted at disambiguation
* time (findSymbol / findAllSymbols / codegraph_search formatting), NOT
* a hard filter — generated nodes are still in the graph and remain
* reachable; they just rank LAST when there's a real implementation
* with the same name.
*
* Scope: suffix patterns only. Most generated files follow the
* `<basename>.<tool>.<ext>` convention (`.pb.go`, `_grpc.pb.go`,
* `.g.dart`, `_pb2.py`), and that covers ~all of what we saw in the
* Go audit. A future addition would be scanning for the canonical
* `// Code generated by` header during extraction, for the rare files
* that defy the suffix convention.
*/
const GENERATED_PATTERNS: ReadonlyArray<RegExp> = [
// Go — protobuf / gRPC / pulsar
/\.pb\.go$/,
/\.pulsar\.go$/,
/_grpc\.pb\.go$/,
// Go — mockgen output. Default emits `mock_<src>.go`; many projects
// (cosmos-sdk uses `expected_*_mocks.go`) rename to `*_mock.go` /
// `*_mocks.go`. Matching either suffix catches both conventions
// without false-positive risk on hand-written sources.
/_mock\.go$/,
/_mocks\.go$/,
/^mock_[^/]+\.go$/,
// TypeScript / JavaScript — common codegen suffixes (Apollo / GraphQL
// codegen, Prisma, Hasura, ts-proto, gRPC-web, swagger-codegen).
/\.generated\.[jt]sx?$/,
/\.gen\.[jt]sx?$/,
/\.pb\.[jt]s$/,
/_pb\.[jt]s$/,
/_grpc_pb\.[jt]s$/,
// Python — protobuf / gRPC / openapi-codegen
/_pb2(_grpc)?\.py$/,
/_pb2\.pyi$/,
// C++ — protobuf
/\.pb\.(cc|h)$/,
// C# — protobuf / gRPC (protoc-gen-csharp puts output under obj/ but
// many projects also commit *.g.cs and *Grpc.cs siblings)
/\.g\.cs$/,
/Grpc\.cs$/,
// Java — protobuf / gRPC: protoc-gen-java emits `*OuterClass.java`,
// protoc-gen-grpc-java emits `*Grpc.java`. The XxxImplBase abstract
// class lives inside Xxx*Grpc.java.
/OuterClass\.java$/,
/Grpc\.java$/,
// Swift — protobuf
/\.pb\.swift$/,
// Dart — build_runner / freezed / json_serializable / chopper
/\.g\.dart$/,
/\.freezed\.dart$/,
/\.pb\.dart$/,
/\.pbgrpc\.dart$/,
/\.chopper\.dart$/,
// Rust — common build.rs OUT_DIR outputs are usually outside the source
// tree, but in-tree generated files often use `*.generated.rs`.
/\.generated\.rs$/,
];
/**
* Whether `filePath` looks like a tool-generated source file based on
* its filename. Path-only — does not read content. The result is a
* relevance hint for disambiguation, not a hard claim.
*/
export function isGeneratedFile(filePath: string): boolean {
return GENERATED_PATTERNS.some((p) => p.test(filePath));
}