diff --git a/go/extractor/BUILD.bazel b/go/extractor/BUILD.bazel index fbc53f20720b..23158e25b15f 100644 --- a/go/extractor/BUILD.bazel +++ b/go/extractor/BUILD.bazel @@ -1,4 +1,4 @@ -load("@rules_go//go:def.bzl", "go_library") +load("@rules_go//go:def.bzl", "go_library", "go_test") load("@rules_java//java:defs.bzl", "java_library") load("@rules_pkg//pkg:mappings.bzl", "pkg_files") @@ -60,3 +60,10 @@ pkg_files( }, visibility = ["//go:__pkg__"], ) + +go_test( + name = "extractor_test", + srcs = ["extractor_test.go"], + embed = [":extractor"], + deps = ["@org_golang_x_tools//go/packages"], +) diff --git a/go/extractor/extractor.go b/go/extractor/extractor.go index bbcd32c10d24..158f0029704d 100644 --- a/go/extractor/extractor.go +++ b/go/extractor/extractor.go @@ -59,6 +59,63 @@ func init() { } } +// isExactTestPackage checks if a package ID represents an exact test match. +// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]" +// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]" +func isExactTestPackage(pkg *packages.Package) bool { + // Test packages have IDs in the format: "pkgpath [pkgpath.test]" + // or for nested test dependencies: "pkgpath [pkgpath/nested.test]" + expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]" + return pkg.ID == expectedTestID +} + +// isBetterPackage determines if pkg is a better choice than current for extraction. +// Preferences: +// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies +// 2. More Syntax nodes (more files to extract) +// 3. Longer ID string as tiebreaker +func isBetterPackage(pkg, current *packages.Package) bool { + pkgIsExact := isExactTestPackage(pkg) + currentIsExact := isExactTestPackage(current) + + // Prefer exact test packages + if pkgIsExact != currentIsExact { + return pkgIsExact + } + + // Prefer packages with more syntax nodes (more files) + pkgSyntaxCount := len(pkg.Syntax) + currentSyntaxCount := len(current.Syntax) + if pkgSyntaxCount != currentSyntaxCount { + return pkgSyntaxCount > currentSyntaxCount + } + + // Fall back to string length + return len(pkg.ID) > len(current.ID) +} + +// selectBestPackages builds a map from package paths to their best package variants. +// In the context of a `go test -c` compilation, we see the same package more than +// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version +// that contains and is used by test code. +// We prefer the version with the most complete test coverage, which is typically: +// 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies +// 2. The package with the most Syntax nodes (most files to extract) +// 3. The longest ID string as a tiebreaker +func selectBestPackages(pkgs []*packages.Package) map[string]*packages.Package { + bestPackageIds := make(map[string]*packages.Package) + packages.Visit(pkgs, nil, func(pkg *packages.Package) { + if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present { + if isBetterPackage(pkg, bestSoFar) { + bestPackageIds[pkg.PkgPath] = pkg + } + } else { + bestPackageIds[pkg.PkgPath] = pkg + } + }) + return bestPackageIds +} + // ExtractWithFlags extracts the packages specified by the given patterns and build flags func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error { startTime := time.Now() @@ -153,22 +210,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, pkgsNotFound := make([]string, 0, len(pkgs)) - // Build a map from package paths to their longest IDs-- - // in the context of a `go test -c` compilation, we will see the same package more than - // once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version - // that contains and is used by test code. - // For our purposes it is simplest to just ignore the non-test version, since the test - // version seems to be a superset of it. - longestPackageIds := make(map[string]string) - packages.Visit(pkgs, nil, func(pkg *packages.Package) { - if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present { - if len(pkg.ID) > len(longestIDSoFar) { - longestPackageIds[pkg.PkgPath] = pkg.ID - } - } else { - longestPackageIds[pkg.PkgPath] = pkg.ID - } - }) + // Build a map from package paths to their best IDs + bestPackageIds := selectBestPackages(pkgs) // Do a post-order traversal and extract the package scope of each package packages.Visit(pkgs, nil, func(pkg *packages.Package) { @@ -257,15 +300,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, // extract AST information for all packages packages.Visit(pkgs, nil, func(pkg *packages.Package) { - // If this is a variant of a package that also occurs with a longer ID, skip it; + // If this is a variant of a package that also occurs with a better ID, skip it; // otherwise we would extract the same file more than once including extracting the // body of methods twice, causing database inconsistencies. // - // We prefer the version with the longest ID because that is (so far as I know) always - // the version that defines more entities -- the only case I'm aware of being a test - // variant of a package, which includes test-only functions in addition to the complete - // contents of the main variant. - if pkg.ID != longestPackageIds[pkg.PkgPath] { + // We prefer the version with the most complete test coverage, prioritizing: + // 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies + // 2. Packages with more Syntax nodes (more files to extract) + // 3. Longer ID strings as a tiebreaker + if pkg.ID != bestPackageIds[pkg.PkgPath].ID { return } diff --git a/go/extractor/extractor_test.go b/go/extractor/extractor_test.go new file mode 100644 index 000000000000..2b585ec7fa1f --- /dev/null +++ b/go/extractor/extractor_test.go @@ -0,0 +1,343 @@ +package extractor + +import ( + "go/ast" + "testing" + + "golang.org/x/tools/go/packages" +) + +func TestIsExactTestPackage(t *testing.T) { + tests := []struct { + name string + pkgID string + pkgPath string + expected bool + }{ + { + name: "exact test package", + pkgID: "github.com/foo/bar [github.com/foo/bar.test]", + pkgPath: "github.com/foo/bar", + expected: true, + }, + { + name: "nested test package", + pkgID: "github.com/foo/bar [github.com/foo/bar/nested.test]", + pkgPath: "github.com/foo/bar", + expected: false, + }, + { + name: "deeply nested test package", + pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + pkgPath: "github.com/go-git/go-git/v6", + expected: false, + }, + { + name: "exact test package with version", + pkgID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + pkgPath: "github.com/go-git/go-git/v6", + expected: true, + }, + { + name: "non-test package", + pkgID: "github.com/foo/bar", + pkgPath: "github.com/foo/bar", + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pkg := &packages.Package{ + ID: tt.pkgID, + PkgPath: tt.pkgPath, + } + result := isExactTestPackage(pkg) + if result != tt.expected { + t.Errorf("isExactTestPackage(%q) = %v, want %v", tt.pkgID, result, tt.expected) + } + }) + } +} + +func TestIsBetterPackage(t *testing.T) { + // Helper to create a package with specified properties + makePkg := func(id, path string, syntaxCount int) *packages.Package { + syntax := make([]*ast.File, syntaxCount) + return &packages.Package{ + ID: id, + PkgPath: path, + Syntax: syntax, + } + } + + tests := []struct { + name string + pkg *packages.Package + current *packages.Package + expected bool // true if pkg is better than current + }{ + { + name: "exact test package beats nested test package", + pkg: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + "github.com/go-git/go-git/v6", + 39, // 19 production + 20 test files + ), + current: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + "github.com/go-git/go-git/v6", + 19, // production files only + ), + expected: true, + }, + { + name: "nested test package loses to exact test package", + pkg: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + "github.com/go-git/go-git/v6", + 19, + ), + current: makePkg( + "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + "github.com/go-git/go-git/v6", + 39, + ), + expected: false, + }, + { + name: "more syntax nodes wins when both are exact tests", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 50, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 30, + ), + expected: true, + }, + { + name: "fewer syntax nodes loses when both are exact tests", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 30, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 50, + ), + expected: false, + }, + { + name: "more syntax nodes wins when both are nested tests", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar/pkg1.test]", + "github.com/foo/bar", + 25, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar/pkg2.test]", + "github.com/foo/bar", + 20, + ), + expected: true, + }, + { + name: "longer ID wins when same syntax count", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar/verylongpackagename.test]", + "github.com/foo/bar", + 20, + ), + current: makePkg( + "github.com/foo/bar [github.com/foo/bar/short.test]", + "github.com/foo/bar", + 20, + ), + expected: true, + }, + { + name: "test package beats non-test with same syntax count", + pkg: makePkg( + "github.com/foo/bar [github.com/foo/bar.test]", + "github.com/foo/bar", + 20, + ), + current: makePkg( + "github.com/foo/bar", + "github.com/foo/bar", + 20, + ), + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isBetterPackage(tt.pkg, tt.current) + if result != tt.expected { + t.Errorf("isBetterPackage() = %v, want %v\n pkg: %q (%d syntax nodes)\n current: %q (%d syntax nodes)", + result, tt.expected, + tt.pkg.ID, len(tt.pkg.Syntax), + tt.current.ID, len(tt.current.Syntax)) + } + }) + } +} + +// TestSelectBestPackages tests the selectBestPackages function +func TestSelectBestPackages(t *testing.T) { + // Helper to create a package with specified properties + makePkg := func(id, path string, syntaxCount int) *packages.Package { + syntax := make([]*ast.File, syntaxCount) + return &packages.Package{ + ID: id, + PkgPath: path, + Syntax: syntax, + } + } + + tests := []struct { + name string + pkgs []*packages.Package + expectedPkgIDs map[string]string // pkgPath -> expected selected ID + }{ + { + name: "single package", + pkgs: []*packages.Package{ + makePkg("example.com/pkg", "example.com/pkg", 10), + }, + expectedPkgIDs: map[string]string{ + "example.com/pkg": "example.com/pkg", + }, + }, + { + name: "test package preferred over production", + pkgs: []*packages.Package{ + makePkg("example.com/pkg", "example.com/pkg", 10), + makePkg("example.com/pkg [example.com/pkg.test]", "example.com/pkg", 15), + }, + expectedPkgIDs: map[string]string{ + "example.com/pkg": "example.com/pkg [example.com/pkg.test]", + }, + }, + { + name: "exact test preferred over nested test", + pkgs: []*packages.Package{ + makePkg("example.com/pkg [example.com/pkg.test]", "example.com/pkg", 20), + makePkg("example.com/pkg [example.com/pkg/nested.test]", "example.com/pkg", 15), + }, + expectedPkgIDs: map[string]string{ + "example.com/pkg": "example.com/pkg [example.com/pkg.test]", + }, + }, + { + name: "multiple packages with different paths", + pkgs: []*packages.Package{ + makePkg("example.com/pkg1", "example.com/pkg1", 10), + makePkg("example.com/pkg1 [example.com/pkg1.test]", "example.com/pkg1", 15), + makePkg("example.com/pkg2", "example.com/pkg2", 8), + makePkg("example.com/pkg2 [example.com/pkg2.test]", "example.com/pkg2", 12), + }, + expectedPkgIDs: map[string]string{ + "example.com/pkg1": "example.com/pkg1 [example.com/pkg1.test]", + "example.com/pkg2": "example.com/pkg2 [example.com/pkg2.test]", + }, + }, + { + name: "more syntax nodes wins among nested tests", + pkgs: []*packages.Package{ + makePkg("example.com/pkg [example.com/pkg/a.test]", "example.com/pkg", 10), + makePkg("example.com/pkg [example.com/pkg/b.test]", "example.com/pkg", 20), + }, + expectedPkgIDs: map[string]string{ + "example.com/pkg": "example.com/pkg [example.com/pkg/b.test]", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := selectBestPackages(tt.pkgs) + + // Check that all expected packages are present + for pkgPath, expectedID := range tt.expectedPkgIDs { + selected, found := result[pkgPath] + if !found { + t.Errorf("Expected package path %q not found in result", pkgPath) + continue + } + if selected.ID != expectedID { + t.Errorf("For package path %q: got ID %q, want %q", + pkgPath, selected.ID, expectedID) + } + } + + // Check that no unexpected packages are present + if len(result) != len(tt.expectedPkgIDs) { + t.Errorf("Expected %d packages in result, got %d", + len(tt.expectedPkgIDs), len(result)) + } + }) + } +} + +// TestPackageSelectionRealWorld simulates the real-world go-git scenario +func TestPackageSelectionRealWorld(t *testing.T) { + // Simulate the actual packages.Load result for go-git repository + // when EXTRACT_TESTS=true + pkgs := []*packages.Package{ + // Production package only + { + ID: "github.com/go-git/go-git/v6", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 19), // 19 production files + }, + // Root test package - this is what we want! + { + ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 39), // 19 production + 20 test files + }, + // Nested test dependency 1 + { + ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/format/packfile.test]", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 19), // production files only (dependency) + }, + // Nested test dependency 2 + { + ID: "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6/plumbing/object.test]", + PkgPath: "github.com/go-git/go-git/v6", + Syntax: make([]*ast.File, 19), // production files only (dependency) + }, + } + + // Use the actual selection logic from the extractor + bestPackageIds := selectBestPackages(pkgs) + + // Verify the correct package was selected + selected := bestPackageIds["github.com/go-git/go-git/v6"] + expectedID := "github.com/go-git/go-git/v6 [github.com/go-git/go-git/v6.test]" + expectedSyntaxCount := 39 + + if selected.ID != expectedID { + t.Errorf("Wrong package selected!\n got: %q (%d syntax nodes)\n want: %q (%d syntax nodes)", + selected.ID, len(selected.Syntax), + expectedID, expectedSyntaxCount) + } + + if len(selected.Syntax) != expectedSyntaxCount { + t.Errorf("Wrong syntax count: got %d, want %d", len(selected.Syntax), expectedSyntaxCount) + } + + // Verify it's recognized as an exact test package + if !isExactTestPackage(selected) { + t.Errorf("Selected package %q should be recognized as exact test package", selected.ID) + } +} diff --git a/go/ql/integration-tests/root-internal-tests/src/go.mod b/go/ql/integration-tests/root-internal-tests/src/go.mod new file mode 100644 index 000000000000..12e11856e552 --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/src/go.mod @@ -0,0 +1,3 @@ +module example.com/testpkg + +go 1.26 diff --git a/go/ql/integration-tests/root-internal-tests/src/main.go b/go/ql/integration-tests/root-internal-tests/src/main.go new file mode 100644 index 000000000000..fff083caa0ac --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/src/main.go @@ -0,0 +1,13 @@ +package main + +func PublicFunc() int { + return 42 +} + +func privateFunc() int { + return 24 +} + +func main() { + PublicFunc() +} diff --git a/go/ql/integration-tests/root-internal-tests/src/main_test.go b/go/ql/integration-tests/root-internal-tests/src/main_test.go new file mode 100644 index 000000000000..7c38d61d4c87 --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/src/main_test.go @@ -0,0 +1,16 @@ +package main + +import "testing" + +// Root internal test - tests private functions +func TestPrivateFunc(t *testing.T) { + if privateFunc() != 24 { + t.Error("privateFunc failed") + } +} + +func TestPublicFunc(t *testing.T) { + if PublicFunc() != 42 { + t.Error("PublicFunc failed") + } +} diff --git a/go/ql/integration-tests/root-internal-tests/src/nested/nested.go b/go/ql/integration-tests/root-internal-tests/src/nested/nested.go new file mode 100644 index 000000000000..427af1e44b63 --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/src/nested/nested.go @@ -0,0 +1,5 @@ +package nested + +func NestedFunc() string { + return "nested" +} diff --git a/go/ql/integration-tests/root-internal-tests/src/nested/nested_test.go b/go/ql/integration-tests/root-internal-tests/src/nested/nested_test.go new file mode 100644 index 000000000000..a7e063c61859 --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/src/nested/nested_test.go @@ -0,0 +1,9 @@ +package nested + +import "testing" + +func TestNestedFunc(t *testing.T) { + if NestedFunc() != "nested" { + t.Error("NestedFunc failed") + } +} diff --git a/go/ql/integration-tests/root-internal-tests/test.expected b/go/ql/integration-tests/root-internal-tests/test.expected new file mode 100644 index 000000000000..f68c14d1338b --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/test.expected @@ -0,0 +1,7 @@ +#select +| main_test.go | +| nested/nested_test.go | +testFunctions +| TestNestedFunc | nested/nested_test.go | +| TestPrivateFunc | main_test.go | +| TestPublicFunc | main_test.go | diff --git a/go/ql/integration-tests/root-internal-tests/test.py b/go/ql/integration-tests/root-internal-tests/test.py new file mode 100644 index 000000000000..a8f376e33975 --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/test.py @@ -0,0 +1,5 @@ +import os + +def test(codeql, go): + # Test that root internal test files are extracted when nested packages have tests + codeql.database.create(source_root="src", extractor_option = ["extract_tests=true"]) diff --git a/go/ql/integration-tests/root-internal-tests/test.ql b/go/ql/integration-tests/root-internal-tests/test.ql new file mode 100644 index 000000000000..234fd1a04203 --- /dev/null +++ b/go/ql/integration-tests/root-internal-tests/test.ql @@ -0,0 +1,15 @@ +import go + +// Verify that root internal test files are extracted +// when nested packages also have tests +from File f +where f.getBaseName().matches("%_test.go") +select f.getRelativePath() + +query predicate testFunctions(string name, string file) { + exists(FuncDecl fn | + fn.getName().matches("Test%") and + name = fn.getName() and + file = fn.getFile().getRelativePath() + ) +}