diff --git a/.github/scripts/gha_matrix_balancer.py b/.github/scripts/gha_matrix_balancer.py new file mode 100644 index 000000000000..545ea4bd3bb6 --- /dev/null +++ b/.github/scripts/gha_matrix_balancer.py @@ -0,0 +1,72 @@ +import os +import json +import argparse + +# Complex, handwritten libraries with long-running test suites. +# The load balancer isolates these onto dedicated VMs to prevent bottlenecks. +HEAVY_LIFTERS = { + "google-cloud-spanner", + "google-cloud-compute", + "google-cloud-compute-v1beta", + "google-cloud-discoveryengine" +} + +def get_valid_packages(directories): + """Filters a list of directories, returning only those containing a noxfile.py.""" + return [p for p in directories if os.path.isfile(os.path.join(p, "noxfile.py"))] + +def distribute_packages(packages, max_buckets): + """Distributes packages into load-balanced buckets, isolating heavy lifters.""" + if not packages: + return [] + + # Heavy lifters jump to the front of the line + packages.sort(key=lambda p: os.path.basename(p) not in HEAVY_LIFTERS) + + # Create the requested number of buckets (or fewer, if we have fewer packages than buckets) + actual_buckets = min(len(packages), max_buckets) + buckets = [{"weight": 0, "pkgs": []} for _ in range(actual_buckets)] + + for pkg in packages: + # Find the bucket with the lowest weight, add the package, and update its weight + lightest = min(buckets, key=lambda b: b["weight"]) + lightest["pkgs"].append(pkg) + lightest["weight"] += 9999 if os.path.basename(pkg) in HEAVY_LIFTERS else 1 + + return [b["pkgs"] for b in buckets] + +def build_github_actions_jobs(buckets): + """Formats the buckets into the specific JSON schema required by GitHub Actions.""" + jobs = [] + for bucket in buckets: + base_name = os.path.basename(bucket[0]).replace("google-cloud-", "") + job_label = f"{base_name} + {len(bucket) - 1}" if len(bucket) > 1 else base_name + jobs.append({"id": job_label, "packages": " ".join(bucket)}) + return jobs + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--matrix-multiplier", type=int, required=True) + parser.add_argument("--max-vms", type=int, default=20) + args = parser.parse_args() + + changed_dirs = os.environ.get("CHANGED_DIRS", "").split() + packages = get_valid_packages(changed_dirs) + + if not packages: + return + + # Protect against GitHub's 256-job hard limit + max_buckets = min(250 // args.matrix_multiplier, args.max_vms) + buckets = distribute_packages(packages, max_buckets) + + jobs_json = json.dumps(build_github_actions_jobs(buckets)) + + if "GITHUB_OUTPUT" in os.environ: + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"buckets={jobs_json}\n") + else: + print(jobs_json) + +if __name__ == "__main__": + main() diff --git a/.github/scripts/test_gha_matrix_balancer.py b/.github/scripts/test_gha_matrix_balancer.py new file mode 100644 index 000000000000..a237b9e597df --- /dev/null +++ b/.github/scripts/test_gha_matrix_balancer.py @@ -0,0 +1,78 @@ +import pytest +from unittest.mock import patch +from gha_matrix_balancer import ( + get_valid_packages, + distribute_packages, + build_github_actions_jobs, + HEAVY_LIFTERS +) + +@patch("os.path.isfile") +def test_get_valid_packages(mock_isfile): + # Mock isfile to only return True for the exact valid path + mock_isfile.side_effect = lambda path: path == "packages/valid-pkg/noxfile.py" + + dirs = ["packages/valid-pkg", "packages/invalid-pkg"] + result = get_valid_packages(dirs) + + assert result == ["packages/valid-pkg"] + +def test_distribute_packages_isolates_heavy_lifters(): + # Mix 1 heavy lifter with 5 normal packages + heavy_lifter = list(HEAVY_LIFTERS)[0] # Grab one of the defined heavy lifters dynamically + + packages = [ + "packages/google-cloud-vision", + "packages/google-cloud-storage", + f"packages/{heavy_lifter}", + "packages/google-cloud-logging", + "packages/google-cloud-pubsub", + "packages/google-cloud-kms", + ] + + # Request 3 buckets + buckets = distribute_packages(packages, max_buckets=3) + + assert len(buckets) == 3 + + # Find the bucket containing the heavy lifter + heavy_bucket = next(b for b in buckets if f"packages/{heavy_lifter}" in b) + + # Because it adds 9999 weight, it should be the ONLY package in its bucket + assert len(heavy_bucket) == 1 + assert heavy_bucket[0] == f"packages/{heavy_lifter}" + +def test_distribute_packages_max_bucket_limit(): + # 5 packages, but we only allow 2 buckets + packages = [f"pkg-{i}" for i in range(5)] + buckets = distribute_packages(packages, max_buckets=2) + + assert len(buckets) == 2 + # Packages should be distributed (3 in one, 2 in the other) + assert len(buckets[0]) + len(buckets[1]) == 5 + +def test_distribute_packages_more_buckets_than_packages(): + # 2 packages, but we allow up to 10 buckets + packages = ["pkg-1", "pkg-2"] + buckets = distribute_packages(packages, max_buckets=10) + + # It should only create 2 buckets, not 10 + assert len(buckets) == 2 + +def test_build_github_actions_jobs(): + buckets = [ + ["packages/google-cloud-spanner"], # Single item + ["packages/google-cloud-vision", "packages/google-cloud-storage", "packages/google-cloud-pubsub"] # Multiple items + ] + + jobs = build_github_actions_jobs(buckets) + + assert len(jobs) == 2 + + # Test single-item label stripping + assert jobs[0]["id"] == "spanner" + assert jobs[0]["packages"] == "packages/google-cloud-spanner" + + # Test multi-item label generation (+ N logic) + assert jobs[1]["id"] == "vision + 2" + assert jobs[1]["packages"] == "packages/google-cloud-vision packages/google-cloud-storage packages/google-cloud-pubsub"