googleapis · ohmayr · May 12, 2026 · gemini-code-assist · May 12, 2026 · gemini-code-assist
@@ -0,0 +1,72 @@
+import os
+import json
+import argparse
+
+# Complex, handwritten libraries with long-running test suites.
+# The load balancer isolates these onto dedicated VMs to prevent bottlenecks.
+HEAVY_LIFTERS = {
+    "google-cloud-spanner", 
+    "google-cloud-compute",
+    "google-cloud-compute-v1beta", 
+    "google-cloud-discoveryengine"
+}
+
+def get_valid_packages(directories):
+    """Filters a list of directories, returning only those containing a noxfile.py."""
+    return [p for p in directories if os.path.isfile(os.path.join(p, "noxfile.py"))]
+
+def distribute_packages(packages, max_buckets):
+    """Distributes packages into load-balanced buckets, isolating heavy lifters."""
+    if not packages:
+        return []
+
+    # Heavy lifters jump to the front of the line
+    packages.sort(key=lambda p: os.path.basename(p) not in HEAVY_LIFTERS)
+
+    # Create the requested number of buckets (or fewer, if we have fewer packages than buckets)
+    actual_buckets = min(len(packages), max_buckets)
+    buckets = [{"weight": 0, "pkgs": []} for _ in range(actual_buckets)]
+
+    for pkg in packages:
+        # Find the bucket with the lowest weight, add the package, and update its weight
+        lightest = min(buckets, key=lambda b: b["weight"])
+        lightest["pkgs"].append(pkg)
+        lightest["weight"] += 9999 if os.path.basename(pkg) in HEAVY_LIFTERS else 1
+
+    return [b["pkgs"] for b in buckets]
+
+def build_github_actions_jobs(buckets):
+    """Formats the buckets into the specific JSON schema required by GitHub Actions."""
+    jobs = []
+    for bucket in buckets:
+        base_name = os.path.basename(bucket[0]).replace("google-cloud-", "")
+        job_label = f"{base_name} + {len(bucket) - 1}" if len(bucket) > 1 else base_name
+        jobs.append({"id": job_label, "packages": " ".join(bucket)})
+    return jobs
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--matrix-multiplier", type=int, required=True)
+    parser.add_argument("--max-vms", type=int, default=20)
+    args = parser.parse_args()
+
+    changed_dirs = os.environ.get("CHANGED_DIRS", "").split()
-    changed_dirs = os.environ.get("CHANGED_DIRS", "").split()
+    changed_dirs = [os.path.normpath(d) for d in os.environ.get("CHANGED_DIRS", "").split()]
-    changed_dirs = os.environ.get("CHANGED_DIRS", "").split()
+    changed_dirs = [os.path.normpath(d) for d in os.environ.get("CHANGED_DIRS", "").split()]
+    packages = get_valid_packages(changed_dirs)
+
+    if not packages:
+        return
+
+    # Protect against GitHub's 256-job hard limit
+    max_buckets = min(250 // args.matrix_multiplier, args.max_vms)
+    buckets = distribute_packages(packages, max_buckets)
+
+    jobs_json = json.dumps(build_github_actions_jobs(buckets))
+
+    if "GITHUB_OUTPUT" in os.environ:
+        with open(os.environ["GITHUB_OUTPUT"], "a") as f:
+            f.write(f"buckets={jobs_json}\n")
+    else:
+        print(jobs_json)
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,78 @@
+import pytest
+from unittest.mock import patch
+from gha_matrix_balancer import (
+    get_valid_packages,
+    distribute_packages,
+    build_github_actions_jobs,
+    HEAVY_LIFTERS
+)
+
+@patch("os.path.isfile")
+def test_get_valid_packages(mock_isfile):
+    # Mock isfile to only return True for the exact valid path
+    mock_isfile.side_effect = lambda path: path == "packages/valid-pkg/noxfile.py"
+
+    dirs = ["packages/valid-pkg", "packages/invalid-pkg"]
+    result = get_valid_packages(dirs)
+
+    assert result == ["packages/valid-pkg"]
+
+def test_distribute_packages_isolates_heavy_lifters():
+    # Mix 1 heavy lifter with 5 normal packages
+    heavy_lifter = list(HEAVY_LIFTERS)[0] # Grab one of the defined heavy lifters dynamically
+
+    packages = [
+        "packages/google-cloud-vision",
+        "packages/google-cloud-storage",
+        f"packages/{heavy_lifter}",
+        "packages/google-cloud-logging",
+        "packages/google-cloud-pubsub",
+        "packages/google-cloud-kms",
+    ]
+
+    # Request 3 buckets
+    buckets = distribute_packages(packages, max_buckets=3)
+
+    assert len(buckets) == 3
+
+    # Find the bucket containing the heavy lifter
+    heavy_bucket = next(b for b in buckets if f"packages/{heavy_lifter}" in b)
+
+    # Because it adds 9999 weight, it should be the ONLY package in its bucket
+    assert len(heavy_bucket) == 1
+    assert heavy_bucket[0] == f"packages/{heavy_lifter}"
+
+def test_distribute_packages_max_bucket_limit():
+    # 5 packages, but we only allow 2 buckets
+    packages = [f"pkg-{i}" for i in range(5)]
+    buckets = distribute_packages(packages, max_buckets=2)
+
+    assert len(buckets) == 2
+    # Packages should be distributed (3 in one, 2 in the other)
+    assert len(buckets[0]) + len(buckets[1]) == 5
+
+def test_distribute_packages_more_buckets_than_packages():
+    # 2 packages, but we allow up to 10 buckets
+    packages = ["pkg-1", "pkg-2"]
+    buckets = distribute_packages(packages, max_buckets=10)
+
+    # It should only create 2 buckets, not 10
+    assert len(buckets) == 2
+
+def test_build_github_actions_jobs():
+    buckets = [
+        ["packages/google-cloud-spanner"], # Single item
+        ["packages/google-cloud-vision", "packages/google-cloud-storage", "packages/google-cloud-pubsub"] # Multiple items
+    ]
+
+    jobs = build_github_actions_jobs(buckets)
+
+    assert len(jobs) == 2
+
+    # Test single-item label stripping
+    assert jobs[0]["id"] == "spanner"
+    assert jobs[0]["packages"] == "packages/google-cloud-spanner"
+
+    # Test multi-item label generation (+ N logic)
+    assert jobs[1]["id"] == "vision + 2"
+    assert jobs[1]["packages"] == "packages/google-cloud-vision packages/google-cloud-storage packages/google-cloud-pubsub"