feat: Eval SDK: Route gemini-3 model traffic to global region in local agent scraping

vertex-sdk-bot · copybara-github · commit c0de42c66a4e · 2026-04-03T15:12:56.000-07:00
PiperOrigin-RevId: 893194564
diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
@@ -44,7 +44,6 @@
 import pandas as pd
 import pytest
 
-
 _TEST_PROJECT = "test-project"
 _TEST_LOCATION = "us-central1"
 
@@ -2079,6 +2078,73 @@ def test_has_tool_call_with_agent_event(self):
 
 
 @pytest.mark.usefixtures("google_auth_mock")
+class TestRunAgent:
+    """Unit tests for the _run_agent function."""
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_rewrites_gemini_3_model_name(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        mock_execute_inference_concurrently.return_value = []
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+            os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
+
+            def mock_execute(*args, **kwargs):
+                assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
+                return []
+
+            mock_execute_inference_concurrently.side_effect = mock_execute
+
+            _evals_common._run_agent(
+                api_client=mock_api_client_fixture,
+                agent_engine=mock.Mock(),
+                agent=None,
+                prompt_dataset=prompt_dataset,
+                user_simulator_config=user_simulator_config,
+            )
+
+            assert (
+                user_simulator_config.model_name
+                == f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
+            )
+            assert os.environ.get("GOOGLE_CLOUD_LOCATION") == "us-central1"
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_rewrites_gemini_3_model_name_empty_env(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        mock_execute_inference_concurrently.return_value = []
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+
+            def mock_execute(*args, **kwargs):
+                assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
+                return []
+
+            mock_execute_inference_concurrently.side_effect = mock_execute
+
+            _evals_common._run_agent(
+                api_client=mock_api_client_fixture,
+                agent_engine=mock.Mock(),
+                agent=None,
+                prompt_dataset=prompt_dataset,
+                user_simulator_config=user_simulator_config,
+            )
+
+            assert (
+                user_simulator_config.model_name
+                == f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
+            )
+            assert "GOOGLE_CLOUD_LOCATION" not in os.environ
+
+
 class TestRunAgentInternal:
     """Unit tests for the _run_agent_internal function."""
 
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
@@ -1877,28 +1877,50 @@ def _run_agent(
     ]
 ]:
     """Internal helper to run inference using Gemini model with concurrency."""
-    if agent_engine:
-        return _execute_inference_concurrently(
-            api_client=api_client,
-            agent_engine=agent_engine,
-            prompt_dataset=prompt_dataset,
-            progress_desc="Agent Run",
-            gemini_config=None,
-            user_simulator_config=None,
-            inference_fn=_execute_agent_run_with_retry,
-        )
-    elif agent:
-        return _execute_inference_concurrently(
-            api_client=api_client,
-            agent=agent,
-            prompt_dataset=prompt_dataset,
-            progress_desc="Local Agent Run",
-            gemini_config=None,
-            user_simulator_config=user_simulator_config,
-            inference_fn=_execute_local_agent_run_with_retry,
-        )
-    else:
-        raise ValueError("Neither agent_engine nor agent is provided.")
+    original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
+    location_overridden = False
+
+    if user_simulator_config and user_simulator_config.model_name:
+        model_name = user_simulator_config.model_name
+        if model_name.startswith("gemini-3") and "/" not in model_name:
+            logger.warning(
+                "Model %s is only available in the global region. Routing to global.",
+                model_name,
+            )
+            user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
+            if original_location != "global":
+                os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
+                location_overridden = True
+
+    try:
+        if agent_engine:
+            return _execute_inference_concurrently(
+                api_client=api_client,
+                agent_engine=agent_engine,
+                prompt_dataset=prompt_dataset,
+                progress_desc="Agent Run",
+                gemini_config=None,
+                user_simulator_config=None,
+                inference_fn=_execute_agent_run_with_retry,
+            )
+        elif agent:
+            return _execute_inference_concurrently(
+                api_client=api_client,
+                agent=agent,
+                prompt_dataset=prompt_dataset,
+                progress_desc="Local Agent Run",
+                gemini_config=None,
+                user_simulator_config=user_simulator_config,
+                inference_fn=_execute_local_agent_run_with_retry,
+            )
+        else:
+            raise ValueError("Neither agent_engine nor agent is provided.")
+    finally:
+        if location_overridden:
+            if original_location is None:
+                del os.environ["GOOGLE_CLOUD_LOCATION"]
+            else:
+                os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
 
 
 def _execute_agent_run_with_retry(
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
@@ -2216,6 +2216,8 @@ def create_evaluation_run(
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
               or if `max_turn` is not set, `max_turn` defaults to 5.
+              The `model_name` inside this config can be either a full model path or a
+              short model name, e.g. `gemini-3-preview-flash`.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
               If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
@@ -3486,6 +3488,8 @@ async def create_evaluation_run(
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
               or if `max_turn` is not set, `max_turn` defaults to 5.
+              The `model_name` inside this config can be either a full model path or a
+              short model name, e.g. `gemini-3-preview-flash`.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
               If provided, `agent_info` must be None. If omitted and `agent_info` is provided,