3131
3232from . import _evals_common
3333from . import _evals_constant
34+ from . import _evals_utils
3435from . import evals
3536from . import types
3637
@@ -1498,10 +1499,29 @@ class EvaluationRunConfig(_common.BaseModel):
14981499 """The number of response candidates for the evaluation run."""
14991500
15001501
1502+ def _rate_limited_get_metric_result (
1503+ rate_limiter : _evals_utils .RateLimiter ,
1504+ handler : MetricHandler [Any ],
1505+ eval_case : types .EvalCase ,
1506+ response_index : int ,
1507+ ) -> types .EvalCaseMetricResult :
1508+ """Wraps a handler's get_metric_result with rate limiting."""
1509+ rate_limiter .sleep_and_advance ()
1510+ return handler .get_metric_result (eval_case , response_index )
1511+
1512+
15011513def compute_metrics_and_aggregate (
15021514 evaluation_run_config : EvaluationRunConfig ,
1515+ evaluation_service_qps : Optional [float ] = None ,
15031516) -> types .EvaluationResult :
1504- """Computes metrics and aggregates them for a given evaluation run config."""
1517+ """Computes metrics and aggregates them for a given evaluation run config.
1518+
1519+ Args:
1520+ evaluation_run_config: The configuration for the evaluation run.
1521+ evaluation_service_qps: Optional QPS limit for the evaluation service.
1522+ Defaults to _DEFAULT_EVAL_SERVICE_QPS (10). Users with higher
1523+ quotas can increase this value.
1524+ """
15051525 metric_handlers = []
15061526 all_futures = []
15071527 results_by_case_response_metric : collections .defaultdict [
@@ -1511,6 +1531,10 @@ def compute_metrics_and_aggregate(
15111531 execution_errors = []
15121532 case_indices_with_errors = set ()
15131533
1534+ qps = evaluation_service_qps or _evals_utils ._DEFAULT_EVAL_SERVICE_QPS
1535+ rate_limiter = _evals_utils .RateLimiter (rate = qps )
1536+ logger .info ("Rate limiting evaluation service requests to %.1f QPS." , qps )
1537+
15141538 for eval_metric in evaluation_run_config .metrics :
15151539 metric_handlers .append (
15161540 get_handler_for_metric (evaluation_run_config .evals_module , eval_metric )
@@ -1553,7 +1577,9 @@ def compute_metrics_and_aggregate(
15531577 for response_index in range (actual_num_candidates_for_case ):
15541578 try :
15551579 future = executor .submit (
1556- metric_handler_instance .get_metric_result ,
1580+ _rate_limited_get_metric_result ,
1581+ rate_limiter ,
1582+ metric_handler_instance ,
15571583 eval_case ,
15581584 response_index ,
15591585 )
0 commit comments