From 44a3e780d8ea9185ab035db831e99cc8941838cc Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Mon, 11 May 2026 12:22:05 -0700 Subject: [PATCH] chore: GenAI Client(evals) - Make HTML visualization helpers public. PiperOrigin-RevId: 913824853 --- tests/unit/vertexai/genai/test_evals.py | 91 ++++++++++++++++++++++++- vertexai/_genai/_evals_visualization.py | 31 ++++++--- 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 77e6180340..9fd9511b3b 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -606,7 +606,7 @@ def test_response_structure(self): assert result.clusters[1].cluster_id == "cluster-2" def test_get_loss_analysis_html(self): - """Tests that _get_loss_analysis_html generates valid HTML with data.""" + """Tests that get_loss_analysis_html generates valid HTML with data.""" from vertexai._genai import _evals_visualization import json @@ -666,7 +666,7 @@ def test_get_loss_analysis_html(self): } ] } - html = _evals_visualization._get_loss_analysis_html(json.dumps(data)) + html = _evals_visualization.get_loss_analysis_html(json.dumps(data)) assert "Loss Pattern Analysis" in html assert "test_metric" not in html # data is Base64-encoded in the HTML assert "" in html @@ -676,6 +676,91 @@ def test_get_loss_analysis_html(self): assert "example-section-label" in html # labels for scenario/rubrics assert "Analysis Summary" in html # summary heading + def test_get_evaluation_html(self): + """Tests that get_evaluation_html generates valid HTML with data.""" + from vertexai._genai import _evals_visualization + import base64 + import json + + data = { + "summary_metrics": [{"metric_name": "test_metric", "mean_score": 0.85}], + "eval_case_results": [ + { + "eval_case_index": 0, + "response_candidate_results": [ + {"display_text": "candidate response"} + ], + } + ], + "metadata": {"dataset": []}, + } + payload_json = json.dumps(data) + html = _evals_visualization.get_evaluation_html(payload_json) + + assert "" in html + assert "Evaluation Report" in html + assert "test_metric" not in html + payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii") + assert payload_b64 in html + assert "DOMPurify" in html + + def test_get_comparison_html(self): + """Tests that get_comparison_html generates valid HTML with data.""" + from vertexai._genai import _evals_visualization + import base64 + import json + + data = { + "summary_metrics": [ + { + "metric_name": "test_metric", + "win_rate": 0.6, + "loss_rate": 0.4, + } + ], + "eval_case_results": [ + { + "eval_case_index": 0, + "response_candidate_results": [ + {"display_text": "candidate A"}, + {"display_text": "candidate B"}, + ], + } + ], + "metadata": {"dataset": []}, + } + payload_json = json.dumps(data) + html = _evals_visualization.get_comparison_html(payload_json) + + assert "" in html + assert "Eval Comparison Report" in html + assert "test_metric" not in html + payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii") + assert payload_b64 in html + assert "DOMPurify" in html + + def test_get_inference_html(self): + """Tests that get_inference_html generates valid HTML with data.""" + from vertexai._genai import _evals_visualization + import base64 + import json + + data = [ + { + "prompt": "What is the capital of France?", + "response": "Paris", + } + ] + payload_json = json.dumps(data, ensure_ascii=False) + html = _evals_visualization.get_inference_html(payload_json) + + assert "" in html + assert "Evaluation Dataset" in html + assert "Paris" not in html + payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii") + assert payload_b64 in html + assert "DOMPurify" in html + def test_display_loss_clusters_response_no_ipython(self): """Tests graceful fallback when not in IPython.""" from vertexai._genai import _evals_visualization @@ -1488,7 +1573,7 @@ def test_display_loss_analysis_results_html(self): }, ensure_ascii=False, ) - html = _evals_visualization._get_loss_analysis_html(payload_json) + html = _evals_visualization.get_loss_analysis_html(payload_json) # The HTML is a self-contained report with base64-encoded JSON payload # decoded by JavaScript at runtime. Verify structure, not content. assert "" in html diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py index ed5e82a93a..45b50b6ebc 100644 --- a/vertexai/_genai/_evals_visualization.py +++ b/vertexai/_genai/_evals_visualization.py @@ -231,7 +231,7 @@ def _extract_dataset_rows(dataset: types.EvaluationDataset) -> list[dict[str, An return processed_rows -def _get_evaluation_html(eval_result_json: str) -> str: +def get_evaluation_html(eval_result_json: str) -> str: """Returns a self-contained HTML for single evaluation visualization.""" payload_b64 = _encode_to_base64(eval_result_json) return textwrap.dedent( @@ -787,7 +787,7 @@ def _get_evaluation_html(eval_result_json: str) -> str: ) -def _get_comparison_html(eval_result_json: str) -> str: +def get_comparison_html(eval_result_json: str) -> str: """Returns a self-contained HTML for a side-by-side eval comparison.""" payload_b64 = _encode_to_base64(eval_result_json) return textwrap.dedent( @@ -1277,7 +1277,7 @@ def _get_comparison_html(eval_result_json: str) -> str: ) -def _get_inference_html(dataframe_json: str) -> str: +def get_inference_html(dataframe_json: str) -> str: """Returns a self-contained HTML for displaying inference results.""" payload_b64 = _encode_to_base64(dataframe_json) return textwrap.dedent( @@ -1475,7 +1475,7 @@ def display_evaluation_result( summary.update(win_rates[summary["metric_name"]]) result_dump["metadata"] = metadata_payload - html_content = _get_comparison_html(json.dumps(result_dump)) + html_content = get_comparison_html(json.dumps(result_dump)) else: single_dataset = input_dataset_list[0] if input_dataset_list else None processed_rows = [] @@ -1499,7 +1499,7 @@ def display_evaluation_result( cand_res["raw_json"] = original_case["response_raw_json"] result_dump["metadata"] = metadata_payload - html_content = _get_evaluation_html(json.dumps(result_dump)) + html_content = get_evaluation_html(json.dumps(result_dump)) display.display(display.HTML(html_content)) @@ -1553,11 +1553,11 @@ def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> Non processed_rows.append(processed_row) dataframe_json_string = json.dumps(processed_rows, ensure_ascii=False, default=str) - html_content = _get_inference_html(dataframe_json_string) + html_content = get_inference_html(dataframe_json_string) display.display(display.HTML(html_content)) -def _get_loss_analysis_html(loss_analysis_json: str) -> str: +def get_loss_analysis_html(loss_analysis_json: str) -> str: """Returns self-contained HTML for loss pattern analysis visualization.""" payload_b64 = _encode_to_base64(loss_analysis_json) return textwrap.dedent( @@ -1865,7 +1865,7 @@ def display_loss_clusters_response( ) raise - html_content = _get_loss_analysis_html( + html_content = get_loss_analysis_html( json.dumps(result_dump, ensure_ascii=False, default=_pydantic_serializer) ) display.display(display.HTML(html_content)) @@ -1892,7 +1892,7 @@ def display_loss_analysis_result( ) raise - html_content = _get_loss_analysis_html( + html_content = get_loss_analysis_html( json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer) ) display.display(display.HTML(html_content)) @@ -1968,7 +1968,7 @@ def display_loss_analysis_results( Wraps the list of LossAnalysisResult objects into the same JSON structure used by GenerateLossClustersResponse and renders using - the shared _get_loss_analysis_html() function. + the shared get_loss_analysis_html() function. When ``eval_item_map`` is provided (from ``get_evaluation_run(include_evaluation_items=True)``), the examples @@ -1997,7 +1997,7 @@ def display_loss_analysis_results( ) raise - html_content = _get_loss_analysis_html( + html_content = get_loss_analysis_html( json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer) ) display.display(display.HTML(html_content)) @@ -2015,3 +2015,12 @@ def display_evaluation_run_status(eval_run_obj: "types.EvaluationRun") -> None: error_message = str(eval_run_obj.error) if eval_run_obj.error else None html_content = _get_status_html(status, error_message) display.display(display.HTML(html_content)) + + +# Backward-compatible private aliases for the public HTML generators. +# These are kept temporarily to avoid breaking existing callers that depend on +# the previous private names. New code should use the public names above. +_get_evaluation_html = get_evaluation_html +_get_comparison_html = get_comparison_html +_get_inference_html = get_inference_html +_get_loss_analysis_html = get_loss_analysis_html