diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
index 77e6180340..9fd9511b3b 100644
--- a/tests/unit/vertexai/genai/test_evals.py
+++ b/tests/unit/vertexai/genai/test_evals.py
@@ -606,7 +606,7 @@ def test_response_structure(self):
assert result.clusters[1].cluster_id == "cluster-2"
def test_get_loss_analysis_html(self):
- """Tests that _get_loss_analysis_html generates valid HTML with data."""
+ """Tests that get_loss_analysis_html generates valid HTML with data."""
from vertexai._genai import _evals_visualization
import json
@@ -666,7 +666,7 @@ def test_get_loss_analysis_html(self):
}
]
}
- html = _evals_visualization._get_loss_analysis_html(json.dumps(data))
+ html = _evals_visualization.get_loss_analysis_html(json.dumps(data))
assert "Loss Pattern Analysis" in html
assert "test_metric" not in html # data is Base64-encoded in the HTML
assert "" in html
@@ -676,6 +676,91 @@ def test_get_loss_analysis_html(self):
assert "example-section-label" in html # labels for scenario/rubrics
assert "Analysis Summary" in html # summary heading
+ def test_get_evaluation_html(self):
+ """Tests that get_evaluation_html generates valid HTML with data."""
+ from vertexai._genai import _evals_visualization
+ import base64
+ import json
+
+ data = {
+ "summary_metrics": [{"metric_name": "test_metric", "mean_score": 0.85}],
+ "eval_case_results": [
+ {
+ "eval_case_index": 0,
+ "response_candidate_results": [
+ {"display_text": "candidate response"}
+ ],
+ }
+ ],
+ "metadata": {"dataset": []},
+ }
+ payload_json = json.dumps(data)
+ html = _evals_visualization.get_evaluation_html(payload_json)
+
+ assert "" in html
+ assert "
Evaluation Report" in html
+ assert "test_metric" not in html
+ payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii")
+ assert payload_b64 in html
+ assert "DOMPurify" in html
+
+ def test_get_comparison_html(self):
+ """Tests that get_comparison_html generates valid HTML with data."""
+ from vertexai._genai import _evals_visualization
+ import base64
+ import json
+
+ data = {
+ "summary_metrics": [
+ {
+ "metric_name": "test_metric",
+ "win_rate": 0.6,
+ "loss_rate": 0.4,
+ }
+ ],
+ "eval_case_results": [
+ {
+ "eval_case_index": 0,
+ "response_candidate_results": [
+ {"display_text": "candidate A"},
+ {"display_text": "candidate B"},
+ ],
+ }
+ ],
+ "metadata": {"dataset": []},
+ }
+ payload_json = json.dumps(data)
+ html = _evals_visualization.get_comparison_html(payload_json)
+
+ assert "" in html
+ assert "Eval Comparison Report" in html
+ assert "test_metric" not in html
+ payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii")
+ assert payload_b64 in html
+ assert "DOMPurify" in html
+
+ def test_get_inference_html(self):
+ """Tests that get_inference_html generates valid HTML with data."""
+ from vertexai._genai import _evals_visualization
+ import base64
+ import json
+
+ data = [
+ {
+ "prompt": "What is the capital of France?",
+ "response": "Paris",
+ }
+ ]
+ payload_json = json.dumps(data, ensure_ascii=False)
+ html = _evals_visualization.get_inference_html(payload_json)
+
+ assert "" in html
+ assert "Evaluation Dataset" in html
+ assert "Paris" not in html
+ payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii")
+ assert payload_b64 in html
+ assert "DOMPurify" in html
+
def test_display_loss_clusters_response_no_ipython(self):
"""Tests graceful fallback when not in IPython."""
from vertexai._genai import _evals_visualization
@@ -1488,7 +1573,7 @@ def test_display_loss_analysis_results_html(self):
},
ensure_ascii=False,
)
- html = _evals_visualization._get_loss_analysis_html(payload_json)
+ html = _evals_visualization.get_loss_analysis_html(payload_json)
# The HTML is a self-contained report with base64-encoded JSON payload
# decoded by JavaScript at runtime. Verify structure, not content.
assert "" in html
diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py
index ed5e82a93a..45b50b6ebc 100644
--- a/vertexai/_genai/_evals_visualization.py
+++ b/vertexai/_genai/_evals_visualization.py
@@ -231,7 +231,7 @@ def _extract_dataset_rows(dataset: types.EvaluationDataset) -> list[dict[str, An
return processed_rows
-def _get_evaluation_html(eval_result_json: str) -> str:
+def get_evaluation_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for single evaluation visualization."""
payload_b64 = _encode_to_base64(eval_result_json)
return textwrap.dedent(
@@ -787,7 +787,7 @@ def _get_evaluation_html(eval_result_json: str) -> str:
)
-def _get_comparison_html(eval_result_json: str) -> str:
+def get_comparison_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for a side-by-side eval comparison."""
payload_b64 = _encode_to_base64(eval_result_json)
return textwrap.dedent(
@@ -1277,7 +1277,7 @@ def _get_comparison_html(eval_result_json: str) -> str:
)
-def _get_inference_html(dataframe_json: str) -> str:
+def get_inference_html(dataframe_json: str) -> str:
"""Returns a self-contained HTML for displaying inference results."""
payload_b64 = _encode_to_base64(dataframe_json)
return textwrap.dedent(
@@ -1475,7 +1475,7 @@ def display_evaluation_result(
summary.update(win_rates[summary["metric_name"]])
result_dump["metadata"] = metadata_payload
- html_content = _get_comparison_html(json.dumps(result_dump))
+ html_content = get_comparison_html(json.dumps(result_dump))
else:
single_dataset = input_dataset_list[0] if input_dataset_list else None
processed_rows = []
@@ -1499,7 +1499,7 @@ def display_evaluation_result(
cand_res["raw_json"] = original_case["response_raw_json"]
result_dump["metadata"] = metadata_payload
- html_content = _get_evaluation_html(json.dumps(result_dump))
+ html_content = get_evaluation_html(json.dumps(result_dump))
display.display(display.HTML(html_content))
@@ -1553,11 +1553,11 @@ def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> Non
processed_rows.append(processed_row)
dataframe_json_string = json.dumps(processed_rows, ensure_ascii=False, default=str)
- html_content = _get_inference_html(dataframe_json_string)
+ html_content = get_inference_html(dataframe_json_string)
display.display(display.HTML(html_content))
-def _get_loss_analysis_html(loss_analysis_json: str) -> str:
+def get_loss_analysis_html(loss_analysis_json: str) -> str:
"""Returns self-contained HTML for loss pattern analysis visualization."""
payload_b64 = _encode_to_base64(loss_analysis_json)
return textwrap.dedent(
@@ -1865,7 +1865,7 @@ def display_loss_clusters_response(
)
raise
- html_content = _get_loss_analysis_html(
+ html_content = get_loss_analysis_html(
json.dumps(result_dump, ensure_ascii=False, default=_pydantic_serializer)
)
display.display(display.HTML(html_content))
@@ -1892,7 +1892,7 @@ def display_loss_analysis_result(
)
raise
- html_content = _get_loss_analysis_html(
+ html_content = get_loss_analysis_html(
json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer)
)
display.display(display.HTML(html_content))
@@ -1968,7 +1968,7 @@ def display_loss_analysis_results(
Wraps the list of LossAnalysisResult objects into the same JSON
structure used by GenerateLossClustersResponse and renders using
- the shared _get_loss_analysis_html() function.
+ the shared get_loss_analysis_html() function.
When ``eval_item_map`` is provided (from
``get_evaluation_run(include_evaluation_items=True)``), the examples
@@ -1997,7 +1997,7 @@ def display_loss_analysis_results(
)
raise
- html_content = _get_loss_analysis_html(
+ html_content = get_loss_analysis_html(
json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer)
)
display.display(display.HTML(html_content))
@@ -2015,3 +2015,12 @@ def display_evaluation_run_status(eval_run_obj: "types.EvaluationRun") -> None:
error_message = str(eval_run_obj.error) if eval_run_obj.error else None
html_content = _get_status_html(status, error_message)
display.display(display.HTML(html_content))
+
+
+# Backward-compatible private aliases for the public HTML generators.
+# These are kept temporarily to avoid breaking existing callers that depend on
+# the previous private names. New code should use the public names above.
+_get_evaluation_html = get_evaluation_html
+_get_comparison_html = get_comparison_html
+_get_inference_html = get_inference_html
+_get_loss_analysis_html = get_loss_analysis_html