python-lsp-compare/tests/test_reporting.py at main · microsoft/python-lsp-compare

490 lines (455 loc) · 22.2 KB
from __future__ import annotations
import json
import tempfile
import unittest
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
from python_lsp_compare.cli import main
from python_lsp_compare.report_csv import build_csv_rows
from python_lsp_compare.report_markdown import render_markdown_report
class ReportingTests(unittest.TestCase):
    def test_render_markdown_report_sorts_benchmark_tables_by_fastest_average(self) -> None:
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            fast_report_path = temp_path / "fast.json"
            slow_report_path = temp_path / "slow.json"
            summary_path = temp_path / "summary.json"
            def build_metric(duration_ms: float, completion_items: int, phase: str = "measured") -> dict[str, object]:
                return {
                    "kind": "request",
                    "method": "textDocument/completion",
                    "duration_ms": duration_ms,
                    "result_summary": {
                        "present": True,
                        "empty": False,
                        "completion_item_count": completion_items,
                    "context": {"phase": phase},
            def build_report(total_duration_ms: float, mean_ms: float, measured_duration_ms: float, completion_items: int) -> dict[str, object]:
                return {
                    "benchmark_reports": [
                            "name": "fixture_suite",
                            "success": True,
                            "total_duration_ms": total_duration_ms,
                            "points": [
                                    "label": "completion fixture",
                                    "method": "textDocument/completion",
                                    "file_path": "fixture.py",
                                    "character": 1,
                                    "success": True,
                                    "summary": {
                                        "mean_ms": mean_ms,
                                        "p95_ms": mean_ms,
                                        "validation": {"passed": True, "failure_count": 0},
                                    "metrics": [
                                        build_metric(measured_duration_ms, completion_items),
                                        build_metric(measured_duration_ms, completion_items),
            fast_report_path.write_text(
                json.dumps(build_report(total_duration_ms=10.0, mean_ms=1.5, measured_duration_ms=1.0, completion_items=2)),
                encoding="utf-8",
            slow_report_path.write_text(
                json.dumps(build_report(total_duration_ms=30.0, mean_ms=4.5, measured_duration_ms=5.0, completion_items=4)),
                encoding="utf-8",
            summary_path.write_text(
                json.dumps(
                        "generated_at": "20260320T000000Z",
                        "baseline_server": "slow",
                        "requested_benchmarks": ["fixture_suite"],
                        "servers": [
                                "id": "slow",
                                "display_name": "Slow Server",
                                "output_path": str(slow_report_path),
                                "success": True,
                                "id": "fast",
                                "display_name": "Fast Server",
                                "output_path": str(fast_report_path),
                                "success": True,
                encoding="utf-8",
            markdown = render_markdown_report(summary_path, baseline_server_id="slow")
            overview_start = markdown.index("## Overview")
            benchmark_start = markdown.index("## Benchmark: fixture_suite")
            point_start = markdown.index("### completion fixture")
            overview_section = markdown[overview_start:benchmark_start]
            benchmark_section = markdown[benchmark_start:point_start]
            point_section = markdown[point_start:]
            self.assertLess(overview_section.index("| Fast Server |"), overview_section.index("| Slow Server |"))
            self.assertLess(benchmark_section.index("| Fast Server |"), benchmark_section.index("| Slow Server |"))
            self.assertLess(point_section.index("| Fast Server |"), point_section.index("| Slow Server |"))
    def test_bench_servers_writes_markdown_comparison_with_result_differences(self) -> None:
        server_script = Path(__file__).parent / "fixtures" / "fake_lsp_server.py"
        benchmark_root = Path(__file__).parent / "fixtures"
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            config_path = temp_path / "servers.json"
            output_dir = temp_path / "results"
            markdown_path = temp_path / "comparison.md"
            csv_path = temp_path / "comparison.csv"
            latest_results_path = temp_path / "latest-results.md"
            config_path.write_text(
                json.dumps(
                        "servers": [
                                "id": "small-results",
                                "displayName": "Small Results",
                                "launch": {
                                    "command": sys.executable,
                                        str(server_script),
                                        "--completion-items",
                                        "--hover-text",
                                "id": "large-results",
                                "displayName": "Large Results",
                                "launch": {
                                    "command": sys.executable,
                                        str(server_script),
                                        "--completion-items",
                                        "--hover-text",
                                        "this is a much longer hover payload",
                encoding="utf-8",
            exit_code = main(
                    "bench-servers",
                    "--config",
                    str(config_path),
                    "--benchmark-root",
                    str(benchmark_root),
                    "--output-dir",
                    str(output_dir),
                    "--markdown-output",
                    str(markdown_path),
                    "--csv-output",
                    str(csv_path),
                    "--baseline-server",
                    "large-results",
            self.assertEqual(exit_code, 0)
            markdown = markdown_path.read_text(encoding="utf-8")
            self.assertIn("# Python LSP Benchmark Comparison", markdown)
            self.assertIn("Small Results", markdown)
            self.assertIn("Large Results", markdown)
            self.assertIn("Baseline server: Large Results (large-results)", markdown)
            self.assertIn("## Server Versions", markdown)
            self.assertIn("Completions found", markdown)
            self.assertIn("Avg measured ms", markdown)
            self.assertIn("Result Differences", markdown)
            self.assertIn("completion fixture: result differences detected", markdown)
            self.assertIn("Delta vs Large Results", markdown)
            self.assertNotIn("Avg result chars", markdown)
            csv_text = csv_path.read_text(encoding="utf-8")
            self.assertIn("baseline_server_id", csv_text)
            self.assertIn("large-results", csv_text)
            self.assertIn("completion_item_count", csv_text)
            self.assertNotIn("avg_result_chars", csv_text)
            self.assertTrue(latest_results_path.exists())
            latest_results = latest_results_path.read_text(encoding="utf-8")
            self.assertIn("# Python LSP Benchmark Comparison", latest_results)
            summary = json.loads(next(output_dir.glob("summary-*.json")).read_text(encoding="utf-8"))
            self.assertEqual(summary["baseline_server"], "large-results")
            self.assertIn("version", summary["servers"][0])
            report_path = output_dir / Path(summary["servers"][0]["output_path"]).name
            report = json.loads(report_path.read_text(encoding="utf-8"))
            completion_point = next(
                point
                for point in report["benchmark_reports"][0]["points"]
                if point["method"] == "textDocument/completion"
            result_metrics = completion_point["summary"]["result_summary"]["metrics"]
            self.assertEqual(result_metrics["completion_item_count"]["mean"], 1.0)
            self.assertEqual(completion_point["summary"]["result_summary"]["non_empty_count"], 2)
    def test_render_markdown_report_shows_tsp_type_name_differences(self) -> None:
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            first_report_path = temp_path / "first.json"
            second_report_path = temp_path / "second.json"
            summary_path = temp_path / "summary.json"
            def build_tsp_metric(duration_ms: float, type_name: str) -> dict[str, object]:
                return {
                    "kind": "request",
                    "method": "typeServer/getComputedType",
                    "duration_ms": duration_ms,
                    "result_summary": {
                        "present": True,
                        "empty": False,
                        "type_name": type_name,
                        "size_chars": 25,
                    "context": {"phase": "measured"},
            def build_report(type_name: str) -> dict[str, object]:
                return {
                    "benchmark_reports": [
                            "name": "tsp_fixture",
                            "success": True,
                            "total_duration_ms": 10.0,
                            "points": [
                                    "label": "narrowed type",
                                    "method": "typeServer/getComputedType",
                                    "file_path": "flow.py",
                                    "character": 1,
                                    "success": True,
                                    "summary": {
                                        "mean_ms": 1.0,
                                        "validation": {"passed": True, "failure_count": 0},
                                    "metrics": [build_tsp_metric(1.0, type_name), build_tsp_metric(1.0, type_name)],
            first_report_path.write_text(json.dumps(build_report("int")), encoding="utf-8")
            second_report_path.write_text(json.dumps(build_report("str")), encoding="utf-8")
            summary_path.write_text(
                json.dumps(
                        "generated_at": "20260320T000000Z",
                        "baseline_server": "baseline",
                        "requested_benchmarks": ["tsp_fixture"],
                        "servers": [
                            {"id": "baseline", "display_name": "Baseline", "output_path": str(first_report_path), "success": True},
                            {"id": "candidate", "display_name": "Candidate", "output_path": str(second_report_path), "success": True},
                encoding="utf-8",
            markdown = render_markdown_report(summary_path, baseline_server_id="baseline")
            rows = build_csv_rows(summary_path, baseline_server_id="baseline")
            self.assertIn("Type name", markdown)
            self.assertIn("int", markdown)
            self.assertIn("str", markdown)
            self.assertTrue(any(row["result_metric_name"] == "type_name" for row in rows))
    def test_render_markdown_report_shows_friendly_semantic_token_method_name(self) -> None:
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            report_path = temp_path / "semantic.json"
            summary_path = temp_path / "summary.json"
            report_path.write_text(
                json.dumps(
                        "benchmark_reports": [
                                "name": "tsp_semantic",
                                "success": True,
                                "total_duration_ms": 10.0,
                                "points": [
                                        "label": "django semantic tokens",
                                        "method": "typeServer/semanticTokens",
                                        "file_path": "semantic.py",
                                        "character": 1,
                                        "success": True,
                                        "summary": {
                                            "mean_ms": 1.0,
                                            "validation": {"passed": True, "failure_count": 0},
                                        "metrics": [
                                                "kind": "request",
                                                "method": "typeServer/semanticTokens",
                                                "duration_ms": 1.0,
                                                "result_summary": {
                                                    "present": True,
                                                    "empty": False,
                                                    "top_level_count": 12,
                                                "context": {"phase": "measured"},
                encoding="utf-8",
            summary_path.write_text(
                json.dumps(
                        "generated_at": "20260320T000000Z",
                        "baseline_server": "pyrefly",
                        "requested_benchmarks": ["tsp_semantic"],
                        "servers": [
                            {"id": "pyrefly", "display_name": "Pyrefly", "output_path": str(report_path), "success": True},
                encoding="utf-8",
            markdown = render_markdown_report(summary_path, baseline_server_id="pyrefly")
            self.assertIn("semantic token impl using typeServer/getComputedType", markdown)
            self.assertNotIn("Method: `typeServer/semanticTokens`", markdown)
    def test_render_markdown_report_includes_server_only_tsp_benchmark(self) -> None:
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            pyrefly_report_path = temp_path / "pyrefly.json"
            other_report_path = temp_path / "other.json"
            summary_path = temp_path / "summary.json"
            def build_tsp_report() -> dict[str, object]:
                return {
                    "benchmark_reports": [
                            "name": "tsp_core",
                            "success": True,
                            "total_duration_ms": 10.0,
                            "points": [
                                    "label": "narrowed type",
                                    "method": "typeServer/getComputedType",
                                    "file_path": "flow.py",
                                    "character": 1,
                                    "success": True,
                                    "summary": {
                                        "mean_ms": 1.0,
                                        "validation": {"passed": True, "failure_count": 0},
                                    "metrics": [
                                            "kind": "request",
                                            "method": "typeServer/getComputedType",
                                            "duration_ms": 1.0,
                                            "result_summary": {"present": True, "empty": False, "type_name": "int"},
                                            "context": {"phase": "measured"},
            pyrefly_report_path.write_text(json.dumps(build_tsp_report()), encoding="utf-8")
            other_report_path.write_text(json.dumps({"benchmark_reports": []}), encoding="utf-8")
            summary_path.write_text(
                json.dumps(
                        "generated_at": "20260320T000000Z",
                        "baseline_server": "pyright",
                        "requested_benchmarks": ["data_science", "web"],
                        "servers": [
                            {"id": "pyrefly", "display_name": "Pyrefly", "output_path": str(pyrefly_report_path), "success": True},
                            {"id": "pyright", "display_name": "Pyright", "output_path": str(other_report_path), "success": True},
                encoding="utf-8",
            markdown = render_markdown_report(summary_path, baseline_server_id="pyright")
            self.assertIn("## Benchmark: tsp_core", markdown)
    def test_render_report_rebuilds_markdown_from_summary_json(self) -> None:
        server_script = Path(__file__).parent / "fixtures" / "fake_lsp_server.py"
        benchmark_root = Path(__file__).parent / "fixtures"
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            config_path = temp_path / "servers.json"
            output_dir = temp_path / "results"
            summary_path = temp_path / "summary.json"
            markdown_path = temp_path / "rerendered.md"
            csv_path = temp_path / "rerendered.csv"
            latest_results_path = temp_path / "latest-results.md"
            config_path.write_text(
                json.dumps(
                        "servers": [
                                "id": "demo",
                                "displayName": "Demo",
                                "launch": {
                                    "command": sys.executable,
                                    "args": [str(server_script)],
                encoding="utf-8",
            exit_code = main(
                    "bench-servers",
                    "--config",
                    str(config_path),
                    "--benchmark-root",
                    str(benchmark_root),
                    "--output-dir",
                    str(output_dir),
                    "--summary-output",
                    str(summary_path),
            self.assertEqual(exit_code, 0)
            render_exit = main(
                    "render-report",
                    "--summary",
                    str(summary_path),
                    "--output",
                    str(markdown_path),
                    "--csv-output",
                    str(csv_path),
                    "--title",
                    "Custom Comparison",
            self.assertEqual(render_exit, 0)
            markdown = markdown_path.read_text(encoding="utf-8")
            csv_text = csv_path.read_text(encoding="utf-8")
            self.assertIn("# Custom Comparison", markdown)
            self.assertIn("Demo", markdown)
            self.assertIn("Avg measured ms", markdown)
            self.assertIn("server_id", csv_text)
            self.assertIn("demo", csv_text)
            self.assertTrue(latest_results_path.exists())
            latest_md = latest_results_path.read_text(encoding="utf-8")
            self.assertIn("# Custom Comparison", latest_md)
            self.assertIn("Demo", latest_md)
if __name__ == "__main__":
    unittest.main()
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

test_reporting.py

Latest commit

History

test_reporting.py

File metadata and controls