"""Tests Sprint 43 — section calibration dans le rapport HTML. Couvre : 1. ``build_calibration_summary_html`` rend le tableau résumé (ECE/MCE par moteur). 2. ``build_reliability_diagram_svg`` rend un SVG avec les barres d'accuracy par bin, les points (avg_conf, accuracy) et la diagonale. 3. ``build_reliability_diagrams_grid_html`` génère un SVG par moteur ayant ``aggregated_calibration``, dans une grille. 4. **Masquage adaptatif** : les fonctions retournent ``""`` si aucun moteur n'a de ``aggregated_calibration`` (cas par défaut tant que les engines n'exposent pas leurs confidences natives). 5. **Anti-injection** : un nom de moteur avec balises HTML est échappé. 6. **Intégration ReportGenerator** : la section apparaît quand au moins un moteur a ``aggregated_calibration``, est omise sinon. 7. **i18n FR/EN** : les clés sont présentes et utilisées. """ from __future__ import annotations import json from pathlib import Path import pytest from picarones.evaluation.synthetic import generate_sample_benchmark from picarones.reports.html.renderers.calibration import ( build_calibration_summary_html, build_reliability_diagram_svg, build_reliability_diagrams_grid_html, ) from picarones.reports.html.generator import ReportGenerator # ────────────────────────────────────────────────────────────────────────── # Fixtures # ────────────────────────────────────────────────────────────────────────── def _make_calibration( ece: float = 0.05, mce: float = 0.12, n_predictions: int = 1000, n_bins: int = 10, ) -> dict: """Calibration agrégée réaliste avec quelques bins peuplés.""" bins = [] for k in range(n_bins): if k >= 5: bins.append({ "bin_low": k / n_bins, "bin_high": (k + 1) / n_bins, "avg_confidence": (k + 0.5) / n_bins, "accuracy": max(0, (k - 1) / n_bins), "count": n_predictions // 5, "gap": 0.1, }) else: bins.append({ "bin_low": k / n_bins, "bin_high": (k + 1) / n_bins, "avg_confidence": None, "accuracy": None, "count": 0, "gap": None, }) return { "ece": ece, "mce": mce, "n_bins": n_bins, "n_predictions": n_predictions, "overall_accuracy": 0.78, "overall_confidence": 0.82, "doc_count": 50, "bins": bins, } def _engine_with_calibration(name: str = "tess", **kwargs) -> dict: return {"name": name, "aggregated_calibration": _make_calibration(**kwargs)} def _engine_without_calibration(name: str = "no_cal") -> dict: return {"name": name, "aggregated_calibration": None} # ────────────────────────────────────────────────────────────────────────── # 1. Résumé # ────────────────────────────────────────────────────────────────────────── class TestSummaryTable: def test_renders_row_per_engine(self) -> None: engines = [_engine_with_calibration("a"), _engine_with_calibration("b")] html = build_calibration_summary_html(engines) assert "calibration-summary" in html assert "a" in html assert "b" in html # ECE et MCE rendus en pourcentage assert "5.00 %" in html # ECE 0.05 assert "12.00 %" in html # MCE 0.12 def test_includes_overall_accuracy_and_confidence(self) -> None: html = build_calibration_summary_html([_engine_with_calibration("x")]) assert "78.0 %" in html # accuracy assert "82.0 %" in html # confidence def test_n_predictions_formatted_with_thousand_sep(self) -> None: html = build_calibration_summary_html( [_engine_with_calibration("x", n_predictions=12345)], ) # 12 345 (espace insécable selon la convention française) assert "12 345" in html or "12345" in html def test_engine_without_calibration_omitted(self) -> None: engines = [_engine_with_calibration("a"), _engine_without_calibration("b")] html = build_calibration_summary_html(engines) assert "a" in html # Le moteur sans calibration ne doit pas avoir de ligne # (vérification approximative : son nom n'apparaît pas en gras) assert "b" not in html # ────────────────────────────────────────────────────────────────────────── # 2. SVG reliability diagram # ────────────────────────────────────────────────────────────────────────── class TestReliabilityDiagramSvg: def test_returns_svg_with_bars_and_diagonal(self) -> None: svg = build_reliability_diagram_svg(_make_calibration()) assert "= 5 # La diagonale en pointillé assert "stroke-dasharray" in svg # Au moins un point par bin non vide assert svg.count("= 5 def test_returns_empty_when_no_data(self) -> None: assert build_reliability_diagram_svg(None) == "" assert build_reliability_diagram_svg({}) == "" # bins tous à count = 0 assert build_reliability_diagram_svg({ "bins": [ {"bin_low": 0, "bin_high": 1, "avg_confidence": None, "accuracy": None, "count": 0, "gap": None}, ], }) == "" def test_axis_labels_via_i18n(self) -> None: labels = { "reliability_x_axis": "CUSTOM_X", "reliability_y_axis": "CUSTOM_Y", } svg = build_reliability_diagram_svg(_make_calibration(), labels=labels) assert "CUSTOM_X" in svg assert "CUSTOM_Y" in svg # ────────────────────────────────────────────────────────────────────────── # 3. Grille de reliability diagrams # ────────────────────────────────────────────────────────────────────────── class TestGrid: def test_one_svg_per_engine(self) -> None: engines = [ _engine_with_calibration("a"), _engine_with_calibration("b"), _engine_with_calibration("c"), ] html = build_reliability_diagrams_grid_html(engines) assert html.count(" None: engines = [_engine_with_calibration("a"), _engine_without_calibration("b")] html = build_reliability_diagrams_grid_html(engines) # Un seul SVG (pour "a") assert html.count(" None: assert build_calibration_summary_html([]) == "" assert build_calibration_summary_html( [_engine_without_calibration("a")], ) == "" def test_grid_empty_when_no_engine_has_calibration(self) -> None: assert build_reliability_diagrams_grid_html([]) == "" assert build_reliability_diagrams_grid_html( [_engine_without_calibration("a")], ) == "" # ────────────────────────────────────────────────────────────────────────── # 5. Anti-injection # ────────────────────────────────────────────────────────────────────────── class TestAntiInjection: def test_engine_name_escaped_in_summary(self) -> None: engine = _engine_with_calibration("") html = build_calibration_summary_html([engine]) assert "