"""Tests Sprint 46 — vue HTML stratifiée + détecteur narratif.
Couvre :
1. ``build_stratified_ranking_html`` rend un ```` par strate
avec tableau moteur × (médiane, moyenne, docs).
2. Bandeau d'hétérogénéité affiché si ``corpus_homogeneity`` fourni.
3. **Masquage adaptatif** : retourne ``""`` si pas de strates.
4. **Anti-injection** : noms de strates et de moteurs avec balises
HTML sont échappés.
5. **Détecteur ``STRATIFICATION_RECOMMENDED``** :
- se déclenche au-delà de 5 points d'écart inter-strate
- importance HIGH au-delà de 10 points, MEDIUM sinon
- ne se déclenche pas sans corpus_homogeneity
6. **Anti-hallucination** : chaque nombre rendu est dans le payload.
7. **Intégration ReportGenerator** : la section apparaît dans
``view_ranking`` quand ``doc_strata`` est peuplé.
8. **i18n FR/EN** : clés présentes pour la vue + le template narratif.
"""
from __future__ import annotations
import json
import re
from pathlib import Path
import pytest
from picarones.evaluation.metric_result import MetricsResult
from picarones.reports.narrative.detectors import detect_stratification_recommended
from picarones.domain.facts import FactImportance, FactType
from picarones.reports.narrative.renderer import extract_numbers, render_fact
from picarones.evaluation.benchmark_result import DocumentResult
from picarones.reports.html.generator import ReportGenerator
from picarones.reports.html.renderers.stratification import build_stratified_ranking_html
# ──────────────────────────────────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────────────────────────────────
_SAMPLE_STRAT = {
"gothique": [
{"engine": "pero", "median_cer": 0.05, "mean_cer": 0.07, "documents": 10},
{"engine": "tess", "median_cer": 0.20, "mean_cer": 0.22, "documents": 10},
],
"imprimé": [
{"engine": "tess", "median_cer": 0.02, "mean_cer": 0.03, "documents": 10},
{"engine": "pero", "median_cer": 0.05, "mean_cer": 0.06, "documents": 10},
],
}
_SAMPLE_STRATA = ["gothique", "imprimé"]
_SAMPLE_HOMOG = {
"leader": "tess",
"n_strata": 2,
"max_inter_strata_gap": 0.18,
"leader_max_gap_strata": ["imprimé", "gothique"],
"leader_per_stratum_median": {"imprimé": 0.02, "gothique": 0.20},
}
def _make_dr(doc_id: str, cer: float) -> DocumentResult:
return DocumentResult(
doc_id=doc_id, image_path=f"/tmp/{doc_id}.png",
ground_truth="x", hypothesis="x",
metrics=MetricsResult(
cer=cer, cer_nfc=cer, cer_caseless=cer,
wer=cer, wer_normalized=cer, mer=cer, wil=cer,
reference_length=1, hypothesis_length=1,
),
duration_seconds=0.1,
)
# ──────────────────────────────────────────────────────────────────────────
# 1-2. build_stratified_ranking_html
# ──────────────────────────────────────────────────────────────────────────
class TestRendering:
def test_renders_one_details_per_stratum(self) -> None:
html = build_stratified_ranking_html(
_SAMPLE_STRAT, _SAMPLE_STRATA, _SAMPLE_HOMOG,
)
assert html.count(" None:
html = build_stratified_ranking_html(
_SAMPLE_STRAT, _SAMPLE_STRATA, _SAMPLE_HOMOG,
)
# Médianes en pourcentage
assert "5.00 %" in html # pero gothique
assert "20.00 %" in html # tess gothique
assert "2.00 %" in html # tess imprimé
def test_homogeneity_banner_present(self) -> None:
html = build_stratified_ranking_html(
_SAMPLE_STRAT, _SAMPLE_STRATA, _SAMPLE_HOMOG,
)
# Le bandeau d'avertissement doit apparaître
assert "tess" in html
assert "18.0" in html
def test_no_homogeneity_no_banner(self) -> None:
html = build_stratified_ranking_html(
_SAMPLE_STRAT, _SAMPLE_STRATA, homogeneity=None,
)
# Pas de bandeau jaune
assert "#fff8e1" not in html
def test_uses_i18n_labels(self) -> None:
labels = {
"stratification_caption": "CUSTOM_CAPTION",
"stratification_median_label": "MED",
"stratification_mean_label": "MEAN",
}
html = build_stratified_ranking_html(
_SAMPLE_STRAT, _SAMPLE_STRATA, None, labels=labels,
)
assert "CUSTOM_CAPTION" in html
assert "MED" in html
assert "MEAN" in html
# ──────────────────────────────────────────────────────────────────────────
# 3. Masquage adaptatif
# ──────────────────────────────────────────────────────────────────────────
class TestAdaptiveMasking:
def test_empty_when_no_stratified_ranking(self) -> None:
assert build_stratified_ranking_html(None, ["S1"]) == ""
assert build_stratified_ranking_html({}, ["S1"]) == ""
def test_empty_when_no_available_strata(self) -> None:
assert build_stratified_ranking_html(_SAMPLE_STRAT, None) == ""
assert build_stratified_ranking_html(_SAMPLE_STRAT, []) == ""
# ──────────────────────────────────────────────────────────────────────────
# 4. Anti-injection
# ──────────────────────────────────────────────────────────────────────────
class TestAntiInjection:
def test_engine_name_escaped(self) -> None:
bad_strat = {
"S1": [
{"engine": "",
"median_cer": 0.1, "mean_cer": 0.1, "documents": 1},
],
}
html = build_stratified_ranking_html(bad_strat, ["S1"])
assert "