Spaces:
Running
feat(reports): Phase B6 — rapport HTML multi-vues + extension AltoView
Browse filesPhase B6 du chantier Option B (mai 2026). La valeur métier ALTO/
documentaire est maintenant visible dans le rapport HTML.
**B6.1 — Extension DEFAULT_ALTO_METRICS (alto_view.py)**
Les métriques textuelles ``alto_text_cer/wer/mer/wil`` (déjà
enregistrées dans le MetricRegistry depuis le Chantier 1) sont
ajoutées aux defaults de ``build_alto_view()``. Total : 7 métriques
(3 structurelles + 4 textuelles). ``reading_order_f1`` reste opt-in
car nécessite une GT READING_ORDER non systématiquement disponible.
**B6.2.a — BenchmarkResult.view_results + converter**
``BenchmarkResult`` a un nouveau champ optionnel
``view_results: dict[str, dict[str, dict[str, dict[str, float]]]]``
indexé ``{view: {engine: {doc: {metric: value}}}}``. Vide pour les
runs legacy ``run_benchmark_via_service``.
``run_result_to_benchmark_result`` (converter) transpose désormais
les ``ViewResult`` du ``RunResult`` vers ce champ : permet au
rapport HTML d'accéder aux métriques par vue sans interroger les
fichiers JSONL natifs.
**B6.2.b — Renderer build_view_results_html (nouveau)**
``picarones/reports/html/renderers/view_results.py`` :
- Une section ``<div class="chart-card view-results-section">`` par
vue présente.
- Titre + note méthodologique FR/EN par vue (text_final,
alto_documentary, searchability) — fallback générique pour les
vues custom.
- Tableau ``Métrique × engine`` avec moyennes par engine
(% si métrique ∈ [0, 1]).
- **Pipelines OMIS listés explicitement** : pattern critique pour
AltoView (un OCR sans expose_alto produit RAW_TEXT seul →
omission silencieuse serait trompeuse).
- Échappement HTML strict (résistance XSS via noms d'engine
custom).
- Adaptive : ``""`` si ``benchmark.view_results`` vide.
Intégration dans ``ReportGenerator._build_section_html`` +
``view_analyses.html`` (placeholder ``view_results_html``).
Tests : 13 cas dans test_view_sections.py
- TestEmptyViewResults (2) : None/dict vide → "".
- TestSingleViewRendering (3) : titre, métriques, "tous éligibles".
- TestOmittedPipelines (1) : OCR text-only listé comme omis
d'AltoView.
- TestMultipleViews (1) : 3 vues canoniques rendues simultanément.
- TestXssEscaping (2) : noms d'engine + métrique avec HTML brut
sont échappés.
- TestI18n (2) : libellés FR/EN.
- TestReportGeneratorIntegration (2) : section présente quand
view_results renseigné, absente sinon (compat ascendante).
État du chantier : checkpoint C3 atteint (valeur métier
documentaire visible dans le rapport HTML). Reste B7 (deprecation
finale) + B8 (suppression legacy).
- picarones/app/services/_benchmark_converter.py +19 -0
- picarones/evaluation/benchmark_result.py +11 -0
- picarones/evaluation/views/alto_view.py +30 -2
- picarones/reports/html/generator.py +15 -0
- picarones/reports/html/renderers/view_results.py +262 -0
- picarones/reports/html/templates/view_analyses.html +8 -0
- tests/evaluation/views/test_sprint_a14_s15_alto_view.py +51 -0
- tests/reports/test_view_sections.py +292 -0
|
@@ -188,12 +188,31 @@ def run_result_to_benchmark_result(
|
|
| 188 |
if fallbacks:
|
| 189 |
metadata["importer_fallbacks"] = fallbacks
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
return BenchmarkResult(
|
| 192 |
corpus_name=corpus.name,
|
| 193 |
corpus_source=str(corpus.source_path) if corpus.source_path else None,
|
| 194 |
document_count=len(documents),
|
| 195 |
engine_reports=engine_reports,
|
| 196 |
metadata=metadata,
|
|
|
|
| 197 |
)
|
| 198 |
|
| 199 |
|
|
|
|
| 188 |
if fallbacks:
|
| 189 |
metadata["importer_fallbacks"] = fallbacks
|
| 190 |
|
| 191 |
+
# Phase B6 — transpose les ViewResult du RunResult en
|
| 192 |
+
# ``view_results`` indexé : ``{view: {engine: {doc: {metric: value}}}}``.
|
| 193 |
+
# Permet au rapport HTML de rendre des sections par vue
|
| 194 |
+
# (TextView/AltoView/SearchView) avec le détail par pipeline.
|
| 195 |
+
view_results_by_view: dict[str, dict[str, dict[str, dict[str, float]]]] = {}
|
| 196 |
+
for doc_idx, run_doc in enumerate(run_result.document_results):
|
| 197 |
+
if doc_idx >= len(documents):
|
| 198 |
+
break
|
| 199 |
+
doc_id = documents[doc_idx].doc_id
|
| 200 |
+
for vr in run_doc.view_results:
|
| 201 |
+
view_bucket = view_results_by_view.setdefault(vr.view_name, {})
|
| 202 |
+
engine_bucket = view_bucket.setdefault(vr.pipeline_name, {})
|
| 203 |
+
engine_bucket[doc_id] = {
|
| 204 |
+
metric: float(value)
|
| 205 |
+
for metric, value in vr.metric_values.items()
|
| 206 |
+
if isinstance(value, (int, float))
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
return BenchmarkResult(
|
| 210 |
corpus_name=corpus.name,
|
| 211 |
corpus_source=str(corpus.source_path) if corpus.source_path else None,
|
| 212 |
document_count=len(documents),
|
| 213 |
engine_reports=engine_reports,
|
| 214 |
metadata=metadata,
|
| 215 |
+
view_results=view_results_by_view,
|
| 216 |
)
|
| 217 |
|
| 218 |
|
|
@@ -527,6 +527,17 @@ class BenchmarkResult:
|
|
| 527 |
# ``None`` si aucun document n'expose de ``script_type`` dans son
|
| 528 |
# ``image_quality.script_type`` ou ``metadata.script_type``.
|
| 529 |
doc_strata: Optional[dict[str, str]] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
|
| 531 |
def ranking(self) -> list[dict]:
|
| 532 |
"""Retourne le classement des moteurs trié par **médiane CER** croissante.
|
|
|
|
| 527 |
# ``None`` si aucun document n'expose de ``script_type`` dans son
|
| 528 |
# ``image_quality.script_type`` ou ``metadata.script_type``.
|
| 529 |
doc_strata: Optional[dict[str, str]] = None
|
| 530 |
+
# Phase B6 (mai 2026) — résultats des EvaluationView du
|
| 531 |
+
# RunOrchestrator (text_final, alto_documentary, searchability).
|
| 532 |
+
# Structure : ``{view_name: {engine_name: {doc_id: {metric: value}}}}``.
|
| 533 |
+
# Vide si le run a été lancé sans vues (cas legacy
|
| 534 |
+
# ``run_benchmark_via_service`` sans RunOrchestrator).
|
| 535 |
+
# Consommé par le rapport HTML (sections multi-vues) et par le
|
| 536 |
+
# narrative engine pour mettre en avant les pipelines qui
|
| 537 |
+
# produisent un ALTO valide vs ceux qui restent en RAW_TEXT seul.
|
| 538 |
+
view_results: dict[str, dict[str, dict[str, dict[str, float]]]] = field(
|
| 539 |
+
default_factory=dict,
|
| 540 |
+
)
|
| 541 |
|
| 542 |
def ranking(self) -> list[dict]:
|
| 543 |
"""Retourne le classement des moteurs trié par **médiane CER** croissante.
|
|
@@ -31,6 +31,8 @@ filtre les pipelines dont l'artefact n'est pas dans
|
|
| 31 |
|
| 32 |
Métriques par défaut
|
| 33 |
--------------------
|
|
|
|
|
|
|
| 34 |
- ``alto_validity`` — l'hypothèse est-elle structurellement
|
| 35 |
cohérente ? (≥ 1 page, ≥ 1 bloc, ≥ 1 ligne).
|
| 36 |
- ``alto_line_count_ratio`` — ratio min/max du nombre de lignes.
|
|
@@ -38,12 +40,24 @@ Métriques par défaut
|
|
| 38 |
|
| 39 |
Toutes ∈ [0, 1] avec ``higher_is_better=True``.
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
Reportées à un sprint suivant
|
| 42 |
-----------------------------
|
| 43 |
- ``textline_alignment`` (IoU des bbox de lignes).
|
| 44 |
- ``reading_order_consistency`` (Kendall tau sur les IDs).
|
| 45 |
- ``layout_f1`` (ICDAR 2015) via wrapper de
|
| 46 |
``evaluation/metrics/layout.py``.
|
|
|
|
|
|
|
|
|
|
| 47 |
"""
|
| 48 |
|
| 49 |
from __future__ import annotations
|
|
@@ -52,12 +66,26 @@ from picarones.domain.artifacts import ArtifactType
|
|
| 52 |
from picarones.domain.evaluation_spec import EvaluationView
|
| 53 |
|
| 54 |
|
| 55 |
-
#: Métriques calculées par défaut.
|
| 56 |
-
#:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
DEFAULT_ALTO_METRICS: tuple[str, ...] = (
|
|
|
|
| 58 |
"alto_validity",
|
| 59 |
"alto_line_count_ratio",
|
| 60 |
"alto_word_box_coverage",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
|
|
|
|
| 31 |
|
| 32 |
Métriques par défaut
|
| 33 |
--------------------
|
| 34 |
+
**Structurelles** (typées ``(ALTO_XML, ALTO_XML)``) :
|
| 35 |
+
|
| 36 |
- ``alto_validity`` — l'hypothèse est-elle structurellement
|
| 37 |
cohérente ? (≥ 1 page, ≥ 1 bloc, ≥ 1 ligne).
|
| 38 |
- ``alto_line_count_ratio`` — ratio min/max du nombre de lignes.
|
|
|
|
| 40 |
|
| 41 |
Toutes ∈ [0, 1] avec ``higher_is_better=True``.
|
| 42 |
|
| 43 |
+
**Textuelles** (Phase B6 — opérent sur le texte extrait de l'ALTO via
|
| 44 |
+
``extract_text_from_alto``) :
|
| 45 |
+
|
| 46 |
+
- ``alto_text_cer`` — CER calculé sur le texte plat extrait des deux
|
| 47 |
+
ALTO (référence + hypothèse). Permet de détecter une régression
|
| 48 |
+
textuelle même quand la structure est préservée.
|
| 49 |
+
- ``alto_text_wer`` / ``alto_text_mer`` / ``alto_text_wil`` — variantes
|
| 50 |
+
WER/MER/WIL sur le même texte extrait.
|
| 51 |
+
|
| 52 |
Reportées à un sprint suivant
|
| 53 |
-----------------------------
|
| 54 |
- ``textline_alignment`` (IoU des bbox de lignes).
|
| 55 |
- ``reading_order_consistency`` (Kendall tau sur les IDs).
|
| 56 |
- ``layout_f1`` (ICDAR 2015) via wrapper de
|
| 57 |
``evaluation/metrics/layout.py``.
|
| 58 |
+
- ``reading_order_f1`` — nécessite une GT ``READING_ORDER`` qui n'est
|
| 59 |
+
pas systématiquement disponible dans les corpus. Opt-in via
|
| 60 |
+
``build_alto_view(metric_names=(..., "reading_order_f1"))``.
|
| 61 |
"""
|
| 62 |
|
| 63 |
from __future__ import annotations
|
|
|
|
| 66 |
from picarones.domain.evaluation_spec import EvaluationView
|
| 67 |
|
| 68 |
|
| 69 |
+
#: Métriques calculées par défaut. 7 métriques :
|
| 70 |
+
#:
|
| 71 |
+
#: - 3 structurelles ``(ALTO_XML, ALTO_XML)`` (Sprint A14-S15).
|
| 72 |
+
#: - 4 textuelles ``(ALTO_XML, ALTO_XML)`` qui extraient le texte
|
| 73 |
+
#: plat de l'ALTO via ``extract_text_from_alto`` puis appliquent
|
| 74 |
+
#: les opérateurs jiwer (Phase B6 mai 2026).
|
| 75 |
+
#:
|
| 76 |
+
#: Les métriques sont enregistrées dans le ``MetricRegistry`` via
|
| 77 |
+
#: ``register_metric`` (cf. ``evaluation/metrics/alto_metrics.py`` et
|
| 78 |
+
#: ``evaluation/metrics/alto_structural.py``).
|
| 79 |
DEFAULT_ALTO_METRICS: tuple[str, ...] = (
|
| 80 |
+
# Structurelles (Sprint A14-S15)
|
| 81 |
"alto_validity",
|
| 82 |
"alto_line_count_ratio",
|
| 83 |
"alto_word_box_coverage",
|
| 84 |
+
# Textuelles (Phase B6 — extraction texte de l'ALTO)
|
| 85 |
+
"alto_text_cer",
|
| 86 |
+
"alto_text_wer",
|
| 87 |
+
"alto_text_mer",
|
| 88 |
+
"alto_text_wil",
|
| 89 |
)
|
| 90 |
|
| 91 |
|
|
@@ -345,6 +345,13 @@ class ReportGenerator:
|
|
| 345 |
from picarones.reports.html.renderers.taxonomy_intra_doc import (
|
| 346 |
build_taxonomy_intra_doc_html,
|
| 347 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
# Spécialisation : construit une map {engine: counts} depuis les
|
| 350 |
# ``aggregated_taxonomy`` ; un moteur sans taxonomie est exclu.
|
|
@@ -427,6 +434,14 @@ class ReportGenerator:
|
|
| 427 |
"marginal_cost_html": build_marginal_cost_html(
|
| 428 |
report_data.get("marginal_cost"), labels=labels,
|
| 429 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
}
|
| 431 |
|
| 432 |
@classmethod
|
|
|
|
| 345 |
from picarones.reports.html.renderers.taxonomy_intra_doc import (
|
| 346 |
build_taxonomy_intra_doc_html,
|
| 347 |
)
|
| 348 |
+
# Phase B6 (mai 2026) — sections par vue d'évaluation
|
| 349 |
+
# (text_final, alto_documentary, searchability) issues du
|
| 350 |
+
# RunOrchestrator. Adaptive : "" si benchmark.view_results
|
| 351 |
+
# est vide (chemin legacy sans vues).
|
| 352 |
+
from picarones.reports.html.renderers.view_results import (
|
| 353 |
+
build_view_results_html,
|
| 354 |
+
)
|
| 355 |
|
| 356 |
# Spécialisation : construit une map {engine: counts} depuis les
|
| 357 |
# ``aggregated_taxonomy`` ; un moteur sans taxonomie est exclu.
|
|
|
|
| 434 |
"marginal_cost_html": build_marginal_cost_html(
|
| 435 |
report_data.get("marginal_cost"), labels=labels,
|
| 436 |
),
|
| 437 |
+
# Phase B6 (mai 2026) — sections par vue d'évaluation.
|
| 438 |
+
"view_results_html": build_view_results_html(
|
| 439 |
+
self.benchmark.view_results,
|
| 440 |
+
all_engine_names=[
|
| 441 |
+
r.engine_name for r in self.benchmark.engine_reports
|
| 442 |
+
],
|
| 443 |
+
lang=self.lang,
|
| 444 |
+
),
|
| 445 |
}
|
| 446 |
|
| 447 |
@classmethod
|
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Phase B6 — rendu HTML des ``BenchmarkResult.view_results``.
|
| 2 |
+
|
| 3 |
+
Présente les ViewResult produits par le ``RunOrchestrator`` (vues
|
| 4 |
+
canoniques ``text_final``, ``alto_documentary``, ``searchability``)
|
| 5 |
+
sous forme de sections HTML par vue.
|
| 6 |
+
|
| 7 |
+
Pour chaque vue présente dans ``benchmark.view_results`` :
|
| 8 |
+
|
| 9 |
+
- Tableau ``engine × moyenne_par_metric`` des métriques de la vue.
|
| 10 |
+
- Liste explicite des **pipelines omis** (qui ne produisent pas
|
| 11 |
+
d'artefact éligible — typiquement un OCR sans ALTO_XML omis de
|
| 12 |
+
``alto_documentary``).
|
| 13 |
+
- Note méthodologique en tête (rappel : ALTO != texte plat).
|
| 14 |
+
|
| 15 |
+
Adaptive
|
| 16 |
+
--------
|
| 17 |
+
Le renderer retourne ``""`` si ``benchmark.view_results`` est vide
|
| 18 |
+
(cas legacy ``run_benchmark_via_service`` sans RunOrchestrator).
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import html
|
| 24 |
+
from statistics import mean
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Vues canoniques : libellés FR/EN par défaut + warnings courts.
|
| 28 |
+
_VIEW_DESCRIPTORS: dict[str, dict[str, dict[str, str]]] = {
|
| 29 |
+
"text_final": {
|
| 30 |
+
"fr": {
|
| 31 |
+
"title": "Vue textuelle (TextView)",
|
| 32 |
+
"note": (
|
| 33 |
+
"Compare les sorties textuelles finales (RAW_TEXT, "
|
| 34 |
+
"CORRECTED_TEXT) ou projetées (ALTO/PAGE/markdown → "
|
| 35 |
+
"texte plat). Les pipelines structurés sont projetés "
|
| 36 |
+
"vers du texte avant comparaison ; leur structure "
|
| 37 |
+
"spatiale est ignorée ici."
|
| 38 |
+
),
|
| 39 |
+
},
|
| 40 |
+
"en": {
|
| 41 |
+
"title": "Text view (TextView)",
|
| 42 |
+
"note": (
|
| 43 |
+
"Compares final text outputs (RAW_TEXT, CORRECTED_TEXT) "
|
| 44 |
+
"or projected ones (ALTO/PAGE/markdown → flat text). "
|
| 45 |
+
"Structured pipelines are projected to flat text before "
|
| 46 |
+
"comparison ; their spatial structure is ignored here."
|
| 47 |
+
),
|
| 48 |
+
},
|
| 49 |
+
},
|
| 50 |
+
"alto_documentary": {
|
| 51 |
+
"fr": {
|
| 52 |
+
"title": "Vue documentaire ALTO (AltoView)",
|
| 53 |
+
"note": (
|
| 54 |
+
"Mesure la fidélité STRUCTURELLE et TEXTUELLE de l'ALTO "
|
| 55 |
+
"produit (validité, lignes, bbox, CER/WER sur le texte "
|
| 56 |
+
"extrait). Les pipelines qui ne produisent pas d'ALTO "
|
| 57 |
+
"sont OMIS de cette vue (pas de score factice)."
|
| 58 |
+
),
|
| 59 |
+
},
|
| 60 |
+
"en": {
|
| 61 |
+
"title": "ALTO documentary view (AltoView)",
|
| 62 |
+
"note": (
|
| 63 |
+
"Measures STRUCTURAL and TEXTUAL fidelity of the "
|
| 64 |
+
"produced ALTO (validity, lines, bbox, CER/WER on "
|
| 65 |
+
"extracted text). Pipelines that don't produce ALTO "
|
| 66 |
+
"are OMITTED from this view (no fake score)."
|
| 67 |
+
),
|
| 68 |
+
},
|
| 69 |
+
},
|
| 70 |
+
"searchability": {
|
| 71 |
+
"fr": {
|
| 72 |
+
"title": "Recherchabilité (SearchView)",
|
| 73 |
+
"note": (
|
| 74 |
+
"Mesure le rappel fuzzy (Levenshtein ≤ 2) et la "
|
| 75 |
+
"préservation des séquences numériques (années, "
|
| 76 |
+
"cotes). Pertinent pour estimer l'impact d'un pipeline "
|
| 77 |
+
"sur les moteurs de recherche plein texte (Elastic, Solr)."
|
| 78 |
+
),
|
| 79 |
+
},
|
| 80 |
+
"en": {
|
| 81 |
+
"title": "Searchability (SearchView)",
|
| 82 |
+
"note": (
|
| 83 |
+
"Measures fuzzy recall (Levenshtein ≤ 2) and "
|
| 84 |
+
"numerical-sequence preservation (years, call numbers). "
|
| 85 |
+
"Relevant to estimate a pipeline's impact on full-text "
|
| 86 |
+
"search engines (Elastic, Solr)."
|
| 87 |
+
),
|
| 88 |
+
},
|
| 89 |
+
},
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _section_descriptor(view_name: str, lang: str) -> dict[str, str]:
|
| 94 |
+
"""Retourne ``{title, note}`` pour une vue. Fallback générique
|
| 95 |
+
pour les vues custom non connues."""
|
| 96 |
+
descriptor = _VIEW_DESCRIPTORS.get(view_name, {}).get(lang)
|
| 97 |
+
if descriptor is not None:
|
| 98 |
+
return descriptor
|
| 99 |
+
if lang == "en":
|
| 100 |
+
return {
|
| 101 |
+
"title": f"View: {view_name}",
|
| 102 |
+
"note": "Custom view — see runner spec for details.",
|
| 103 |
+
}
|
| 104 |
+
return {
|
| 105 |
+
"title": f"Vue : {view_name}",
|
| 106 |
+
"note": "Vue custom — voir la spec du runner pour détails.",
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _format_metric(value: float) -> str:
|
| 111 |
+
"""Formate une métrique en pourcentage si ∈ [0, 1], sinon
|
| 112 |
+
notation courte à 4 décimales."""
|
| 113 |
+
if 0.0 <= value <= 1.0:
|
| 114 |
+
return f"{value * 100:.2f}%"
|
| 115 |
+
return f"{value:.4f}"
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _aggregate_view_metrics(
|
| 119 |
+
engine_buckets: dict[str, dict[str, dict[str, float]]],
|
| 120 |
+
) -> dict[str, dict[str, float]]:
|
| 121 |
+
"""Pour chaque engine, calcule la moyenne de chaque métrique sur
|
| 122 |
+
tous les documents. Retourne ``{engine: {metric: mean}}``.
|
| 123 |
+
|
| 124 |
+
Une métrique absente d'un doc spécifique est ignorée pour la
|
| 125 |
+
moyenne (tolérance aux ``failed_metrics`` de la vue).
|
| 126 |
+
"""
|
| 127 |
+
out: dict[str, dict[str, float]] = {}
|
| 128 |
+
for engine, doc_buckets in engine_buckets.items():
|
| 129 |
+
per_metric: dict[str, list[float]] = {}
|
| 130 |
+
for doc_metrics in doc_buckets.values():
|
| 131 |
+
for metric, value in doc_metrics.items():
|
| 132 |
+
if isinstance(value, (int, float)):
|
| 133 |
+
per_metric.setdefault(metric, []).append(float(value))
|
| 134 |
+
out[engine] = {
|
| 135 |
+
metric: mean(values)
|
| 136 |
+
for metric, values in per_metric.items()
|
| 137 |
+
if values
|
| 138 |
+
}
|
| 139 |
+
return out
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def build_view_results_html(
|
| 143 |
+
view_results: dict[str, dict[str, dict[str, dict[str, float]]]] | None,
|
| 144 |
+
all_engine_names: list[str],
|
| 145 |
+
*,
|
| 146 |
+
lang: str = "fr",
|
| 147 |
+
) -> str:
|
| 148 |
+
"""Construit le HTML des sections par vue.
|
| 149 |
+
|
| 150 |
+
Parameters
|
| 151 |
+
----------
|
| 152 |
+
view_results:
|
| 153 |
+
Map ``{view_name: {engine_name: {doc_id: {metric: value}}}}``
|
| 154 |
+
produite par ``run_result_to_benchmark_result`` (Phase B6).
|
| 155 |
+
``None`` ou vide → retour ``""`` (rapport legacy intact).
|
| 156 |
+
all_engine_names:
|
| 157 |
+
Liste de tous les noms d'engines du benchmark, pour identifier
|
| 158 |
+
les **pipelines omis** par chaque vue.
|
| 159 |
+
lang:
|
| 160 |
+
``"fr"`` (défaut) ou ``"en"`` pour les libellés.
|
| 161 |
+
|
| 162 |
+
Returns
|
| 163 |
+
-------
|
| 164 |
+
HTML string prêt à être splatté dans ``base.html.j2`` via le
|
| 165 |
+
placeholder ``{{ view_results_html | safe }}``.
|
| 166 |
+
"""
|
| 167 |
+
if not view_results:
|
| 168 |
+
return ""
|
| 169 |
+
|
| 170 |
+
sections: list[str] = []
|
| 171 |
+
all_engines_set = set(all_engine_names)
|
| 172 |
+
omitted_label = "Pipelines omis" if lang == "fr" else "Omitted pipelines"
|
| 173 |
+
no_omission_label = (
|
| 174 |
+
"Tous les pipelines éligibles."
|
| 175 |
+
if lang == "fr"
|
| 176 |
+
else "All pipelines eligible."
|
| 177 |
+
)
|
| 178 |
+
metric_label = "Métrique" if lang == "fr" else "Metric"
|
| 179 |
+
|
| 180 |
+
for view_name in sorted(view_results.keys()):
|
| 181 |
+
descriptor = _section_descriptor(view_name, lang)
|
| 182 |
+
engine_buckets = view_results[view_name]
|
| 183 |
+
aggregated = _aggregate_view_metrics(engine_buckets)
|
| 184 |
+
eligible_engines = sorted(aggregated.keys())
|
| 185 |
+
omitted = sorted(all_engines_set - set(eligible_engines))
|
| 186 |
+
|
| 187 |
+
# Set de toutes les métriques observées (union sur engines).
|
| 188 |
+
all_metrics: set[str] = set()
|
| 189 |
+
for engine_metrics in aggregated.values():
|
| 190 |
+
all_metrics.update(engine_metrics.keys())
|
| 191 |
+
sorted_metrics = sorted(all_metrics)
|
| 192 |
+
|
| 193 |
+
# En-tête + note méthodologique.
|
| 194 |
+
section_lines: list[str] = [
|
| 195 |
+
'<div class="chart-card view-results-section"'
|
| 196 |
+
' style="grid-column:1/-1">',
|
| 197 |
+
f'<h3>{html.escape(descriptor["title"])}</h3>',
|
| 198 |
+
f'<p class="view-note"><em>{html.escape(descriptor["note"])}'
|
| 199 |
+
'</em></p>',
|
| 200 |
+
]
|
| 201 |
+
|
| 202 |
+
if not eligible_engines:
|
| 203 |
+
section_lines.append(
|
| 204 |
+
'<p class="view-no-data">'
|
| 205 |
+
+ html.escape(
|
| 206 |
+
"Aucun pipeline éligible pour cette vue."
|
| 207 |
+
if lang == "fr"
|
| 208 |
+
else "No pipeline eligible for this view.",
|
| 209 |
+
)
|
| 210 |
+
+ "</p>",
|
| 211 |
+
)
|
| 212 |
+
else:
|
| 213 |
+
# Tableau engine × metric.
|
| 214 |
+
section_lines.append('<table class="view-results-table">')
|
| 215 |
+
section_lines.append("<thead><tr>")
|
| 216 |
+
section_lines.append(
|
| 217 |
+
f'<th>{html.escape(metric_label)}</th>',
|
| 218 |
+
)
|
| 219 |
+
for engine in eligible_engines:
|
| 220 |
+
section_lines.append(
|
| 221 |
+
f'<th>{html.escape(engine)}</th>',
|
| 222 |
+
)
|
| 223 |
+
section_lines.append("</tr></thead>")
|
| 224 |
+
section_lines.append("<tbody>")
|
| 225 |
+
for metric in sorted_metrics:
|
| 226 |
+
section_lines.append("<tr>")
|
| 227 |
+
section_lines.append(
|
| 228 |
+
f'<td><code>{html.escape(metric)}</code></td>',
|
| 229 |
+
)
|
| 230 |
+
for engine in eligible_engines:
|
| 231 |
+
value = aggregated.get(engine, {}).get(metric)
|
| 232 |
+
cell = (
|
| 233 |
+
_format_metric(value)
|
| 234 |
+
if value is not None
|
| 235 |
+
else "—"
|
| 236 |
+
)
|
| 237 |
+
section_lines.append(f"<td>{html.escape(cell)}</td>")
|
| 238 |
+
section_lines.append("</tr>")
|
| 239 |
+
section_lines.append("</tbody></table>")
|
| 240 |
+
|
| 241 |
+
# Pipelines omis (toujours affiché — explicite > silencieux).
|
| 242 |
+
section_lines.append(
|
| 243 |
+
f'<p class="view-omitted"><strong>{html.escape(omitted_label)} :'
|
| 244 |
+
"</strong> ",
|
| 245 |
+
)
|
| 246 |
+
if omitted:
|
| 247 |
+
section_lines.append(
|
| 248 |
+
", ".join(f"<code>{html.escape(e)}</code>" for e in omitted),
|
| 249 |
+
)
|
| 250 |
+
else:
|
| 251 |
+
section_lines.append(
|
| 252 |
+
f"<em>{html.escape(no_omission_label)}</em>",
|
| 253 |
+
)
|
| 254 |
+
section_lines.append("</p>")
|
| 255 |
+
|
| 256 |
+
section_lines.append("</div>")
|
| 257 |
+
sections.append("\n".join(section_lines))
|
| 258 |
+
|
| 259 |
+
return "\n".join(sections)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
__all__ = ["build_view_results_html"]
|
|
@@ -209,6 +209,14 @@
|
|
| 209 |
</div>
|
| 210 |
{% endif %}
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
<!-- Sprint 86 — A.II.5 : recherchabilité fuzzy + précision sur
|
| 213 |
séquences numériques. Adaptive : n'apparaît que si au moins
|
| 214 |
un moteur a du signal. -->
|
|
|
|
| 209 |
</div>
|
| 210 |
{% endif %}
|
| 211 |
|
| 212 |
+
<!-- Phase B6 (mai 2026) — sections par EvaluationView du
|
| 213 |
+
RunOrchestrator (text_final, alto_documentary, searchability).
|
| 214 |
+
Adaptive : n'apparaît que si benchmark.view_results est non
|
| 215 |
+
vide (chemin via RunOrchestrator). -->
|
| 216 |
+
{% if view_results_html %}
|
| 217 |
+
{{ view_results_html | safe }}
|
| 218 |
+
{% endif %}
|
| 219 |
+
|
| 220 |
<!-- Sprint 86 — A.II.5 : recherchabilité fuzzy + précision sur
|
| 221 |
séquences numériques. Adaptive : n'apparaît que si au moins
|
| 222 |
un moteur a du signal. -->
|
|
@@ -13,6 +13,12 @@ from picarones.domain import (
|
|
| 13 |
ArtifactType,
|
| 14 |
MetricSpec,
|
| 15 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
from picarones.evaluation.metrics.alto_structural import (
|
| 17 |
compute_alto_validity,
|
| 18 |
compute_line_count_ratio,
|
|
@@ -133,9 +139,16 @@ class TestAltoViewShape:
|
|
| 133 |
def test_default_metrics(self) -> None:
|
| 134 |
view = build_alto_view()
|
| 135 |
assert view.metric_names == DEFAULT_ALTO_METRICS
|
|
|
|
| 136 |
assert "alto_validity" in view.metric_names
|
| 137 |
assert "alto_line_count_ratio" in view.metric_names
|
| 138 |
assert "alto_word_box_coverage" in view.metric_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
def test_no_projection(self) -> None:
|
| 141 |
view = build_alto_view()
|
|
@@ -180,6 +193,44 @@ def _build_alto_executor(payloads: dict[str, AltoDocument]) -> DefaultEvaluation
|
|
| 180 |
),
|
| 181 |
compute_word_box_coverage,
|
| 182 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
projectors = ProjectorRegistry() # AltoView n'a pas besoin de projecteur
|
| 184 |
|
| 185 |
def loader(art: Artifact) -> AltoDocument:
|
|
|
|
| 13 |
ArtifactType,
|
| 14 |
MetricSpec,
|
| 15 |
)
|
| 16 |
+
from picarones.evaluation.metrics.alto_metrics import (
|
| 17 |
+
alto_text_cer,
|
| 18 |
+
alto_text_mer,
|
| 19 |
+
alto_text_wer,
|
| 20 |
+
alto_text_wil,
|
| 21 |
+
)
|
| 22 |
from picarones.evaluation.metrics.alto_structural import (
|
| 23 |
compute_alto_validity,
|
| 24 |
compute_line_count_ratio,
|
|
|
|
| 139 |
def test_default_metrics(self) -> None:
|
| 140 |
view = build_alto_view()
|
| 141 |
assert view.metric_names == DEFAULT_ALTO_METRICS
|
| 142 |
+
# 3 métriques structurelles (Sprint A14-S15).
|
| 143 |
assert "alto_validity" in view.metric_names
|
| 144 |
assert "alto_line_count_ratio" in view.metric_names
|
| 145 |
assert "alto_word_box_coverage" in view.metric_names
|
| 146 |
+
# 4 métriques textuelles ajoutées en Phase B6 — opèrent sur le
|
| 147 |
+
# texte plat extrait de l'ALTO via ``extract_text_from_alto``.
|
| 148 |
+
assert "alto_text_cer" in view.metric_names
|
| 149 |
+
assert "alto_text_wer" in view.metric_names
|
| 150 |
+
assert "alto_text_mer" in view.metric_names
|
| 151 |
+
assert "alto_text_wil" in view.metric_names
|
| 152 |
|
| 153 |
def test_no_projection(self) -> None:
|
| 154 |
view = build_alto_view()
|
|
|
|
| 193 |
),
|
| 194 |
compute_word_box_coverage,
|
| 195 |
)
|
| 196 |
+
# Phase B6 — métriques textuelles ajoutées aux defaults AltoView.
|
| 197 |
+
# Elles attendent un payload qui expose ``xml_content`` ou un
|
| 198 |
+
# str XML brut (cf. ``extract_text_from_alto``). Les payloads
|
| 199 |
+
# ``AltoDocument`` typés du test échouent silencieusement dans
|
| 200 |
+
# cette extraction → CER 1.0. On les enregistre quand même pour
|
| 201 |
+
# vérifier le wiring registry/executor.
|
| 202 |
+
metrics.register(
|
| 203 |
+
MetricSpec(
|
| 204 |
+
name="alto_text_cer",
|
| 205 |
+
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
|
| 206 |
+
higher_is_better=False,
|
| 207 |
+
),
|
| 208 |
+
alto_text_cer,
|
| 209 |
+
)
|
| 210 |
+
metrics.register(
|
| 211 |
+
MetricSpec(
|
| 212 |
+
name="alto_text_wer",
|
| 213 |
+
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
|
| 214 |
+
higher_is_better=False,
|
| 215 |
+
),
|
| 216 |
+
alto_text_wer,
|
| 217 |
+
)
|
| 218 |
+
metrics.register(
|
| 219 |
+
MetricSpec(
|
| 220 |
+
name="alto_text_mer",
|
| 221 |
+
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
|
| 222 |
+
higher_is_better=False,
|
| 223 |
+
),
|
| 224 |
+
alto_text_mer,
|
| 225 |
+
)
|
| 226 |
+
metrics.register(
|
| 227 |
+
MetricSpec(
|
| 228 |
+
name="alto_text_wil",
|
| 229 |
+
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
|
| 230 |
+
higher_is_better=False,
|
| 231 |
+
),
|
| 232 |
+
alto_text_wil,
|
| 233 |
+
)
|
| 234 |
projectors = ProjectorRegistry() # AltoView n'a pas besoin de projecteur
|
| 235 |
|
| 236 |
def loader(art: Artifact) -> AltoDocument:
|
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Phase B6 — rendu HTML des ``BenchmarkResult.view_results``.
|
| 2 |
+
|
| 3 |
+
Vérifie que le renderer ``build_view_results_html`` :
|
| 4 |
+
|
| 5 |
+
1. Retourne ``""`` quand ``view_results`` est vide ou ``None`` (compat
|
| 6 |
+
ascendante : un BenchmarkResult issu de
|
| 7 |
+
``run_benchmark_via_service`` sans RunOrchestrator n'a pas de
|
| 8 |
+
``view_results``).
|
| 9 |
+
2. Génère une section par vue présente, avec titre + note
|
| 10 |
+
méthodologique + tableau engine × moyenne_par_metric.
|
| 11 |
+
3. Liste explicitement les pipelines OMIS de chaque vue (= ceux qui
|
| 12 |
+
n'ont pas produit d'artefact éligible).
|
| 13 |
+
4. Échappe le HTML correctement (résistance XSS via noms d'engine
|
| 14 |
+
custom).
|
| 15 |
+
5. S'intègre proprement dans le rapport HTML complet (test bout-en-bout
|
| 16 |
+
via ``ReportGenerator``).
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
from picarones.evaluation.benchmark_result import BenchmarkResult, EngineReport
|
| 22 |
+
from picarones.evaluation.metric_result import MetricsResult
|
| 23 |
+
from picarones.reports.html.renderers.view_results import (
|
| 24 |
+
build_view_results_html,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 29 |
+
# Helpers
|
| 30 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _make_engine_report(name: str) -> EngineReport:
|
| 34 |
+
return EngineReport(
|
| 35 |
+
engine_name=name,
|
| 36 |
+
engine_version="test",
|
| 37 |
+
engine_config={},
|
| 38 |
+
document_results=[],
|
| 39 |
+
aggregated_metrics={},
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 44 |
+
# Renderer adaptatif (cas vides)
|
| 45 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class TestEmptyViewResults:
|
| 49 |
+
def test_none_returns_empty_string(self) -> None:
|
| 50 |
+
assert build_view_results_html(None, all_engine_names=["t"]) == ""
|
| 51 |
+
|
| 52 |
+
def test_empty_dict_returns_empty_string(self) -> None:
|
| 53 |
+
assert build_view_results_html({}, all_engine_names=["t"]) == ""
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 57 |
+
# Rendu d'une vue avec données
|
| 58 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class TestSingleViewRendering:
|
| 62 |
+
def _sample_view_results(
|
| 63 |
+
self,
|
| 64 |
+
) -> dict[str, dict[str, dict[str, dict[str, float]]]]:
|
| 65 |
+
return {
|
| 66 |
+
"text_final": {
|
| 67 |
+
"tesseract": {
|
| 68 |
+
"doc1": {"cer": 0.05, "wer": 0.10},
|
| 69 |
+
"doc2": {"cer": 0.03, "wer": 0.08},
|
| 70 |
+
},
|
| 71 |
+
},
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
def test_section_contains_view_title(self) -> None:
|
| 75 |
+
html = build_view_results_html(
|
| 76 |
+
self._sample_view_results(), all_engine_names=["tesseract"],
|
| 77 |
+
)
|
| 78 |
+
assert "TextView" in html
|
| 79 |
+
# Note méthodologique présente.
|
| 80 |
+
assert "projetées" in html.lower() or "projeté" in html.lower()
|
| 81 |
+
|
| 82 |
+
def test_section_contains_engine_metrics_table(self) -> None:
|
| 83 |
+
html = build_view_results_html(
|
| 84 |
+
self._sample_view_results(), all_engine_names=["tesseract"],
|
| 85 |
+
)
|
| 86 |
+
# Header + métrique + valeur.
|
| 87 |
+
assert "tesseract" in html
|
| 88 |
+
assert "cer" in html
|
| 89 |
+
assert "wer" in html
|
| 90 |
+
# Moyenne CER : (0.05 + 0.03) / 2 = 0.04 → 4.00%.
|
| 91 |
+
assert "4.00%" in html
|
| 92 |
+
# Moyenne WER : (0.10 + 0.08) / 2 = 0.09 → 9.00%.
|
| 93 |
+
assert "9.00%" in html
|
| 94 |
+
|
| 95 |
+
def test_no_omitted_when_all_eligible(self) -> None:
|
| 96 |
+
html = build_view_results_html(
|
| 97 |
+
self._sample_view_results(), all_engine_names=["tesseract"],
|
| 98 |
+
)
|
| 99 |
+
# "Tous les pipelines éligibles" affiché car aucun n'est omis.
|
| 100 |
+
assert "éligibles" in html or "eligible" in html.lower()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 104 |
+
# Pipelines omis (cas AltoView avec engine OCR pur)
|
| 105 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class TestOmittedPipelines:
|
| 109 |
+
def test_alto_view_omits_text_only_engine(self) -> None:
|
| 110 |
+
"""Cas typique : AltoView ne reçoit que des résultats du
|
| 111 |
+
pipeline qui produit ALTO. Un pipeline OCR seul est omis."""
|
| 112 |
+
view_results = {
|
| 113 |
+
"alto_documentary": {
|
| 114 |
+
"tesseract_alto": {
|
| 115 |
+
"doc1": {"alto_validity": 1.0},
|
| 116 |
+
},
|
| 117 |
+
# Pas de "tesseract_text_only" → omis de cette vue
|
| 118 |
+
},
|
| 119 |
+
}
|
| 120 |
+
html = build_view_results_html(
|
| 121 |
+
view_results,
|
| 122 |
+
all_engine_names=["tesseract_alto", "tesseract_text_only"],
|
| 123 |
+
)
|
| 124 |
+
assert "tesseract_alto" in html
|
| 125 |
+
# tesseract_text_only listé dans Pipelines omis.
|
| 126 |
+
assert "tesseract_text_only" in html
|
| 127 |
+
# Le label "Pipelines omis" est présent.
|
| 128 |
+
assert "omis" in html.lower() or "omitted" in html.lower()
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 132 |
+
# Multi-vues (le cas typique de production)
|
| 133 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
class TestMultipleViews:
|
| 137 |
+
def test_renders_three_canonical_views(self) -> None:
|
| 138 |
+
view_results = {
|
| 139 |
+
"text_final": {
|
| 140 |
+
"tesseract": {"doc1": {"cer": 0.1}},
|
| 141 |
+
},
|
| 142 |
+
"alto_documentary": {
|
| 143 |
+
"tesseract": {"doc1": {"alto_validity": 1.0}},
|
| 144 |
+
},
|
| 145 |
+
"searchability": {
|
| 146 |
+
"tesseract": {"doc1": {"searchability_recall": 0.95}},
|
| 147 |
+
},
|
| 148 |
+
}
|
| 149 |
+
html = build_view_results_html(
|
| 150 |
+
view_results, all_engine_names=["tesseract"],
|
| 151 |
+
)
|
| 152 |
+
assert "TextView" in html
|
| 153 |
+
assert "AltoView" in html
|
| 154 |
+
assert "SearchView" in html
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 158 |
+
# Sécurité — XSS via noms d'engine custom
|
| 159 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
class TestXssEscaping:
|
| 163 |
+
def test_engine_name_with_html_chars_is_escaped(self) -> None:
|
| 164 |
+
view_results = {
|
| 165 |
+
"text_final": {
|
| 166 |
+
"<script>alert(1)</script>": {"doc1": {"cer": 0.1}},
|
| 167 |
+
},
|
| 168 |
+
}
|
| 169 |
+
html = build_view_results_html(
|
| 170 |
+
view_results, all_engine_names=["<script>alert(1)</script>"],
|
| 171 |
+
)
|
| 172 |
+
# Le HTML brut ne doit pas apparaître non échappé.
|
| 173 |
+
assert "<script>" not in html
|
| 174 |
+
# L'entité échappée est présente.
|
| 175 |
+
assert "<script>" in html
|
| 176 |
+
|
| 177 |
+
def test_metric_name_with_html_chars_is_escaped(self) -> None:
|
| 178 |
+
view_results = {
|
| 179 |
+
"text_final": {
|
| 180 |
+
"tesseract": {"doc1": {"<weird>": 0.1}},
|
| 181 |
+
},
|
| 182 |
+
}
|
| 183 |
+
html = build_view_results_html(
|
| 184 |
+
view_results, all_engine_names=["tesseract"],
|
| 185 |
+
)
|
| 186 |
+
assert "<weird>" not in html
|
| 187 |
+
assert "<weird>" in html
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 191 |
+
# Internationalization
|
| 192 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
class TestI18n:
|
| 196 |
+
def _sample(self) -> dict:
|
| 197 |
+
return {
|
| 198 |
+
"alto_documentary": {
|
| 199 |
+
"tess": {"doc1": {"alto_validity": 1.0}},
|
| 200 |
+
},
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
def test_french_default_labels(self) -> None:
|
| 204 |
+
html = build_view_results_html(
|
| 205 |
+
self._sample(), all_engine_names=["tess", "other"], lang="fr",
|
| 206 |
+
)
|
| 207 |
+
assert "documentaire" in html.lower()
|
| 208 |
+
assert "pipelines omis" in html.lower()
|
| 209 |
+
|
| 210 |
+
def test_english_labels(self) -> None:
|
| 211 |
+
html = build_view_results_html(
|
| 212 |
+
self._sample(), all_engine_names=["tess", "other"], lang="en",
|
| 213 |
+
)
|
| 214 |
+
assert "documentary" in html.lower()
|
| 215 |
+
assert "omitted pipelines" in html.lower()
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 219 |
+
# Intégration avec ReportGenerator
|
| 220 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
class TestReportGeneratorIntegration:
|
| 224 |
+
def _make_benchmark(
|
| 225 |
+
self, with_view_results: bool,
|
| 226 |
+
) -> BenchmarkResult:
|
| 227 |
+
# Document minimal. Les hooks et agrégats sont vides — on
|
| 228 |
+
# teste juste la présence/absence de la section view_results.
|
| 229 |
+
from picarones.evaluation.benchmark_result import DocumentResult
|
| 230 |
+
|
| 231 |
+
engine = EngineReport(
|
| 232 |
+
engine_name="tesseract",
|
| 233 |
+
engine_version="5.x",
|
| 234 |
+
engine_config={},
|
| 235 |
+
document_results=[
|
| 236 |
+
DocumentResult(
|
| 237 |
+
doc_id="doc1",
|
| 238 |
+
image_path="/tmp/doc1.png",
|
| 239 |
+
ground_truth="Bonjour",
|
| 240 |
+
hypothesis="Bonjour",
|
| 241 |
+
metrics=MetricsResult(
|
| 242 |
+
cer=0.0, cer_nfc=0.0, cer_caseless=0.0,
|
| 243 |
+
wer=0.0, wer_normalized=0.0, mer=0.0, wil=0.0,
|
| 244 |
+
reference_length=7, hypothesis_length=7,
|
| 245 |
+
),
|
| 246 |
+
duration_seconds=0.1,
|
| 247 |
+
),
|
| 248 |
+
],
|
| 249 |
+
aggregated_metrics={},
|
| 250 |
+
)
|
| 251 |
+
view_results: dict = {}
|
| 252 |
+
if with_view_results:
|
| 253 |
+
view_results = {
|
| 254 |
+
"text_final": {
|
| 255 |
+
"tesseract": {"doc1": {"cer": 0.0, "wer": 0.0}},
|
| 256 |
+
},
|
| 257 |
+
"alto_documentary": {
|
| 258 |
+
# Aucun engine n'a produit d'ALTO ici → vue vide
|
| 259 |
+
# mais tesseract est listé comme omis.
|
| 260 |
+
},
|
| 261 |
+
}
|
| 262 |
+
return BenchmarkResult(
|
| 263 |
+
corpus_name="test_corpus",
|
| 264 |
+
corpus_source=None,
|
| 265 |
+
document_count=1,
|
| 266 |
+
engine_reports=[engine],
|
| 267 |
+
view_results=view_results,
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
def test_report_includes_view_section_when_present(self, tmp_path) -> None:
|
| 271 |
+
from picarones.reports.html.generator import ReportGenerator
|
| 272 |
+
|
| 273 |
+
bm = self._make_benchmark(with_view_results=True)
|
| 274 |
+
out = tmp_path / "report.html"
|
| 275 |
+
ReportGenerator(bm, lang="fr").generate(out)
|
| 276 |
+
|
| 277 |
+
html = out.read_text(encoding="utf-8")
|
| 278 |
+
assert "TextView" in html
|
| 279 |
+
assert "AltoView" in html
|
| 280 |
+
|
| 281 |
+
def test_report_omits_view_section_when_absent(self, tmp_path) -> None:
|
| 282 |
+
"""Compat ascendante : sans view_results, le rapport HTML
|
| 283 |
+
legacy est intact (aucune section `view-results-section`)."""
|
| 284 |
+
from picarones.reports.html.generator import ReportGenerator
|
| 285 |
+
|
| 286 |
+
bm = self._make_benchmark(with_view_results=False)
|
| 287 |
+
out = tmp_path / "report.html"
|
| 288 |
+
ReportGenerator(bm, lang="fr").generate(out)
|
| 289 |
+
|
| 290 |
+
html = out.read_text(encoding="utf-8")
|
| 291 |
+
# Le marker CSS du renderer view_results doit être absent.
|
| 292 |
+
assert "view-results-section" not in html
|