Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Running

Picarones / tests /integration /test_sprint_a14_s21_report_service.py

Claude

feat(sprint-H.3)!: renommage reports_v2/ → reports/

9011070 unverified about 1 month ago

20.8 kB

	"""Sprint A14-S21 — ``ReportService`` (rendu HTML depuis ``RunResult``).

	Couverture :

	- Rendu basique : header (corpus, run_id, code_version, timestamps),
	vue d'ensemble pipelines (succès/échecs/durée), une section par
	vue avec table pipeline × métriques.
	- Pattern d'omission visible : un pipeline qui ne produit pas
	d'artefact éligible affiche ``OMIS`` (pas un ``0`` factice).
	- Anti-injection : ``corpus_name`` / ``view.name`` /
	``pipeline_name`` contenant ``<script>`` sont échappés.
	- Persistance round-trip : ``BenchmarkService.persist`` → 3 fichiers
	→ ``ReportService.render_from_dir`` → HTML équivalent au rendu
	in-memory.
	- Bilingue : labels FR vs EN distincts.
	- Cas dégénérés : RunResult vide, vue sans aucun ViewResult.
	"""

	from __future__ import annotations

	import re
	from datetime import datetime, timezone
	from pathlib import Path

	import pytest

	from picarones.reports.html import HtmlReportRenderer as ReportService
	from picarones.domain.evaluation_spec import EvaluationView
	from picarones.domain.artifacts import ArtifactType
	from picarones.domain.run_manifest import RunManifest
	from picarones.app.results import RunResult
	from picarones.evaluation.views.base import ViewResult


	# ──────────────────────────────────────────────────────────────────
	# Helpers de fabrication de RunResult synthétique
	# ──────────────────────────────────────────────────────────────────


	def _empty_view(
	*,
	name: str = "text_final",
	description: str = "Vue texte final",
	candidate_types: frozenset[ArtifactType] \| None = None,
	metric_names: tuple[str, ...] = ("cer", "wer"),
	warnings: tuple[str, ...] = (),
	ignored_dimensions: tuple[str, ...] = (),
	) -> EvaluationView:
	return EvaluationView(
	name=name,
	description=description,
	candidate_types=(
	candidate_types if candidate_types is not None
	else frozenset({ArtifactType.RAW_TEXT})
	),
	projection=None,
	projections_by_source_type={},
	metric_names=metric_names,
	warnings=warnings,
	ignored_dimensions=ignored_dimensions,
	)


	def _manifest(
	*,
	corpus_name: str = "test_corpus",
	pipeline_names: tuple[str, ...] = ("pA", "pB"),
	views: tuple[EvaluationView, ...] = (),
	run_id: str = "test_run_001",
	code_version: str = "1.0.0-s21",
	n_documents: int = 2,
	) -> RunManifest:
	return RunManifest(
	run_id=run_id,
	corpus_name=corpus_name,
	n_documents=n_documents,
	pipeline_names=pipeline_names,
	view_specs=views,
	code_version=code_version,
	started_at=datetime(2026, 5, 4, 10, 0, 0, tzinfo=timezone.utc),
	completed_at=datetime(2026, 5, 4, 10, 0, 1, tzinfo=timezone.utc),
	dependencies_lock={},
	metadata={},
	)


	# ──────────────────────────────────────────────────────────────────
	# Fixture : run BnF S18 — pour tests d'intégration end-to-end
	# ──────────────────────────────────────────────────────────────────


	@pytest.fixture
	def bnf_run_result(tmp_path: Path) -> RunResult:
	"""Réutilise le scénario E2E S18 pour un RunResult réaliste."""
	import sys
	sys.path.insert(0, str(Path(__file__).parent))
	from test_sprint_a14_s18_bnf_e2e import _run_full_benchmark
	_, result = _run_full_benchmark(tmp_path)
	return result


	# ──────────────────────────────────────────────────────────────────
	# Rendu basique
	# ──────────────────────────────────────────────────────────────────


	class TestBasicRendering:
	def test_render_returns_complete_html_document(self) -> None:
	view = _empty_view()
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert html.startswith("<!DOCTYPE html>")
	assert html.rstrip().endswith("</html>")
	assert '<meta charset="utf-8">' in html
	assert "<style>" in html

	def test_header_contains_manifest_fields(self) -> None:
	view = _empty_view()
	manifest = _manifest(
	corpus_name="bnf_xviiie",
	run_id="bnf_xviiie_20260504T100001Z",
	code_version="2.1.0",
	views=(view,),
	)
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "bnf_xviiie" in html
	assert "bnf_xviiie_20260504T100001Z" in html
	assert "2.1.0" in html
	# Timestamp ISO.
	assert "2026-05-04T10:00:00" in html

	def test_pipelines_overview_lists_all_manifest_pipelines(self) -> None:
	view = _empty_view()
	manifest = _manifest(
	pipeline_names=("alpha", "beta", "gamma"),
	views=(view,),
	)
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	# Les 3 pipelines apparaissent même sans aucun PipelineResult.
	for name in ("alpha", "beta", "gamma"):
	assert name in html

	def test_one_section_per_view(self) -> None:
	v1 = _empty_view(name="text_final")
	v2 = _empty_view(name="alto_documentary")
	v3 = _empty_view(name="searchability")
	manifest = _manifest(views=(v1, v2, v3))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert 'id="view-text_final"' in html
	assert 'id="view-alto_documentary"' in html
	assert 'id="view-searchability"' in html


	# ──────────────────────────────────────────────────────────────────
	# Pattern d'omission visible
	# ──────────────────────────────────────────────────────────────────


	class TestOmissionVisibility:
	def test_pipeline_with_no_view_results_is_marked_omitted(
	self, bnf_run_result: RunResult,
	) -> None:
	"""Sur le scénario BnF S18, AltoView omet ``pipeline_simple_ocr``
	et ``pipeline_ocr_plus_correction``."""
	html = ReportService().render(bnf_run_result)
	# Trouver la section AltoView et vérifier les omissions.
	alto_section = _extract_section(html, "alto_documentary")
	# Les 2 pipelines omises doivent apparaître avec OMIS, le 3ème
	# avec des valeurs numériques.
	assert "pipeline_simple_ocr" in alto_section
	assert "pipeline_ocr_plus_correction" in alto_section
	# Au moins 2 cellules OMIS dans la section AltoView.
	assert alto_section.count("OMIS") >= 2

	def test_omitted_cell_explains_why(
	self, bnf_run_result: RunResult,
	) -> None:
	html = ReportService().render(bnf_run_result)
	# Le tooltip explique l'omission (FR par défaut). ``html.escape``
	# transforme les apostrophes en ' — on cherche les
	# versions échappées.
	assert "ne produisant pas d'artefact" in html
	assert "Pas de score factice" in html

	def test_no_omitted_marker_on_view_where_all_eligible(
	self, bnf_run_result: RunResult,
	) -> None:
	"""TextView accepte tous les pipelines BnF → pas de OMIS."""
	html = ReportService().render(bnf_run_result)
	text_section = _extract_section(html, "text_final")
	assert "OMIS" not in text_section


	# ──────────────────────────────────────────────────────────────────
	# Anti-injection HTML
	# ──────────────────────────────────────────────────────────────────


	class TestAntiInjection:
	def test_corpus_name_with_html_is_escaped(self) -> None:
	view = _empty_view()
	manifest = _manifest(
	corpus_name="<script>alert(1)</script>",
	views=(view,),
	)
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "<script>alert(1)</script>" not in html
	assert "<script>alert(1)</script>" in html

	def test_pipeline_name_with_html_is_escaped(self) -> None:
	view = _empty_view()
	manifest = _manifest(
	pipeline_names=("<img src=x onerror=alert(1)>",),
	views=(view,),
	)
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "<img src=x" not in html
	assert "<img src=x" in html

	def test_view_name_and_description_are_escaped(self) -> None:
	view = _empty_view(
	name="evil_name",
	description='</style><script>x</script>',
	)
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "</style><script>" not in html
	assert "</style><script>" in html

	def test_view_warning_is_escaped(self) -> None:
	view = _empty_view(warnings=("<b>injected</b>",))
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "<b>injected</b>" not in html
	assert "<b>injected</b>" in html


	# ──────────────────────────────────────────────────────────────────
	# Persistance round-trip
	# ──────────────────────────────────────────────────────────────────


	class TestPersistenceRoundTrip:
	def test_render_from_dir_matches_render(
	self, bnf_run_result: RunResult, tmp_path: Path,
	) -> None:
	"""Persister puis re-render produit le MÊME HTML que le render
	in-memory : preuve byte-à-byte que la persistance est lossless
	pour les besoins du rapport."""
	from picarones.app.services import BenchmarkService
	# On a besoin d'un BenchmarkService pour appeler persist —
	# mais on peut court-circuiter en utilisant le helper interne.
	out_dir = tmp_path / "persisted"
	# Construire un BenchmarkService bidon juste pour persist :
	# ses deux dépendances ne sont pas appelées par persist().
	from picarones.evaluation.registry import MetricRegistry
	from picarones.evaluation.projectors import ProjectorRegistry
	from picarones.evaluation.views import DefaultEvaluationViewExecutor
	from picarones.pipeline import CorpusRunner, PipelineExecutor
	loader = lambda art: "" # noqa: E731 — non appelé par persist
	view_executor = DefaultEvaluationViewExecutor.from_registries(
	MetricRegistry(), ProjectorRegistry(), loader,
	)
	runner = CorpusRunner(
	PipelineExecutor(adapter_resolver=lambda n: None),
	max_in_flight=1,
	timeout_seconds_per_doc=1.0,
	poll_interval_seconds=0.001,
	)
	bench = BenchmarkService(
	corpus_runner=runner,
	view_executor=view_executor,
	code_version="1.0.0-s18-bnf-test",
	)
	bench.persist(bnf_run_result, out_dir)

	svc = ReportService()
	html_in_memory = svc.render(bnf_run_result)
	html_from_disk = svc.render_from_dir(out_dir)
	assert html_from_disk == html_in_memory

	def test_load_run_result_roundtrip_preserves_structure(
	self, bnf_run_result: RunResult, tmp_path: Path,
	) -> None:
	from picarones.app.services import BenchmarkService
	from picarones.evaluation.registry import MetricRegistry
	from picarones.evaluation.projectors import ProjectorRegistry
	from picarones.evaluation.views import DefaultEvaluationViewExecutor
	from picarones.pipeline import CorpusRunner, PipelineExecutor
	loader = lambda art: "" # noqa: E731
	view_executor = DefaultEvaluationViewExecutor.from_registries(
	MetricRegistry(), ProjectorRegistry(), loader,
	)
	runner = CorpusRunner(
	PipelineExecutor(adapter_resolver=lambda n: None),
	max_in_flight=1,
	timeout_seconds_per_doc=1.0,
	poll_interval_seconds=0.001,
	)
	bench = BenchmarkService(
	corpus_runner=runner,
	view_executor=view_executor,
	code_version="1.0.0-s18-bnf-test",
	)
	out_dir = tmp_path / "persisted2"
	bench.persist(bnf_run_result, out_dir)
	loaded = ReportService.load_run_result(out_dir)
	assert loaded.manifest.corpus_name == bnf_run_result.manifest.corpus_name
	assert loaded.n_documents == bnf_run_result.n_documents
	# Comptes de view_results identiques par vue.
	for view in bnf_run_result.manifest.view_specs:
	assert (
	len(loaded.view_results_for(view.name))
	== len(bnf_run_result.view_results_for(view.name))
	)

	def test_load_run_result_raises_on_missing_files(
	self, tmp_path: Path,
	) -> None:
	empty_dir = tmp_path / "nothing"
	empty_dir.mkdir()
	with pytest.raises(FileNotFoundError, match="run_manifest.json"):
	ReportService.load_run_result(empty_dir)


	# ──────────────────────────────────────────────────────────────────
	# Bilingue FR / EN
	# ──────────────────────────────────────────────────────────────────


	class TestI18N:
	def test_french_labels_by_default(self) -> None:
	view = _empty_view()
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert 'lang="fr"' in html
	assert "Pipelines exécutées" in html
	assert "Avertissements" in html or "Démarré" in html

	def test_english_labels(self) -> None:
	view = _empty_view()
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService(lang="en").render(result)
	assert 'lang="en"' in html
	assert "Pipelines executed" in html

	def test_unknown_lang_falls_back_to_french(self) -> None:
	view = _empty_view()
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService(lang="xx").render(result)
	assert 'lang="fr"' in html


	# ──────────────────────────────────────────────────────────────────
	# Cas dégénérés
	# ──────────────────────────────────────────────────────────────────


	class TestEdgeCases:
	def test_empty_run_result_renders_without_crashing(self) -> None:
	manifest = _manifest(views=(), pipeline_names=(), n_documents=0)
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "<!DOCTYPE html>" in html

	def test_view_with_no_view_results_shows_empty_message(self) -> None:
	view = _empty_view(name="lonely_view")
	manifest = _manifest(views=(view,), pipeline_names=())
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	section = _extract_section(html, "lonely_view")
	# Soit le message "Aucun pipeline" est rendu, soit le tableau
	# est vide (aucune ligne). Les deux comportements sont OK
	# pour S21.
	assert (
	"Aucun pipeline" in section
	or "<tbody>\n\n</tbody>" in section
	)

	def test_view_displays_warnings_block(self) -> None:
	view = _empty_view(warnings=("Attention : projection lossy.",))
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "Attention : projection lossy." in html
	assert 'class="warnings"' in html

	def test_view_displays_ignored_dimensions(self) -> None:
	view = _empty_view(
	ignored_dimensions=("geometry", "block_structure"),
	)
	manifest = _manifest(views=(view,))
	result = RunResult(manifest=manifest, document_results=())
	html = ReportService().render(result)
	assert "geometry, block_structure" in html


	# ──────────────────────────────────────────────────────────────────
	# Smoke : rendu complet du scénario BnF S18
	# ──────────────────────────────────────────────────────────────────


	class TestSmokeBnFScenario:
	def test_bnf_report_contains_all_3_pipelines_and_3_views(
	self, bnf_run_result: RunResult,
	) -> None:
	html = ReportService().render(bnf_run_result)
	# Pipelines.
	for name in (
	"pipeline_simple_ocr",
	"pipeline_structured_ocr",
	"pipeline_ocr_plus_correction",
	):
	assert name in html
	# Vues.
	for name in (
	"text_final",
	"alto_documentary",
	"searchability",
	):
	assert f'id="view-{name}"' in html

	def test_bnf_metric_values_appear(
	self, bnf_run_result: RunResult,
	) -> None:
	html = ReportService().render(bnf_run_result)
	# Au moins une métrique numérique dans la section TextView
	# (CER 0.0000 pour structured_ocr).
	text_section = _extract_section(html, "text_final")
	# Format ".4f" → quelque chose comme "0.0000" ou "0.0250".
	assert re.search(r"[01]\.\d{4}", text_section), (
	"aucune valeur numérique 4-digit trouvée dans TextView"
	)


	# ──────────────────────────────────────────────────────────────────
	# Helpers de tests
	# ──────────────────────────────────────────────────────────────────


	def _extract_section(html: str, view_name: str) -> str:
	"""Extrait le HTML de la section ``<section id="view-{view_name}">``
	jusqu'au ``</section>`` correspondant."""
	marker = f'id="view-{view_name}"'
	start = html.find(marker)
	assert start != -1, f"section {view_name!r} introuvable dans le HTML"
	# On remonte au début de <section.
	section_start = html.rfind("<section", 0, start)
	section_end = html.find("</section>", start) + len("</section>")
	return html[section_start:section_end]


	# Helper pour calmer pyflakes : ViewResult importé pour signaler
	# l'intention de signature des helpers internes du service.
	_ = ViewResult