Spaces:
Sleeping
Sleeping
| """Tests Sprint 67 β vue HTML d'un benchmark de pipeline composΓ©e. | |
| Couvre : | |
| 1. ``build_pipeline_summary_html`` : affiche pipeline, corpus, | |
| n_docs, succeeded/failed, durΓ©e totale. | |
| 2. ``build_pipeline_steps_table_html`` : tableau par Γ©tape avec | |
| colonnes attendues, mΓ©triques aux jonctions formatΓ©es, | |
| error_breakdown affichΓ©, vide si aucune Γ©tape. | |
| 3. ``build_pipeline_report_html`` : document HTML autonome | |
| (``<!doctype html>``, head, body, styles inline). | |
| 4. Anti-injection HTML : noms de pipeline / corpus / step | |
| contenant ``<script>`` correctement Γ©chappΓ©s. | |
| 5. Adaptive masking : pas d'Γ©tape β tableau vide. | |
| 6. ComplΓ©tude i18n : toutes les clΓ©s ``pipeline_*`` prΓ©sentes en | |
| FR et EN. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from picarones.measurements.pipeline_benchmark import ( | |
| PipelineBenchmarkResult, | |
| StepAggregate, | |
| ) | |
| from picarones.report.pipeline_render import ( | |
| build_pipeline_report_html, | |
| build_pipeline_steps_table_html, | |
| build_pipeline_summary_html, | |
| ) | |
| def _make_bench( | |
| name: str = "ocr_then_fix", | |
| corpus: str = "demo", | |
| ) -> PipelineBenchmarkResult: | |
| bench = PipelineBenchmarkResult( | |
| pipeline_name=name, corpus_name=corpus, | |
| n_docs=10, total_duration_seconds=12.345, | |
| ) | |
| bench.per_step_aggregates = [ | |
| StepAggregate( | |
| step_name="ocr", n_docs=10, n_succeeded=10, n_failed=0, | |
| duration_seconds_total=2.5, duration_seconds_mean=0.25, | |
| duration_seconds_median=0.24, | |
| junction_metrics={ | |
| "text": {"cer": {"mean": 0.182, "median": 0.18, "n": 10}}, | |
| }, | |
| ), | |
| StepAggregate( | |
| step_name="rewrite", n_docs=10, n_succeeded=8, n_failed=2, | |
| duration_seconds_total=1.2, duration_seconds_mean=0.15, | |
| duration_seconds_median=0.14, failing_doc_ids=["d3", "d7"], | |
| junction_metrics={ | |
| "text": {"cer": {"mean": 0.05, "median": 0.04, "n": 8}}, | |
| }, | |
| error_breakdown={"raised_exception": 2}, | |
| ), | |
| ] | |
| class _FakePR: | |
| def __init__(self, ok): self._ok = ok | |
| def succeeded(self): return self._ok | |
| bench.per_doc_results = [_FakePR(True)] * 8 + [_FakePR(False)] * 2 | |
| return bench | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Summary | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestSummary: | |
| def test_renders_pipeline_and_corpus_name(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_summary_html(bench) | |
| assert "ocr_then_fix" in html | |
| assert "demo" in html | |
| def test_includes_succeeded_and_failed(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_summary_html(bench) | |
| # 8 rΓ©ussies sur 10 (les fakes per_doc_results) | |
| assert "8 / 10" in html | |
| # 2 Γ©chouΓ©es | |
| assert ">2<" in html | |
| def test_duration_formatted(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_summary_html(bench) | |
| # 12.345s β "12.35 s" (formatage en s pour > 1s < 60s) | |
| assert "12.35 s" in html | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. Steps table | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestStepsTable: | |
| def test_renders_step_names(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_steps_table_html(bench) | |
| assert "ocr" in html | |
| assert "rewrite" in html | |
| def test_columns_present(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_steps_table_html(bench) | |
| # VΓ©rifie les en-tΓͺtes par dΓ©faut FR | |
| for col in ( | |
| "Γtape", "RΓ©ussies", "ΓchouΓ©es", "Taux succΓ¨s", | |
| "DurΓ©e moyenne", "DurΓ©e mΓ©diane", | |
| "MΓ©triques aux jonctions", "Erreurs", | |
| ): | |
| assert col in html | |
| def test_metrics_displayed(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_steps_table_html(bench) | |
| # MΓ©triques formatΓ©es : type.metric : mean (n=N) | |
| assert "text.cer" in html | |
| assert "0.182" in html | |
| assert "0.050" in html | |
| assert "n=10" in html | |
| assert "n=8" in html | |
| def test_error_breakdown_displayed(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_steps_table_html(bench) | |
| assert "raised_exception" in html | |
| def test_empty_when_no_aggregates(self) -> None: | |
| bench = PipelineBenchmarkResult( | |
| pipeline_name="x", corpus_name="y", | |
| ) | |
| assert build_pipeline_steps_table_html(bench) == "" | |
| def test_success_rate_cell_colored(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_steps_table_html(bench) | |
| # Le gradient utilise des couleurs hex | |
| assert "background:#" in html | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. Document autonome | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestStandaloneDocument: | |
| def test_doctype_present(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_report_html(bench) | |
| assert html.startswith("<!doctype html>") | |
| def test_html_head_body_structure(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_report_html(bench) | |
| assert "<html" in html | |
| assert "<head>" in html | |
| assert "<body>" in html | |
| assert "</html>" in html | |
| def test_styles_inline(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_report_html(bench) | |
| assert "<style>" in html | |
| # Au moins une règle CSS | |
| assert "body" in html | |
| assert "font-family" in html | |
| def test_title_includes_pipeline_name(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_report_html(bench) | |
| assert "<title>" in html | |
| assert "ocr_then_fix" in html | |
| def test_lang_attribute(self) -> None: | |
| bench = _make_bench() | |
| html_fr = build_pipeline_report_html(bench, lang="fr") | |
| html_en = build_pipeline_report_html(bench, lang="en") | |
| assert 'lang="fr"' in html_fr | |
| assert 'lang="en"' in html_en | |
| def test_summary_and_steps_included(self) -> None: | |
| bench = _make_bench() | |
| html = build_pipeline_report_html(bench) | |
| # Le document contient les deux blocs | |
| assert "ocr_then_fix" in html | |
| assert "ocr" in html | |
| assert "rewrite" in html | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. Anti-injection HTML | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestAntiInjection: | |
| def test_pipeline_name_escaped(self) -> None: | |
| bench = PipelineBenchmarkResult( | |
| pipeline_name="<script>alert(1)</script>", | |
| corpus_name="demo", | |
| ) | |
| html = build_pipeline_summary_html(bench) | |
| assert "<script>alert(1)</script>" not in html | |
| assert "<script>" in html | |
| def test_corpus_name_escaped(self) -> None: | |
| bench = PipelineBenchmarkResult( | |
| pipeline_name="p", | |
| corpus_name="<img src=x onerror=alert(1)>", | |
| ) | |
| html = build_pipeline_report_html(bench) | |
| assert "<img src=x" not in html | |
| assert "<img" in html | |
| def test_step_name_escaped(self) -> None: | |
| bench = PipelineBenchmarkResult( | |
| pipeline_name="p", corpus_name="c", | |
| ) | |
| bench.per_step_aggregates = [ | |
| StepAggregate( | |
| step_name="<script>", n_docs=1, n_succeeded=1, | |
| duration_seconds_mean=0.1, duration_seconds_median=0.1, | |
| ), | |
| ] | |
| html = build_pipeline_steps_table_html(bench) | |
| assert "<script>" not in html.replace( | |
| "<script>alert", "@@@", # ne devrait pas Γͺtre prΓ©sent de toute faΓ§on | |
| ) | |
| assert "<script>" in html | |
| def test_label_escaped_via_i18n(self) -> None: | |
| bench = _make_bench() | |
| labels = {"pipeline_summary_title": "<b>X</b>"} | |
| html = build_pipeline_summary_html(bench, labels=labels) | |
| assert "<b>X</b>" not in html | |
| assert "<b>X</b>" in html | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. ComplΓ©tude i18n | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestI18nCompleteness: | |
| def _load(self, lang: str) -> dict: | |
| path = ( | |
| Path(__file__).parent.parent.parent | |
| / "picarones" / "report" / "i18n" / f"{lang}.json" | |
| ) | |
| return json.loads(path.read_text(encoding="utf-8")) | |
| def test_all_pipeline_keys_present_fr(self) -> None: | |
| d = self._load("fr") | |
| required = ( | |
| "pipeline_report_title", "pipeline_report_note", | |
| "pipeline_summary_title", "pipeline_name_label", | |
| "pipeline_corpus_label", "pipeline_n_docs_label", | |
| "pipeline_succeeded_label", "pipeline_failed_label", | |
| "pipeline_duration_label", "pipeline_steps_title", | |
| "pipeline_step_name_label", "pipeline_success_rate_label", | |
| "pipeline_duration_mean_label", | |
| "pipeline_duration_median_label", | |
| "pipeline_junction_metrics_label", | |
| "pipeline_error_breakdown_label", | |
| "pipeline_docs_short", | |
| ) | |
| for key in required: | |
| assert key in d, f"manque clΓ© FR : {key}" | |
| def test_all_pipeline_keys_present_en(self) -> None: | |
| d_fr = self._load("fr") | |
| d_en = self._load("en") | |
| for key in d_fr: | |
| if key.startswith("pipeline_"): | |
| assert key in d_en, f"manque clΓ© EN : {key}" | |