"""Tests E2E API REST pour les champs B3-final de ``BenchmarkRunRequest``. Phase D3 audit B3-final (mai 2026) — l'audit implacable a identifié l'absence de couverture API REST pour les nouveaux champs ajoutés en Phase B3-final corr-A/B/C : - ``views``, ``profile``, ``partial_dir``, ``entity_extractor``, ``output_json`` (BenchmarkRunRequest) - ``expose_alto`` (PipelineConfig) Ces tests valident : 1. **Validation Pydantic positive** : payloads valides retournent 200 2. **Validation Pydantic négative** : payloads malformés retournent 422 3. **Sécurité path traversal** : ``../../etc`` refusé en 422 """ from __future__ import annotations import pytest from fastapi.testclient import TestClient @pytest.fixture def client(): from picarones.interfaces.web.app import app return TestClient(app) def _valid_corpus_payload(tmp_path): """Crée un corpus zip mini valide pour les tests.""" from PIL import Image img = Image.new("RGB", (50, 50), color=(255, 255, 255)) img.save(tmp_path / "doc01.png") (tmp_path / "doc01.gt.txt").write_text("hello", encoding="utf-8") return str(tmp_path) # ────────────────────────────────────────────────────────────────────── # 1. Validation positive — payloads B3-final acceptés # ────────────────────────────────────────────────────────────────────── class TestB3FinalFieldsAccepted: """Vérifie que ``BenchmarkRunRequest`` accepte tous les nouveaux champs B3-final ajoutés en Phase corr-A/B/C.""" def test_request_accepts_views_field(self, client) -> None: """``views`` accepte la liste des vues canoniques.""" from picarones.interfaces.web.models import BenchmarkRunRequest # Validation Pydantic isolée (sans HTTP, plus rapide). req = BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], views=["text_final", "alto_documentary", "searchability"], ) assert list(req.views) == [ "text_final", "alto_documentary", "searchability", ] def test_request_accepts_profile_field(self) -> None: from picarones.interfaces.web.models import BenchmarkRunRequest req = BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], profile="diagnostics", ) assert req.profile == "diagnostics" def test_request_accepts_partial_dir_field(self) -> None: from picarones.interfaces.web.models import BenchmarkRunRequest req = BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], partial_dir="partial/checkpoints", ) assert req.partial_dir == "partial/checkpoints" def test_request_accepts_entity_extractor_field(self) -> None: from picarones.interfaces.web.models import BenchmarkRunRequest req = BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], entity_extractor="picarones.adapters.ner:SpacyExtractor", ) assert req.entity_extractor == "picarones.adapters.ner:SpacyExtractor" def test_request_accepts_output_json_field(self) -> None: from picarones.interfaces.web.models import BenchmarkRunRequest req = BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], output_json="bench_legacy.json", ) assert req.output_json == "bench_legacy.json" def test_pipeline_config_accepts_expose_alto(self) -> None: from picarones.interfaces.web.models import PipelineConfig pc = PipelineConfig( engine_name="tesseract", expose_alto=True, ) assert pc.expose_alto is True def test_pipeline_config_default_no_expose_alto(self) -> None: from picarones.interfaces.web.models import PipelineConfig pc = PipelineConfig(engine_name="tesseract") assert pc.expose_alto is False def test_expose_alto_with_non_tesseract_engine_warns( self, caplog: pytest.LogCaptureFixture, ) -> None: """Phase D4 audit B3-final — l'UI envoie ``expose_alto=true`` mais le moteur cible n'est pas Tesseract. Le flag est ignoré mais on logue un warning explicite pour que l'utilisateur comprenne pourquoi son ``alto_documentary`` view ne fournit aucune métrique. """ import logging from picarones.interfaces.web.benchmark_utils import ( _engine_from_competitor, ) from picarones.interfaces.web.models import PipelineConfig with caplog.at_level(logging.WARNING): try: _engine_from_competitor(PipelineConfig( engine_name="precomputed_text", expose_alto=True, )) except Exception: # Le factory peut échouer car ``precomputed_text`` # demande des kwargs supplémentaires — on capture mais # le warning doit être émis AVANT cette erreur. pass warnings_text = "\n".join( r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING ) assert "expose_alto" in warnings_text or "alto" in warnings_text.lower() assert "precomputed_text" in warnings_text # ────────────────────────────────────────────────────────────────────── # 2. Validation négative — payloads malformés rejetés # ────────────────────────────────────────────────────────────────────── class TestB3FinalFieldsValidation: def test_invalid_view_name_rejected(self) -> None: """``views`` n'accepte que les noms canoniques (Literal).""" from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], views=["not_a_canonical_view"], ) def test_invalid_profile_rejected(self) -> None: """``profile`` n'accepte que les profils canoniques (Literal).""" from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], profile="not_a_real_profile", ) # ────────────────────────────────────────────────────────────────────── # 3. Sécurité — path traversal refusé (Phase D2 audit) # ────────────────────────────────────────────────────────────────────── class TestPathTraversalSecurity: def test_partial_dir_traversal_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="path traversal"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], partial_dir="../../etc/passwd", ) def test_partial_dir_absolute_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="chemin absolu"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], partial_dir="/etc/passwd", ) def test_output_json_traversal_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="path traversal"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], output_json="../../home/user/private.json", ) def test_entity_extractor_traversal_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="interdits"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], entity_extractor="../../etc/passwd:Bad", ) def test_entity_extractor_with_slash_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="interdits"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], entity_extractor="some/path:Class", ) def test_entity_extractor_with_space_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="interdits"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], entity_extractor="my package:Class", ) def test_entity_extractor_malformed_rejected(self) -> None: from pydantic import ValidationError from picarones.interfaces.web.models import BenchmarkRunRequest with pytest.raises(ValidationError, match="format invalide"): BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], entity_extractor="123invalid_start_with_digit", ) def test_empty_string_path_fields_accepted(self) -> None: """``""`` est explicitement autorisé (= feature désactivée).""" from picarones.interfaces.web.models import BenchmarkRunRequest req = BenchmarkRunRequest( corpus_path="./corpus", competitors=[{"engine_name": "tesseract"}], partial_dir="", output_json="", entity_extractor="", ) assert req.partial_dir == "" assert req.output_json == "" assert req.entity_extractor == ""