Spaces:
Sleeping
Sleeping
Claude
test+feat: D4 audit B3-final β assertions strictes + warning expose_alto cross-engine
b420e00 unverified | """Tests E2E API REST pour les champs B3-final de ``BenchmarkRunRequest``. | |
| Phase D3 audit B3-final (mai 2026) β l'audit implacable a identifiΓ© | |
| l'absence de couverture API REST pour les nouveaux champs ajoutΓ©s | |
| en Phase B3-final corr-A/B/C : | |
| - ``views``, ``profile``, ``partial_dir``, ``entity_extractor``, | |
| ``output_json`` (BenchmarkRunRequest) | |
| - ``expose_alto`` (PipelineConfig) | |
| Ces tests valident : | |
| 1. **Validation Pydantic positive** : payloads valides retournent 200 | |
| 2. **Validation Pydantic nΓ©gative** : payloads malformΓ©s retournent 422 | |
| 3. **SΓ©curitΓ© path traversal** : ``../../etc`` refusΓ© en 422 | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from fastapi.testclient import TestClient | |
| def client(): | |
| from picarones.interfaces.web.app import app | |
| return TestClient(app) | |
| def _valid_corpus_payload(tmp_path): | |
| """CrΓ©e un corpus zip mini valide pour les tests.""" | |
| from PIL import Image | |
| img = Image.new("RGB", (50, 50), color=(255, 255, 255)) | |
| img.save(tmp_path / "doc01.png") | |
| (tmp_path / "doc01.gt.txt").write_text("hello", encoding="utf-8") | |
| return str(tmp_path) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Validation positive β payloads B3-final acceptΓ©s | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestB3FinalFieldsAccepted: | |
| """VΓ©rifie que ``BenchmarkRunRequest`` accepte tous les nouveaux | |
| champs B3-final ajoutΓ©s en Phase corr-A/B/C.""" | |
| def test_request_accepts_views_field(self, client) -> None: | |
| """``views`` accepte la liste des vues canoniques.""" | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| # Validation Pydantic isolΓ©e (sans HTTP, plus rapide). | |
| req = BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| views=["text_final", "alto_documentary", "searchability"], | |
| ) | |
| assert list(req.views) == [ | |
| "text_final", "alto_documentary", "searchability", | |
| ] | |
| def test_request_accepts_profile_field(self) -> None: | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| req = BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| profile="diagnostics", | |
| ) | |
| assert req.profile == "diagnostics" | |
| def test_request_accepts_partial_dir_field(self) -> None: | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| req = BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| partial_dir="partial/checkpoints", | |
| ) | |
| assert req.partial_dir == "partial/checkpoints" | |
| def test_request_accepts_entity_extractor_field(self) -> None: | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| req = BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| entity_extractor="picarones.adapters.ner:SpacyExtractor", | |
| ) | |
| assert req.entity_extractor == "picarones.adapters.ner:SpacyExtractor" | |
| def test_request_accepts_output_json_field(self) -> None: | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| req = BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| output_json="bench_legacy.json", | |
| ) | |
| assert req.output_json == "bench_legacy.json" | |
| def test_pipeline_config_accepts_expose_alto(self) -> None: | |
| from picarones.interfaces.web.models import PipelineConfig | |
| pc = PipelineConfig( | |
| engine_name="tesseract", expose_alto=True, | |
| ) | |
| assert pc.expose_alto is True | |
| def test_pipeline_config_default_no_expose_alto(self) -> None: | |
| from picarones.interfaces.web.models import PipelineConfig | |
| pc = PipelineConfig(engine_name="tesseract") | |
| assert pc.expose_alto is False | |
| def test_expose_alto_with_non_tesseract_engine_warns( | |
| self, caplog: pytest.LogCaptureFixture, | |
| ) -> None: | |
| """Phase D4 audit B3-final β l'UI envoie ``expose_alto=true`` | |
| mais le moteur cible n'est pas Tesseract. Le flag est ignorΓ© | |
| mais on logue un warning explicite pour que l'utilisateur | |
| comprenne pourquoi son ``alto_documentary`` view ne fournit | |
| aucune mΓ©trique. | |
| """ | |
| import logging | |
| from picarones.interfaces.web.benchmark_utils import ( | |
| _engine_from_competitor, | |
| ) | |
| from picarones.interfaces.web.models import PipelineConfig | |
| with caplog.at_level(logging.WARNING): | |
| try: | |
| _engine_from_competitor(PipelineConfig( | |
| engine_name="precomputed_text", expose_alto=True, | |
| )) | |
| except Exception: | |
| # Le factory peut Γ©chouer car ``precomputed_text`` | |
| # demande des kwargs supplΓ©mentaires β on capture mais | |
| # le warning doit Γͺtre Γ©mis AVANT cette erreur. | |
| pass | |
| warnings_text = "\n".join( | |
| r.getMessage() for r in caplog.records | |
| if r.levelno >= logging.WARNING | |
| ) | |
| assert "expose_alto" in warnings_text or "alto" in warnings_text.lower() | |
| assert "precomputed_text" in warnings_text | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. Validation nΓ©gative β payloads malformΓ©s rejetΓ©s | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestB3FinalFieldsValidation: | |
| def test_invalid_view_name_rejected(self) -> None: | |
| """``views`` n'accepte que les noms canoniques (Literal).""" | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| views=["not_a_canonical_view"], | |
| ) | |
| def test_invalid_profile_rejected(self) -> None: | |
| """``profile`` n'accepte que les profils canoniques (Literal).""" | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| profile="not_a_real_profile", | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. SΓ©curitΓ© β path traversal refusΓ© (Phase D2 audit) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestPathTraversalSecurity: | |
| def test_partial_dir_traversal_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="path traversal"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| partial_dir="../../etc/passwd", | |
| ) | |
| def test_partial_dir_absolute_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="chemin absolu"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| partial_dir="/etc/passwd", | |
| ) | |
| def test_output_json_traversal_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="path traversal"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| output_json="../../home/user/private.json", | |
| ) | |
| def test_entity_extractor_traversal_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="interdits"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| entity_extractor="../../etc/passwd:Bad", | |
| ) | |
| def test_entity_extractor_with_slash_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="interdits"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| entity_extractor="some/path:Class", | |
| ) | |
| def test_entity_extractor_with_space_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="interdits"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| entity_extractor="my package:Class", | |
| ) | |
| def test_entity_extractor_malformed_rejected(self) -> None: | |
| from pydantic import ValidationError | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| with pytest.raises(ValidationError, match="format invalide"): | |
| BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| entity_extractor="123invalid_start_with_digit", | |
| ) | |
| def test_empty_string_path_fields_accepted(self) -> None: | |
| """``""`` est explicitement autorisΓ© (= feature dΓ©sactivΓ©e).""" | |
| from picarones.interfaces.web.models import BenchmarkRunRequest | |
| req = BenchmarkRunRequest( | |
| corpus_path="./corpus", | |
| competitors=[{"engine_name": "tesseract"}], | |
| partial_dir="", | |
| output_json="", | |
| entity_extractor="", | |
| ) | |
| assert req.partial_dir == "" | |
| assert req.output_json == "" | |
| assert req.entity_extractor == "" | |