Picarones / tests /web /test_benchmark_run_b3_final_fields.py
Claude
test+feat: D4 audit B3-final β€” assertions strictes + warning expose_alto cross-engine
b420e00 unverified
Raw
History Blame
11.5 kB
"""Tests E2E API REST pour les champs B3-final de ``BenchmarkRunRequest``.
Phase D3 audit B3-final (mai 2026) β€” l'audit implacable a identifiΓ©
l'absence de couverture API REST pour les nouveaux champs ajoutΓ©s
en Phase B3-final corr-A/B/C :
- ``views``, ``profile``, ``partial_dir``, ``entity_extractor``,
``output_json`` (BenchmarkRunRequest)
- ``expose_alto`` (PipelineConfig)
Ces tests valident :
1. **Validation Pydantic positive** : payloads valides retournent 200
2. **Validation Pydantic nΓ©gative** : payloads malformΓ©s retournent 422
3. **SΓ©curitΓ© path traversal** : ``../../etc`` refusΓ© en 422
"""
from __future__ import annotations
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client():
from picarones.interfaces.web.app import app
return TestClient(app)
def _valid_corpus_payload(tmp_path):
"""CrΓ©e un corpus zip mini valide pour les tests."""
from PIL import Image
img = Image.new("RGB", (50, 50), color=(255, 255, 255))
img.save(tmp_path / "doc01.png")
(tmp_path / "doc01.gt.txt").write_text("hello", encoding="utf-8")
return str(tmp_path)
# ──────────────────────────────────────────────────────────────────────
# 1. Validation positive β€” payloads B3-final acceptΓ©s
# ──────────────────────────────────────────────────────────────────────
class TestB3FinalFieldsAccepted:
"""VΓ©rifie que ``BenchmarkRunRequest`` accepte tous les nouveaux
champs B3-final ajoutΓ©s en Phase corr-A/B/C."""
def test_request_accepts_views_field(self, client) -> None:
"""``views`` accepte la liste des vues canoniques."""
from picarones.interfaces.web.models import BenchmarkRunRequest
# Validation Pydantic isolΓ©e (sans HTTP, plus rapide).
req = BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
views=["text_final", "alto_documentary", "searchability"],
)
assert list(req.views) == [
"text_final", "alto_documentary", "searchability",
]
def test_request_accepts_profile_field(self) -> None:
from picarones.interfaces.web.models import BenchmarkRunRequest
req = BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
profile="diagnostics",
)
assert req.profile == "diagnostics"
def test_request_accepts_partial_dir_field(self) -> None:
from picarones.interfaces.web.models import BenchmarkRunRequest
req = BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
partial_dir="partial/checkpoints",
)
assert req.partial_dir == "partial/checkpoints"
def test_request_accepts_entity_extractor_field(self) -> None:
from picarones.interfaces.web.models import BenchmarkRunRequest
req = BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
entity_extractor="picarones.adapters.ner:SpacyExtractor",
)
assert req.entity_extractor == "picarones.adapters.ner:SpacyExtractor"
def test_request_accepts_output_json_field(self) -> None:
from picarones.interfaces.web.models import BenchmarkRunRequest
req = BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
output_json="bench_legacy.json",
)
assert req.output_json == "bench_legacy.json"
def test_pipeline_config_accepts_expose_alto(self) -> None:
from picarones.interfaces.web.models import PipelineConfig
pc = PipelineConfig(
engine_name="tesseract", expose_alto=True,
)
assert pc.expose_alto is True
def test_pipeline_config_default_no_expose_alto(self) -> None:
from picarones.interfaces.web.models import PipelineConfig
pc = PipelineConfig(engine_name="tesseract")
assert pc.expose_alto is False
def test_expose_alto_with_non_tesseract_engine_warns(
self, caplog: pytest.LogCaptureFixture,
) -> None:
"""Phase D4 audit B3-final β€” l'UI envoie ``expose_alto=true``
mais le moteur cible n'est pas Tesseract. Le flag est ignorΓ©
mais on logue un warning explicite pour que l'utilisateur
comprenne pourquoi son ``alto_documentary`` view ne fournit
aucune mΓ©trique.
"""
import logging
from picarones.interfaces.web.benchmark_utils import (
_engine_from_competitor,
)
from picarones.interfaces.web.models import PipelineConfig
with caplog.at_level(logging.WARNING):
try:
_engine_from_competitor(PipelineConfig(
engine_name="precomputed_text", expose_alto=True,
))
except Exception:
# Le factory peut Γ©chouer car ``precomputed_text``
# demande des kwargs supplΓ©mentaires β€” on capture mais
# le warning doit Γͺtre Γ©mis AVANT cette erreur.
pass
warnings_text = "\n".join(
r.getMessage() for r in caplog.records
if r.levelno >= logging.WARNING
)
assert "expose_alto" in warnings_text or "alto" in warnings_text.lower()
assert "precomputed_text" in warnings_text
# ──────────────────────────────────────────────────────────────────────
# 2. Validation nΓ©gative β€” payloads malformΓ©s rejetΓ©s
# ──────────────────────────────────────────────────────────────────────
class TestB3FinalFieldsValidation:
def test_invalid_view_name_rejected(self) -> None:
"""``views`` n'accepte que les noms canoniques (Literal)."""
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
views=["not_a_canonical_view"],
)
def test_invalid_profile_rejected(self) -> None:
"""``profile`` n'accepte que les profils canoniques (Literal)."""
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
profile="not_a_real_profile",
)
# ──────────────────────────────────────────────────────────────────────
# 3. SΓ©curitΓ© β€” path traversal refusΓ© (Phase D2 audit)
# ──────────────────────────────────────────────────────────────────────
class TestPathTraversalSecurity:
def test_partial_dir_traversal_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="path traversal"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
partial_dir="../../etc/passwd",
)
def test_partial_dir_absolute_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="chemin absolu"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
partial_dir="/etc/passwd",
)
def test_output_json_traversal_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="path traversal"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
output_json="../../home/user/private.json",
)
def test_entity_extractor_traversal_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="interdits"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
entity_extractor="../../etc/passwd:Bad",
)
def test_entity_extractor_with_slash_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="interdits"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
entity_extractor="some/path:Class",
)
def test_entity_extractor_with_space_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="interdits"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
entity_extractor="my package:Class",
)
def test_entity_extractor_malformed_rejected(self) -> None:
from pydantic import ValidationError
from picarones.interfaces.web.models import BenchmarkRunRequest
with pytest.raises(ValidationError, match="format invalide"):
BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
entity_extractor="123invalid_start_with_digit",
)
def test_empty_string_path_fields_accepted(self) -> None:
"""``""`` est explicitement autorisΓ© (= feature dΓ©sactivΓ©e)."""
from picarones.interfaces.web.models import BenchmarkRunRequest
req = BenchmarkRunRequest(
corpus_path="./corpus",
competitors=[{"engine_name": "tesseract"}],
partial_dir="",
output_json="",
entity_extractor="",
)
assert req.partial_dir == ""
assert req.output_json == ""
assert req.entity_extractor == ""