Picarones / tests /architecture /test_s8_system_binaries_lock.py
Claude
fix(sprint-S8.5): capture Tesseract version in RunManifest (vrai fix S6.1)
74020d5 unverified
Raw
History Blame
4.77 kB
"""Sprint S8.5 โ€” capture des binaires systรจme dans le RunManifest.
Ferme le trou de reproductibilitรฉ laissรฉ par
``capture_dependencies_lock`` qui ne couvre que les paquets Python.
La version du binaire Tesseract (qui exรฉcute rรฉellement l'OCR) n'est
pas dans le wheel ``pytesseract`` et doit รชtre capturรฉe sรฉparรฉment.
Sans cette capture, deux runs avec le mรชme ``dependencies_lock``
peuvent produire des CER diffรฉrents si la version Tesseract change
entre temps (ex : Debian point-release).
"""
from __future__ import annotations
from unittest.mock import patch
import pytest
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1. capture_system_binaries_lock โ€” best-effort
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class TestCaptureSystemBinariesLock:
def test_returns_dict(self) -> None:
from picarones.app.services.dependencies import (
capture_system_binaries_lock,
)
lock = capture_system_binaries_lock()
assert isinstance(lock, dict)
def test_includes_tesseract_when_installed(self) -> None:
"""Si ``tesseract`` est dans ``$PATH``, sa version doit รชtre
capturรฉe."""
from picarones.app.services.dependencies import (
capture_system_binaries_lock,
)
import shutil
if not shutil.which("tesseract"):
pytest.skip("tesseract non installรฉ sur ce systรจme")
lock = capture_system_binaries_lock()
assert "tesseract" in lock
assert "tesseract" in lock["tesseract"].lower() # ex : "tesseract 5.3.0"
def test_missing_binary_silently_omitted(self) -> None:
"""Si un binaire n'est pas dans ``$PATH``, sa clรฉ est absente
du dict (pas ``None``, pas d'exception)."""
from picarones.app.services.dependencies import (
_safe_capture_binary_version,
)
result = _safe_capture_binary_version("definitely_not_a_real_binary_xyz")
assert result is None
def test_safe_capture_handles_subprocess_error(self) -> None:
"""``subprocess.run`` qui timeout ou crash โ†’ ``None``, pas
de propagation."""
from picarones.app.services.dependencies import (
_safe_capture_binary_version,
)
# Mock pour simuler un binaire qui timeout.
import subprocess
with patch("shutil.which", return_value="/fake/path"):
with patch(
"subprocess.run",
side_effect=subprocess.TimeoutExpired("fake", 5),
):
result = _safe_capture_binary_version("fake")
assert result is None
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2. RunManifest accepte system_binaries_lock
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class TestRunManifestField:
def test_default_empty_dict(self) -> None:
"""Manifests prรฉ-S8.5 sans le champ doivent rester
dรฉsรฉrialisables."""
from datetime import datetime, timezone
from picarones.domain.run_manifest import RunManifest
m = RunManifest(
run_id="r",
corpus_name="c",
n_documents=0,
code_version="1.0.0",
started_at=datetime.now(timezone.utc),
completed_at=datetime.now(timezone.utc),
)
assert m.system_binaries_lock == {}
def test_field_persisted_in_serialization(self) -> None:
"""Le champ apparaรฎt dans le dump JSON pour les ingesters
externes (BnF audit)."""
from datetime import datetime, timezone
from picarones.domain.run_manifest import RunManifest
m = RunManifest(
run_id="r",
corpus_name="c",
n_documents=0,
code_version="1.0.0",
started_at=datetime.now(timezone.utc),
completed_at=datetime.now(timezone.utc),
system_binaries_lock={"tesseract": "tesseract 5.3.0"},
)
dumped = m.model_dump()
assert "system_binaries_lock" in dumped
assert dumped["system_binaries_lock"]["tesseract"] == "tesseract 5.3.0"