Spaces:
Sleeping
Sleeping
File size: 6,691 Bytes
71f166b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | """Smoke tests du harness lui-mΓͺme.
Phase 0 : avant que la moindre comparaison legacy β rewrite ne soit
faite, il faut prouver que le harness :
1. Génère des corpus de référence reproductibles cross-OS.
2. Sait Γ©crire et relire un golden snapshot.
3. Ses comparateurs sΓ©mantiques rejettent les vraies diffΓ©rences et
acceptent les non-significatives.
Ces tests sont marquΓ©s ``regression`` mais ne font pas de
comparaison legacy β rewrite β ils valident l'infrastructure
elle-mΓͺme.
Aux phases suivantes, des fichiers ``test_phaseN_<module>.py``
viendront s'ajouter Γ cΓ΄tΓ© de celui-ci pour vΓ©rifier chaque
fonctionnalitΓ© migrΓ©e.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from tests.regression.legacy_vs_rewrite.conftest import (
assert_floats_equal,
assert_golden_match,
assert_json_semantic_equal,
assert_set_equal,
)
pytestmark = pytest.mark.regression
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Corpus
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_small_corpus_has_three_documents(small_corpus_dir: Path) -> None:
"""``small_corpus_dir`` produit 3 paires (image + GT)."""
pngs = sorted(small_corpus_dir.glob("*.png"))
gts = sorted(small_corpus_dir.glob("*.gt.txt"))
assert len(pngs) == 3, f"3 PNG attendus, {len(pngs)} trouvΓ©s."
assert len(gts) == 3, f"3 GT attendues, {len(gts)} trouvΓ©es."
for png in pngs:
gt = png.with_suffix("").with_suffix(".gt.txt")
assert gt.exists(), f"GT manquante pour {png.name}."
def test_medium_corpus_has_thirty_documents(medium_corpus_dir: Path) -> None:
"""``medium_corpus_dir`` produit 30 paires."""
pngs = sorted(medium_corpus_dir.glob("*.png"))
assert len(pngs) == 30
def test_corpus_generation_is_idempotent(small_corpus_dir: Path) -> None:
"""Re-générer le corpus ne réécrit pas les fichiers existants."""
pngs_before = {p: p.stat().st_mtime for p in small_corpus_dir.glob("*.png")}
# Re-dΓ©clencher la gΓ©nΓ©ration en rΓ©important la fixture (ici on
# appelle directement la primitive β le test n'est pas sale, c'est
# le contrat d'idempotence qui est vΓ©rifiΓ©).
from tests.regression.legacy_vs_rewrite.conftest import (
_generate_synthetic_corpus,
)
_generate_synthetic_corpus(
small_corpus_dir,
documents=[
("doc01", "BENEDICTUS DEUS"),
("doc02", "Anno Domini MCMXVII"),
("doc03", "Folio 23 recto"),
],
)
pngs_after = {p: p.stat().st_mtime for p in small_corpus_dir.glob("*.png")}
for path, mtime_before in pngs_before.items():
assert pngs_after[path] == mtime_before, (
f"{path.name} a Γ©tΓ© rΓ©-Γ©crit alors qu'il existait dΓ©jΓ ."
)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Golden snapshots
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_golden_path_creates_directories(golden_path, tmp_path) -> None:
"""``golden_path('phase', 'corpus', 'file')`` crΓ©e le dossier."""
p = golden_path("phase0", "smoke", "tmp.txt")
assert p.parent.exists()
# Cleanup pour ne pas polluer.
if p.exists():
p.unlink()
def test_golden_match_writes_on_first_run(
tmp_path: Path,
regen_golden: bool,
) -> None:
"""Quand le fichier golden n'existe pas, on l'Γ©crit (premier run)."""
target = tmp_path / "first.txt"
assert_golden_match("hello", target, regen=False) # Γ©crit
assert target.read_text() == "hello"
def test_golden_match_passes_when_identical(tmp_path: Path) -> None:
"""Quand actual == golden, le test passe silencieusement."""
target = tmp_path / "id.txt"
target.write_text("identical content")
assert_golden_match("identical content", target, regen=False)
def test_golden_match_fails_when_different(tmp_path: Path) -> None:
"""Quand actual != golden, AssertionError."""
target = tmp_path / "diff.txt"
target.write_text("expected text")
with pytest.raises(AssertionError, match="Golden mismatch"):
assert_golden_match("actual text", target, regen=False)
def test_golden_match_regen_overwrites(tmp_path: Path) -> None:
"""En mode regen, le fichier est rΓ©-Γ©crit mΓͺme si diffΓ©rent."""
target = tmp_path / "regen.txt"
target.write_text("old")
assert_golden_match("new", target, regen=True)
assert target.read_text() == "new"
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Comparateurs sΓ©mantiques
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_assert_floats_equal_within_eps() -> None:
assert_floats_equal(1.0000000001, 1.0, eps=1e-9)
def test_assert_floats_equal_rejects_outside_eps() -> None:
with pytest.raises(AssertionError, match="diff="):
assert_floats_equal(1.001, 1.0, eps=1e-9)
def test_assert_set_equal_accepts_reorder() -> None:
assert_set_equal([3, 1, 2], [1, 2, 3])
def test_assert_set_equal_rejects_missing() -> None:
with pytest.raises(AssertionError, match="manquants"):
assert_set_equal([1, 2], [1, 2, 3])
def test_assert_set_equal_rejects_extra() -> None:
with pytest.raises(AssertionError, match="en trop"):
assert_set_equal([1, 2, 3, 4], [1, 2, 3])
def test_assert_json_semantic_ignores_key_order() -> None:
a = {"b": 2, "a": 1}
e = {"a": 1, "b": 2}
assert_json_semantic_equal(a, e)
def test_assert_json_semantic_detects_real_diff() -> None:
with pytest.raises(AssertionError, match="JSON diffΓ©rents"):
assert_json_semantic_equal({"a": 1}, {"a": 2})
def test_assert_json_semantic_handles_lists() -> None:
"""Les listes gardent l'ordre β c'est le contrat JSON."""
with pytest.raises(AssertionError):
assert_json_semantic_equal([1, 2], [2, 1])
|