Picarones / tests /measurements /test_sprint61_philological_runner.py
Claude
fix(security,metrics): Sprint A14-S1 β€” boucher les 6 P0 du rewrite ciblΓ©
a2bea75 unverified
Raw
History Blame
13.5 kB
"""Tests Sprint 61 β€” cΓ’blage backend des mΓ©triques philologiques.
Couvre :
1. Champs ``DocumentResult.philological_metrics`` et
``EngineReport.aggregated_philological`` posΓ©s.
2. SΓ©rialisation conditionnelle dans ``as_dict``.
3. LibΓ©ration par ``compact``.
4. ``compute_philological_metrics`` :
- GT mΓ©diΓ©val dΓ©clenche abbreviations + mufi
- GT imprimΓ© ancien dΓ©clenche early_modern
- GT moderne dΓ©clenche modern_archives
- GT avec numΓ©raux romains dΓ©clenche roman_numerals
- GT avec caractères hors Basic Latin déclenche unicode_blocks
- GT en ASCII pur sans marqueur β†’ ``None``
- GT vide / None β†’ ``None``
5. ``aggregate_philological_metrics`` :
- Somme correcte des compteurs par module
- Recalcul correct des scores globaux
- Doc count cohΓ©rent
- Aucun document avec signal β†’ ``None``
6. IntΓ©gration runner end-to-end via fixture mock.
"""
from __future__ import annotations
from picarones.measurements.philological_hooks import (
aggregate_philological_metrics,
compute_philological_metrics,
)
from picarones.core.results import DocumentResult, EngineReport
from picarones.measurements.metrics import MetricsResult
def _make_doc(
doc_id: str = "d1",
gt: str = "",
hyp: str = "",
philological: dict | None = None,
) -> DocumentResult:
"""Helper : construit un DocumentResult minimal pour les tests."""
return DocumentResult(
doc_id=doc_id,
image_path=f"/tmp/{doc_id}.png",
ground_truth=gt,
hypothesis=hyp,
metrics=MetricsResult(
cer=0.0, cer_nfc=0.0, cer_caseless=0.0,
wer=0.0, wer_normalized=0.0, mer=0.0, wil=0.0,
reference_length=len(gt), hypothesis_length=len(hyp),
),
duration_seconds=0.1,
philological_metrics=philological,
)
# ──────────────────────────────────────────────────────────────────────────
# 1. Champs posΓ©s sur DocumentResult / EngineReport
# ──────────────────────────────────────────────────────────────────────────
class TestFields:
def test_document_result_default_none(self) -> None:
dr = _make_doc()
assert dr.philological_metrics is None
def test_document_result_accepts_dict(self) -> None:
dr = _make_doc(philological={"mufi": {"coverage": 0.9}})
assert dr.philological_metrics == {"mufi": {"coverage": 0.9}}
def test_engine_report_default_none(self) -> None:
report = EngineReport(
engine_name="test", engine_version="1.0",
engine_config={}, document_results=[],
)
assert report.aggregated_philological is None
def test_engine_report_accepts_dict(self) -> None:
report = EngineReport(
engine_name="test", engine_version="1.0",
engine_config={}, document_results=[],
aggregated_philological={"mufi": {"coverage": 0.9}},
)
assert report.aggregated_philological == {"mufi": {"coverage": 0.9}}
# ──────────────────────────────────────────────────────────────────────────
# 2. SΓ©rialisation as_dict
# ──────────────────────────────────────────────────────────────────────────
class TestSerialization:
def test_as_dict_omits_none(self) -> None:
dr = _make_doc()
d = dr.as_dict()
assert "philological_metrics" not in d
def test_as_dict_includes_when_present(self) -> None:
dr = _make_doc(philological={"mufi": {"coverage": 1.0}})
d = dr.as_dict()
assert d["philological_metrics"] == {"mufi": {"coverage": 1.0}}
def test_engine_report_as_dict_omits_none(self) -> None:
report = EngineReport(
engine_name="t", engine_version="1", engine_config={},
document_results=[],
)
assert "aggregated_philological" not in report.as_dict()
def test_engine_report_as_dict_includes_when_present(self) -> None:
report = EngineReport(
engine_name="t", engine_version="1", engine_config={},
document_results=[],
aggregated_philological={"mufi": {"coverage": 0.5}},
)
d = report.as_dict()
assert d["aggregated_philological"] == {"mufi": {"coverage": 0.5}}
# ──────────────────────────────────────────────────────────────────────────
# 3. LibΓ©ration par compact()
# ──────────────────────────────────────────────────────────────────────────
class TestCompact:
def test_compact_clears_philological(self) -> None:
# Sprint A14-S1 β€” opt-in via drop_analyses=True.
dr = _make_doc(philological={"mufi": {"coverage": 1.0}})
dr.compact(drop_analyses=True)
assert dr.philological_metrics is None
# ──────────────────────────────────────────────────────────────────────────
# 4. compute_philological_metrics β€” adaptive masking
# ──────────────────────────────────────────────────────────────────────────
class TestComputeAdaptive:
def test_medieval_triggers_abbreviations_and_mufi(self) -> None:
gt = "fait en lan ꝑ regem ΓΎΓ¦t"
m = compute_philological_metrics(gt, gt)
assert m is not None
assert "abbreviations" in m
assert "mufi" in m
def test_early_modern_triggers_typography(self) -> None:
gt = "le ſerpent finement & ã"
m = compute_philological_metrics(gt, gt)
assert m is not None
assert "early_modern" in m
def test_modern_archives_triggers_module(self) -> None:
gt = "Mme Dupont au bd Voltaire vol. II"
m = compute_philological_metrics(gt, gt)
assert m is not None
assert "modern_archives" in m
def test_roman_numerals_triggers_module(self) -> None:
gt = "Louis XIV mourut en MDCCXV"
m = compute_philological_metrics(gt, gt)
assert m is not None
assert "roman_numerals" in m
def test_unicode_blocks_triggered_only_outside_basic_latin(self) -> None:
# ASCII pur sans marqueur β†’ unicode_blocks omis (Basic Latin
# uniquement, breakdown trivial).
m = compute_philological_metrics("hello world", "hello world")
assert m is None
def test_unicode_blocks_triggered_with_diacritics(self) -> None:
# Du Latin Extended β†’ unicode_blocks inclus
gt = "cafΓ© Γ  Γ© Γ΄"
m = compute_philological_metrics(gt, gt)
assert m is not None
assert "unicode_blocks" in m
def test_empty_returns_none(self) -> None:
assert compute_philological_metrics("", "") is None
assert compute_philological_metrics(None, None) is None
def test_no_signal_returns_none(self) -> None:
# Pure Basic Latin sans aucun marqueur philologique
m = compute_philological_metrics("hello", "hello")
assert m is None
# ──────────────────────────────────────────────────────────────────────────
# 5. aggregate_philological_metrics
# ──────────────────────────────────────────────────────────────────────────
class TestAggregation:
def test_no_data_returns_none(self) -> None:
assert aggregate_philological_metrics([]) is None
assert aggregate_philological_metrics([None, None]) is None
def test_aggregates_only_present_modules(self) -> None:
# Doc 1 a mufi+abbr, Doc 2 a juste roman_numerals
d1 = compute_philological_metrics("ꝑ ꝓ ꝗ", "per pro qui")
d2 = compute_philological_metrics("Louis XIV", "Louis 14")
agg = aggregate_philological_metrics([d1, d2])
assert agg is not None
# mufi prΓ©sent (Doc1 le dΓ©clenchait avec ꝑ/ꝓ/ꝗ qui sont MUFI)
assert "abbreviations" in agg
assert "roman_numerals" in agg
# doc_count par module
assert agg["abbreviations"]["doc_count"] == 1
assert agg["roman_numerals"]["doc_count"] == 1
def test_aggregation_sums_counters(self) -> None:
# 3 docs avec MUFI : "ΓΎΓ¦t ꝑ" = 3 caractΓ¨res MUFI (ΓΎ, Γ¦, ꝑ)
gt = "ΓΎΓ¦t ꝑ"
per_doc = [compute_philological_metrics(gt, gt) for _ in range(3)]
agg = aggregate_philological_metrics(per_doc)
assert agg is not None
assert "mufi" in agg
# 3 caractères × 3 docs = 9
assert agg["mufi"]["n_mufi_chars_reference"] == 9
assert agg["mufi"]["n_mufi_chars_preserved"] == 9
assert agg["mufi"]["coverage"] == 1.0
assert agg["mufi"]["doc_count"] == 3
def test_aggregation_recomputes_global_score(self) -> None:
# Doc1 prΓ©serve 100%, Doc2 prΓ©serve 0% β†’ moyenne pondΓ©rΓ©e
d1 = compute_philological_metrics("XIV", "XIV")
d2 = compute_philological_metrics("V", "perdu")
agg = aggregate_philological_metrics([d1, d2])
roman = agg["roman_numerals"]
# Doc1 : 1 strict_preserved (XIV)
# Doc2 : 1 lost (V)
# Total : 2 numΓ©raux, 1 strict β†’ 0.5
assert roman["n_numerals_reference"] == 2
assert roman["global_strict_score"] == 0.5
def test_per_category_aggregation_modern_archives(self) -> None:
# Deux docs avec modern_archives sur catΓ©gories diffΓ©rentes
d1 = compute_philological_metrics("Mme bd", "Mme bd")
d2 = compute_philological_metrics("vol. p.", "vol. p.")
agg = aggregate_philological_metrics([d1, d2])
per_cat = agg["modern_archives"]["per_category"]
# Doc1 : civility_titles + address ; Doc2 : bibliographic
assert "civility_titles" in per_cat
assert "address" in per_cat
assert "bibliographic" in per_cat
for cat in per_cat.values():
assert cat["strict_score"] == 1.0
# ──────────────────────────────────────────────────────────────────────────
# 6. IntΓ©gration end-to-end (mock lΓ©ger sur le runner)
# ──────────────────────────────────────────────────────────────────────────
class TestRunnerIntegration:
"""VΓ©rifie que ``_compute_document_result`` attache bien les
``philological_metrics`` quand la GT a du signal."""
def test_runner_attaches_philological(self, tmp_path) -> None:
from picarones.measurements.runner import _compute_document_result
from picarones.engines.base import EngineResult
# CrΓ©er une image fictive (le module image_quality Γ©chouera
# gracieusement, ce qui est OK pour le test).
img = tmp_path / "doc.png"
img.write_bytes(b"") # vide ; on ignore le rΓ©sultat image_quality
gt = "ꝑ regem mcclxxxij"
ocr_result = EngineResult(
engine_name="mock", image_path=str(img),
text=gt, duration_seconds=0.1, error=None,
)
dr = _compute_document_result(
doc_id="d1",
image_path=str(img),
ground_truth=gt,
ocr_result=ocr_result,
char_exclude=None,
)
assert dr.philological_metrics is not None
assert "abbreviations" in dr.philological_metrics
assert "roman_numerals" in dr.philological_metrics
def test_runner_omits_philological_on_plain_text(self, tmp_path) -> None:
from picarones.measurements.runner import _compute_document_result
from picarones.engines.base import EngineResult
img = tmp_path / "doc.png"
img.write_bytes(b"")
# Texte ASCII pur sans marqueur philologique
gt = "hello world without any markers"
ocr_result = EngineResult(
engine_name="mock", image_path=str(img),
text=gt, duration_seconds=0.1, error=None,
)
dr = _compute_document_result(
doc_id="d1",
image_path=str(img),
ground_truth=gt,
ocr_result=ocr_result,
char_exclude=None,
)
assert dr.philological_metrics is None