Picarones / tests /evaluation /views /test_sprint_a14_s15_alto_view.py
Claude
refactor: kill bricolage S49-S57 β€” fixes structurels (audit cleanup)
88add17 unverified
raw
history blame
14.2 kB
"""Sprint A14-S15 β€” AltoView (vue canonique 2).
6 cas couvrant la fidΓ©litΓ© documentaire ALTO + le pattern
d'omission explicite des pipelines qui ne produisent pas d'ALTO.
"""
from __future__ import annotations
import pytest
from picarones.domain import (
Artifact,
ArtifactType,
MetricSpec,
)
from picarones.evaluation.metrics.alto_structural import (
compute_alto_validity,
compute_line_count_ratio,
compute_word_box_coverage,
)
from picarones.evaluation.projectors import ProjectorRegistry
from picarones.evaluation.registry import MetricRegistry
from picarones.evaluation.views import (
DEFAULT_ALTO_METRICS,
DefaultEvaluationViewExecutor,
build_alto_view,
build_text_view,
)
from picarones.formats.alto.types import (
AltoBBox,
AltoDocument,
AltoLine,
AltoPage,
AltoString,
AltoTextBlock,
)
# ──────────────────────────────────────────────────────────────────────
# Fixtures ALTO
# ──────────────────────────────────────────────────────────────────────
def _line(*words: str, with_bbox: bool = True) -> AltoLine:
strings = tuple(
AltoString(
content=w,
bbox=AltoBBox(hpos=0, vpos=0, width=10, height=10) if with_bbox else None,
)
for w in words
)
return AltoLine(strings=strings)
def _doc(*lines: AltoLine, n_blocks: int = 1) -> AltoDocument:
"""Construit un AltoDocument avec ``n_blocks`` blocs partageant
les lignes."""
if n_blocks == 1:
return AltoDocument(pages=(AltoPage(
blocks=(AltoTextBlock(lines=lines),),
),),)
# Distribute lines across blocks (tous identiques pour simplifier)
chunks = [lines] * n_blocks
return AltoDocument(pages=(AltoPage(
blocks=tuple(AltoTextBlock(lines=c) for c in chunks),
),),)
def _empty_doc() -> AltoDocument:
return AltoDocument()
# ──────────────────────────────────────────────────────────────────────
# MΓ©triques individuelles
# ──────────────────────────────────────────────────────────────────────
class TestAltoMetrics:
def test_validity_full_doc(self) -> None:
d = _doc(_line("a", "b"))
assert compute_alto_validity(d, d) == 1.0
def test_validity_empty_doc(self) -> None:
assert compute_alto_validity(_doc(_line("a")), _empty_doc()) == 0.0
def test_line_count_ratio_equal(self) -> None:
d1 = _doc(_line("a"), _line("b"), _line("c"))
d2 = _doc(_line("x"), _line("y"), _line("z"))
assert compute_line_count_ratio(d1, d2) == 1.0
def test_line_count_ratio_partial(self) -> None:
d1 = _doc(_line("a"), _line("b"), _line("c"), _line("d")) # 4
d2 = _doc(_line("x"), _line("y")) # 2
assert compute_line_count_ratio(d1, d2) == 0.5
def test_line_count_ratio_both_empty(self) -> None:
assert compute_line_count_ratio(_empty_doc(), _empty_doc()) == 1.0
def test_word_box_coverage_full(self) -> None:
d = _doc(_line("a", "b", "c", with_bbox=True))
assert compute_word_box_coverage(d, d) == 1.0
def test_word_box_coverage_partial(self) -> None:
# 2 mots avec bbox, 1 sans
line = AltoLine(strings=(
AltoString(content="a", bbox=AltoBBox(hpos=0, vpos=0, width=1, height=1)),
AltoString(content="b", bbox=AltoBBox(hpos=0, vpos=0, width=1, height=1)),
AltoString(content="c", bbox=None),
))
d = AltoDocument(pages=(AltoPage(blocks=(AltoTextBlock(lines=(line,),),),),),)
assert abs(compute_word_box_coverage(d, d) - 2 / 3) < 1e-9
def test_word_box_coverage_no_bbox(self) -> None:
d = _doc(_line("a", "b", with_bbox=False))
assert compute_word_box_coverage(d, d) == 0.0
# ──────────────────────────────────────────────────────────────────────
# AltoView shape
# ──────────────────────────────────────────────────────────────────────
class TestAltoViewShape:
def test_default_view_accepts_only_alto_xml(self) -> None:
"""Cas 1 β€” AltoView n'accepte que ALTO_XML."""
view = build_alto_view()
assert view.accepts(ArtifactType.ALTO_XML)
assert not view.accepts(ArtifactType.RAW_TEXT)
assert not view.accepts(ArtifactType.PAGE_XML)
assert not view.accepts(ArtifactType.CANONICAL_DOCUMENT)
assert not view.accepts(ArtifactType.IMAGE)
def test_default_metrics(self) -> None:
view = build_alto_view()
assert view.metric_names == DEFAULT_ALTO_METRICS
assert "alto_validity" in view.metric_names
assert "alto_line_count_ratio" in view.metric_names
assert "alto_word_box_coverage" in view.metric_names
def test_no_projection(self) -> None:
view = build_alto_view()
assert view.projection is None
# Pas de projection mΓͺme par type source.
assert view.projection_for(ArtifactType.ALTO_XML) is None
def test_warnings_signal_omission_pattern(self) -> None:
view = build_alto_view()
warnings_text = " ".join(view.warnings)
assert "OMIS" in warnings_text or "omis" in warnings_text
# ──────────────────────────────────────────────────────────────────────
# AltoView avec executor
# ──────────────────────────────────────────────────────────────────────
def _build_alto_executor(payloads: dict[str, AltoDocument]) -> DefaultEvaluationViewExecutor:
metrics = MetricRegistry()
metrics.register(
MetricSpec(
name="alto_validity",
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
higher_is_better=True,
),
compute_alto_validity,
)
metrics.register(
MetricSpec(
name="alto_line_count_ratio",
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
higher_is_better=True,
),
compute_line_count_ratio,
)
metrics.register(
MetricSpec(
name="alto_word_box_coverage",
input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML),
higher_is_better=True,
),
compute_word_box_coverage,
)
projectors = ProjectorRegistry() # AltoView n'a pas besoin de projecteur
def loader(art: Artifact) -> AltoDocument:
if art.id not in payloads:
raise KeyError(f"missing payload {art.id}")
return payloads[art.id]
return DefaultEvaluationViewExecutor.from_registries(metrics, projectors, loader)
class TestAltoViewWithExecutor:
def test_perfect_alto_yields_all_ones(self) -> None:
"""Cas 2 — Hypothèse identique à la GT → toutes métriques = 1.0."""
gt = _doc(_line("a", "b"), _line("c", "d"))
payloads = {"gt": gt, "cand": gt}
executor = _build_alto_executor(payloads)
view = build_alto_view()
gt_art = Artifact(id="gt", document_id="d", type=ArtifactType.ALTO_XML)
cand = Artifact(id="cand", document_id="d", type=ArtifactType.ALTO_XML)
result = executor.evaluate(view, cand, gt_art, pipeline_name="test")
assert result.metric_values["alto_validity"] == 1.0
assert result.metric_values["alto_line_count_ratio"] == 1.0
assert result.metric_values["alto_word_box_coverage"] == 1.0
assert result.failed_metrics == {}
def test_partial_quality_alto(self) -> None:
"""Cas 3 — Hypothèse avec moins de lignes → ratio < 1, autres OK."""
gt = _doc(_line("a"), _line("b"), _line("c"), _line("d")) # 4 lignes
cand = _doc(_line("x"), _line("y")) # 2 lignes
payloads = {"gt": gt, "cand": cand}
executor = _build_alto_executor(payloads)
view = build_alto_view()
gt_art = Artifact(id="gt", document_id="d", type=ArtifactType.ALTO_XML)
cand_art = Artifact(id="cand", document_id="d", type=ArtifactType.ALTO_XML)
result = executor.evaluate(view, cand_art, gt_art, pipeline_name="test")
assert result.metric_values["alto_validity"] == 1.0 # cohΓ©rent
assert result.metric_values["alto_line_count_ratio"] == 0.5
assert result.metric_values["alto_word_box_coverage"] == 1.0
# ──────────────────────────────────────────────────────────────────────
# Pattern d'omission : pipelines sans ALTO ne sont PAS dans AltoView
# ──────────────────────────────────────────────────────────────────────
class TestOmissionPattern:
"""Le caller (service applicatif) doit OMETTRE les pipelines qui
ne produisent pas d'ALTO_XML, plutΓ΄t que de leur attribuer un
score factice Γ  0.
Le test dΓ©montre le pattern recommandΓ©.
"""
def test_caller_filters_pipelines_by_view_acceptance(self) -> None:
"""Cas 4 β€” Pattern : boucler sur (vue, candidats), filtrer
ceux dont le type n'est pas dans candidate_types."""
view = build_alto_view()
# Simulons 3 pipelines avec leurs sorties principales :
candidates = [
("tesseract_text", ArtifactType.RAW_TEXT), # PAS d'ALTO
("ocr_llm_alto", ArtifactType.ALTO_XML), # ALTO βœ“
("vlm_alto_reconstructed", ArtifactType.ALTO_XML), # ALTO βœ“
]
# Le caller filtre :
eligible = [
(name, art_type)
for name, art_type in candidates
if view.accepts(art_type)
]
omitted = [
(name, art_type)
for name, art_type in candidates
if not view.accepts(art_type)
]
assert len(eligible) == 2
assert ("ocr_llm_alto", ArtifactType.ALTO_XML) in eligible
assert ("vlm_alto_reconstructed", ArtifactType.ALTO_XML) in eligible
assert len(omitted) == 1
assert omitted[0][0] == "tesseract_text"
def test_executor_raises_value_error_if_caller_doesnt_filter(self) -> None:
"""Cas 5 β€” Garde-fou : si le caller n'a pas filtrΓ© et passe
un RAW_TEXT à AltoView, ``executor.evaluate`` lève ``ValueError``
explicite."""
payloads = {"cand": "this is text", "gt": _doc(_line("a"))}
executor = _build_alto_executor(payloads)
view = build_alto_view()
cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT)
gt = Artifact(id="gt", document_id="d", type=ArtifactType.ALTO_XML)
with pytest.raises(ValueError, match="n'accepte pas"):
executor.evaluate(view, cand, gt, pipeline_name="test")
# ──────────────────────────────────────────────────────────────────────
# Cas central BnF : TextView + AltoView complΓ©mentaires
# ──────────────────────────────────────────────────────────────────────
class TestBnFDualViewUsage:
"""DΓ©montre que le rapport BnF cible peut prΓ©senter TextView ET
AltoView pour les **mΓͺmes** pipelines, mais avec des sets de
pipelines diffΓ©rents.
Pipeline 1 : Tesseract texte brut β†’ prΓ©sent dans TextView, OMIS d'AltoView.
Pipeline 2 : OCR+LLM avec ALTO β†’ prΓ©sent dans les DEUX.
Pipeline 3 : VLM avec ALTO reconstruit β†’ prΓ©sent dans les DEUX.
Le test ne fait PAS l'évaluation complète (la stub mémoire ne
porte que ce qui est utile). Il vΓ©rifie le **pattern** : pour
chaque vue, quels pipelines sont Γ©ligibles.
"""
def test_two_views_select_different_pipeline_sets(self) -> None:
"""Cas 6 β€” DΓ©finition de done S15 :
* Tesseract β†’ omis d'AltoView, prΓ©sent dans TextView
* OCR+LLM+ALTO β†’ dans les deux
* VLM+ALTO β†’ dans les deux
"""
text_view = build_text_view()
alto_view = build_alto_view()
pipelines = [
("tesseract", ArtifactType.RAW_TEXT),
("ocr_llm_alto", ArtifactType.ALTO_XML),
("vlm_alto", ArtifactType.ALTO_XML),
]
text_eligible = {
n for n, t in pipelines if text_view.accepts(t)
}
alto_eligible = {
n for n, t in pipelines if alto_view.accepts(t)
}
# TextView accepte les 3.
assert text_eligible == {"tesseract", "ocr_llm_alto", "vlm_alto"}
# AltoView omet Tesseract, garde les 2 ALTO.
assert alto_eligible == {"ocr_llm_alto", "vlm_alto"}
assert "tesseract" not in alto_eligible
# Les pipelines prΓ©sents dans AltoView sont un SOUS-ENSEMBLE de
# ceux prΓ©sents dans TextView (cohΓ©rence : si un pipeline
# produit de l'ALTO, son texte est aussi extractible).
assert alto_eligible.issubset(text_eligible)