Picarones / tests /measurements /test_sprint54_layout.py
Claude
test: rΓ©organiser les 110 fichiers tests/test_*.py par cercle architectural
d109222 unverified
Raw
History Blame
12.2 kB
"""Tests Sprint 54 β€” Layout F1 par type de rΓ©gion.
Couvre :
1. ``Region`` validation (bbox invalide β†’ ValueError, area calculΓ©e).
2. ``_iou_bbox`` mathΓ©matique (identitΓ©, disjoint, partiel).
3. **Cas standards** :
- Layout parfait β†’ F1 = 1
- Mauvais type sur la mΓͺme bbox β†’ 0 TP pour ce type
- Hallucination (rΓ©gion inventΓ©e) β†’ FP
- RΓ©gion ratΓ©e (manquante) β†’ FN
- IoU sous le seuil β†’ pas d'appariement
4. **Multi-type** : breakdown per_type cohΓ©rent avec les comptages
globaux.
5. **Alignement greedy** : 2 hypothèses pour 1 GT → la meilleure
gagne, l'autre devient FP.
6. **Cas dΓ©gΓ©nΓ©rΓ©s** : listes vides, None, IoU custom.
7. ``layout_f1`` raccourci Γ©quivalent Γ  ``compute_layout_metrics["f1"]``.
"""
from __future__ import annotations
import pytest
from picarones.measurements.layout import (
Region,
_iou_bbox,
compute_layout_metrics,
layout_f1,
)
# ──────────────────────────────────────────────────────────────────────────
# 1. Region validation
# ──────────────────────────────────────────────────────────────────────────
class TestRegionDataclass:
def test_valid_construction(self) -> None:
r = Region("r1", "TextRegion", (0, 0, 100, 200))
assert r.id == "r1"
assert r.area == 20_000
def test_invalid_bbox_raises(self) -> None:
with pytest.raises(ValueError, match="bbox invalide"):
Region("r1", "TextRegion", (0, 0, 0, 100))
with pytest.raises(ValueError, match="bbox invalide"):
Region("r1", "TextRegion", (0, 0, 100, -5))
# ──────────────────────────────────────────────────────────────────────────
# 2. IoU bbox
# ──────────────────────────────────────────────────────────────────────────
class TestIouBbox:
def test_identical_bbox_iou_one(self) -> None:
a = Region("a", "X", (0, 0, 100, 100))
assert _iou_bbox(a, a) == pytest.approx(1.0)
def test_disjoint_bbox_iou_zero(self) -> None:
a = Region("a", "X", (0, 0, 100, 100))
b = Region("b", "X", (200, 200, 50, 50))
assert _iou_bbox(a, b) == 0.0
def test_partial_overlap(self) -> None:
# a = [0,0,100,100], b = [50,50,100,100]
# intersection : 50x50 = 2500
# union : 10000 + 10000 - 2500 = 17500
# iou = 2500/17500 β‰ˆ 0.143
a = Region("a", "X", (0, 0, 100, 100))
b = Region("b", "X", (50, 50, 100, 100))
assert _iou_bbox(a, b) == pytest.approx(2500 / 17500)
# ──────────────────────────────────────────────────────────────────────────
# 3. Cas standards
# ──────────────────────────────────────────────────────────────────────────
class TestStandardCases:
def test_perfect_layout(self) -> None:
ref = [
Region("r1", "TextRegion", (0, 0, 100, 100)),
Region("r2", "MarginNote", (200, 0, 50, 100)),
]
m = compute_layout_metrics(ref, list(ref))
assert m["global"]["f1"] == pytest.approx(1.0)
assert m["true_positives"] == 2
assert m["false_positives"] == 0
assert m["false_negatives"] == 0
def test_wrong_type_breaks_match(self) -> None:
# MΓͺme bbox mais type diffΓ©rent β†’ pas d'appariement
ref = [Region("r1", "TextRegion", (0, 0, 100, 100))]
hyp = [Region("r1", "MarginNote", (0, 0, 100, 100))]
m = compute_layout_metrics(ref, hyp)
assert m["true_positives"] == 0
assert m["false_negatives"] == 1
assert m["false_positives"] == 1
def test_hallucinated_region_is_fp(self) -> None:
ref = [Region("r1", "TextRegion", (0, 0, 100, 100))]
hyp = [
Region("r1", "TextRegion", (0, 0, 100, 100)),
Region("rX", "TextRegion", (500, 500, 50, 50)), # inventΓ©e
]
m = compute_layout_metrics(ref, hyp)
assert m["true_positives"] == 1
assert m["false_positives"] == 1
assert m["hallucinated_regions"][0]["id"] == "rX"
def test_missing_region_is_fn(self) -> None:
ref = [
Region("r1", "TextRegion", (0, 0, 100, 100)),
Region("r2", "TextRegion", (200, 0, 100, 100)),
]
hyp = [Region("r1", "TextRegion", (0, 0, 100, 100))]
m = compute_layout_metrics(ref, hyp)
assert m["true_positives"] == 1
assert m["false_negatives"] == 1
assert m["missed_regions"][0]["id"] == "r2"
def test_iou_below_threshold_no_match(self) -> None:
# Recouvrement IoU = 2500/17500 β‰ˆ 0.14 < 0.5
ref = [Region("r1", "TextRegion", (0, 0, 100, 100))]
hyp = [Region("r1", "TextRegion", (50, 50, 100, 100))]
m = compute_layout_metrics(ref, hyp, iou_threshold=0.5)
assert m["true_positives"] == 0
def test_iou_above_threshold_matches(self) -> None:
# Recouvrement IoU = 6400/13600 β‰ˆ 0.47, sous 0.5 mais sur 0.4
ref = [Region("r1", "TextRegion", (0, 0, 100, 100))]
hyp = [Region("r1", "TextRegion", (20, 20, 100, 100))]
m_strict = compute_layout_metrics(ref, hyp, iou_threshold=0.5)
m_loose = compute_layout_metrics(ref, hyp, iou_threshold=0.4)
assert m_strict["true_positives"] == 0
assert m_loose["true_positives"] == 1
# ──────────────────────────────────────────────────────────────────────────
# 4. Multi-type breakdown
# ──────────────────────────────────────────────────────────────────────────
class TestPerTypeBreakdown:
def test_per_type_metrics(self) -> None:
ref = [
Region("r1", "TextRegion", (0, 0, 100, 100)),
Region("r2", "TextRegion", (200, 0, 100, 100)),
Region("r3", "MarginNote", (0, 200, 100, 50)),
Region("r4", "Header", (0, 300, 200, 30)),
]
hyp = [
Region("r1", "TextRegion", (0, 0, 100, 100)), # match
# r2 manquante β†’ FN TextRegion
Region("r3", "MarginNote", (0, 200, 100, 50)), # match
Region("rX", "Footer", (0, 400, 200, 30)), # FP Footer
# r4 Header manquante β†’ FN Header
]
m = compute_layout_metrics(ref, hyp)
per_type = m["per_type"]
# TextRegion : 1 TP + 1 FN β†’ P=1, R=0.5, F1=2/3
assert per_type["TextRegion"]["true_positives" if False else "f1"] == pytest.approx(2 / 3)
# MarginNote : 1 TP, parfait
assert per_type["MarginNote"]["f1"] == pytest.approx(1.0)
# Header : 1 FN β†’ P=0, R=0, F1=0
assert per_type["Header"]["f1"] == 0.0
# Footer : 1 FP β†’ P=0, R=0
assert per_type["Footer"]["f1"] == 0.0
# ──────────────────────────────────────────────────────────────────────────
# 5. Alignement greedy
# ──────────────────────────────────────────────────────────────────────────
class TestGreedyAlignment:
def test_best_iou_wins(self) -> None:
# GT : 1 région. Hypothèse : 2 régions, l'une parfaite,
# l'autre faiblement chevauchante. La meilleure gagne.
ref = [Region("r1", "TextRegion", (0, 0, 100, 100))]
hyp = [
Region("h_weak", "TextRegion", (60, 60, 100, 100)), # faible IoU
Region("h_strong", "TextRegion", (0, 0, 100, 100)), # parfait
]
m = compute_layout_metrics(ref, hyp, iou_threshold=0.1)
# Le strong gagne, le weak devient FP
assert m["true_positives"] == 1
assert m["false_positives"] == 1
assert m["hallucinated_regions"][0]["id"] == "h_weak"
# ──────────────────────────────────────────────────────────────────────────
# 6. Cas dΓ©gΓ©nΓ©rΓ©s
# ──────────────────────────────────────────────────────────────────────────
class TestDegenerateCases:
def test_both_empty(self) -> None:
m = compute_layout_metrics([], [])
assert m["global"]["f1"] == 0.0
assert m["per_type"] == {}
def test_only_reference_empty(self) -> None:
m = compute_layout_metrics([], [Region("r1", "X", (0, 0, 10, 10))])
assert m["false_positives"] == 1
assert m["true_positives"] == 0
def test_only_hypothesis_empty(self) -> None:
m = compute_layout_metrics([Region("r1", "X", (0, 0, 10, 10))], [])
assert m["false_negatives"] == 1
assert m["true_positives"] == 0
def test_none_inputs(self) -> None:
m = compute_layout_metrics(None, None)
assert m["global"]["f1"] == 0.0
def test_dict_input_coerced(self) -> None:
# L'utilisateur peut passer des dicts au format {id, type, bbox}
ref = [{"id": "r1", "type": "TextRegion", "bbox": (0, 0, 100, 100)}]
hyp = [{"id": "r1", "type": "TextRegion", "bbox": (0, 0, 100, 100)}]
assert layout_f1(ref, hyp) == pytest.approx(1.0)
# ──────────────────────────────────────────────────────────────────────────
# 7. Type matching case-insensitive
# ──────────────────────────────────────────────────────────────────────────
class TestTypeNormalization:
def test_type_case_insensitive(self) -> None:
ref = [Region("r1", "TextRegion", (0, 0, 100, 100))]
hyp = [Region("r1", "textregion", (0, 0, 100, 100))]
assert layout_f1(ref, hyp) == pytest.approx(1.0)
# ──────────────────────────────────────────────────────────────────────────
# 8. Shortcut layout_f1
# ──────────────────────────────────────────────────────────────────────────
class TestShortcut:
def test_shortcut_matches_full_call(self) -> None:
ref = [
Region("r1", "TextRegion", (0, 0, 100, 100)),
Region("r2", "MarginNote", (200, 0, 50, 100)),
]
hyp = [
Region("r1", "TextRegion", (0, 0, 100, 100)),
# r2 manquante
]
full = compute_layout_metrics(ref, hyp)
assert layout_f1(ref, hyp) == pytest.approx(full["global"]["f1"])