Spaces:
Sleeping
Sleeping
Claude
sprint54: A.II.2.2 Layout F1 par type β couche de calcul (clΓ΄ture A.II.2)
f6c8252 unverified | """Layout F1 par type de rΓ©gion β Sprint 54. | |
| Sprint 54 β A.II.2.2 du plan d'Γ©volution 2026. | |
| Pourquoi ce module | |
| ------------------ | |
| Un mΓ©diΓ©viste qui Γ©dite un manuscrit glosΓ© veut savoir : *Β« le moteur | |
| sΓ©pare-t-il bien le texte principal de la glose ? Β»*. Le score de | |
| structure global de Picarones (Sprint 5) agrège fusion/fragmentation | |
| de lignes en un seul nombre β utile mais non typΓ©. Ce module | |
| discrimine par **type de rΓ©gion** ALTO/PAGE (``TextRegion``, | |
| ``MarginNote``, ``Header``, ``Footer``, ``Drop-Cap``...) en | |
| appliquant le pattern ICDAR layout standard : | |
| - **TP** : rΓ©gion GT et rΓ©gion hypothΓ¨se de **mΓͺme type** avec | |
| chevauchement IoU β₯ seuil (alignement greedy par IoU dΓ©croissant), | |
| - **FN** : rΓ©gion GT non matchΓ©e, | |
| - **FP** : région hypothèse non matchée, | |
| - F1 calculΓ© global et par type. | |
| Le pattern d'alignement est le mΓͺme que pour le NER (Sprint 38) β on | |
| rΓ©utilise une approche Γ©prouvΓ©e plutΓ΄t que d'en inventer une nouvelle. | |
| StratΓ©gie de dΓ©coupage | |
| ---------------------- | |
| CohΓ©rente avec NER (Sprint 38), Flesch (Sprint 52), Reading order F1 | |
| (Sprint 53) : couche de calcul pure d'abord. L'utilisateur fournit | |
| deux listes de ``Region`` (typiquement extraites de ALTO/PAGE par un | |
| parser amont β le parser ALTO/PAGE standard de Picarones suivra | |
| dans un sprint dΓ©diΓ©). Pas de cΓ’blage runner ni de vue HTML ici. | |
| Convention de coordonnΓ©es | |
| ------------------------- | |
| Une bbox est un tuple ``(x, y, width, height)`` en pixels (origine | |
| en haut Γ gauche, axe y vers le bas β convention ALTO et PAGE | |
| standard). L'IoU est calculΓ©e sur l'aire d'intersection / union des | |
| rectangles. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from dataclasses import dataclass | |
| from typing import Iterable | |
| logger = logging.getLogger(__name__) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Modèle de données | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class Region: | |
| """Une rΓ©gion ALTO/PAGE alignable sur sa GT. | |
| Attributs | |
| --------- | |
| id: | |
| Identifiant unique au sein de la sΓ©quence (ex. ``"r_1"``, | |
| ``"region_main"``). Informatif β l'alignement se fait par IoU, | |
| pas par ID. | |
| type: | |
| CatΓ©gorie de la rΓ©gion (``"TextRegion"``, ``"MarginNote"``, | |
| ``"Header"``, etc.). Comparaison **case-insensitive**. | |
| bbox: | |
| Rectangle ``(x, y, width, height)`` en pixels, origine en haut | |
| Γ gauche. Doit avoir width > 0 et height > 0. | |
| """ | |
| id: str | |
| type: str | |
| bbox: tuple[int, int, int, int] | |
| def __post_init__(self) -> None: | |
| x, y, w, h = self.bbox | |
| if w <= 0 or h <= 0: | |
| raise ValueError( | |
| f"Region {self.id!r} : bbox invalide (w={w}, h={h}). " | |
| "width et height doivent Γͺtre strictement positifs." | |
| ) | |
| def area(self) -> int: | |
| _, _, w, h = self.bbox | |
| return w * h | |
| def _to_region(obj: Region | dict) -> Region: | |
| """Coerce un dict en ``Region`` (clΓ©s ``id``, ``type``, ``bbox``).""" | |
| if isinstance(obj, Region): | |
| return obj | |
| return Region( | |
| id=str(obj["id"]), | |
| type=str(obj["type"]), | |
| bbox=tuple(obj["bbox"]), # type: ignore[arg-type] | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # IoU + alignement greedy | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _iou_bbox(a: Region, b: Region) -> float: | |
| """Intersection-over-Union de deux bboxes ``(x, y, w, h)``.""" | |
| ax, ay, aw, ah = a.bbox | |
| bx, by, bw, bh = b.bbox | |
| inter_x = max(ax, bx) | |
| inter_y = max(ay, by) | |
| inter_x_end = min(ax + aw, bx + bw) | |
| inter_y_end = min(ay + ah, by + bh) | |
| inter_w = max(0, inter_x_end - inter_x) | |
| inter_h = max(0, inter_y_end - inter_y) | |
| inter = inter_w * inter_h | |
| if inter == 0: | |
| return 0.0 | |
| union = a.area + b.area - inter | |
| if union <= 0: | |
| return 0.0 | |
| return inter / union | |
| def _align_regions( | |
| references: list[Region], | |
| hypotheses: list[Region], | |
| iou_threshold: float, | |
| ) -> tuple[list[tuple[int, int, float]], set[int], set[int]]: | |
| """Appareillage greedy par IoU dΓ©croissant ; same type requis. | |
| Renvoie ``(matches, unmatched_refs, unmatched_hyps)`` β | |
| ``matches`` est une liste de ``(idx_ref, idx_hyp, iou)``. | |
| """ | |
| candidates: list[tuple[float, int, int]] = [] | |
| for i, r in enumerate(references): | |
| for j, h in enumerate(hypotheses): | |
| if r.type.casefold() != h.type.casefold(): | |
| continue | |
| iou = _iou_bbox(r, h) | |
| if iou >= iou_threshold: | |
| candidates.append((iou, i, j)) | |
| # Tri stable : IoU dΓ©croissant, puis indices croissants pour | |
| # dΓ©terminisme sur Γ©galitΓ©s. | |
| candidates.sort(key=lambda t: (-t[0], t[1], t[2])) | |
| matched_refs: set[int] = set() | |
| matched_hyps: set[int] = set() | |
| matches: list[tuple[int, int, float]] = [] | |
| for iou, i, j in candidates: | |
| if i in matched_refs or j in matched_hyps: | |
| continue | |
| matched_refs.add(i) | |
| matched_hyps.add(j) | |
| matches.append((i, j, iou)) | |
| unmatched_refs = set(range(len(references))) - matched_refs | |
| unmatched_hyps = set(range(len(hypotheses))) - matched_hyps | |
| return matches, unmatched_refs, unmatched_hyps | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MΓ©trique principale | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _prf(tp: int, fp: int, fn: int) -> dict[str, float]: | |
| p = tp / (tp + fp) if (tp + fp) > 0 else 0.0 | |
| r = tp / (tp + fn) if (tp + fn) > 0 else 0.0 | |
| f1 = 2 * p * r / (p + r) if (p + r) > 0 else 0.0 | |
| return {"precision": p, "recall": r, "f1": f1, "support": tp + fn} | |
| def compute_layout_metrics( | |
| reference_regions: Iterable[Region | dict] | None, | |
| hypothesis_regions: Iterable[Region | dict] | None, | |
| iou_threshold: float = 0.5, | |
| ) -> dict: | |
| """Calcule precision/recall/F1 sur le layout par type de rΓ©gion. | |
| Parameters | |
| ---------- | |
| reference_regions: | |
| Liste de rΓ©gions GT (``Region`` ou dict ``{id, type, bbox}``). | |
| hypothesis_regions: | |
| Liste de rΓ©gions produites par le moteur OCR/HTR ou un | |
| layout-detector. | |
| iou_threshold: | |
| Seuil de chevauchement minimal pour dΓ©clarer un appariement | |
| (dΓ©faut : 0,5 β convention ICDAR). | |
| Returns | |
| ------- | |
| dict | |
| ``{ | |
| "global": {"precision", "recall", "f1", "support"}, | |
| "per_type": {type_name: {"precision", ...}}, | |
| "true_positives": int, | |
| "false_positives": int, | |
| "false_negatives": int, | |
| "missed_regions": list[dict], # GT non matchΓ©es | |
| "hallucinated_regions": list[dict], # hyp non matchΓ©es | |
| "iou_threshold": float, | |
| }`` | |
| Cas dΓ©gΓ©nΓ©rΓ©s | |
| ------------- | |
| - Deux listes vides β F1 = 0 et tous compteurs Γ 0. | |
| - GT vide + hyp non-vide β F1 = 0 (toutes hyp = FP). | |
| - hyp vide + GT non-vide β F1 = 0 (toutes GT = FN). | |
| """ | |
| refs = [_to_region(r) for r in (reference_regions or [])] | |
| hyps = [_to_region(h) for h in (hypothesis_regions or [])] | |
| matches, unmatched_refs, unmatched_hyps = _align_regions( | |
| refs, hyps, iou_threshold, | |
| ) | |
| tp = len(matches) | |
| fn = len(unmatched_refs) | |
| fp = len(unmatched_hyps) | |
| cat_tp: dict[str, int] = {} | |
| cat_fn: dict[str, int] = {} | |
| cat_fp: dict[str, int] = {} | |
| for i, _j, _iou in matches: | |
| cat = refs[i].type | |
| cat_tp[cat] = cat_tp.get(cat, 0) + 1 | |
| for i in unmatched_refs: | |
| cat = refs[i].type | |
| cat_fn[cat] = cat_fn.get(cat, 0) + 1 | |
| for j in unmatched_hyps: | |
| cat = hyps[j].type | |
| cat_fp[cat] = cat_fp.get(cat, 0) + 1 | |
| all_categories = sorted(set(cat_tp) | set(cat_fn) | set(cat_fp)) | |
| per_type = { | |
| cat: _prf( | |
| cat_tp.get(cat, 0), | |
| cat_fp.get(cat, 0), | |
| cat_fn.get(cat, 0), | |
| ) | |
| for cat in all_categories | |
| } | |
| return { | |
| "global": _prf(tp, fp, fn), | |
| "per_type": per_type, | |
| "true_positives": tp, | |
| "false_positives": fp, | |
| "false_negatives": fn, | |
| "missed_regions": [ | |
| {"id": refs[i].id, "type": refs[i].type, "bbox": list(refs[i].bbox)} | |
| for i in sorted(unmatched_refs) | |
| ], | |
| "hallucinated_regions": [ | |
| {"id": hyps[j].id, "type": hyps[j].type, "bbox": list(hyps[j].bbox)} | |
| for j in sorted(unmatched_hyps) | |
| ], | |
| "iou_threshold": iou_threshold, | |
| } | |
| def layout_f1( | |
| reference_regions: Iterable[Region | dict] | None, | |
| hypothesis_regions: Iterable[Region | dict] | None, | |
| iou_threshold: float = 0.5, | |
| ) -> float: | |
| """Raccourci : F1 global du layout.""" | |
| return compute_layout_metrics( | |
| reference_regions, hypothesis_regions, iou_threshold, | |
| )["global"]["f1"] | |
| __all__ = [ | |
| "Region", | |
| "compute_layout_metrics", | |
| "layout_f1", | |
| ] | |