Spaces:
Running
Running
| """Sprint A14-S15 β AltoView (vue canonique 2). | |
| 6 cas couvrant la fidΓ©litΓ© documentaire ALTO + le pattern | |
| d'omission explicite des pipelines qui ne produisent pas d'ALTO. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from picarones.domain import ( | |
| Artifact, | |
| ArtifactType, | |
| MetricSpec, | |
| ) | |
| from picarones.evaluation.metrics.alto_structural import ( | |
| compute_alto_validity, | |
| compute_line_count_ratio, | |
| compute_word_box_coverage, | |
| ) | |
| from picarones.evaluation.projectors import ProjectorRegistry | |
| from picarones.evaluation.registry import MetricRegistry | |
| from picarones.evaluation.views import ( | |
| DEFAULT_ALTO_METRICS, | |
| DefaultEvaluationViewExecutor, | |
| build_alto_view, | |
| build_text_view, | |
| ) | |
| from picarones.formats.alto.types import ( | |
| AltoBBox, | |
| AltoDocument, | |
| AltoLine, | |
| AltoPage, | |
| AltoString, | |
| AltoTextBlock, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Fixtures ALTO | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _line(*words: str, with_bbox: bool = True) -> AltoLine: | |
| strings = tuple( | |
| AltoString( | |
| content=w, | |
| bbox=AltoBBox(hpos=0, vpos=0, width=10, height=10) if with_bbox else None, | |
| ) | |
| for w in words | |
| ) | |
| return AltoLine(strings=strings) | |
| def _doc(*lines: AltoLine, n_blocks: int = 1) -> AltoDocument: | |
| """Construit un AltoDocument avec ``n_blocks`` blocs partageant | |
| les lignes.""" | |
| if n_blocks == 1: | |
| return AltoDocument(pages=(AltoPage( | |
| blocks=(AltoTextBlock(lines=lines),), | |
| ),),) | |
| # Distribute lines across blocks (tous identiques pour simplifier) | |
| chunks = [lines] * n_blocks | |
| return AltoDocument(pages=(AltoPage( | |
| blocks=tuple(AltoTextBlock(lines=c) for c in chunks), | |
| ),),) | |
| def _empty_doc() -> AltoDocument: | |
| return AltoDocument() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MΓ©triques individuelles | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestAltoMetrics: | |
| def test_validity_full_doc(self) -> None: | |
| d = _doc(_line("a", "b")) | |
| assert compute_alto_validity(d, d) == 1.0 | |
| def test_validity_empty_doc(self) -> None: | |
| assert compute_alto_validity(_doc(_line("a")), _empty_doc()) == 0.0 | |
| def test_line_count_ratio_equal(self) -> None: | |
| d1 = _doc(_line("a"), _line("b"), _line("c")) | |
| d2 = _doc(_line("x"), _line("y"), _line("z")) | |
| assert compute_line_count_ratio(d1, d2) == 1.0 | |
| def test_line_count_ratio_partial(self) -> None: | |
| d1 = _doc(_line("a"), _line("b"), _line("c"), _line("d")) # 4 | |
| d2 = _doc(_line("x"), _line("y")) # 2 | |
| assert compute_line_count_ratio(d1, d2) == 0.5 | |
| def test_line_count_ratio_both_empty(self) -> None: | |
| assert compute_line_count_ratio(_empty_doc(), _empty_doc()) == 1.0 | |
| def test_word_box_coverage_full(self) -> None: | |
| d = _doc(_line("a", "b", "c", with_bbox=True)) | |
| assert compute_word_box_coverage(d, d) == 1.0 | |
| def test_word_box_coverage_partial(self) -> None: | |
| # 2 mots avec bbox, 1 sans | |
| line = AltoLine(strings=( | |
| AltoString(content="a", bbox=AltoBBox(hpos=0, vpos=0, width=1, height=1)), | |
| AltoString(content="b", bbox=AltoBBox(hpos=0, vpos=0, width=1, height=1)), | |
| AltoString(content="c", bbox=None), | |
| )) | |
| d = AltoDocument(pages=(AltoPage(blocks=(AltoTextBlock(lines=(line,),),),),),) | |
| assert abs(compute_word_box_coverage(d, d) - 2 / 3) < 1e-9 | |
| def test_word_box_coverage_no_bbox(self) -> None: | |
| d = _doc(_line("a", "b", with_bbox=False)) | |
| assert compute_word_box_coverage(d, d) == 0.0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # AltoView shape | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestAltoViewShape: | |
| def test_default_view_accepts_only_alto_xml(self) -> None: | |
| """Cas 1 β AltoView n'accepte que ALTO_XML.""" | |
| view = build_alto_view() | |
| assert view.accepts(ArtifactType.ALTO_XML) | |
| assert not view.accepts(ArtifactType.RAW_TEXT) | |
| assert not view.accepts(ArtifactType.PAGE_XML) | |
| assert not view.accepts(ArtifactType.CANONICAL_DOCUMENT) | |
| assert not view.accepts(ArtifactType.IMAGE) | |
| def test_default_metrics(self) -> None: | |
| view = build_alto_view() | |
| assert view.metric_names == DEFAULT_ALTO_METRICS | |
| assert "alto_validity" in view.metric_names | |
| assert "alto_line_count_ratio" in view.metric_names | |
| assert "alto_word_box_coverage" in view.metric_names | |
| def test_no_projection(self) -> None: | |
| view = build_alto_view() | |
| assert view.projection is None | |
| # Pas de projection mΓͺme par type source. | |
| assert view.projection_for(ArtifactType.ALTO_XML) is None | |
| def test_warnings_signal_omission_pattern(self) -> None: | |
| view = build_alto_view() | |
| warnings_text = " ".join(view.warnings) | |
| assert "OMIS" in warnings_text or "omis" in warnings_text | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # AltoView avec executor | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_alto_executor(payloads: dict[str, AltoDocument]) -> DefaultEvaluationViewExecutor: | |
| metrics = MetricRegistry() | |
| metrics.register( | |
| MetricSpec( | |
| name="alto_validity", | |
| input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML), | |
| higher_is_better=True, | |
| ), | |
| compute_alto_validity, | |
| ) | |
| metrics.register( | |
| MetricSpec( | |
| name="alto_line_count_ratio", | |
| input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML), | |
| higher_is_better=True, | |
| ), | |
| compute_line_count_ratio, | |
| ) | |
| metrics.register( | |
| MetricSpec( | |
| name="alto_word_box_coverage", | |
| input_types=(ArtifactType.ALTO_XML, ArtifactType.ALTO_XML), | |
| higher_is_better=True, | |
| ), | |
| compute_word_box_coverage, | |
| ) | |
| projectors = ProjectorRegistry() # AltoView n'a pas besoin de projecteur | |
| def loader(art: Artifact) -> AltoDocument: | |
| if art.id not in payloads: | |
| raise KeyError(f"missing payload {art.id}") | |
| return payloads[art.id] | |
| return DefaultEvaluationViewExecutor.from_registries(metrics, projectors, loader) | |
| class TestAltoViewWithExecutor: | |
| def test_perfect_alto_yields_all_ones(self) -> None: | |
| """Cas 2 β HypothΓ¨se identique Γ la GT β toutes mΓ©triques = 1.0.""" | |
| gt = _doc(_line("a", "b"), _line("c", "d")) | |
| payloads = {"gt": gt, "cand": gt} | |
| executor = _build_alto_executor(payloads) | |
| view = build_alto_view() | |
| gt_art = Artifact(id="gt", document_id="d", type=ArtifactType.ALTO_XML) | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.ALTO_XML) | |
| result = executor.evaluate(view, cand, gt_art, pipeline_name="test") | |
| assert result.metric_values["alto_validity"] == 1.0 | |
| assert result.metric_values["alto_line_count_ratio"] == 1.0 | |
| assert result.metric_values["alto_word_box_coverage"] == 1.0 | |
| assert result.failed_metrics == {} | |
| def test_partial_quality_alto(self) -> None: | |
| """Cas 3 β HypothΓ¨se avec moins de lignes β ratio < 1, autres OK.""" | |
| gt = _doc(_line("a"), _line("b"), _line("c"), _line("d")) # 4 lignes | |
| cand = _doc(_line("x"), _line("y")) # 2 lignes | |
| payloads = {"gt": gt, "cand": cand} | |
| executor = _build_alto_executor(payloads) | |
| view = build_alto_view() | |
| gt_art = Artifact(id="gt", document_id="d", type=ArtifactType.ALTO_XML) | |
| cand_art = Artifact(id="cand", document_id="d", type=ArtifactType.ALTO_XML) | |
| result = executor.evaluate(view, cand_art, gt_art, pipeline_name="test") | |
| assert result.metric_values["alto_validity"] == 1.0 # cohΓ©rent | |
| assert result.metric_values["alto_line_count_ratio"] == 0.5 | |
| assert result.metric_values["alto_word_box_coverage"] == 1.0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Pattern d'omission : pipelines sans ALTO ne sont PAS dans AltoView | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestOmissionPattern: | |
| """Le caller (service applicatif) doit OMETTRE les pipelines qui | |
| ne produisent pas d'ALTO_XML, plutΓ΄t que de leur attribuer un | |
| score factice Γ 0. | |
| Le test dΓ©montre le pattern recommandΓ©. | |
| """ | |
| def test_caller_filters_pipelines_by_view_acceptance(self) -> None: | |
| """Cas 4 β Pattern : boucler sur (vue, candidats), filtrer | |
| ceux dont le type n'est pas dans candidate_types.""" | |
| view = build_alto_view() | |
| # Simulons 3 pipelines avec leurs sorties principales : | |
| candidates = [ | |
| ("tesseract_text", ArtifactType.RAW_TEXT), # PAS d'ALTO | |
| ("ocr_llm_alto", ArtifactType.ALTO_XML), # ALTO β | |
| ("vlm_alto_reconstructed", ArtifactType.ALTO_XML), # ALTO β | |
| ] | |
| # Le caller filtre : | |
| eligible = [ | |
| (name, art_type) | |
| for name, art_type in candidates | |
| if view.accepts(art_type) | |
| ] | |
| omitted = [ | |
| (name, art_type) | |
| for name, art_type in candidates | |
| if not view.accepts(art_type) | |
| ] | |
| assert len(eligible) == 2 | |
| assert ("ocr_llm_alto", ArtifactType.ALTO_XML) in eligible | |
| assert ("vlm_alto_reconstructed", ArtifactType.ALTO_XML) in eligible | |
| assert len(omitted) == 1 | |
| assert omitted[0][0] == "tesseract_text" | |
| def test_executor_raises_value_error_if_caller_doesnt_filter(self) -> None: | |
| """Cas 5 β Garde-fou : si le caller n'a pas filtrΓ© et passe | |
| un RAW_TEXT à AltoView, ``executor.evaluate`` lève ``ValueError`` | |
| explicite.""" | |
| payloads = {"cand": "this is text", "gt": _doc(_line("a"))} | |
| executor = _build_alto_executor(payloads) | |
| view = build_alto_view() | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.ALTO_XML) | |
| with pytest.raises(ValueError, match="n'accepte pas"): | |
| executor.evaluate(view, cand, gt, pipeline_name="test") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Cas central BnF : TextView + AltoView complΓ©mentaires | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestBnFDualViewUsage: | |
| """DΓ©montre que le rapport BnF cible peut prΓ©senter TextView ET | |
| AltoView pour les **mΓͺmes** pipelines, mais avec des sets de | |
| pipelines diffΓ©rents. | |
| Pipeline 1 : Tesseract texte brut β prΓ©sent dans TextView, OMIS d'AltoView. | |
| Pipeline 2 : OCR+LLM avec ALTO β prΓ©sent dans les DEUX. | |
| Pipeline 3 : VLM avec ALTO reconstruit β prΓ©sent dans les DEUX. | |
| Le test ne fait PAS l'évaluation complète (la stub mémoire ne | |
| porte que ce qui est utile). Il vΓ©rifie le **pattern** : pour | |
| chaque vue, quels pipelines sont Γ©ligibles. | |
| """ | |
| def test_two_views_select_different_pipeline_sets(self) -> None: | |
| """Cas 6 β DΓ©finition de done S15 : | |
| * Tesseract β omis d'AltoView, prΓ©sent dans TextView | |
| * OCR+LLM+ALTO β dans les deux | |
| * VLM+ALTO β dans les deux | |
| """ | |
| text_view = build_text_view() | |
| alto_view = build_alto_view() | |
| pipelines = [ | |
| ("tesseract", ArtifactType.RAW_TEXT), | |
| ("ocr_llm_alto", ArtifactType.ALTO_XML), | |
| ("vlm_alto", ArtifactType.ALTO_XML), | |
| ] | |
| text_eligible = { | |
| n for n, t in pipelines if text_view.accepts(t) | |
| } | |
| alto_eligible = { | |
| n for n, t in pipelines if alto_view.accepts(t) | |
| } | |
| # TextView accepte les 3. | |
| assert text_eligible == {"tesseract", "ocr_llm_alto", "vlm_alto"} | |
| # AltoView omet Tesseract, garde les 2 ALTO. | |
| assert alto_eligible == {"ocr_llm_alto", "vlm_alto"} | |
| assert "tesseract" not in alto_eligible | |
| # Les pipelines prΓ©sents dans AltoView sont un SOUS-ENSEMBLE de | |
| # ceux prΓ©sents dans TextView (cohΓ©rence : si un pipeline | |
| # produit de l'ALTO, son texte est aussi extractible). | |
| assert alto_eligible.issubset(text_eligible) | |