Spaces:
Running
Running
| """Sprint A14-S16 β SearchView + mΓ©triques de recherchabilitΓ©.""" | |
| from __future__ import annotations | |
| import pytest | |
| from picarones.domain import Artifact, ArtifactType, MetricSpec | |
| from picarones.evaluation.metrics.search import ( | |
| levenshtein_distance, | |
| numerical_sequence_preservation, | |
| searchability_recall, | |
| ) | |
| from picarones.evaluation.projectors import ( | |
| AltoToText, | |
| CanonicalToText, | |
| PageToText, | |
| ProjectorRegistry, | |
| ) | |
| from picarones.evaluation.registry import MetricRegistry | |
| from picarones.evaluation.views import ( | |
| DEFAULT_SEARCH_METRICS, | |
| DefaultEvaluationViewExecutor, | |
| build_search_view, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MΓ©triques individuelles | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestLevenshtein: | |
| def test_identical(self) -> None: | |
| assert levenshtein_distance("hello", "hello") == 0 | |
| def test_empty(self) -> None: | |
| assert levenshtein_distance("", "") == 0 | |
| assert levenshtein_distance("abc", "") == 3 | |
| assert levenshtein_distance("", "abc") == 3 | |
| def test_single_substitution(self) -> None: | |
| assert levenshtein_distance("hello", "hallo") == 1 | |
| def test_kitten_sitting(self) -> None: | |
| # Cas canonique : kitten β sitting (kβs, eβi, +g) = 3 ops | |
| assert levenshtein_distance("kitten", "sitting") == 3 | |
| class TestSearchabilityRecall: | |
| def test_perfect_match(self) -> None: | |
| recall = searchability_recall("hello world", "hello world") | |
| assert recall == 1.0 | |
| def test_fuzzy_match_within_threshold(self) -> None: | |
| # "monde" vs "monds" β 1 substitution, β€ 2 β match | |
| recall = searchability_recall("le monde", "le monds") | |
| assert recall == 1.0 | |
| def test_fuzzy_match_beyond_threshold(self) -> None: | |
| # "monde" vs "rabbit" β distance > 2 β pas de match | |
| recall = searchability_recall("le monde", "le rabbit") | |
| # "le" matche, "monde" non β 1/2 = 0.5 | |
| assert recall == 0.5 | |
| def test_empty_gt_returns_zero(self) -> None: | |
| assert searchability_recall("", "hello") == 0.0 | |
| def test_multiplicity_respected(self) -> None: | |
| # GT a "le" deux fois, hyp une seule fois β 1/2 | |
| recall = searchability_recall("le le monde", "le monde") | |
| assert abs(recall - 2 / 3) < 1e-9 # "le", "monde" matchent (1 "le" non) | |
| def test_case_insensitive_by_default(self) -> None: | |
| assert searchability_recall("Bonjour", "bonjour") == 1.0 | |
| def test_negative_max_distance_raises(self) -> None: | |
| with pytest.raises(ValueError, match="max_distance"): | |
| searchability_recall("a", "b", max_distance=-1) | |
| class TestNumericalSequencePreservation: | |
| def test_perfect_year_preservation(self) -> None: | |
| score = numerical_sequence_preservation( | |
| "fait Γ Paris en 1789", | |
| "fait Γ Paris en 1789", | |
| ) | |
| assert score == 1.0 | |
| def test_year_corrupted(self) -> None: | |
| # GT contient "1789", hyp contient "1798" (pas dans hyp_years) | |
| # Mais "1798" est aussi une annΓ©e 4 chiffres valide qui matche | |
| # le regex. VΓ©rifions la sΓ©mantique : on cherche les annΓ©es | |
| # GT dans les annΓ©es hyp. | |
| score = numerical_sequence_preservation( | |
| "annΓ©e 1789", | |
| "annΓ©e 1798", | |
| ) | |
| # 1789 (GT) n'est PAS dans hyp_years = [1798] β 0/1 = 0.0 | |
| assert score == 0.0 | |
| def test_partial_preservation(self) -> None: | |
| score = numerical_sequence_preservation( | |
| "1789, 1799, 1815", | |
| "1789 et 1815", # 1799 perdu | |
| ) | |
| # 2/3 prΓ©servΓ©s | |
| assert abs(score - 2 / 3) < 1e-9 | |
| def test_no_years_in_gt(self) -> None: | |
| score = numerical_sequence_preservation( | |
| "pas de date ici", | |
| "pas de date lΓ ", | |
| ) | |
| assert score == 0.0 # convention : pas d'annΓ©es GT β 0.0 | |
| def test_year_regex_bounds(self) -> None: | |
| # AnnΓ©e 999 β trop court (3 chiffres) | |
| # AnnΓ©e 1000 β OK | |
| # AnnΓ©e 2099 β hors plage (regex 2[0-2][0-9]) | |
| score = numerical_sequence_preservation("an 999 et 1000", "an 999 et 1000") | |
| # Seul "1000" est dΓ©tectΓ© en GT β comparΓ© Γ hyp oΓΉ "1000" prΓ©sent aussi | |
| assert score == 1.0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SearchView shape | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestSearchViewShape: | |
| def test_default_view_accepts_5_types(self) -> None: | |
| view = build_search_view() | |
| for t in ( | |
| ArtifactType.RAW_TEXT, | |
| ArtifactType.CORRECTED_TEXT, | |
| ArtifactType.ALTO_XML, | |
| ArtifactType.PAGE_XML, | |
| ArtifactType.CANONICAL_DOCUMENT, | |
| ): | |
| assert view.accepts(t) | |
| def test_default_metrics(self) -> None: | |
| view = build_search_view() | |
| assert view.metric_names == DEFAULT_SEARCH_METRICS | |
| def test_projection_for_alto_routes_correctly(self) -> None: | |
| view = build_search_view() | |
| spec = view.projection_for(ArtifactType.ALTO_XML) | |
| assert spec is not None | |
| assert spec.projector_name == "alto_to_text" | |
| def test_warnings_signal_higher_is_better_inversion(self) -> None: | |
| view = build_search_view() | |
| text = " ".join(view.warnings) | |
| assert "higher_is_better" in text or "OPPOSΓ" in text | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SearchView avec executor | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_search_executor(payloads: dict[str, str]) -> DefaultEvaluationViewExecutor: | |
| metrics = MetricRegistry() | |
| metrics.register( | |
| MetricSpec( | |
| name="searchability_recall", | |
| input_types=(ArtifactType.RAW_TEXT, ArtifactType.RAW_TEXT), | |
| higher_is_better=True, | |
| ), | |
| searchability_recall, | |
| ) | |
| metrics.register( | |
| MetricSpec( | |
| name="numerical_sequence_preservation", | |
| input_types=(ArtifactType.RAW_TEXT, ArtifactType.RAW_TEXT), | |
| higher_is_better=True, | |
| ), | |
| numerical_sequence_preservation, | |
| ) | |
| projectors = ProjectorRegistry() | |
| projectors.register(AltoToText()) | |
| projectors.register(PageToText()) | |
| projectors.register(CanonicalToText()) | |
| def loader(art: Artifact) -> str: | |
| if art.id not in payloads: | |
| raise KeyError(art.id) | |
| return payloads[art.id] | |
| return DefaultEvaluationViewExecutor.from_registries(metrics, projectors, loader) | |
| class TestSearchViewWithExecutor: | |
| def test_perfect_text_yields_recall_1(self) -> None: | |
| payloads = { | |
| "cand": "le petit chat noir 1789", | |
| "gt": "le petit chat noir 1789", | |
| } | |
| executor = _build_search_executor(payloads) | |
| view = build_search_view() | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert result.metric_values["searchability_recall"] == 1.0 | |
| assert result.metric_values["numerical_sequence_preservation"] == 1.0 | |
| def test_partial_text_quality_with_year_loss(self) -> None: | |
| payloads = { | |
| "cand": "le pelit chat noir 1798", # erreur typo + annΓ©e corrompue | |
| "gt": "le petit chat noir 1789", | |
| } | |
| executor = _build_search_executor(payloads) | |
| view = build_search_view() | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| # "petit"β"pelit" = 1 sub, OK ; "1789"β"1798" = 2 subs, OK pour | |
| # searchability fuzzy. Donc searchability_recall β 1.0. | |
| assert result.metric_values["searchability_recall"] >= 0.8 | |
| # Mais l'annΓ©e 1789 N'EST PAS dans hyp β preservation = 0. | |
| assert result.metric_values["numerical_sequence_preservation"] == 0.0 | |