Spaces:
Sleeping
Sleeping
| """Tests Sprint 85 β A.II.5b : prΓ©cision sur sΓ©quences numΓ©riques. | |
| Couvre : | |
| 1. DΓ©tection par catΓ©gorie (year, roman, foliation, currency, regnal). | |
| 2. ``compute_numerical_sequence_metrics`` : | |
| - identitΓ© β 1.0 sur strict et value | |
| - perte totale β 0.0 | |
| - GT vide β scores 0.0 (pas None β convention float) | |
| - value prΓ©servΓ©e mais pas strict (XIV β 14) | |
| - foliotation recto/verso non interchangeables | |
| - multiplicitΓ© respectΓ©e | |
| 3. Cas rΓ©alistes : charte XVIII, registre paroissial. | |
| 4. Enregistrement registre typΓ© : strict + value. | |
| """ | |
| from __future__ import annotations | |
| from picarones.measurements.numerical_sequences import ( | |
| CATEGORIES, | |
| _detect_currencies, | |
| _detect_foliations, | |
| _detect_regnal, | |
| _detect_romans_with_values, | |
| _detect_years, | |
| compute_numerical_sequence_metrics, | |
| numerical_sequence_strict_score, | |
| numerical_sequence_value_score, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. DΓ©tection par catΓ©gorie | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestDetectYears: | |
| def test_classic_year(self) -> None: | |
| years = _detect_years("nΓ© en 1789 puis mort en 1856") | |
| assert years == [("1789", 1789), ("1856", 1856)] | |
| def test_year_with_context(self) -> None: | |
| years = _detect_years("1α΅Κ³ janvier 1789") | |
| assert years == [("1789", 1789)] | |
| def test_outside_range_ignored(self) -> None: | |
| # 999 (3 chiffres) et 2123 (au-delΓ 2099) : non dΓ©tectΓ©s | |
| assert _detect_years("999 et 2123") == [] | |
| def test_empty(self) -> None: | |
| assert _detect_years("") == [] | |
| class TestDetectRomans: | |
| def test_classic(self) -> None: | |
| out = _detect_romans_with_values("Tome IV, MDCLXVIII") | |
| forms = [f for f, _ in out] | |
| assert "IV" in forms | |
| assert "MDCLXVIII" in forms | |
| def test_min_length_filters_single_letter(self) -> None: | |
| # I, V, X seuls β ignorΓ©s (min_length=2) | |
| out = _detect_romans_with_values("I prononce le V") | |
| forms = [f for f, _ in out] | |
| assert "I" not in forms | |
| class TestDetectFoliations: | |
| def test_recto_verso_preserved(self) -> None: | |
| out = _detect_foliations("voir f. 12r et f. 12v") | |
| keys = [k for _, k in out] | |
| assert "12r" in keys | |
| assert "12v" in keys | |
| def test_page_range(self) -> None: | |
| out = _detect_foliations("pp. 12-15") | |
| assert ("pp. 12-15", "12-15") in out | |
| def test_n_degree(self) -> None: | |
| out = _detect_foliations("voir nΒ° 42") | |
| assert any(k == "42" for _, k in out) | |
| class TestDetectCurrencies: | |
| def test_ancien_regime(self) -> None: | |
| out = _detect_currencies("12 livres 5 sols 8 deniers") | |
| units = [v[1] for _, v in out] | |
| assert "livre" in units | |
| assert "sol" in units | |
| assert "denier" in units | |
| def test_modern_units(self) -> None: | |
| out = _detect_currencies("100 Β£ et 50 β¬") | |
| units = [v[1] for _, v in out] | |
| assert "Β£" in units | |
| assert "β¬" in units | |
| class TestDetectRegnal: | |
| def test_simple_regnal(self) -> None: | |
| out = _detect_regnal("l'an III de la RΓ©publique") | |
| # Β« l'an III Β» + Β« an III de la RΓ©publique Β» fusionnΓ©s en une | |
| # seule occurrence par le regex | |
| assert any(v == 3 for _, v in out) | |
| def test_an_de_grace(self) -> None: | |
| out = _detect_regnal("Γ©crit en l'an de grΓ’ce 1450") | |
| assert any(v == 1450 for _, v in out) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. compute_numerical_sequence_metrics | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestComputeMetrics: | |
| def test_identity(self) -> None: | |
| gt = "Tome IV, an de grΓ’ce 1789, f. 12r, 5 livres" | |
| r = compute_numerical_sequence_metrics(gt, gt) | |
| assert r["global_strict_score"] == 1.0 | |
| assert r["global_value_score"] == 1.0 | |
| def test_total_loss(self) -> None: | |
| gt = "1789 IV f. 12r 5 livres" | |
| hyp = "alpha beta gamma delta" | |
| r = compute_numerical_sequence_metrics(gt, hyp) | |
| assert r["global_strict_score"] == 0.0 | |
| assert r["global_value_score"] == 0.0 | |
| assert r["n_total"] >= 1 | |
| def test_empty_gt_returns_zero(self) -> None: | |
| r = compute_numerical_sequence_metrics("", "anything") | |
| # Pas de sΓ©quence en GT β scores 0 (pas de division par 0) | |
| assert r["global_strict_score"] == 0.0 | |
| assert r["global_value_score"] == 0.0 | |
| assert r["n_total"] == 0 | |
| def test_value_preserved_form_lost(self) -> None: | |
| # « XIV » en GT ; hypothèse contient « 14 » en année | |
| # (impossible ici car 14 < 1000 et hors plage years). | |
| # Cas plus robuste : Β« MMXX Β» (2020) β hyp Β« 2020 Β». | |
| # Mais value_extractor de roman_numerals attend un int | |
| # romain β si hypothesis n'a pas Β« MMXX Β» mais bien | |
| # Β« 2020 Β», le dΓ©tecteur roman ne trouve rien, donc | |
| # le roman GT est lost en valeur aussi (cohΓ©rent : on | |
| # ne fait pas de cross-category match). | |
| # On teste donc le mode strict vs value sur foliotation : | |
| gt = "voir f. 12r" | |
| hyp = "voir fol. 12r" # forme diffΓ©rente, valeur identique (12r) | |
| r = compute_numerical_sequence_metrics(gt, hyp) | |
| # Β« f. 12r Β» et Β« fol. 12r Β» ont la mΓͺme clΓ© de valeur | |
| # (Β« 12r Β»), donc value=1, strict=0 | |
| assert r["per_category"]["foliation"]["value"] == 1 | |
| assert r["per_category"]["foliation"]["strict"] == 0 | |
| def test_recto_verso_not_interchangeable(self) -> None: | |
| # f. 12r (GT) et f. 12v (hyp) : recto/verso diffΓ©rents, | |
| # donc lost en value et en strict | |
| r = compute_numerical_sequence_metrics("f. 12r", "f. 12v") | |
| assert r["per_category"]["foliation"]["strict"] == 0 | |
| assert r["per_category"]["foliation"]["value"] == 0 | |
| def test_multiplicity(self) -> None: | |
| # 2 occurrences en GT, 1 en hyp β 1 prΓ©servΓ©e | |
| gt = "1789 et 1789" | |
| hyp = "1789" | |
| r = compute_numerical_sequence_metrics(gt, hyp) | |
| assert r["per_category"]["year"]["n_total"] == 2 | |
| assert r["per_category"]["year"]["strict"] == 1 | |
| assert "1789" in r["per_category"]["year"]["lost_items"] | |
| def test_categories_constant(self) -> None: | |
| # Sanity : les 5 catΓ©gories sont dΓ©clarΓ©es | |
| assert set(CATEGORIES) == { | |
| "year", "roman", "foliation", "currency", "regnal", | |
| } | |
| def test_per_category_breakdown_keys(self) -> None: | |
| r = compute_numerical_sequence_metrics("1789", "1789") | |
| for cat in CATEGORIES: | |
| assert cat in r["per_category"] | |
| for k in ( | |
| "n_total", "strict", "value", | |
| "strict_score", "value_score", "lost_items", | |
| ): | |
| assert k in r["per_category"][cat] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. Cas rΓ©alistes | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestRealistic: | |
| def test_charte_18e_strict_preserved(self) -> None: | |
| gt = ( | |
| "DonnΓ© Γ Paris l'an de grΓ’ce 1789, " | |
| "f. 12r, contre 25 livres 4 sols et 6 deniers." | |
| ) | |
| hyp = ( | |
| "DonnΓ© Γ Paris l'an de grΓ’ce 1789, " | |
| "f. 12r, contre 25 livres 4 sols et 6 deniers." | |
| ) | |
| r = compute_numerical_sequence_metrics(gt, hyp) | |
| assert r["global_strict_score"] == 1.0 | |
| def test_baptismal_register_modernized(self) -> None: | |
| # OCR modernisant : XVIII β 18 (forme romaine perdue) | |
| gt = "Au siècle XVIII, en l'an 1750, f. 3r" | |
| hyp = "Au siècle 18, en l'an 1750, f. 3r" | |
| r = compute_numerical_sequence_metrics(gt, hyp) | |
| # XVIII forme perdue (le hyp n'a pas un romain reconnaissable) | |
| assert "XVIII" in r["per_category"]["roman"]["lost_items"] | |
| # AnnΓ©e et foliation prΓ©servΓ©es | |
| assert r["per_category"]["year"]["strict"] == 1 | |
| assert r["per_category"]["foliation"]["strict"] == 1 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. Registre typΓ© | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestRegistry: | |
| def test_strict_and_value_metrics_registered(self) -> None: | |
| from picarones.core.metric_registry import select_metrics | |
| from picarones.core.modules import ArtifactType | |
| metrics = select_metrics((ArtifactType.TEXT, ArtifactType.TEXT)) | |
| names = [m.name for m in metrics] | |
| assert "numerical_sequence_strict_score" in names | |
| assert "numerical_sequence_value_score" in names | |
| def test_strict_score_callable(self) -> None: | |
| v = numerical_sequence_strict_score("1789", "1789") | |
| assert v == 1.0 | |
| def test_value_score_with_form_drift(self) -> None: | |
| # Β« f. 12r Β» vs Β« fol. 12r Β» : value prΓ©servΓ©e, strict perdu | |
| strict = numerical_sequence_strict_score("f. 12r", "fol. 12r") | |
| value = numerical_sequence_value_score("f. 12r", "fol. 12r") | |
| assert strict == 0.0 | |
| assert value == 1.0 | |
| def test_metric_via_compute_at_junction(self) -> None: | |
| from picarones.core.metric_registry import compute_at_junction | |
| from picarones.core.modules import ArtifactType | |
| results = compute_at_junction( | |
| "1789, IV", "1789, IV", | |
| (ArtifactType.TEXT, ArtifactType.TEXT), | |
| ) | |
| assert results.get("numerical_sequence_strict_score") == 1.0 | |
| assert results.get("numerical_sequence_value_score") == 1.0 | |