Spaces:
Sleeping
Sleeping
| """Tests Sprint 58 β marqueurs typographiques imprimΓ© ancien. | |
| Couvre : | |
| 1. ``get_category`` : caractères classés correctement dans les | |
| 5 catΓ©gories (ligatures, long_s, dotless_i, ampersand, | |
| nasal_tildes) ; caractΓ¨res non typographiques β ``None``. | |
| 2. ``detect_markers`` : | |
| - reconnaissance des marqueurs prΓ©-composΓ©s | |
| - reconnaissance des sΓ©quences ``voyelle + U+0303`` comme | |
| nasal_tildes | |
| - texte vide / None | |
| 3. ``compute_early_modern_metrics`` : | |
| - **Diplomatique** : tous marqueurs prΓ©servΓ©s β preservation = 1 | |
| - **Modernisant** : marqueurs remplacΓ©s (ΕΏβs, ο¬βfi, Δ±βi, Γ£βa) β | |
| preservation faible | |
| - **Mixte** : breakdown per_category cohΓ©rent | |
| - cas dΓ©gΓ©nΓ©rΓ©s (GT sans marqueur, vide, None) | |
| 4. **Cas rΓ©aliste** : un document XVIIα΅ avec 5 marqueurs ; trois | |
| moteurs avec trois conventions diffΓ©rentes β discriminΓ©s. | |
| 5. ``missed_markers`` : liste les marqueurs ratΓ©s avec leur index | |
| et catΓ©gorie. | |
| 6. IntΓ©gration registre typΓ©. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from picarones.measurements.early_modern_typography import ( | |
| AMPERSAND, | |
| DOTLESS_I, | |
| LIGATURES, | |
| LONG_S, | |
| NASAL_TILDE_PRECOMPOSED, | |
| compute_early_modern_metrics, | |
| detect_markers, | |
| early_modern_preservation, | |
| get_category, | |
| ) | |
| from picarones.core.metric_registry import compute_at_junction, select_metrics | |
| from picarones.core.modules import ArtifactType | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. get_category | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestGetCategory: | |
| def test_categorize(self, char: str, expected: str | None) -> None: | |
| assert get_category(char) == expected | |
| def test_empty_input(self) -> None: | |
| assert get_category("") is None | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. detect_markers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestDetectMarkers: | |
| def test_detects_all_categories(self) -> None: | |
| text = "ο¬ ΕΏ Δ± & Γ£" | |
| markers = detect_markers(text) | |
| cats = sorted(cat for _i, _m, cat in markers) | |
| assert cats == [ | |
| "ampersand", "dotless_i", "ligatures", "long_s", "nasal_tildes", | |
| ] | |
| def test_detects_combining_tilde_sequence(self) -> None: | |
| # 'a' + U+0303 (combining tilde) β nasal_tildes | |
| text = "aΜ" | |
| markers = detect_markers(text) | |
| assert len(markers) == 1 | |
| idx, marker, cat = markers[0] | |
| assert cat == "nasal_tildes" | |
| assert marker == "aΜ" | |
| assert idx == 0 | |
| def test_preserves_order(self) -> None: | |
| text = "Δ± puis ΕΏ puis ο¬" | |
| markers = detect_markers(text) | |
| cats = [cat for _i, _m, cat in markers] | |
| assert cats == ["dotless_i", "long_s", "ligatures"] | |
| def test_empty_input(self) -> None: | |
| assert detect_markers("") == [] | |
| assert detect_markers(None) == [] | |
| def test_text_without_markers(self) -> None: | |
| assert detect_markers("hello world") == [] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. compute_early_modern_metrics β cas standards | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestComputeMetrics: | |
| def gt(self) -> str: | |
| return "le ΕΏerpent ο¬nement & Γ£ Δ±" | |
| def test_diplomatic_full_preservation(self, gt: str) -> None: | |
| m = compute_early_modern_metrics(gt, gt) | |
| assert m["global_preservation"] == pytest.approx(1.0) | |
| assert m["n_markers_preserved"] == m["n_markers_reference"] | |
| assert m["missed_markers"] == [] | |
| def test_modernizing_loses_typographic_markers(self, gt: str) -> None: | |
| # Tous les marqueurs typographiques remplacΓ©s par leurs | |
| # Γ©quivalents modernes ; & est conservΓ© (rarement modernisΓ©) | |
| hyp = "le serpent finement & a i" | |
| m = compute_early_modern_metrics(gt, hyp) | |
| # Esperluette prΓ©servΓ©e, le reste perdu | |
| assert m["per_category"]["ampersand"]["preservation"] == 1.0 | |
| assert m["per_category"]["long_s"]["preservation"] == 0.0 | |
| assert m["per_category"]["ligatures"]["preservation"] == 0.0 | |
| assert m["per_category"]["dotless_i"]["preservation"] == 0.0 | |
| assert m["per_category"]["nasal_tildes"]["preservation"] == 0.0 | |
| # Global : 1/5 = 0.2 | |
| assert m["global_preservation"] == pytest.approx(0.2) | |
| def test_mixed_engine_per_category(self, gt: str) -> None: | |
| # PrΓ©serve s long + ampersand, perd les autres | |
| hyp = "le ΕΏerpent finement & a i" | |
| m = compute_early_modern_metrics(gt, hyp) | |
| assert m["per_category"]["long_s"]["preservation"] == 1.0 | |
| assert m["per_category"]["ampersand"]["preservation"] == 1.0 | |
| assert m["per_category"]["ligatures"]["preservation"] == 0.0 | |
| # 2/5 = 0.4 | |
| assert m["global_preservation"] == pytest.approx(0.4) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. Cas dΓ©gΓ©nΓ©rΓ©s | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestDegenerateCases: | |
| def test_gt_without_markers(self) -> None: | |
| m = compute_early_modern_metrics("hello", "hello") | |
| assert m["n_markers_reference"] == 0 | |
| assert m["global_preservation"] == 0.0 | |
| assert m["per_category"] == {} | |
| def test_empty_gt(self) -> None: | |
| m = compute_early_modern_metrics("", "anything") | |
| assert m["n_markers_reference"] == 0 | |
| assert m["global_preservation"] == 0.0 | |
| def test_none_inputs(self) -> None: | |
| m = compute_early_modern_metrics(None, None) | |
| assert m["n_markers_reference"] == 0 | |
| def test_empty_hyp_with_markers_in_gt(self) -> None: | |
| m = compute_early_modern_metrics("ο¬ ΕΏ", "") | |
| assert m["n_markers_preserved"] == 0 | |
| assert m["global_preservation"] == 0.0 | |
| assert len(m["missed_markers"]) == 2 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. missed_markers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestMissedMarkers: | |
| def test_missed_markers_have_index_and_category(self) -> None: | |
| gt = "ο¬ et ΕΏ" | |
| # ΕΏ remplacΓ© par s, ο¬ remplacΓ© par fi | |
| hyp = "fi et s" | |
| m = compute_early_modern_metrics(gt, hyp) | |
| # Les deux marqueurs sont ratΓ©s | |
| assert len(m["missed_markers"]) == 2 | |
| cats = {entry["category"] for entry in m["missed_markers"]} | |
| assert cats == {"ligatures", "long_s"} | |
| # Chaque entrΓ©e a un index, marker, category | |
| for entry in m["missed_markers"]: | |
| assert "index" in entry | |
| assert "marker" in entry | |
| assert "category" in entry | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. Comptage exhaustif | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestExhaustiveAccounting: | |
| def test_preserved_plus_missed_equals_total(self) -> None: | |
| gt = "ο¬ ΕΏ Δ± & Γ£ ο¬ ο¬" | |
| hyp = "fi s i & a fl ο¬" | |
| m = compute_early_modern_metrics(gt, hyp) | |
| assert ( | |
| m["n_markers_preserved"] + len(m["missed_markers"]) | |
| == m["n_markers_reference"] | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 7. Sets exposΓ©s | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestExposedSets: | |
| def test_ligatures_non_empty(self) -> None: | |
| assert len(LIGATURES) >= 5 | |
| def test_categories_disjoint(self) -> None: | |
| # Les sets prΓ©-composΓ©s sont disjoints (pas de chevauchement) | |
| sets = [LIGATURES, LONG_S, DOTLESS_I, AMPERSAND, NASAL_TILDE_PRECOMPOSED] | |
| for i, a in enumerate(sets): | |
| for b in sets[i + 1:]: | |
| assert a & b == frozenset(), ( | |
| f"Chevauchement entre catΓ©gories : {a & b!r}" | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 8. Raccourci | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestShortcut: | |
| def test_shortcut_matches_full_call(self) -> None: | |
| gt = "ο¬ ΕΏ &" | |
| hyp = "fi s &" | |
| full = compute_early_modern_metrics(gt, hyp) | |
| assert early_modern_preservation(gt, hyp) == pytest.approx( | |
| full["global_preservation"], | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 9. IntΓ©gration registre typΓ© | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestRegistryIntegration: | |
| def test_metric_registered(self) -> None: | |
| # Force l'import qui peuple le registre | |
| import picarones.measurements.early_modern_typography # noqa: F401 | |
| selected = select_metrics( | |
| (ArtifactType.TEXT, ArtifactType.TEXT), | |
| ) | |
| names = {spec.name for spec in selected} | |
| assert "early_modern_preservation" in names | |
| def test_compute_at_junction(self) -> None: | |
| out = compute_at_junction( | |
| "ο¬ ΕΏ &", "ο¬ ΕΏ &", | |
| (ArtifactType.TEXT, ArtifactType.TEXT), | |
| ) | |
| assert out["early_modern_preservation"] == pytest.approx(1.0) | |