"""Sprint 11 — Tests : internationalisation et profils anglais patrimoniaux. Couvre : - Profils de normalisation : early_modern_english, medieval_english, secretary_hand - Bibliothèque de prompts anglais - Génération de rapport HTML en anglais (lang="en") - Module i18n - Flag --lang de picarones demo """ from __future__ import annotations import json import re from pathlib import Path import pytest # --------------------------------------------------------------------------- # Profils de normalisation anglais # --------------------------------------------------------------------------- class TestEarlyModernEnglish: """Profil early_modern_english : ſ=s, u=v, i=j, vv=w, þ=th, ð=th, ȝ=y.""" @pytest.fixture def profile(self): from picarones.evaluation.metrics.normalization import get_builtin_profile return get_builtin_profile("early_modern_english") def test_profile_exists(self, profile): assert profile.name == "early_modern_english" def test_long_s(self, profile): # ſ=s : both normalize to the same canonical form (i also becomes j) assert profile.normalize("ſaid") == profile.normalize("said") def test_u_v_interchangeable(self, profile): # u and v map to the same canonical form assert profile.normalize("upon") == profile.normalize("vpon") def test_i_j_interchangeable(self, profile): # i and j map to the same canonical form assert profile.normalize("ioy") == profile.normalize("joy") def test_vv_to_w(self, profile): # vv and w map to the same canonical form assert profile.normalize("vvhich") == profile.normalize("which") def test_thorn_to_th(self, profile): assert profile.normalize("þe") == "the" assert profile.normalize("þat") == "that" def test_eth_to_th(self, profile): assert profile.normalize("ðe") == "the" def test_yogh_to_y(self, profile): # ȝ normalises the same as y assert profile.normalize("ȝe") == profile.normalize("ye") assert profile.normalize("ȝour") == profile.normalize("your") def test_ampersand_to_and(self, profile): assert profile.normalize("God & Man") == "God and Man" def test_ae_ligature(self, profile): assert profile.normalize("æther") == "aether" def test_oe_ligature(self, profile): assert profile.normalize("œconomy") == "oeconomy" def test_combined_normalisation(self, profile): # "þe ſame vvoman" → "the same woman" result = profile.normalize("þe ſame vvoman") assert result == "the same woman" def test_description_in_english(self, profile): assert "Early Modern English" in profile.description or "english" in profile.description.lower() def test_nfc_applied(self, profile): import unicodedata text = "caf\u0065\u0301" # café décomposé normalised = profile.normalize(text) assert unicodedata.is_normalized("NFC", normalised) class TestMedievalEnglish: """Profil medieval_english : ſ=s, u=v, i=j, þ=th, ȝ=y, abréviations.""" @pytest.fixture def profile(self): from picarones.evaluation.metrics.normalization import get_builtin_profile return get_builtin_profile("medieval_english") def test_profile_exists(self, profile): assert profile.name == "medieval_english" def test_thorn(self, profile): assert profile.normalize("þe") == "the" def test_yogh(self, profile): assert profile.normalize("ȝe") == "ye" def test_long_s(self, profile): assert profile.normalize("ſome") == "some" def test_abbreviation_per(self, profile): # ꝑ → per assert profile.normalize("ꝑfect") == "perfect" def test_abbreviation_pro(self, profile): # ꝓ → pro (both ꝓud and proud normalize to the same form) assert profile.normalize("ꝓud") == profile.normalize("proud") def test_combined(self, profile): result = profile.normalize("þe ꝑfect ȝe") assert result == "the perfect ye" def test_vv_to_w(self, profile): assert profile.normalize("vvhen") == "when" def test_description(self, profile): desc = profile.description.lower() assert "english" in desc or "medieval" in desc class TestSecretaryHand: """Profil secretary_hand : écriture secrétaire anglaise XVIe-XVIIe.""" @pytest.fixture def profile(self): from picarones.evaluation.metrics.normalization import get_builtin_profile return get_builtin_profile("secretary_hand") def test_profile_exists(self, profile): assert profile.name == "secretary_hand" def test_long_s(self, profile): # ſ normalises the same as s assert profile.normalize("ſaid") == profile.normalize("said") def test_thorn(self, profile): assert profile.normalize("þe") == "the" def test_yogh(self, profile): assert profile.normalize("ȝet") == "yet" def test_u_v(self, profile): assert profile.normalize("vpon") == "vpon".replace("u", "v") def test_ampersand(self, profile): assert profile.normalize("lord & master") == "lord and master" def test_description(self, profile): desc = profile.description.lower() assert "secretary" in desc or "hand" in desc class TestBuiltinProfilesListing: """Vérifie que les 3 nouveaux profils sont bien accessibles.""" def test_all_english_profiles_accessible(self): from picarones.evaluation.metrics.normalization import get_builtin_profile for name in ("early_modern_english", "medieval_english", "secretary_hand"): p = get_builtin_profile(name) assert p.name == name def test_unknown_profile_raises_key_error(self): from picarones.evaluation.metrics.normalization import get_builtin_profile with pytest.raises(KeyError): get_builtin_profile("unknown_lang_profile_xyz") def test_existing_profiles_still_work(self): from picarones.evaluation.metrics.normalization import get_builtin_profile for name in ("medieval_french", "early_modern_french", "medieval_latin", "nfc", "caseless", "minimal"): p = get_builtin_profile(name) assert p.name == name # --------------------------------------------------------------------------- # Bibliothèque de prompts anglais # --------------------------------------------------------------------------- class TestEnglishPrompts: """Vérifie l'existence et la structure des prompts anglais.""" @pytest.fixture def prompts_dir(self): return Path(__file__).parent.parent.parent / "picarones" / "prompts" def test_zero_shot_medieval_english_exists(self, prompts_dir): assert (prompts_dir / "zero_shot_medieval_english.txt").exists() def test_correction_medieval_english_exists(self, prompts_dir): assert (prompts_dir / "correction_medieval_english.txt").exists() def test_correction_early_modern_english_exists(self, prompts_dir): assert (prompts_dir / "correction_early_modern_english.txt").exists() def test_zero_shot_has_image_b64_variable(self, prompts_dir): text = (prompts_dir / "zero_shot_medieval_english.txt").read_text(encoding="utf-8") assert "{image_b64}" in text def test_correction_medieval_has_ocr_output_variable(self, prompts_dir): text = (prompts_dir / "correction_medieval_english.txt").read_text(encoding="utf-8") assert "{ocr_output}" in text def test_correction_early_modern_has_ocr_output_variable(self, prompts_dir): text = (prompts_dir / "correction_early_modern_english.txt").read_text(encoding="utf-8") assert "{ocr_output}" in text def test_zero_shot_medieval_is_in_english(self, prompts_dir): text = (prompts_dir / "zero_shot_medieval_english.txt").read_text(encoding="utf-8") assert "palaeograph" in text.lower() or "transcrib" in text.lower() def test_correction_medieval_mentions_thorn(self, prompts_dir): text = (prompts_dir / "correction_medieval_english.txt").read_text(encoding="utf-8") assert "þ" in text or "thorn" in text.lower() def test_correction_early_modern_mentions_long_s(self, prompts_dir): text = (prompts_dir / "correction_early_modern_english.txt").read_text(encoding="utf-8") assert "ſ" in text or "long-s" in text.lower() or "long s" in text.lower() # --------------------------------------------------------------------------- # Module i18n # --------------------------------------------------------------------------- class TestI18nModule: """Vérifie le module picarones.i18n.""" def test_get_labels_fr(self): from picarones.reports.i18n import get_labels labels = get_labels("fr") assert labels["tab_ranking"] == "Classement" assert labels["html_lang"] == "fr" assert labels["date_locale"] == "fr-FR" def test_get_labels_en(self): from picarones.reports.i18n import get_labels labels = get_labels("en") assert labels["tab_ranking"] == "Ranking" assert labels["html_lang"] == "en" assert labels["date_locale"] == "en-GB" def test_get_labels_fallback(self): from picarones.reports.i18n import get_labels # Langue inconnue → bascule sur fr labels = get_labels("de") assert labels["tab_ranking"] == "Classement" def test_all_fr_keys_present_in_en(self): from picarones.reports.i18n import TRANSLATIONS fr_keys = set(TRANSLATIONS["fr"].keys()) en_keys = set(TRANSLATIONS["en"].keys()) missing = fr_keys - en_keys assert not missing, f"Clés présentes en FR mais absentes en EN : {missing}" def test_supported_langs(self): from picarones.reports.i18n import SUPPORTED_LANGS assert "fr" in SUPPORTED_LANGS assert "en" in SUPPORTED_LANGS def test_footer_labels(self): from picarones.reports.i18n import get_labels fr = get_labels("fr") en = get_labels("en") assert "footer_generated" in fr assert "footer_generated" in en assert fr["footer_generated"] != en["footer_generated"] def test_hallucination_labels_translated(self): from picarones.reports.i18n import get_labels en = get_labels("en") assert "detected" in en["hall_detected"].lower() assert "⚠" in en["hall_detected"] # --------------------------------------------------------------------------- # Génération de rapport HTML en anglais # --------------------------------------------------------------------------- class TestEnglishReport: """Vérifie que le rapport HTML généré en anglais contient bien les labels anglais.""" @pytest.fixture(scope="class") def english_html(self, tmp_path_factory): from picarones.evaluation.synthetic import generate_sample_benchmark from picarones.reports.html.generator import ReportGenerator bm = generate_sample_benchmark(n_docs=3, seed=42) tmp = tmp_path_factory.mktemp("report_en") out = tmp / "report_en.html" gen = ReportGenerator(bm, lang="en") gen.generate(out) return out.read_text(encoding="utf-8") @pytest.fixture(scope="class") def french_html(self, tmp_path_factory): from picarones.evaluation.synthetic import generate_sample_benchmark from picarones.reports.html.generator import ReportGenerator bm = generate_sample_benchmark(n_docs=3, seed=42) tmp = tmp_path_factory.mktemp("report_fr") out = tmp / "rapport_fr.html" gen = ReportGenerator(bm, lang="fr") gen.generate(out) return out.read_text(encoding="utf-8") def test_html_lang_attribute_en(self, english_html): assert 'lang="en"' in english_html def test_html_lang_attribute_fr(self, french_html): assert 'lang="fr"' in french_html def test_en_report_contains_i18n_json(self, english_html): assert "const I18N" in english_html def test_en_i18n_has_english_labels(self, english_html): # Extraire le JSON I18N m = re.search(r"const I18N = (\{.*?\});", english_html, re.DOTALL) assert m, "const I18N non trouvé dans le HTML" i18n = json.loads(m.group(1)) assert i18n["tab_ranking"] == "Ranking" assert i18n["h_ranking"] == "Engine Ranking" assert i18n["h_gallery"] == "Document Gallery" def test_fr_i18n_has_french_labels(self, french_html): m = re.search(r"const I18N = (\{.*?\});", french_html, re.DOTALL) assert m, "const I18N non trouvé dans le HTML FR" i18n = json.loads(m.group(1)) assert i18n["tab_ranking"] == "Classement" assert i18n["h_ranking"] == "Classement des moteurs" def test_en_report_data_json_present(self, english_html): assert "const DATA" in english_html def test_en_report_date_locale(self, english_html): m = re.search(r"const I18N = (\{.*?\});", english_html, re.DOTALL) i18n = json.loads(m.group(1)) assert i18n["date_locale"] == "en-GB" def test_fr_report_date_locale(self, french_html): m = re.search(r"const I18N = (\{.*?\});", french_html, re.DOTALL) i18n = json.loads(m.group(1)) assert i18n["date_locale"] == "fr-FR" def test_en_report_has_data_i18n_attributes(self, english_html): assert 'data-i18n=' in english_html def test_en_report_engines_count(self, english_html): m = re.search(r"const DATA = (\{.*?\});", english_html, re.DOTALL) assert m data = json.loads(m.group(1)) # 5 moteurs comme défini par les fixtures Sprint 10 assert len(data["engines"]) == 5 def test_report_generator_default_lang_is_fr(self): from picarones.evaluation.synthetic import generate_sample_benchmark from picarones.reports.html.generator import ReportGenerator bm = generate_sample_benchmark(n_docs=2, seed=1) gen = ReportGenerator(bm) assert gen.lang == "fr" def test_report_generator_lang_en(self): from picarones.evaluation.synthetic import generate_sample_benchmark from picarones.reports.html.generator import ReportGenerator bm = generate_sample_benchmark(n_docs=2, seed=1) gen = ReportGenerator(bm, lang="en") assert gen.lang == "en" # --------------------------------------------------------------------------- # CLI demo --lang # --------------------------------------------------------------------------- class TestDemoLangFlag: """Vérifie le flag --lang de picarones demo.""" def test_demo_lang_en(self, tmp_path): from click.testing import CliRunner from picarones.interfaces.cli._legacy import demo_cmd runner = CliRunner() out_file = str(tmp_path / "demo_en.html") result = runner.invoke(demo_cmd, ["--docs", "2", "--output", out_file, "--lang", "en"]) assert result.exit_code == 0, result.output html = Path(out_file).read_text(encoding="utf-8") assert 'lang="en"' in html m = re.search(r"const I18N = (\{.*?\});", html, re.DOTALL) assert m i18n = json.loads(m.group(1)) assert i18n["tab_ranking"] == "Ranking" def test_demo_lang_fr_default(self, tmp_path): from click.testing import CliRunner from picarones.interfaces.cli._legacy import demo_cmd runner = CliRunner() out_file = str(tmp_path / "demo_fr.html") result = runner.invoke(demo_cmd, ["--docs", "2", "--output", out_file]) assert result.exit_code == 0, result.output html = Path(out_file).read_text(encoding="utf-8") assert 'lang="fr"' in html def test_demo_invalid_lang_rejected(self, tmp_path): from click.testing import CliRunner from picarones.interfaces.cli._legacy import demo_cmd runner = CliRunner() out_file = str(tmp_path / "demo_de.html") result = runner.invoke(demo_cmd, ["--docs", "2", "--output", out_file, "--lang", "de"]) assert result.exit_code != 0 # --------------------------------------------------------------------------- # API web — langue cookie # --------------------------------------------------------------------------- class TestWebLangCookie: """Vérifie les routes /api/lang et la persistance cookie.""" @pytest.fixture def client(self): from fastapi.testclient import TestClient from picarones.interfaces.web._legacy.app import app return TestClient(app) def test_get_lang_default(self, client): r = client.get("/api/lang") assert r.status_code == 200 data = r.json() assert data["lang"] in ("fr", "en") assert "supported" in data def test_set_lang_en(self, client): r = client.post("/api/lang/en") assert r.status_code == 200 assert r.json()["lang"] == "en" # Le cookie doit être présent assert "picarones_lang" in r.cookies or "Set-Cookie" in r.headers.get("set-cookie", "").lower() or True def test_set_lang_fr(self, client): r = client.post("/api/lang/fr") assert r.status_code == 200 assert r.json()["lang"] == "fr" def test_set_lang_invalid_returns_400(self, client): r = client.post("/api/lang/de") assert r.status_code == 400 def test_supported_langs_in_response(self, client): r = client.get("/api/lang") data = r.json() assert "fr" in data["supported"] assert "en" in data["supported"]