Spaces:
Running
Running
Claude
Sprint 11 : internationalisation complΓ¨te β support anglais patrimonial
ce0bff3 unverified | """Labels i18n pour le rapport HTML et l'interface Picarones. | |
| Langues supportΓ©es | |
| ------------------ | |
| - ``"fr"`` : franΓ§ais (dΓ©faut) | |
| - ``"en"`` : anglais patrimonial (heritage English) | |
| """ | |
| from __future__ import annotations | |
| TRANSLATIONS: dict[str, dict[str, str]] = { | |
| "fr": { | |
| # ββ HTML mΓ©ta ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "html_lang": "fr", | |
| "date_locale": "fr-FR", | |
| # ββ Navigation βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "nav_report": "rapport OCR", | |
| "tab_ranking": "Classement", | |
| "tab_gallery": "Galerie", | |
| "tab_document": "Document", | |
| "tab_characters": "Caractères", | |
| "tab_analyses": "Analyses", | |
| "btn_present": "β PrΓ©sentation", | |
| # ββ Classement βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "h_ranking": "Classement des moteurs", | |
| "col_rank": "#", | |
| "col_engine": "Concurrent", | |
| "col_cer": "CER exact", | |
| "col_cer_diplo": "CER diplo.", | |
| "col_cer_diplo_title": "CER aprΓ¨s normalisation diplomatique (ΕΏ=s, u=v, i=jβ¦) β mesure les erreurs substantielles en ignorant les variantes graphiques codifiΓ©es", | |
| "col_wer": "WER", | |
| "col_mer": "MER", | |
| "col_wil": "WIL", | |
| "col_ligatures": "Ligatures", | |
| "col_ligatures_title": "Taux de reconnaissance des ligatures (ο¬, ο¬, Ε, Γ¦, ο¬β¦)", | |
| "col_diacritics": "Diacritiques", | |
| "col_diacritics_title": "Taux de conservation des diacritiques (accents, cΓ©dilles, trΓ©masβ¦)", | |
| "col_gini": "Gini", | |
| "col_gini_title": "Coefficient de Gini des erreurs CER par ligne β 0 = erreurs uniformes, 1 = erreurs concentrΓ©es. Un bon moteur a CER bas ET Gini bas.", | |
| "col_anchor": "Ancrage", | |
| "col_anchor_title": "Score d'ancrage : proportion des trigrammes de la sortie trouvant un ancrage dans le GT β faible score = hallucinations probables (LLM/VLM)", | |
| "col_cer_median": "CER mΓ©dian", | |
| "col_cer_min": "CER min", | |
| "col_cer_max": "CER max", | |
| "col_overnorm": "Sur-norm.", | |
| "col_overnorm_title": "Classe 10 β Sur-normalisation LLM : taux de mots corrects dΓ©gradΓ©s par le LLM", | |
| "col_docs": "Docs", | |
| # ββ Galerie ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "h_gallery": "Galerie des documents", | |
| "gallery_sort_label": "Trier par :", | |
| "gallery_sort_id": "Identifiant", | |
| "gallery_sort_cer": "CER moyen", | |
| "gallery_sort_difficulty": "DifficultΓ©", | |
| "gallery_sort_best": "Meilleur moteur", | |
| "gallery_filter_cer_label": "Filtrer CER >", | |
| "gallery_filter_engine_label": "Moteur :", | |
| "gallery_filter_all": "Tous", | |
| "gallery_empty": "Aucun document ne correspond aux filtres.", | |
| # ββ Document βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "doc_sidebar_header": "Documents", | |
| "doc_title_default": "SΓ©lectionner un document", | |
| "h_image": "Image originale", | |
| "h_gt": "VΓ©ritΓ© terrain (GT)", | |
| "h_diff": "Sorties OCR β diff par moteur", | |
| "h_line_metrics": "Distribution des erreurs par ligne", | |
| "h_hallucination": "Analyse des hallucinations", | |
| # ββ Analyses βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "h_characters": "Analyse des caractères", | |
| "char_engine_label": "Moteur :", | |
| "h_cer_dist": "Distribution du CER par moteur", | |
| "h_radar": "Profil des moteurs (radar)", | |
| "radar_note": "Axe radar : CER, WER, MER, WIL β valeurs inversΓ©es (plus c'est haut, meilleur est le moteur).", | |
| "h_cer_doc": "CER par document (tous moteurs)", | |
| "h_duration": "Temps d'exΓ©cution moyen (secondes/document)", | |
| "h_quality_cer": "QualitΓ© image β CER (scatter plot)", | |
| "quality_cer_note": "Chaque point = un document. Axe X = score qualitΓ© image [0β1]. Axe Y = CER. CorrΓ©lation nΓ©gative attendue.", | |
| "h_taxonomy": "Taxonomie des erreurs par moteur", | |
| "taxonomy_note": "Distribution des classes d'erreurs (classes 1β9 de la taxonomie Picarones).", | |
| "h_reliability": "Courbes de fiabilitΓ©", | |
| "reliability_note": "Pour les X% documents les plus faciles (triΓ©s par CER croissant), quel est le CER moyen cumulΓ© ? Une courbe basse = moteur performant mΓͺme sur les documents faciles.", | |
| "h_bootstrap": "Intervalles de confiance Γ 95 % (bootstrap)", | |
| "bootstrap_note": "IC Γ 95% sur le CER moyen par moteur (1000 itΓ©rations bootstrap).", | |
| "h_venn": "Erreurs communes / exclusives (Venn)", | |
| "venn_note": "Intersection des ensembles d'erreurs entre les 2 ou 3 premiers concurrents. Erreurs communes = segments partagΓ©s.", | |
| "h_pairwise": "Tests de Wilcoxon β comparaisons par paires", | |
| "pairwise_note": "Test signΓ©-rangΓ© de Wilcoxon (non-paramΓ©trique). Seuil Ξ± = 0.05.", | |
| "h_clusters": "Clustering des patterns d'erreurs", | |
| "h_gini_cer": "Gini vs CER moyen", | |
| "gini_cer_ideal": "β idΓ©al : bas-gauche", | |
| "gini_cer_note": "Axe X = CER moyen, Axe Y = coefficient de Gini. Un moteur idΓ©al a CER bas ET Gini bas (erreurs rares et uniformes).", | |
| "h_ratio_anchor": "Ratio longueur vs ancrage", | |
| "ratio_anchor_subtitle": "β hallucinations VLM", | |
| "ratio_anchor_note": "Axe X = score d'ancrage trigrammes [0β1]. Axe Y = ratio longueur sortie/GT. Zone β οΈ : ancrage < 0.5 ou ratio > 1.2 β hallucinations probables.", | |
| "h_correlation": "Matrice de corrΓ©lation entre mΓ©triques", | |
| "corr_engine_label": "Moteur :", | |
| "corr_note": "Coefficient de Pearson entre les mΓ©triques CER, WER, qualitΓ© image, ligatures, diacritiques. Vert = corrΓ©lation positive, Rouge = corrΓ©lation nΓ©gative.", | |
| # ββ Footer ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "footer_generated": "Rapport gΓ©nΓ©rΓ© le", | |
| "footer_by": "par Picarones", | |
| # ββ JS strings dynamiques βββββββββββββββββββββββββββββββββββββββββ | |
| "heatmap_start": "DΓ©but", | |
| "heatmap_mid": "Milieu", | |
| "heatmap_end": "Fin", | |
| "heatmap_title": "CARTE THERMIQUE (position)", | |
| "percentile_title": "PERCENTILES CER", | |
| "lines": "lignes", | |
| "no_line_metrics": "Aucune mΓ©trique de ligne disponible.", | |
| "no_hall_metrics": "Aucune mΓ©trique d'hallucination disponible.", | |
| "no_hall_blocks": "Aucun bloc hallucinΓ© dΓ©tectΓ©.", | |
| "hall_detected": "β οΈ Hallucinations dΓ©tectΓ©es", | |
| "hall_ok": "β Ancrage satisfaisant", | |
| "hall_blocks_title": "Blocs sans ancrage dans le GT :", | |
| "hall_block_label": "Bloc hallucinΓ©", | |
| "hall_more_blocks": "bloc(s) supplΓ©mentaire(s)", | |
| "no_gini": "DonnΓ©es Gini non disponibles.", | |
| "no_scatter": "DonnΓ©es non disponibles.", | |
| "total_errors": "Total :", | |
| "errors_classified": "erreurs classifiΓ©es.", | |
| "class_col": "Classe", | |
| "proportion_col": "Proportion", | |
| "taxonomy_engine_label": "Moteur :", | |
| }, | |
| "en": { | |
| # ββ HTML mΓ©ta ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "html_lang": "en", | |
| "date_locale": "en-GB", | |
| # ββ Navigation βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "nav_report": "OCR report", | |
| "tab_ranking": "Ranking", | |
| "tab_gallery": "Gallery", | |
| "tab_document": "Document", | |
| "tab_characters": "Characters", | |
| "tab_analyses": "Analyses", | |
| "btn_present": "β Presentation", | |
| # ββ Ranking ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "h_ranking": "Engine Ranking", | |
| "col_rank": "#", | |
| "col_engine": "Engine", | |
| "col_cer": "Exact CER", | |
| "col_cer_diplo": "Diplo. CER", | |
| "col_cer_diplo_title": "CER after diplomatic normalisation (ΕΏ=s, u=v, i=jβ¦) β measures substantial errors ignoring codified graphical variants", | |
| "col_wer": "WER", | |
| "col_mer": "MER", | |
| "col_wil": "WIL", | |
| "col_ligatures": "Ligatures", | |
| "col_ligatures_title": "Ligature recognition rate (ο¬, ο¬, Ε, Γ¦, ο¬β¦)", | |
| "col_diacritics": "Diacritics", | |
| "col_diacritics_title": "Diacritic preservation rate (accents, cedillas, umlautsβ¦)", | |
| "col_gini": "Gini", | |
| "col_gini_title": "Gini coefficient of per-line CER errors β 0 = uniform errors, 1 = concentrated errors. A good engine has low CER AND low Gini.", | |
| "col_anchor": "Anchor", | |
| "col_anchor_title": "Anchor score: proportion of output trigrams found in the GT β low score = probable hallucinations (LLM/VLM)", | |
| "col_cer_median": "Median CER", | |
| "col_cer_min": "Min CER", | |
| "col_cer_max": "Max CER", | |
| "col_overnorm": "Over-norm.", | |
| "col_overnorm_title": "Class 10 β LLM over-normalisation: rate of correct words degraded by the LLM", | |
| "col_docs": "Docs", | |
| # ββ Gallery ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "h_gallery": "Document Gallery", | |
| "gallery_sort_label": "Sort by:", | |
| "gallery_sort_id": "Identifier", | |
| "gallery_sort_cer": "Mean CER", | |
| "gallery_sort_difficulty": "Difficulty", | |
| "gallery_sort_best": "Best engine", | |
| "gallery_filter_cer_label": "Filter CER >", | |
| "gallery_filter_engine_label": "Engine:", | |
| "gallery_filter_all": "All", | |
| "gallery_empty": "No documents match the filters.", | |
| # ββ Document βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "doc_sidebar_header": "Documents", | |
| "doc_title_default": "Select a document", | |
| "h_image": "Original Image", | |
| "h_gt": "Ground Truth (GT)", | |
| "h_diff": "OCR Output β diff by engine", | |
| "h_line_metrics": "Error Distribution by Line", | |
| "h_hallucination": "Hallucination Analysis", | |
| # ββ Analyses βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "h_characters": "Character Analysis", | |
| "char_engine_label": "Engine:", | |
| "h_cer_dist": "CER Distribution by Engine", | |
| "h_radar": "Engine Profile (radar)", | |
| "radar_note": "Radar axes: CER, WER, MER, WIL β inverted values (higher = better engine).", | |
| "h_cer_doc": "CER by Document (all engines)", | |
| "h_duration": "Average Execution Time (seconds/document)", | |
| "h_quality_cer": "Image Quality β CER (scatter plot)", | |
| "quality_cer_note": "Each point = one document. X-axis = image quality score [0β1]. Y-axis = CER. Negative correlation expected.", | |
| "h_taxonomy": "Error Taxonomy by Engine", | |
| "taxonomy_note": "Distribution of error classes (classes 1β9 of the Picarones taxonomy).", | |
| "h_reliability": "Reliability Curves", | |
| "reliability_note": "For the X% easiest documents (sorted by ascending CER), what is the cumulative mean CER? A low curve = engine performing well even on easy documents.", | |
| "h_bootstrap": "95% Bootstrap Confidence Intervals", | |
| "bootstrap_note": "95% CI on mean CER per engine (1000 bootstrap iterations).", | |
| "h_venn": "Shared / Exclusive Errors (Venn)", | |
| "venn_note": "Intersection of error sets between the 2 or 3 top engines. Shared errors = overlapping segments.", | |
| "h_pairwise": "Wilcoxon Tests β pairwise comparisons", | |
| "pairwise_note": "Wilcoxon signed-rank test (non-parametric). Threshold Ξ± = 0.05.", | |
| "h_clusters": "Frequent Error Clusters", | |
| "h_gini_cer": "Gini vs Mean CER", | |
| "gini_cer_ideal": "β ideal: bottom-left", | |
| "gini_cer_note": "X-axis = mean CER, Y-axis = Gini coefficient. An ideal engine has low CER AND low Gini (rare, uniform errors).", | |
| "h_ratio_anchor": "Length Ratio vs Anchor Score", | |
| "ratio_anchor_subtitle": "β VLM hallucinations", | |
| "ratio_anchor_note": "X-axis = trigram anchor score [0β1]. Y-axis = output/GT length ratio. β οΈ Zone: anchor < 0.5 or ratio > 1.2 β probable hallucinations.", | |
| "h_correlation": "Metric Correlation Matrix", | |
| "corr_engine_label": "Engine:", | |
| "corr_note": "Pearson coefficient between CER, WER, image quality, ligatures, diacritics. Green = positive correlation, Red = negative.", | |
| # ββ Footer ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "footer_generated": "Report generated on", | |
| "footer_by": "by Picarones", | |
| # ββ JS strings dynamiques βββββββββββββββββββββββββββββββββββββββββ | |
| "heatmap_start": "Start", | |
| "heatmap_mid": "Middle", | |
| "heatmap_end": "End", | |
| "heatmap_title": "HEATMAP (position)", | |
| "percentile_title": "CER PERCENTILES", | |
| "lines": "lines", | |
| "no_line_metrics": "No line metrics available.", | |
| "no_hall_metrics": "No hallucination metrics available.", | |
| "no_hall_blocks": "No hallucinated blocks detected.", | |
| "hall_detected": "β οΈ Hallucinations detected", | |
| "hall_ok": "β Satisfactory anchoring", | |
| "hall_blocks_title": "Blocks with no anchor in GT:", | |
| "hall_block_label": "Hallucinated block", | |
| "hall_more_blocks": "additional block(s)", | |
| "no_gini": "Gini data not available.", | |
| "no_scatter": "Data not available.", | |
| "total_errors": "Total:", | |
| "errors_classified": "classified errors.", | |
| "class_col": "Class", | |
| "proportion_col": "Proportion", | |
| "taxonomy_engine_label": "Engine:", | |
| }, | |
| } | |
| def get_labels(lang: str = "fr") -> dict[str, str]: | |
| """Retourne le dictionnaire de labels pour la langue donnΓ©e. | |
| Parameters | |
| ---------- | |
| lang: | |
| Code langue : ``"fr"`` (dΓ©faut) ou ``"en"``. | |
| Returns | |
| ------- | |
| dict | |
| Labels traduits. Toujours valide : bascule sur ``"fr"`` si lang inconnu. | |
| """ | |
| return TRANSLATIONS.get(lang, TRANSLATIONS["fr"]) | |
| SUPPORTED_LANGS: list[str] = list(TRANSLATIONS.keys()) | |