Picarones / picarones /report /lexical_modernization_render.py
Claude
refactor(report): consolidate 27 render helpers into render_helpers.py
2d6c41d unverified
Raw
History Blame
3.89 kB
"""Rendu HTML de la vue « Modernisation lexicale » — Sprint 80.
A.I.7 du plan d'évolution 2026.
Suite directe ``picarones/core/lexical_modernization.py``.
Pattern identique aux autres rendus (Sprints 41/43/62/67/72/74/75/76/77) :
**server-side**, pas de JavaScript, anti-injection systématique.
Vue
---
Tableau trié par taux de modernisation décroissant : forme
historique GT → forme(s) modernisée(s), occurrences GT, %.
Couleur de cellule pour le %.
"""
from __future__ import annotations
from html import escape as _e
from typing import Optional
from picarones.measurements.lexical_modernization import top_modernized_tokens
from picarones.report.render_helpers import (
GRADIENT_TARGET_ORANGE,
color_single_gradient,
)
def _format_variants(variants: dict, max_show: int = 3) -> str:
"""Liste compacte des variants modernisés."""
items = sorted(variants.items(), key=lambda kv: -kv[1])
shown = items[:max_show]
rest = len(items) - max_show
parts = [
f"{_e(form)} ({count})"
for form, count in shown
]
if rest > 0:
parts.append(f"+{rest}")
return ", ".join(parts)
def build_lexical_modernization_html(
data: Optional[dict],
labels: Optional[dict[str, str]] = None,
*,
top_n: int = 20,
min_total: int = 1,
) -> str:
"""Construit la table HTML de modernisation lexicale.
Retourne ``""`` si ``data is None`` ou si aucun token modernisé.
"""
if not data:
return ""
rows = top_modernized_tokens(data, n=top_n, min_total=min_total)
if not rows:
return ""
labels = labels or {}
title = labels.get(
"lexmod_title", "Modernisation lexicale (top tokens)",
)
note = labels.get(
"lexmod_note",
"Tokens GT que le moteur réécrit le plus souvent. "
"Lecture : « maistre → maître modernisé dans 85 % des cas » "
"indique de quoi corriger dans le prompt pour préserver "
"l'orthographe historique.",
)
gt_label = labels.get("lexmod_gt_label", "Forme historique GT")
hyp_label = labels.get("lexmod_hyp_label", "Variantes OCR")
n_label = labels.get("lexmod_n_label", "n GT")
rate_label = labels.get("lexmod_rate_label", "% modernisé")
parts = [
'<div class="lexmod" style="margin:1rem 0">',
f'<div style="font-weight:600;margin-bottom:.4rem">{_e(title)}</div>',
f'<div style="font-size:.85rem;opacity:.75;margin-bottom:.5rem">'
f'{_e(note)}</div>',
'<table style="border-collapse:collapse;width:100%;'
'font-size:.85rem">',
'<thead><tr>',
]
for col in (gt_label, hyp_label, n_label, rate_label):
parts.append(
f'<th scope=\"col\" style="padding:.3rem .5rem;text-align:left;'
f'border-bottom:1px solid #ccc;font-weight:600">'
f'{_e(col)}</th>'
)
parts.append("</tr></thead><tbody>")
for gt_token, slot in rows:
rate = slot.get("rate_modernized", 0.0)
n_total = slot.get("n_total", 0)
variants_str = _format_variants(slot.get("variants") or {})
rate_color = color_single_gradient(rate, end_rgb=GRADIENT_TARGET_ORANGE)
parts.append(
f'<tr>'
f'<td style="padding:.3rem .5rem;font-family:monospace">'
f'{_e(gt_token)}</td>'
f'<td style="padding:.3rem .5rem;font-size:.85rem">'
f'{variants_str}</td>'
f'<td style="padding:.3rem .5rem;text-align:right;'
f'font-family:monospace">{n_total}</td>'
f'<td style="padding:.3rem .5rem;text-align:right;'
f'background:{rate_color};font-family:monospace">'
f'{rate * 100:.0f}%</td>'
f'</tr>'
)
parts.append("</tbody></table></div>")
return "".join(parts)
__all__ = [
"build_lexical_modernization_html",
]