Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Running

Picarones / picarones /report /lexical_modernization_render.py

Claude

refactor(report): consolidate 27 render helpers into render_helpers.py

2d6c41d unverified about 2 months ago

3.89 kB

	"""Rendu HTML de la vue « Modernisation lexicale » — Sprint 80.

	A.I.7 du plan d'évolution 2026.

	Suite directe ``picarones/core/lexical_modernization.py``.
	Pattern identique aux autres rendus (Sprints 41/43/62/67/72/74/75/76/77) :
	server-side, pas de JavaScript, anti-injection systématique.

	Vue
	---
	Tableau trié par taux de modernisation décroissant : forme
	historique GT → forme(s) modernisée(s), occurrences GT, %.
	Couleur de cellule pour le %.
	"""

	from __future__ import annotations

	from html import escape as _e
	from typing import Optional

	from picarones.measurements.lexical_modernization import top_modernized_tokens
	from picarones.report.render_helpers import (
	GRADIENT_TARGET_ORANGE,
	color_single_gradient,
	)


	def _format_variants(variants: dict, max_show: int = 3) -> str:
	"""Liste compacte des variants modernisés."""
	items = sorted(variants.items(), key=lambda kv: -kv[1])
	shown = items[:max_show]
	rest = len(items) - max_show
	parts = [
	f"{_e(form)} ({count})"
	for form, count in shown
	]
	if rest > 0:
	parts.append(f"+{rest}")
	return ", ".join(parts)


	def build_lexical_modernization_html(
	data: Optional[dict],
	labels: Optional[dict[str, str]] = None,
	*,
	top_n: int = 20,
	min_total: int = 1,
	) -> str:
	"""Construit la table HTML de modernisation lexicale.

	Retourne ``""`` si ``data is None`` ou si aucun token modernisé.
	"""
	if not data:
	return ""
	rows = top_modernized_tokens(data, n=top_n, min_total=min_total)
	if not rows:
	return ""
	labels = labels or {}
	title = labels.get(
	"lexmod_title", "Modernisation lexicale (top tokens)",
	)
	note = labels.get(
	"lexmod_note",
	"Tokens GT que le moteur réécrit le plus souvent. "
	"Lecture : « maistre → maître modernisé dans 85 % des cas » "
	"indique de quoi corriger dans le prompt pour préserver "
	"l'orthographe historique.",
	)
	gt_label = labels.get("lexmod_gt_label", "Forme historique GT")
	hyp_label = labels.get("lexmod_hyp_label", "Variantes OCR")
	n_label = labels.get("lexmod_n_label", "n GT")
	rate_label = labels.get("lexmod_rate_label", "% modernisé")

	parts = [
	'<div class="lexmod" style="margin:1rem 0">',
	f'<div style="font-weight:600;margin-bottom:.4rem">{_e(title)}</div>',
	f'<div style="font-size:.85rem;opacity:.75;margin-bottom:.5rem">'
	f'{_e(note)}</div>',
	'<table style="border-collapse:collapse;width:100%;'
	'font-size:.85rem">',
	'<thead><tr>',
	]
	for col in (gt_label, hyp_label, n_label, rate_label):
	parts.append(
	f'<th scope=\"col\" style="padding:.3rem .5rem;text-align:left;'
	f'border-bottom:1px solid #ccc;font-weight:600">'
	f'{_e(col)}</th>'
	)
	parts.append("</tr></thead><tbody>")
	for gt_token, slot in rows:
	rate = slot.get("rate_modernized", 0.0)
	n_total = slot.get("n_total", 0)
	variants_str = _format_variants(slot.get("variants") or {})
	rate_color = color_single_gradient(rate, end_rgb=GRADIENT_TARGET_ORANGE)
	parts.append(
	f'<tr>'
	f'<td style="padding:.3rem .5rem;font-family:monospace">'
	f'{_e(gt_token)}</td>'
	f'<td style="padding:.3rem .5rem;font-size:.85rem">'
	f'{variants_str}</td>'
	f'<td style="padding:.3rem .5rem;text-align:right;'
	f'font-family:monospace">{n_total}</td>'
	f'<td style="padding:.3rem .5rem;text-align:right;'
	f'background:{rate_color};font-family:monospace">'
	f'{rate * 100:.0f}%</td>'
	f'</tr>'
	)
	parts.append("</tbody></table></div>")
	return "".join(parts)


	__all__ = [
	"build_lexical_modernization_html",
	]