Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Running

Picarones / picarones /report /multirun_stability_render.py

Claude

refactor(report): consolidate 27 render helpers into render_helpers.py

2d6c41d unverified about 2 months ago

5.84 kB

	"""Rendu HTML « Stabilité multi-runs » — Sprint 90 (A.II.4).

	Suite directe ``picarones/core/reliability.compute_multirun_stability``
	(Sprint 83). Pattern identique aux autres rendus : server-side,
	pas de JS, anti-injection systématique.

	Note d'intégration
	------------------
	La stabilité multi-runs n'est pas calculée automatiquement par
	le runner — l'utilisateur doit relancer son moteur LLM/VLM
	plusieurs fois (option ``--repeats N`` du runner reportée à un
	sprint dédié) et appeler ``compute_multirun_stability`` lui-
	même. Cette vue est donc un module de rendu pur que
	l'utilisateur compose :

	.. code-block:: python

	from picarones.measurements.reliability import compute_multirun_stability
	from picarones.report.multirun_stability_render import (
	build_multirun_stability_html,
	)

	stability = []
	for engine_name, runs in per_engine_runs.items():
	s = compute_multirun_stability(runs, reference=ref)
	if s is not None:
	s["engine_name"] = engine_name
	stability.append(s)
	html = build_multirun_stability_html(stability, labels)

	Vue
	---
	Tableau moteur × {n_runs, CER moyen ± écart-type, CV (%),
	% paires identiques, n outputs distincts}. Cellule CV colorée
	par gradient vert (stable) → rouge (instable, CV > 20 %).

	Adaptive : ``""`` si la liste est vide ou que tous les
	``cer_cv`` sont ``None``.
	"""

	from __future__ import annotations

	from html import escape as _e
	from typing import Optional

	from picarones.report.render_helpers import color_traffic_light


	def build_multirun_stability_html(
	stability: Optional[list],
	labels: Optional[dict[str, str]] = None,
	) -> str:
	"""Construit la vue HTML de stabilité multi-runs.

	Parameters
	----------
	stability:
	Liste de dicts (un par moteur) issus de
	``compute_multirun_stability`` enrichis d'un
	``engine_name``. Si vide ou ``None``, retourne ``""``.
	labels:
	Dict i18n. Clés sous le préfixe ``stability_*``.
	"""
	if not stability:
	return ""
	rows = [s for s in stability if isinstance(s, dict) and s.get("engine_name")]
	if not rows:
	return ""
	labels = labels or {}
	title = labels.get("stability_title", "Stabilité multi-runs")
	note = labels.get(
	"stability_note",
	"Quand un moteur LLM/VLM est non déterministe, la "
	"variance entre runs successifs sur les mêmes documents "
	"est un proxy de la fiabilité scientifique. Un CV élevé "
	"ou un faible taux de runs identiques discrédite "
	"l'interprétation du CER moyen.",
	)
	h_engine = labels.get("stability_engine", "Moteur")
	h_n_runs = labels.get("stability_n_runs", "Runs")
	h_cer = labels.get("stability_cer", "CER moyen ± σ")
	h_cv = labels.get("stability_cv", "CV (%)")
	h_identical = labels.get("stability_identical", "% runs identiques")
	h_distinct = labels.get("stability_distinct", "Sorties distinctes")

	parts = [
	'<section class="stability-section" style="margin:1rem 0">',
	f'<h3 style="margin:0 0 .3rem 0">{_e(title)}</h3>',
	f'<div style="font-size:.85rem;opacity:.75;margin-bottom:.6rem">'
	f'{_e(note)}</div>',
	'<table style="border-collapse:collapse;width:100%;'
	'font-size:.9rem">',
	'<thead><tr>',
	]
	for col in (h_engine, h_n_runs, h_cer, h_cv, h_identical, h_distinct):
	parts.append(
	f'<th scope=\"col\" style="padding:.4rem .6rem;text-align:left;'
	f'border-bottom:1px solid #ccc;font-weight:600">'
	f'{_e(col)}</th>'
	)
	parts.append("</tr></thead><tbody>")
	for stab in rows:
	engine = str(stab.get("engine_name") or "?")
	n_runs = int(stab.get("n_runs") or 0)
	cer_mean = stab.get("cer_mean")
	cer_stdev = stab.get("cer_stdev")
	cer_cv = stab.get("cer_cv")
	identical = stab.get("identical_run_rate")
	n_distinct = stab.get("n_distinct_outputs")
	if isinstance(cer_mean, (int, float)) and isinstance(cer_stdev, (int, float)):
	cer_str = f"{cer_mean * 100:.2f}% ± {cer_stdev * 100:.2f}%"
	elif isinstance(cer_mean, (int, float)):
	cer_str = f"{cer_mean * 100:.2f}%"
	else:
	cer_str = "—"
	if isinstance(cer_cv, (int, float)):
	cv_color = color_traffic_light(float(cer_cv), low_is_good=True, scale_max=0.25)
	cv_cell = (
	f'<td style="padding:.4rem .6rem;text-align:right;'
	f'background:{cv_color};font-family:monospace;'
	f'font-weight:600">{float(cer_cv) * 100:.1f}</td>'
	)
	else:
	cv_cell = (
	'<td style="padding:.4rem .6rem;text-align:right;'
	'opacity:.4">—</td>'
	)
	identical_str = (
	f"{float(identical) * 100:.1f}"
	if isinstance(identical, (int, float)) else "—"
	)
	distinct_str = str(n_distinct) if isinstance(n_distinct, int) else "—"
	parts.append(
	f'<tr>'
	f'<td style="padding:.4rem .6rem">{_e(engine)}</td>'
	f'<td style="padding:.4rem .6rem;text-align:right;'
	f'font-family:monospace">{n_runs}</td>'
	f'<td style="padding:.4rem .6rem;text-align:right;'
	f'font-family:monospace">{cer_str}</td>'
	f'{cv_cell}'
	f'<td style="padding:.4rem .6rem;text-align:right;'
	f'font-family:monospace">{identical_str}</td>'
	f'<td style="padding:.4rem .6rem;text-align:right;'
	f'font-family:monospace">{distinct_str}</td>'
	f'</tr>'
	)
	parts.append("</tbody></table></section>")
	return "".join(parts)


	__all__ = ["build_multirun_stability_html"]