Spaces:
Sleeping
Sleeping
File size: 4,601 Bytes
e11f03a 2d6c41d e11f03a 2d6c41d e11f03a 2d6c41d e11f03a 43d25a5 e11f03a 2d6c41d e11f03a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | """Rendu HTML « Lisibilité (delta Flesch) » — Sprint 87 (A.II.2).
Suite directe ``picarones/core/readability.py`` (Sprint 52) +
câblage runner Sprint 87.
Pattern identique aux autres rendus : server-side, pas de JS,
anti-injection systématique.
Vue
---
Tableau résumé moteur × {delta_mean, delta_median, %
over-normalisés, % under-normalisés, n_docs}. Cellule delta_mean
colorée par gradient :
- vert (delta ≈ 0) : OCR fidèle à la GT en complexité.
- orange (delta > 5) : over-normalisation (typique LLM).
- bleu (delta < -5) : dégradation OCR brutale.
Adaptative : ``""`` si aucun moteur n'a de
``aggregated_readability``.
"""
from __future__ import annotations
from html import escape as _e
from typing import Optional
from picarones.report.render_helpers import color_diverging
def _bg_for_flesch_delta(delta: float) -> str:
"""Vert au centre (delta ≈ 0), orange en sur-normalisation (delta > 0),
bleu en sous-normalisation (delta < 0). Saturation à ±15 pts Flesch.
"""
if abs(delta) <= 1.0:
return "#a7f0a7" # neutre vert clair, indistinguable du bruit
return color_diverging(
delta,
max_abs=15.0,
neutral_rgb=(167, 240, 167),
positive_rgb=(220, 140, 60),
negative_rgb=(90, 160, 210),
)
def build_readability_summary_html(
engines: list[dict],
labels: Optional[dict[str, str]] = None,
) -> str:
"""Construit la table HTML lisibilité.
Returns ``""`` si aucun moteur n'a de signal.
"""
rows = [
e for e in engines
if isinstance(e.get("aggregated_readability"), dict)
]
if not rows:
return ""
labels = labels or {}
title = labels.get("readability_title", "Lisibilité (delta Flesch)")
note = labels.get(
"readability_note",
"Différence de lisibilité Flesch entre la sortie OCR et la "
"GT. Δ > +5 : over-normalisation (typique des LLM qui "
"modernisent un texte ancien). Δ < -5 : dégradation "
"brutale. Δ ≈ 0 : fidélité au registre linguistique.",
)
col_engine = labels.get("readability_engine", "Moteur")
col_mean = labels.get("readability_delta_mean", "Δ moyen")
col_median = labels.get("readability_delta_median", "Δ médian")
col_over = labels.get(
"readability_over_norm_rate", "% over-normalisé",
)
col_under = labels.get(
"readability_under_norm_count", "Docs under-normalisés",
)
col_docs = labels.get("readability_docs", "Docs")
parts = [
'<div class="readability-section" style="margin:1rem 0">',
f'<h3 style="margin:0 0 .3rem 0">{_e(title)}</h3>',
f'<div style="font-size:.85rem;opacity:.75;margin-bottom:.5rem">'
f'{_e(note)}</div>',
'<table style="border-collapse:collapse;width:100%;'
'font-size:.9rem">',
'<thead><tr>',
]
for col in (col_engine, col_mean, col_median, col_over,
col_under, col_docs):
parts.append(
f'<th scope=\"col\" style="padding:.4rem .6rem;text-align:left;'
f'border-bottom:1px solid #ccc;font-weight:600">'
f'{_e(col)}</th>'
)
parts.append("</tr></thead><tbody>")
for engine in rows:
agg = engine["aggregated_readability"]
name = engine.get("name") or "?"
delta_mean = float(agg.get("delta_mean") or 0.0)
delta_median = float(agg.get("delta_median") or 0.0)
over_rate = float(agg.get("over_normalized_rate") or 0.0)
n_under = int(agg.get("n_under_normalized") or 0)
n_docs = int(agg.get("n_docs") or 0)
color = _bg_for_flesch_delta(delta_mean)
parts.append(
f'<tr>'
f'<td style="padding:.4rem .6rem">{_e(str(name))}</td>'
f'<td style="padding:.4rem .6rem;text-align:right;'
f'background:{color};font-family:monospace;font-weight:600">'
f'{delta_mean:+.2f}</td>'
f'<td style="padding:.4rem .6rem;text-align:right;'
f'font-family:monospace">{delta_median:+.2f}</td>'
f'<td style="padding:.4rem .6rem;text-align:right;'
f'font-family:monospace">{over_rate * 100:.0f}%</td>'
f'<td style="padding:.4rem .6rem;text-align:right;'
f'font-family:monospace">{n_under}</td>'
f'<td style="padding:.4rem .6rem;text-align:right;'
f'font-family:monospace">{n_docs}</td>'
f'</tr>'
)
parts.append("</tbody></table></div>")
return "".join(parts)
__all__ = ["build_readability_summary_html"]
|