"""Rendu HTML « Recherchabilité fuzzy » — Sprint 86 (A.II.5a HTML). Suite directe ``picarones/core/searchability.py`` (Sprint 84) + câblage runner (Sprint 86). Pattern identique aux autres rendus (Sprints 41/43/62/67/72) : **server-side**, pas de JavaScript, anti-injection systématique. Vue --- Tableau résumé : moteur × (rappel, n_searchable / n_gt_tokens, docs). Cellule rappel colorée par gradient rouge → vert. Adaptative : ``""`` si aucun moteur n'a de ``aggregated_searchability``. """ from __future__ import annotations from html import escape as _e from typing import Optional def _color_for_recall(recall: float) -> str: """Gradient rouge → jaune → vert pour rappel ∈ [0, 1].""" f = max(0.0, min(1.0, recall)) if f < 0.5: # rouge → jaune t = f / 0.5 r = 235 g = int(70 + (200 - 70) * t) b = 70 else: # jaune → vert t = (f - 0.5) / 0.5 r = int(235 + (60 - 235) * t) g = int(200 + (160 - 200) * t) b = int(70 + (90 - 70) * t) return f"#{r:02x}{g:02x}{b:02x}" def build_searchability_summary_html( engines: list[dict], labels: Optional[dict[str, str]] = None, ) -> str: """Construit la table HTML de recherchabilité. Parameters ---------- engines: Liste de dicts moteur ; chacun peut avoir ``aggregated_searchability``. labels: Dict i18n, clés ``search_*``. Returns ------- str ``""`` si aucun moteur n'a de signal. """ rows = [ e for e in engines if isinstance(e.get("aggregated_searchability"), dict) ] if not rows: return "" labels = labels or {} title = labels.get("search_title", "Recherchabilité fuzzy") note = labels.get( "search_note", "Proportion de tokens GT retrouvés dans la sortie OCR à " "distance de Levenshtein ≤ 2 — proxy direct de la " "qualité pour la recherche plein-texte (Elastic, Solr).", ) col_engine = labels.get("search_engine", "Moteur") col_recall = labels.get("search_recall", "Rappel") col_count = labels.get("search_count", "Tokens retrouvés / total") col_docs = labels.get("search_docs", "Docs") parts = [ '
', f'

{_e(title)}

', f'
' f'{_e(note)}
', '', '', ] for col in (col_engine, col_recall, col_count, col_docs): parts.append( f'' ) parts.append("") for engine in rows: agg = engine["aggregated_searchability"] name = engine.get("name") or "?" recall = float(agg.get("recall") or 0.0) n_search = int(agg.get("n_searchable") or 0) n_total = int(agg.get("n_gt_tokens") or 0) n_docs = int(agg.get("n_docs") or 0) color = _color_for_recall(recall) parts.append( f'' f'' f'' f'' f'' f'' ) parts.append("
' f'{_e(col)}
{_e(str(name))}' f'{recall * 100:.1f}%{n_search} / {n_total}{n_docs}
") return "".join(parts) __all__ = ["build_searchability_summary_html"]