Spaces:
Running
feat(sprint-2): rapport HTML interactif auto-contenu
Browse filesNouveau module picarones/report/ :
Générateur HTML (report/generator.py) :
- Fichier HTML unique auto-contenu (~84 ko avec 12 docs et images PNG base64)
- Données embarquées en JSON inline (const DATA = {...})
- Chart.js 4.4.1 + diff2html 3.4.47 chargés depuis cdnjs
4 vues navigables :
1. Classement — tableau triable au clic sur chaque colonne (CER, WER, MER, WIL)
avec barres proportionnelles, badges couleur (vert→rouge), version moteur
2. Galerie — grille d'images avec badges CER par moteur, filtres CER/moteur,
tri par identifiant / CER moyen / meilleur moteur
3. Document — sidebar avec liste + CER badge, image zoomable (molette, drag),
vérité terrain + panneau diff coloré par moteur
(insertions vert, suppressions rouge, substitutions orange)
4. Analyses — 4 graphiques : histogramme CER par plage, radar multi-métriques
inversé, courbe CER par document, durée d'exécution moyenne
Diff mot-à-mot (report/diff_utils.py) :
- compute_word_diff() via difflib.SequenceMatcher → ops JSON sérialisables
- compute_char_diff() pour tokens courts
- diff_stats() pour comptage des opérations
Données de test (fixtures.py) :
- generate_sample_benchmark() : 12 textes GT médiévaux réalistes
- 3 moteurs simulés : pero_ocr (2.3% CER), tesseract (0% CER), ancien_moteur (18.9%)
- Images PNG placeholder générées en pur Python (sans Pillow)
- Reproductible via paramètre seed
CLI (cli.py) :
- picarones report --results results.json --output rapport.html
- picarones demo [--docs N] [--json-output results.json]
Tests : 50 nouveaux tests (108 au total, 100% passants en 2.95s)
- test_diff_utils.py : 16 tests (word diff, char diff, stats)
- test_report.py : 34 tests (fixtures, _build_report_data, generator, couleurs)
dont : validation JSON embarqué parseable, round-trip JSON→HTML,
présence Chart.js et diff2html, structure HTML valide
https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq
- picarones/cli.py +90 -0
- picarones/fixtures.py +247 -0
- picarones/report/__init__.py +5 -0
- picarones/report/diff_utils.py +89 -0
- picarones/report/generator.py +1358 -0
- tests/test_diff_utils.py +97 -0
- tests/test_report.py +244 -0
|
@@ -3,6 +3,8 @@
|
|
| 3 |
Commandes disponibles
|
| 4 |
---------------------
|
| 5 |
picarones run — Lance un benchmark complet
|
|
|
|
|
|
|
| 6 |
picarones metrics — Calcule CER/WER entre deux fichiers texte
|
| 7 |
picarones engines — Liste les moteurs disponibles
|
| 8 |
picarones info — Informations de version
|
|
@@ -291,5 +293,93 @@ def info_cmd() -> None:
|
|
| 291 |
click.echo(f" {name:<15} {status}")
|
| 292 |
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
if __name__ == "__main__":
|
| 295 |
cli()
|
|
|
|
| 3 |
Commandes disponibles
|
| 4 |
---------------------
|
| 5 |
picarones run — Lance un benchmark complet
|
| 6 |
+
picarones report — Génère le rapport HTML depuis un JSON de résultats
|
| 7 |
+
picarones demo — Génère un rapport de démonstration avec données fictives
|
| 8 |
picarones metrics — Calcule CER/WER entre deux fichiers texte
|
| 9 |
picarones engines — Liste les moteurs disponibles
|
| 10 |
picarones info — Informations de version
|
|
|
|
| 293 |
click.echo(f" {name:<15} {status}")
|
| 294 |
|
| 295 |
|
| 296 |
+
# ---------------------------------------------------------------------------
|
| 297 |
+
# picarones report
|
| 298 |
+
# ---------------------------------------------------------------------------
|
| 299 |
+
|
| 300 |
+
@cli.command("report")
|
| 301 |
+
@click.option(
|
| 302 |
+
"--results", "-r",
|
| 303 |
+
required=True,
|
| 304 |
+
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
|
| 305 |
+
help="Fichier JSON de résultats produit par 'picarones run'",
|
| 306 |
+
)
|
| 307 |
+
@click.option(
|
| 308 |
+
"--output", "-o",
|
| 309 |
+
default="rapport.html",
|
| 310 |
+
show_default=True,
|
| 311 |
+
type=click.Path(resolve_path=True),
|
| 312 |
+
help="Fichier HTML de sortie",
|
| 313 |
+
)
|
| 314 |
+
@click.option("--verbose", "-v", is_flag=True, default=False, help="Mode verbeux")
|
| 315 |
+
def report_cmd(results: str, output: str, verbose: bool) -> None:
|
| 316 |
+
"""Génère le rapport HTML interactif depuis un fichier JSON de résultats.
|
| 317 |
+
|
| 318 |
+
Le rapport est un fichier HTML auto-contenu, lisible hors-ligne,
|
| 319 |
+
avec tableau de classement, galerie, vue document et graphiques.
|
| 320 |
+
"""
|
| 321 |
+
_setup_logging(verbose)
|
| 322 |
+
|
| 323 |
+
from picarones.report.generator import ReportGenerator
|
| 324 |
+
|
| 325 |
+
click.echo(f"Chargement des résultats : {results}")
|
| 326 |
+
try:
|
| 327 |
+
gen = ReportGenerator.from_json(results)
|
| 328 |
+
except Exception as exc:
|
| 329 |
+
click.echo(f"Erreur lors du chargement : {exc}", err=True)
|
| 330 |
+
sys.exit(1)
|
| 331 |
+
|
| 332 |
+
click.echo(f"Génération du rapport HTML…")
|
| 333 |
+
path = gen.generate(output)
|
| 334 |
+
click.echo(f"Rapport généré : {path}")
|
| 335 |
+
click.echo(f"Ouvrez-le dans un navigateur : file://{path}")
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
# ---------------------------------------------------------------------------
|
| 339 |
+
# picarones demo
|
| 340 |
+
# ---------------------------------------------------------------------------
|
| 341 |
+
|
| 342 |
+
@cli.command("demo")
|
| 343 |
+
@click.option(
|
| 344 |
+
"--output", "-o",
|
| 345 |
+
default="rapport_demo.html",
|
| 346 |
+
show_default=True,
|
| 347 |
+
type=click.Path(resolve_path=True),
|
| 348 |
+
help="Fichier HTML de sortie",
|
| 349 |
+
)
|
| 350 |
+
@click.option(
|
| 351 |
+
"--docs", "-n",
|
| 352 |
+
default=12,
|
| 353 |
+
show_default=True,
|
| 354 |
+
type=click.IntRange(1, 12),
|
| 355 |
+
help="Nombre de documents fictifs (1–12)",
|
| 356 |
+
)
|
| 357 |
+
@click.option(
|
| 358 |
+
"--json-output", "-j",
|
| 359 |
+
default=None,
|
| 360 |
+
type=click.Path(resolve_path=True),
|
| 361 |
+
help="Exporte aussi les résultats JSON",
|
| 362 |
+
)
|
| 363 |
+
def demo_cmd(output: str, docs: int, json_output: str | None) -> None:
|
| 364 |
+
"""Génère un rapport de démonstration avec des données fictives réalistes.
|
| 365 |
+
|
| 366 |
+
Utile pour tester le rendu HTML sans installer Tesseract ni Pero OCR.
|
| 367 |
+
"""
|
| 368 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 369 |
+
from picarones.report.generator import ReportGenerator
|
| 370 |
+
|
| 371 |
+
click.echo(f"Génération des données fictives ({docs} documents, 3 moteurs)…")
|
| 372 |
+
benchmark = generate_sample_benchmark(n_docs=docs)
|
| 373 |
+
|
| 374 |
+
if json_output:
|
| 375 |
+
bm_path = benchmark.to_json(json_output)
|
| 376 |
+
click.echo(f"Résultats JSON : {bm_path}")
|
| 377 |
+
|
| 378 |
+
gen = ReportGenerator(benchmark)
|
| 379 |
+
path = gen.generate(output)
|
| 380 |
+
click.echo(f"Rapport de démonstration : {path}")
|
| 381 |
+
click.echo(f"Ouvrez-le dans un navigateur : file://{path}")
|
| 382 |
+
|
| 383 |
+
|
| 384 |
if __name__ == "__main__":
|
| 385 |
cli()
|
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Données de test réalistes pour valider le rapport HTML sans moteurs OCR installés.
|
| 2 |
+
|
| 3 |
+
Usage :
|
| 4 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 5 |
+
bm = generate_sample_benchmark()
|
| 6 |
+
bm.to_json("sample_results.json")
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import base64
|
| 12 |
+
import random
|
| 13 |
+
import struct
|
| 14 |
+
import zlib
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import Optional
|
| 17 |
+
|
| 18 |
+
from picarones.core.metrics import MetricsResult, aggregate_metrics
|
| 19 |
+
from picarones.core.results import BenchmarkResult, DocumentResult, EngineReport
|
| 20 |
+
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
# Textes GT réalistes (documents patrimoniaux BnF)
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
|
| 25 |
+
_GT_TEXTS = [
|
| 26 |
+
"Icy commence le prologue de maistre Jehan Froissart sus les croniques de France & d'Angleterre.",
|
| 27 |
+
"En l'an de grace mil trois cens soixante, regnoit en France le noble roy Jehan, filz du roy Phelippe de Valois.",
|
| 28 |
+
"Item ledit jour furent menez en ladicte ville de Paris plusieurs prisonniers sarasins & mahommetans.",
|
| 29 |
+
"Le chancellier du roy manda à tous les baillifs & seneschaulx que on feist crier & publier par tous les carrefours.",
|
| 30 |
+
"Cy après sensuyt la copie des lettres patentes données par nostre seigneur le roy à ses très chiers & feaulx.",
|
| 31 |
+
"Nous Charles, par la grace de Dieu roy de France, à tous ceulx qui ces presentes lettres verront, salut.",
|
| 32 |
+
"Savoir faisons que pour considéracion des bons & aggreables services que nostre amé & feal conseillier.",
|
| 33 |
+
"Donné à Paris, le vingt & deuxième jour du mois de juillet, l'an de grace mil quatre cens & troys.",
|
| 34 |
+
"Les dessus ditz ambassadeurs respondirent que leur seigneur & maistre estoit très joyeulx de ceste aliance.",
|
| 35 |
+
"Après lesquelles choses ainsi faictes & passées, le dit traictié fut ratiffié & confirmé de toutes parties.",
|
| 36 |
+
"Item, en ladicte année, fut faicte grant assemblée de gens d'armes tant à cheval que à pied.",
|
| 37 |
+
"Et pour ce que la chose est notoire & manifeste, nous avons fait mettre nostre scel à ces presentes.",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
# Erreurs OCR typiques par moteur (transformations appliquées au GT)
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
|
| 44 |
+
def _tesseract_errors(text: str, rng: random.Random) -> str:
|
| 45 |
+
"""Simule les erreurs typiques de Tesseract sur documents médiévaux."""
|
| 46 |
+
replacements = [
|
| 47 |
+
("ſ", "f"), ("œ", "oe"), ("æ", "ae"),
|
| 48 |
+
("&", "8"), ("é", "e"), ("è", "e"),
|
| 49 |
+
("nostre", "noltre"), ("maistre", "inaistre"),
|
| 50 |
+
("faictes", "faictcs"), ("ledit", "Ledit"),
|
| 51 |
+
("regnoit", "regnoit"), ("Froissart", "Froiflart"),
|
| 52 |
+
("conseillie", "conlcillier"), ("consideracion", "confideration"),
|
| 53 |
+
("ny", "uy"), ("lx", "le"),
|
| 54 |
+
]
|
| 55 |
+
for src, tgt in rng.sample(replacements, k=min(rng.randint(2, 5), len(replacements))):
|
| 56 |
+
text = text.replace(src, tgt, 1)
|
| 57 |
+
if rng.random() < 0.3:
|
| 58 |
+
words = text.split()
|
| 59 |
+
if len(words) > 5:
|
| 60 |
+
idx = rng.randint(1, len(words) - 2)
|
| 61 |
+
words.pop(idx)
|
| 62 |
+
text = " ".join(words)
|
| 63 |
+
return text
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _pero_errors(text: str, rng: random.Random) -> str:
|
| 67 |
+
"""Pero OCR : moins d'erreurs, mais confusions diacritiques persistantes."""
|
| 68 |
+
replacements = [
|
| 69 |
+
("é", "é"), ("è", "e"), ("ê", "e"),
|
| 70 |
+
("œ", "oe"), ("&", "&"),
|
| 71 |
+
("uy", "ny"), ("rr", "ri"),
|
| 72 |
+
("nostre", "noſtre"), ("maistre", "maistre"),
|
| 73 |
+
]
|
| 74 |
+
for src, tgt in rng.sample(replacements, k=rng.randint(0, 3)):
|
| 75 |
+
text = text.replace(src, tgt, 1)
|
| 76 |
+
return text
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _bad_engine_errors(text: str, rng: random.Random) -> str:
|
| 80 |
+
"""Moteur de mauvaise qualité : nombreuses erreurs."""
|
| 81 |
+
words = text.split()
|
| 82 |
+
result = []
|
| 83 |
+
for word in words:
|
| 84 |
+
r = rng.random()
|
| 85 |
+
if r < 0.15:
|
| 86 |
+
pass # mot supprimé
|
| 87 |
+
elif r < 0.30:
|
| 88 |
+
# substitution partielle
|
| 89 |
+
chars = list(word)
|
| 90 |
+
if len(chars) > 2:
|
| 91 |
+
i = rng.randint(0, len(chars) - 1)
|
| 92 |
+
chars[i] = rng.choice("abcdefghijklmnopqrstuvwxyz")
|
| 93 |
+
result.append("".join(chars))
|
| 94 |
+
else:
|
| 95 |
+
result.append(word)
|
| 96 |
+
if rng.random() < 0.2:
|
| 97 |
+
result.insert(rng.randint(0, len(result)), rng.choice(["|||", "---", "###"]))
|
| 98 |
+
return " ".join(result)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ---------------------------------------------------------------------------
|
| 102 |
+
# Génération d'une image PNG placeholder (pur Python, sans Pillow)
|
| 103 |
+
# ---------------------------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
def _make_placeholder_png(width: int = 300, height: int = 200, text_hint: str = "") -> bytes:
|
| 106 |
+
"""Génère un PNG minimal représentant une page de document (gris clair).
|
| 107 |
+
|
| 108 |
+
Le PNG est valide et affichable dans tous les navigateurs.
|
| 109 |
+
On dessine une zone blanche avec une bordure et quelques lignes simulant du texte.
|
| 110 |
+
"""
|
| 111 |
+
# Créer les données de pixels RGB
|
| 112 |
+
pixels = []
|
| 113 |
+
for y in range(height):
|
| 114 |
+
row = []
|
| 115 |
+
for x in range(width):
|
| 116 |
+
# Fond légèrement crème (#f5f0e8)
|
| 117 |
+
if x < 3 or x >= width - 3 or y < 3 or y >= height - 3:
|
| 118 |
+
row.extend([180, 160, 140]) # bordure grise
|
| 119 |
+
elif 20 < y < 24 or 35 < y < 39:
|
| 120 |
+
# Lignes de titre simulées
|
| 121 |
+
if 30 < x < width - 30:
|
| 122 |
+
row.extend([80, 80, 80]) # texte gris foncé
|
| 123 |
+
else:
|
| 124 |
+
row.extend([245, 240, 232])
|
| 125 |
+
elif y > 50 and (y - 50) % 18 < 2 and 20 < x < width - 20:
|
| 126 |
+
row.extend([120, 120, 120]) # lignes de texte simulées
|
| 127 |
+
else:
|
| 128 |
+
row.extend([245, 240, 232])
|
| 129 |
+
pixels.append(bytes(row))
|
| 130 |
+
|
| 131 |
+
def make_png(w: int, h: int, rows: list[bytes]) -> bytes:
|
| 132 |
+
def png_chunk(chunk_type: bytes, data: bytes) -> bytes:
|
| 133 |
+
c = chunk_type + data
|
| 134 |
+
return struct.pack(">I", len(data)) + c + struct.pack(">I", zlib.crc32(c) & 0xFFFFFFFF)
|
| 135 |
+
|
| 136 |
+
sig = b"\x89PNG\r\n\x1a\n"
|
| 137 |
+
ihdr = png_chunk(b"IHDR", struct.pack(">IIBBBBB", w, h, 8, 2, 0, 0, 0))
|
| 138 |
+
raw = b"".join(b"\x00" + row for row in rows)
|
| 139 |
+
idat = png_chunk(b"IDAT", zlib.compress(raw))
|
| 140 |
+
iend = png_chunk(b"IEND", b"")
|
| 141 |
+
return sig + ihdr + idat + iend
|
| 142 |
+
|
| 143 |
+
return make_png(width, height, pixels)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _png_to_data_uri(png_bytes: bytes) -> str:
|
| 147 |
+
b64 = base64.b64encode(png_bytes).decode("ascii")
|
| 148 |
+
return f"data:image/png;base64,{b64}"
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# ---------------------------------------------------------------------------
|
| 152 |
+
# Génération du benchmark de test
|
| 153 |
+
# ---------------------------------------------------------------------------
|
| 154 |
+
|
| 155 |
+
def _make_metrics(reference: str, hypothesis: str) -> MetricsResult:
|
| 156 |
+
from picarones.core.metrics import compute_metrics
|
| 157 |
+
return compute_metrics(reference, hypothesis)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def generate_sample_benchmark(
|
| 161 |
+
n_docs: int = 12,
|
| 162 |
+
seed: int = 42,
|
| 163 |
+
include_images: bool = True,
|
| 164 |
+
) -> BenchmarkResult:
|
| 165 |
+
"""Génère un BenchmarkResult fictif mais réaliste.
|
| 166 |
+
|
| 167 |
+
Parameters
|
| 168 |
+
----------
|
| 169 |
+
n_docs:
|
| 170 |
+
Nombre de documents dans le corpus de test (max = len(_GT_TEXTS)).
|
| 171 |
+
seed:
|
| 172 |
+
Graine aléatoire pour la reproductibilité.
|
| 173 |
+
include_images:
|
| 174 |
+
Si True, génère des images PNG placeholder encodées en base64.
|
| 175 |
+
|
| 176 |
+
Returns
|
| 177 |
+
-------
|
| 178 |
+
BenchmarkResult
|
| 179 |
+
Prêt pour le rapport HTML ou l'export JSON.
|
| 180 |
+
"""
|
| 181 |
+
rng = random.Random(seed)
|
| 182 |
+
n_docs = min(n_docs, len(_GT_TEXTS))
|
| 183 |
+
gt_texts = _GT_TEXTS[:n_docs]
|
| 184 |
+
|
| 185 |
+
engines_config = [
|
| 186 |
+
("pero_ocr", "0.7.2", {"config": "/models/pero_printed.ini"}, _pero_errors),
|
| 187 |
+
("tesseract", "5.3.3", {"lang": "fra", "psm": 6}, _tesseract_errors),
|
| 188 |
+
("ancien_moteur", "2.1.0", {"lang": "fra"}, _bad_engine_errors),
|
| 189 |
+
]
|
| 190 |
+
|
| 191 |
+
engine_reports: list[EngineReport] = []
|
| 192 |
+
image_b64_cache: dict[str, str] = {}
|
| 193 |
+
|
| 194 |
+
for engine_name, engine_version, engine_cfg, error_fn in engines_config:
|
| 195 |
+
doc_results: list[DocumentResult] = []
|
| 196 |
+
|
| 197 |
+
for i, gt in enumerate(gt_texts):
|
| 198 |
+
doc_id = f"folio_{i+1:03d}"
|
| 199 |
+
image_path = f"/corpus/images/{doc_id}.jpg"
|
| 200 |
+
|
| 201 |
+
# Générer l'image placeholder une fois
|
| 202 |
+
if include_images and doc_id not in image_b64_cache:
|
| 203 |
+
png = _make_placeholder_png(320, 220, gt[:20])
|
| 204 |
+
image_b64_cache[doc_id] = _png_to_data_uri(png)
|
| 205 |
+
|
| 206 |
+
# Générer la sortie OCR avec erreurs
|
| 207 |
+
hypothesis = error_fn(gt, rng)
|
| 208 |
+
|
| 209 |
+
metrics = _make_metrics(gt, hypothesis)
|
| 210 |
+
|
| 211 |
+
doc_results.append(
|
| 212 |
+
DocumentResult(
|
| 213 |
+
doc_id=doc_id,
|
| 214 |
+
image_path=image_path,
|
| 215 |
+
ground_truth=gt,
|
| 216 |
+
hypothesis=hypothesis,
|
| 217 |
+
metrics=metrics,
|
| 218 |
+
duration_seconds=round(rng.uniform(0.3, 4.5), 3),
|
| 219 |
+
)
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
report = EngineReport(
|
| 223 |
+
engine_name=engine_name,
|
| 224 |
+
engine_version=engine_version,
|
| 225 |
+
engine_config=engine_cfg,
|
| 226 |
+
document_results=doc_results,
|
| 227 |
+
)
|
| 228 |
+
engine_reports.append(report)
|
| 229 |
+
|
| 230 |
+
bm = BenchmarkResult(
|
| 231 |
+
corpus_name="Corpus de test — Chroniques médiévales BnF",
|
| 232 |
+
corpus_source="/corpus/chroniques/",
|
| 233 |
+
document_count=n_docs,
|
| 234 |
+
engine_reports=engine_reports,
|
| 235 |
+
metadata={
|
| 236 |
+
"description": "Données de démonstration générées par picarones.fixtures",
|
| 237 |
+
"script": "gothique textura",
|
| 238 |
+
"langue": "Français médiéval (XIVe-XVe siècle)",
|
| 239 |
+
"institution": "BnF — Département des manuscrits",
|
| 240 |
+
},
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# Attacher les images base64 au benchmark (hors du schéma standard,
|
| 244 |
+
# le générateur HTML les récupérera depuis ce champ supplémentaire)
|
| 245 |
+
bm.metadata["_images_b64"] = image_b64_cache # type: ignore[assignment]
|
| 246 |
+
|
| 247 |
+
return bm
|
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module de génération du rapport HTML interactif."""
|
| 2 |
+
|
| 3 |
+
from picarones.report.generator import ReportGenerator
|
| 4 |
+
|
| 5 |
+
__all__ = ["ReportGenerator"]
|
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Calcul du diff mot-à-mot entre vérité terrain et sortie OCR.
|
| 2 |
+
|
| 3 |
+
Produit une liste d'opérations sérialisables en JSON, consommée
|
| 4 |
+
par le rendu JS dans le rapport HTML.
|
| 5 |
+
|
| 6 |
+
Opérations possibles
|
| 7 |
+
--------------------
|
| 8 |
+
{"op": "equal", "text": "mot"}
|
| 9 |
+
{"op": "insert", "text": "mot"} -- présent dans l'OCR mais pas dans la GT
|
| 10 |
+
{"op": "delete", "text": "mot"} -- présent dans la GT mais pas dans l'OCR
|
| 11 |
+
{"op": "replace", "old": "…", "new": "…"} -- substitution (orange)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import difflib
|
| 17 |
+
import re
|
| 18 |
+
from typing import Any
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _tokenize(text: str) -> list[str]:
|
| 22 |
+
"""Découpe le texte en tokens (mots + ponctuation + espaces)."""
|
| 23 |
+
# Conserver les espaces comme tokens pour un rendu fidèle
|
| 24 |
+
return re.split(r"(\s+)", text)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def compute_word_diff(reference: str, hypothesis: str) -> list[dict[str, Any]]:
|
| 28 |
+
"""Calcule un diff mot-à-mot entre deux textes.
|
| 29 |
+
|
| 30 |
+
Parameters
|
| 31 |
+
----------
|
| 32 |
+
reference:
|
| 33 |
+
Texte de vérité terrain.
|
| 34 |
+
hypothesis:
|
| 35 |
+
Texte produit par le moteur OCR.
|
| 36 |
+
|
| 37 |
+
Returns
|
| 38 |
+
-------
|
| 39 |
+
list of dict
|
| 40 |
+
Séquence d'opérations : equal, insert, delete, replace.
|
| 41 |
+
"""
|
| 42 |
+
ref_tokens = reference.split()
|
| 43 |
+
hyp_tokens = hypothesis.split()
|
| 44 |
+
|
| 45 |
+
matcher = difflib.SequenceMatcher(None, ref_tokens, hyp_tokens, autojunk=False)
|
| 46 |
+
ops: list[dict[str, Any]] = []
|
| 47 |
+
|
| 48 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 49 |
+
ref_chunk = " ".join(ref_tokens[i1:i2])
|
| 50 |
+
hyp_chunk = " ".join(hyp_tokens[j1:j2])
|
| 51 |
+
|
| 52 |
+
if tag == "equal":
|
| 53 |
+
ops.append({"op": "equal", "text": ref_chunk})
|
| 54 |
+
elif tag == "insert":
|
| 55 |
+
ops.append({"op": "insert", "text": hyp_chunk})
|
| 56 |
+
elif tag == "delete":
|
| 57 |
+
ops.append({"op": "delete", "text": ref_chunk})
|
| 58 |
+
elif tag == "replace":
|
| 59 |
+
ops.append({"op": "replace", "old": ref_chunk, "new": hyp_chunk})
|
| 60 |
+
|
| 61 |
+
return ops
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def compute_char_diff(reference: str, hypothesis: str) -> list[dict[str, Any]]:
|
| 65 |
+
"""Diff caractère par caractère — utile pour les tokens courts."""
|
| 66 |
+
matcher = difflib.SequenceMatcher(None, list(reference), list(hypothesis), autojunk=False)
|
| 67 |
+
ops: list[dict[str, Any]] = []
|
| 68 |
+
|
| 69 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 70 |
+
ref_chunk = reference[i1:i2]
|
| 71 |
+
hyp_chunk = hypothesis[j1:j2]
|
| 72 |
+
if tag == "equal":
|
| 73 |
+
ops.append({"op": "equal", "text": ref_chunk})
|
| 74 |
+
elif tag == "insert":
|
| 75 |
+
ops.append({"op": "insert", "text": hyp_chunk})
|
| 76 |
+
elif tag == "delete":
|
| 77 |
+
ops.append({"op": "delete", "text": ref_chunk})
|
| 78 |
+
elif tag == "replace":
|
| 79 |
+
ops.append({"op": "replace", "old": ref_chunk, "new": hyp_chunk})
|
| 80 |
+
|
| 81 |
+
return ops
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def diff_stats(ops: list[dict[str, Any]]) -> dict[str, int]:
|
| 85 |
+
"""Compte le nombre d'insertions, suppressions et substitutions."""
|
| 86 |
+
stats = {"equal": 0, "insert": 0, "delete": 0, "replace": 0}
|
| 87 |
+
for op in ops:
|
| 88 |
+
stats[op["op"]] += 1
|
| 89 |
+
return stats
|
|
@@ -0,0 +1,1358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Générateur du rapport HTML interactif auto-contenu.
|
| 2 |
+
|
| 3 |
+
Le rapport produit est un fichier HTML unique embarquant :
|
| 4 |
+
- Toutes les données (JSON inline)
|
| 5 |
+
- Chart.js et diff2html (depuis cdnjs)
|
| 6 |
+
- CSS et JavaScript de l'application
|
| 7 |
+
|
| 8 |
+
Vues disponibles
|
| 9 |
+
----------------
|
| 10 |
+
1. Classement — tableau triable par colonne (CER, WER, MER, WIL)
|
| 11 |
+
2. Galerie — grille d'images avec badge CER coloré
|
| 12 |
+
3. Document — image zoomable + diff coloré GT / OCR par moteur
|
| 13 |
+
4. Analyses — histogramme CER + graphique radar
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import math
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import Optional
|
| 22 |
+
|
| 23 |
+
from picarones.core.results import BenchmarkResult
|
| 24 |
+
from picarones.report.diff_utils import compute_word_diff
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
# Helpers
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
|
| 31 |
+
def _cer_color(cer: float) -> str:
|
| 32 |
+
"""Retourne une couleur CSS pour un score CER donné (0→vert, 1→rouge)."""
|
| 33 |
+
if cer < 0.05:
|
| 34 |
+
return "#16a34a" # vert
|
| 35 |
+
if cer < 0.15:
|
| 36 |
+
return "#ca8a04" # jaune-orangé
|
| 37 |
+
if cer < 0.30:
|
| 38 |
+
return "#ea580c" # orange
|
| 39 |
+
return "#dc2626" # rouge
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _cer_bg(cer: float) -> str:
|
| 43 |
+
if cer < 0.05:
|
| 44 |
+
return "#dcfce7"
|
| 45 |
+
if cer < 0.15:
|
| 46 |
+
return "#fef9c3"
|
| 47 |
+
if cer < 0.30:
|
| 48 |
+
return "#ffedd5"
|
| 49 |
+
return "#fee2e2"
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _pct(v: Optional[float], decimals: int = 2) -> str:
|
| 53 |
+
if v is None:
|
| 54 |
+
return "—"
|
| 55 |
+
return f"{v * 100:.{decimals}f} %"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _safe(v: Optional[float], decimals: int = 4) -> float:
|
| 59 |
+
return round(v or 0.0, decimals)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# ---------------------------------------------------------------------------
|
| 63 |
+
# Préparation des données
|
| 64 |
+
# ---------------------------------------------------------------------------
|
| 65 |
+
|
| 66 |
+
def _build_report_data(benchmark: BenchmarkResult, images_b64: dict[str, str]) -> dict:
|
| 67 |
+
"""Transforme un BenchmarkResult en dict JSON pour le rapport HTML."""
|
| 68 |
+
|
| 69 |
+
engines_summary = []
|
| 70 |
+
for report in benchmark.engine_reports:
|
| 71 |
+
agg = report.aggregated_metrics
|
| 72 |
+
engines_summary.append({
|
| 73 |
+
"name": report.engine_name,
|
| 74 |
+
"version": report.engine_version,
|
| 75 |
+
"cer": _safe(agg.get("cer", {}).get("mean")),
|
| 76 |
+
"wer": _safe(agg.get("wer", {}).get("mean")),
|
| 77 |
+
"mer": _safe(agg.get("mer", {}).get("mean")),
|
| 78 |
+
"wil": _safe(agg.get("wil", {}).get("mean")),
|
| 79 |
+
"cer_median": _safe(agg.get("cer", {}).get("median")),
|
| 80 |
+
"cer_min": _safe(agg.get("cer", {}).get("min")),
|
| 81 |
+
"cer_max": _safe(agg.get("cer", {}).get("max")),
|
| 82 |
+
"doc_count": agg.get("document_count", 0),
|
| 83 |
+
"failed": agg.get("failed_count", 0),
|
| 84 |
+
# Distribution pour l'histogramme : liste des CER individuels
|
| 85 |
+
"cer_values": [
|
| 86 |
+
_safe(dr.metrics.cer)
|
| 87 |
+
for dr in report.document_results
|
| 88 |
+
if dr.metrics.error is None
|
| 89 |
+
],
|
| 90 |
+
})
|
| 91 |
+
|
| 92 |
+
# Documents (vue galerie + vue détail)
|
| 93 |
+
# On collecte tous les doc_ids depuis le premier moteur
|
| 94 |
+
doc_ids_ordered = []
|
| 95 |
+
if benchmark.engine_reports:
|
| 96 |
+
doc_ids_ordered = [dr.doc_id for dr in benchmark.engine_reports[0].document_results]
|
| 97 |
+
|
| 98 |
+
# Index croisé : doc_id → {engine_name → DocumentResult}
|
| 99 |
+
doc_engine_map: dict[str, dict] = {did: {} for did in doc_ids_ordered}
|
| 100 |
+
for report in benchmark.engine_reports:
|
| 101 |
+
for dr in report.document_results:
|
| 102 |
+
doc_engine_map[dr.doc_id][report.engine_name] = dr
|
| 103 |
+
|
| 104 |
+
documents = []
|
| 105 |
+
for doc_id in doc_ids_ordered:
|
| 106 |
+
engine_results = []
|
| 107 |
+
gt = ""
|
| 108 |
+
image_path = ""
|
| 109 |
+
for engine_name in [r.engine_name for r in benchmark.engine_reports]:
|
| 110 |
+
dr = doc_engine_map[doc_id].get(engine_name)
|
| 111 |
+
if dr is None:
|
| 112 |
+
continue
|
| 113 |
+
gt = dr.ground_truth
|
| 114 |
+
image_path = dr.image_path
|
| 115 |
+
diff_ops = compute_word_diff(dr.ground_truth, dr.hypothesis)
|
| 116 |
+
engine_results.append({
|
| 117 |
+
"engine": engine_name,
|
| 118 |
+
"hypothesis": dr.hypothesis,
|
| 119 |
+
"cer": _safe(dr.metrics.cer),
|
| 120 |
+
"wer": _safe(dr.metrics.wer),
|
| 121 |
+
"duration": dr.duration_seconds,
|
| 122 |
+
"error": dr.engine_error,
|
| 123 |
+
"diff": diff_ops,
|
| 124 |
+
})
|
| 125 |
+
|
| 126 |
+
# CER moyen sur ce document (pour le badge galerie)
|
| 127 |
+
cer_values = [er["cer"] for er in engine_results if er["error"] is None]
|
| 128 |
+
mean_cer = sum(cer_values) / len(cer_values) if cer_values else 1.0
|
| 129 |
+
best_engine = min(engine_results, key=lambda x: x["cer"], default=None)
|
| 130 |
+
|
| 131 |
+
documents.append({
|
| 132 |
+
"doc_id": doc_id,
|
| 133 |
+
"image_path": image_path,
|
| 134 |
+
"image_b64": images_b64.get(doc_id, ""),
|
| 135 |
+
"ground_truth": gt,
|
| 136 |
+
"mean_cer": _safe(mean_cer),
|
| 137 |
+
"best_engine": best_engine["engine"] if best_engine else "",
|
| 138 |
+
"engine_results": engine_results,
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
return {
|
| 142 |
+
"meta": {
|
| 143 |
+
"corpus_name": benchmark.corpus_name,
|
| 144 |
+
"corpus_source": benchmark.corpus_source,
|
| 145 |
+
"document_count": benchmark.document_count,
|
| 146 |
+
"run_date": benchmark.run_date,
|
| 147 |
+
"picarones_version": benchmark.picarones_version,
|
| 148 |
+
"metadata": benchmark.metadata,
|
| 149 |
+
},
|
| 150 |
+
"ranking": benchmark.ranking(),
|
| 151 |
+
"engines": engines_summary,
|
| 152 |
+
"documents": documents,
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# ---------------------------------------------------------------------------
|
| 157 |
+
# Template HTML
|
| 158 |
+
# ---------------------------------------------------------------------------
|
| 159 |
+
|
| 160 |
+
_HTML_TEMPLATE = """\
|
| 161 |
+
<!DOCTYPE html>
|
| 162 |
+
<html lang="fr">
|
| 163 |
+
<head>
|
| 164 |
+
<meta charset="UTF-8">
|
| 165 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 166 |
+
<title>Picarones — {corpus_name}</title>
|
| 167 |
+
|
| 168 |
+
<!-- Chart.js -->
|
| 169 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"
|
| 170 |
+
integrity="sha512-CQBWl4fJHWbryGE+Pc3UJWW1h3Q8IkkvNnPTozals+S49OTEQPoQj/m1LZRM28Wr/7bJCMlpYS3/Zp4hHuWQ=="
|
| 171 |
+
crossorigin="anonymous"></script>
|
| 172 |
+
|
| 173 |
+
<!-- diff2html -->
|
| 174 |
+
<link rel="stylesheet"
|
| 175 |
+
href="https://cdnjs.cloudflare.com/ajax/libs/diff2html/3.4.47/diff2html.min.css"
|
| 176 |
+
crossorigin="anonymous">
|
| 177 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/diff2html/3.4.47/diff2html.min.js"
|
| 178 |
+
crossorigin="anonymous"></script>
|
| 179 |
+
|
| 180 |
+
<style>
|
| 181 |
+
/* ── Reset & base ─────────────────────────────────────────────────── */
|
| 182 |
+
*, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
|
| 183 |
+
:root {{
|
| 184 |
+
--bg: #f1f5f9;
|
| 185 |
+
--surface: #ffffff;
|
| 186 |
+
--border: #e2e8f0;
|
| 187 |
+
--primary: #1e40af;
|
| 188 |
+
--primary-lt: #dbeafe;
|
| 189 |
+
--text: #1e293b;
|
| 190 |
+
--text-muted: #64748b;
|
| 191 |
+
--ins: #16a34a;
|
| 192 |
+
--ins-bg: #dcfce7;
|
| 193 |
+
--del: #dc2626;
|
| 194 |
+
--del-bg: #fee2e2;
|
| 195 |
+
--rep: #c2410c;
|
| 196 |
+
--rep-bg: #ffedd5;
|
| 197 |
+
--radius: 8px;
|
| 198 |
+
--shadow: 0 1px 3px rgba(0,0,0,.08), 0 1px 2px rgba(0,0,0,.05);
|
| 199 |
+
--nav-h: 56px;
|
| 200 |
+
}}
|
| 201 |
+
html {{ font-size: 14px; scroll-behavior: smooth; }}
|
| 202 |
+
body {{
|
| 203 |
+
font-family: system-ui, -apple-system, 'Segoe UI', sans-serif;
|
| 204 |
+
background: var(--bg);
|
| 205 |
+
color: var(--text);
|
| 206 |
+
min-height: 100vh;
|
| 207 |
+
}}
|
| 208 |
+
|
| 209 |
+
/* ── Navigation ───────────────────────────────────────────────────── */
|
| 210 |
+
nav {{
|
| 211 |
+
position: fixed; top: 0; left: 0; right: 0; z-index: 100;
|
| 212 |
+
height: var(--nav-h);
|
| 213 |
+
background: var(--primary);
|
| 214 |
+
display: flex; align-items: center;
|
| 215 |
+
padding: 0 1.5rem;
|
| 216 |
+
gap: 2rem;
|
| 217 |
+
box-shadow: 0 2px 8px rgba(0,0,0,.25);
|
| 218 |
+
}}
|
| 219 |
+
nav .brand {{
|
| 220 |
+
color: #fff; font-weight: 700; font-size: 1.1rem;
|
| 221 |
+
letter-spacing: -.3px; white-space: nowrap;
|
| 222 |
+
display: flex; align-items: center; gap: .4rem;
|
| 223 |
+
}}
|
| 224 |
+
nav .brand span {{ opacity: .7; font-weight: 400; font-size: .85rem; }}
|
| 225 |
+
nav .tabs {{
|
| 226 |
+
display: flex; gap: .25rem; flex: 1;
|
| 227 |
+
}}
|
| 228 |
+
.tab-btn {{
|
| 229 |
+
background: transparent; border: none; cursor: pointer;
|
| 230 |
+
color: rgba(255,255,255,.7);
|
| 231 |
+
padding: .4rem .9rem; border-radius: 6px;
|
| 232 |
+
font-size: .9rem; font-weight: 500;
|
| 233 |
+
transition: background .15s, color .15s;
|
| 234 |
+
}}
|
| 235 |
+
.tab-btn:hover {{ background: rgba(255,255,255,.12); color: #fff; }}
|
| 236 |
+
.tab-btn.active {{ background: rgba(255,255,255,.18); color: #fff; }}
|
| 237 |
+
nav .meta {{
|
| 238 |
+
color: rgba(255,255,255,.6); font-size: .78rem;
|
| 239 |
+
white-space: nowrap; margin-left: auto;
|
| 240 |
+
}}
|
| 241 |
+
|
| 242 |
+
/* ── Layout ───────────────────────────────────────────────────────── */
|
| 243 |
+
main {{
|
| 244 |
+
margin-top: var(--nav-h);
|
| 245 |
+
padding: 1.5rem;
|
| 246 |
+
max-width: 1400px;
|
| 247 |
+
margin-left: auto; margin-right: auto;
|
| 248 |
+
}}
|
| 249 |
+
.view {{ display: none; }}
|
| 250 |
+
.view.active {{ display: block; }}
|
| 251 |
+
.card {{
|
| 252 |
+
background: var(--surface);
|
| 253 |
+
border-radius: var(--radius);
|
| 254 |
+
border: 1px solid var(--border);
|
| 255 |
+
box-shadow: var(--shadow);
|
| 256 |
+
padding: 1.25rem;
|
| 257 |
+
margin-bottom: 1.25rem;
|
| 258 |
+
}}
|
| 259 |
+
h2 {{
|
| 260 |
+
font-size: 1rem; font-weight: 700;
|
| 261 |
+
color: var(--text); margin-bottom: .75rem;
|
| 262 |
+
border-bottom: 2px solid var(--primary-lt);
|
| 263 |
+
padding-bottom: .4rem;
|
| 264 |
+
}}
|
| 265 |
+
h3 {{ font-size: .9rem; font-weight: 600; margin-bottom: .5rem; }}
|
| 266 |
+
|
| 267 |
+
/* ── Ranking table ────────────────────────────────────────────────── */
|
| 268 |
+
.table-wrap {{ overflow-x: auto; }}
|
| 269 |
+
table {{
|
| 270 |
+
width: 100%; border-collapse: collapse;
|
| 271 |
+
font-size: .88rem;
|
| 272 |
+
}}
|
| 273 |
+
thead tr {{ background: var(--bg); }}
|
| 274 |
+
th {{
|
| 275 |
+
text-align: left; padding: .6rem .75rem;
|
| 276 |
+
border-bottom: 2px solid var(--border);
|
| 277 |
+
cursor: pointer; white-space: nowrap;
|
| 278 |
+
color: var(--text-muted); font-weight: 600; font-size: .8rem;
|
| 279 |
+
text-transform: uppercase; letter-spacing: .04em;
|
| 280 |
+
user-select: none;
|
| 281 |
+
}}
|
| 282 |
+
th.sortable:hover {{ color: var(--primary); }}
|
| 283 |
+
th .sort-icon {{ opacity: .4; margin-left: .25rem; font-style: normal; }}
|
| 284 |
+
th.sorted .sort-icon {{ opacity: 1; color: var(--primary); }}
|
| 285 |
+
td {{
|
| 286 |
+
padding: .55rem .75rem;
|
| 287 |
+
border-bottom: 1px solid var(--border);
|
| 288 |
+
vertical-align: middle;
|
| 289 |
+
}}
|
| 290 |
+
tr:last-child td {{ border-bottom: none; }}
|
| 291 |
+
tbody tr:hover {{ background: #f8fafc; }}
|
| 292 |
+
.rank-badge {{
|
| 293 |
+
display: inline-flex; align-items: center; justify-content: center;
|
| 294 |
+
width: 1.6rem; height: 1.6rem; border-radius: 50%;
|
| 295 |
+
font-weight: 700; font-size: .75rem;
|
| 296 |
+
background: var(--primary-lt); color: var(--primary);
|
| 297 |
+
}}
|
| 298 |
+
.rank-badge.rank-1 {{ background: #fef3c7; color: #92400e; }}
|
| 299 |
+
.engine-name {{ font-weight: 600; }}
|
| 300 |
+
.engine-version {{ color: var(--text-muted); font-size: .78rem; margin-left: .3rem; }}
|
| 301 |
+
.cer-badge {{
|
| 302 |
+
display: inline-block;
|
| 303 |
+
padding: .15rem .5rem; border-radius: 4px;
|
| 304 |
+
font-weight: 600; font-size: .82rem;
|
| 305 |
+
}}
|
| 306 |
+
.bar {{
|
| 307 |
+
display: inline-block; height: 8px; border-radius: 4px;
|
| 308 |
+
vertical-align: middle; margin-right: .4rem;
|
| 309 |
+
}}
|
| 310 |
+
|
| 311 |
+
/* ── Gallery ──────────────────────────────────────────────────────── */
|
| 312 |
+
.gallery-controls {{
|
| 313 |
+
display: flex; align-items: center; gap: .75rem;
|
| 314 |
+
margin-bottom: 1rem; flex-wrap: wrap;
|
| 315 |
+
}}
|
| 316 |
+
.gallery-controls label {{ font-size: .82rem; color: var(--text-muted); }}
|
| 317 |
+
.gallery-controls input[type=range] {{ width: 120px; }}
|
| 318 |
+
.gallery-grid {{
|
| 319 |
+
display: grid;
|
| 320 |
+
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
| 321 |
+
gap: 1rem;
|
| 322 |
+
}}
|
| 323 |
+
.gallery-card {{
|
| 324 |
+
background: var(--surface);
|
| 325 |
+
border: 1px solid var(--border);
|
| 326 |
+
border-radius: var(--radius);
|
| 327 |
+
overflow: hidden;
|
| 328 |
+
cursor: pointer;
|
| 329 |
+
transition: transform .15s, box-shadow .15s;
|
| 330 |
+
}}
|
| 331 |
+
.gallery-card:hover {{
|
| 332 |
+
transform: translateY(-2px);
|
| 333 |
+
box-shadow: 0 4px 12px rgba(0,0,0,.12);
|
| 334 |
+
border-color: var(--primary);
|
| 335 |
+
}}
|
| 336 |
+
.gallery-card img, .gallery-card .img-placeholder {{
|
| 337 |
+
width: 100%; aspect-ratio: 4/3; object-fit: cover;
|
| 338 |
+
display: block; background: #e8e0d4;
|
| 339 |
+
}}
|
| 340 |
+
.img-placeholder {{
|
| 341 |
+
display: flex; align-items: center; justify-content: center;
|
| 342 |
+
font-size: 2rem; color: #94a3b8;
|
| 343 |
+
}}
|
| 344 |
+
.gallery-card-body {{
|
| 345 |
+
padding: .6rem .75rem;
|
| 346 |
+
}}
|
| 347 |
+
.gallery-card-title {{
|
| 348 |
+
font-size: .8rem; font-weight: 600; margin-bottom: .35rem;
|
| 349 |
+
white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
|
| 350 |
+
}}
|
| 351 |
+
.gallery-card-badges {{
|
| 352 |
+
display: flex; gap: .3rem; flex-wrap: wrap;
|
| 353 |
+
}}
|
| 354 |
+
.engine-cer-badge {{
|
| 355 |
+
font-size: .7rem; font-weight: 700;
|
| 356 |
+
padding: .1rem .35rem; border-radius: 3px;
|
| 357 |
+
}}
|
| 358 |
+
|
| 359 |
+
/* ── Document detail ──────────────────────────────────────────────── */
|
| 360 |
+
.doc-layout {{
|
| 361 |
+
display: grid;
|
| 362 |
+
grid-template-columns: 220px 1fr;
|
| 363 |
+
gap: 1rem;
|
| 364 |
+
align-items: start;
|
| 365 |
+
}}
|
| 366 |
+
@media (max-width: 768px) {{
|
| 367 |
+
.doc-layout {{ grid-template-columns: 1fr; }}
|
| 368 |
+
}}
|
| 369 |
+
.doc-sidebar {{
|
| 370 |
+
background: var(--surface);
|
| 371 |
+
border: 1px solid var(--border);
|
| 372 |
+
border-radius: var(--radius);
|
| 373 |
+
max-height: calc(100vh - var(--nav-h) - 3rem);
|
| 374 |
+
overflow-y: auto;
|
| 375 |
+
position: sticky;
|
| 376 |
+
top: calc(var(--nav-h) + 1.5rem);
|
| 377 |
+
}}
|
| 378 |
+
.doc-sidebar-header {{
|
| 379 |
+
padding: .6rem .75rem;
|
| 380 |
+
font-size: .8rem; font-weight: 700; color: var(--text-muted);
|
| 381 |
+
text-transform: uppercase; letter-spacing: .05em;
|
| 382 |
+
border-bottom: 1px solid var(--border);
|
| 383 |
+
position: sticky; top: 0; background: var(--surface);
|
| 384 |
+
}}
|
| 385 |
+
.doc-list-item {{
|
| 386 |
+
padding: .5rem .75rem;
|
| 387 |
+
cursor: pointer;
|
| 388 |
+
border-bottom: 1px solid var(--border);
|
| 389 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 390 |
+
gap: .5rem;
|
| 391 |
+
transition: background .1s;
|
| 392 |
+
}}
|
| 393 |
+
.doc-list-item:last-child {{ border-bottom: none; }}
|
| 394 |
+
.doc-list-item:hover {{ background: var(--bg); }}
|
| 395 |
+
.doc-list-item.active {{ background: var(--primary-lt); }}
|
| 396 |
+
.doc-list-label {{ font-size: .82rem; font-weight: 500; }}
|
| 397 |
+
.doc-list-cer {{
|
| 398 |
+
font-size: .72rem; font-weight: 700;
|
| 399 |
+
padding: .1rem .3rem; border-radius: 3px;
|
| 400 |
+
flex-shrink: 0;
|
| 401 |
+
}}
|
| 402 |
+
|
| 403 |
+
/* Image zone */
|
| 404 |
+
.doc-image-wrap {{
|
| 405 |
+
position: relative; overflow: hidden;
|
| 406 |
+
border: 1px solid var(--border); border-radius: var(--radius);
|
| 407 |
+
background: #e8e0d4; cursor: zoom-in;
|
| 408 |
+
aspect-ratio: 4/3;
|
| 409 |
+
}}
|
| 410 |
+
.doc-image-wrap img {{
|
| 411 |
+
width: 100%; height: 100%; object-fit: contain;
|
| 412 |
+
transform-origin: center center;
|
| 413 |
+
transition: transform .2s;
|
| 414 |
+
user-select: none;
|
| 415 |
+
}}
|
| 416 |
+
.doc-image-placeholder {{
|
| 417 |
+
width: 100%; height: 100%;
|
| 418 |
+
display: flex; align-items: center; justify-content: center;
|
| 419 |
+
flex-direction: column; gap: .5rem; color: #94a3b8;
|
| 420 |
+
font-size: .9rem;
|
| 421 |
+
}}
|
| 422 |
+
.zoom-controls {{
|
| 423 |
+
position: absolute; bottom: .5rem; right: .5rem;
|
| 424 |
+
display: flex; gap: .3rem;
|
| 425 |
+
}}
|
| 426 |
+
.zoom-btn {{
|
| 427 |
+
background: rgba(0,0,0,.5); color: #fff;
|
| 428 |
+
border: none; border-radius: 4px; cursor: pointer;
|
| 429 |
+
width: 28px; height: 28px; font-size: .9rem;
|
| 430 |
+
display: flex; align-items: center; justify-content: center;
|
| 431 |
+
transition: background .1s;
|
| 432 |
+
}}
|
| 433 |
+
.zoom-btn:hover {{ background: rgba(0,0,0,.75); }}
|
| 434 |
+
|
| 435 |
+
/* Diff panels */
|
| 436 |
+
.diff-panels {{
|
| 437 |
+
display: grid;
|
| 438 |
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
| 439 |
+
gap: .75rem;
|
| 440 |
+
margin-top: .75rem;
|
| 441 |
+
}}
|
| 442 |
+
.diff-panel {{
|
| 443 |
+
background: var(--surface);
|
| 444 |
+
border: 1px solid var(--border);
|
| 445 |
+
border-radius: var(--radius);
|
| 446 |
+
overflow: hidden;
|
| 447 |
+
}}
|
| 448 |
+
.diff-panel-header {{
|
| 449 |
+
padding: .5rem .75rem;
|
| 450 |
+
background: var(--bg);
|
| 451 |
+
border-bottom: 1px solid var(--border);
|
| 452 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 453 |
+
}}
|
| 454 |
+
.diff-panel-title {{ font-size: .83rem; font-weight: 700; }}
|
| 455 |
+
.diff-panel-metrics {{
|
| 456 |
+
display: flex; gap: .4rem;
|
| 457 |
+
font-size: .72rem;
|
| 458 |
+
}}
|
| 459 |
+
.diff-panel-body {{
|
| 460 |
+
padding: .75rem; font-size: .82rem; line-height: 1.7;
|
| 461 |
+
font-family: 'Georgia', serif;
|
| 462 |
+
max-height: 260px; overflow-y: auto;
|
| 463 |
+
}}
|
| 464 |
+
/* Diff spans */
|
| 465 |
+
.d-eq {{ color: var(--text); }}
|
| 466 |
+
.d-ins {{ color: var(--ins); background: var(--ins-bg); border-radius: 2px; padding: 0 1px; }}
|
| 467 |
+
.d-del {{ color: var(--del); background: var(--del-bg); border-radius: 2px; padding: 0 1px; text-decoration: line-through; }}
|
| 468 |
+
.d-rep-old {{ color: var(--del); background: var(--del-bg); border-radius: 2px 0 0 2px; padding: 0 1px; text-decoration: line-through; }}
|
| 469 |
+
.d-rep-new {{ color: var(--rep); background: var(--rep-bg); border-radius: 0 2px 2px 0; padding: 0 1px; }}
|
| 470 |
+
|
| 471 |
+
/* GT panel */
|
| 472 |
+
.gt-panel {{
|
| 473 |
+
background: var(--surface);
|
| 474 |
+
border: 1px solid var(--border);
|
| 475 |
+
border-radius: var(--radius);
|
| 476 |
+
overflow: hidden;
|
| 477 |
+
}}
|
| 478 |
+
.gt-panel-header {{
|
| 479 |
+
padding: .5rem .75rem;
|
| 480 |
+
background: #f0fdf4;
|
| 481 |
+
border-bottom: 1px solid #bbf7d0;
|
| 482 |
+
font-size: .83rem; font-weight: 700; color: #15803d;
|
| 483 |
+
}}
|
| 484 |
+
.gt-panel-body {{
|
| 485 |
+
padding: .75rem; font-size: .82rem; line-height: 1.7;
|
| 486 |
+
font-family: 'Georgia', serif;
|
| 487 |
+
max-height: 260px; overflow-y: auto;
|
| 488 |
+
color: var(--text);
|
| 489 |
+
}}
|
| 490 |
+
|
| 491 |
+
/* ── Analyses ─────────────────────────────────────────────────────── */
|
| 492 |
+
.charts-grid {{
|
| 493 |
+
display: grid;
|
| 494 |
+
grid-template-columns: repeat(auto-fit, minmax(380px, 1fr));
|
| 495 |
+
gap: 1rem;
|
| 496 |
+
}}
|
| 497 |
+
.chart-card {{
|
| 498 |
+
background: var(--surface);
|
| 499 |
+
border: 1px solid var(--border);
|
| 500 |
+
border-radius: var(--radius);
|
| 501 |
+
padding: 1rem;
|
| 502 |
+
}}
|
| 503 |
+
.chart-canvas-wrap {{ position: relative; height: 280px; }}
|
| 504 |
+
|
| 505 |
+
/* ── Misc ─────────────────────────────────────────────────────────── */
|
| 506 |
+
.badge {{
|
| 507 |
+
display: inline-block; padding: .15rem .45rem;
|
| 508 |
+
border-radius: 4px; font-size: .72rem; font-weight: 700;
|
| 509 |
+
}}
|
| 510 |
+
.pill {{
|
| 511 |
+
display: inline-block; padding: .1rem .4rem;
|
| 512 |
+
border-radius: 12px; font-size: .72rem;
|
| 513 |
+
background: var(--primary-lt); color: var(--primary);
|
| 514 |
+
}}
|
| 515 |
+
.empty-state {{
|
| 516 |
+
text-align: center; padding: 3rem 1rem;
|
| 517 |
+
color: var(--text-muted); font-size: .9rem;
|
| 518 |
+
}}
|
| 519 |
+
.legend-dot {{
|
| 520 |
+
display: inline-block; width: 8px; height: 8px;
|
| 521 |
+
border-radius: 50%; margin-right: .3rem;
|
| 522 |
+
}}
|
| 523 |
+
.legend-row {{
|
| 524 |
+
display: flex; align-items: center; gap: .4rem;
|
| 525 |
+
font-size: .78rem; color: var(--text-muted);
|
| 526 |
+
}}
|
| 527 |
+
footer {{
|
| 528 |
+
text-align: center; padding: 1.5rem;
|
| 529 |
+
color: var(--text-muted); font-size: .75rem;
|
| 530 |
+
border-top: 1px solid var(--border); margin-top: 2rem;
|
| 531 |
+
}}
|
| 532 |
+
.stat-row {{
|
| 533 |
+
display: flex; gap: 1.5rem; flex-wrap: wrap; margin-bottom: .75rem;
|
| 534 |
+
}}
|
| 535 |
+
.stat {{
|
| 536 |
+
background: var(--bg); border-radius: 6px; padding: .4rem .75rem;
|
| 537 |
+
font-size: .8rem;
|
| 538 |
+
}}
|
| 539 |
+
.stat b {{ color: var(--primary); }}
|
| 540 |
+
</style>
|
| 541 |
+
</head>
|
| 542 |
+
|
| 543 |
+
<body>
|
| 544 |
+
|
| 545 |
+
<!-- ── Navigation ─────────────────────────────────────────────────── -->
|
| 546 |
+
<nav>
|
| 547 |
+
<div class="brand">
|
| 548 |
+
Picarones
|
| 549 |
+
<span>| rapport OCR</span>
|
| 550 |
+
</div>
|
| 551 |
+
<div class="tabs">
|
| 552 |
+
<button class="tab-btn active" onclick="showView('ranking')">Classement</button>
|
| 553 |
+
<button class="tab-btn" onclick="showView('gallery')">Galerie</button>
|
| 554 |
+
<button class="tab-btn" onclick="showView('document')">Document</button>
|
| 555 |
+
<button class="tab-btn" onclick="showView('analyses')">Analyses</button>
|
| 556 |
+
</div>
|
| 557 |
+
<div class="meta" id="nav-meta">—</div>
|
| 558 |
+
</nav>
|
| 559 |
+
|
| 560 |
+
<!-- ── Main ───────────────────────────────────────────────────────── -->
|
| 561 |
+
<main>
|
| 562 |
+
|
| 563 |
+
<!-- ════ Vue 1 : Classement ════════════════════════════════════════ -->
|
| 564 |
+
<div id="view-ranking" class="view active">
|
| 565 |
+
<div class="card">
|
| 566 |
+
<h2>Classement des moteurs</h2>
|
| 567 |
+
<div class="stat-row" id="ranking-stats"></div>
|
| 568 |
+
<div class="table-wrap">
|
| 569 |
+
<table id="ranking-table">
|
| 570 |
+
<thead>
|
| 571 |
+
<tr>
|
| 572 |
+
<th data-col="rank" class="sortable sorted" data-dir="asc">#<i class="sort-icon">↑</i></th>
|
| 573 |
+
<th data-col="name" class="sortable">Moteur<i class="sort-icon">↕</i></th>
|
| 574 |
+
<th data-col="cer" class="sortable">CER<i class="sort-icon">↕</i></th>
|
| 575 |
+
<th data-col="wer" class="sortable">WER<i class="sort-icon">↕</i></th>
|
| 576 |
+
<th data-col="mer" class="sortable">MER<i class="sort-icon">↕</i></th>
|
| 577 |
+
<th data-col="wil" class="sortable">WIL<i class="sort-icon">↕</i></th>
|
| 578 |
+
<th>CER médian</th>
|
| 579 |
+
<th>CER min</th>
|
| 580 |
+
<th>CER max</th>
|
| 581 |
+
<th>Docs</th>
|
| 582 |
+
</tr>
|
| 583 |
+
</thead>
|
| 584 |
+
<tbody id="ranking-tbody"></tbody>
|
| 585 |
+
</table>
|
| 586 |
+
</div>
|
| 587 |
+
<div class="stat-row" style="margin-top:.75rem">
|
| 588 |
+
<div class="legend-row">
|
| 589 |
+
<span class="legend-dot" style="background:#16a34a"></span>CER < 5 %
|
| 590 |
+
</div>
|
| 591 |
+
<div class="legend-row">
|
| 592 |
+
<span class="legend-dot" style="background:#ca8a04"></span>5–15 %
|
| 593 |
+
</div>
|
| 594 |
+
<div class="legend-row">
|
| 595 |
+
<span class="legend-dot" style="background:#ea580c"></span>15–30 %
|
| 596 |
+
</div>
|
| 597 |
+
<div class="legend-row">
|
| 598 |
+
<span class="legend-dot" style="background:#dc2626"></span>> 30 %
|
| 599 |
+
</div>
|
| 600 |
+
</div>
|
| 601 |
+
</div>
|
| 602 |
+
</div>
|
| 603 |
+
|
| 604 |
+
<!-- ════ Vue 2 : Galerie ═══════════════════════════════════════════ -->
|
| 605 |
+
<div id="view-gallery" class="view">
|
| 606 |
+
<div class="card">
|
| 607 |
+
<h2>Galerie des documents</h2>
|
| 608 |
+
<div class="gallery-controls">
|
| 609 |
+
<label>Trier par :
|
| 610 |
+
<select id="gallery-sort" onchange="renderGallery()">
|
| 611 |
+
<option value="doc_id">Identifiant</option>
|
| 612 |
+
<option value="mean_cer">CER moyen</option>
|
| 613 |
+
<option value="best_engine">Meilleur moteur</option>
|
| 614 |
+
</select>
|
| 615 |
+
</label>
|
| 616 |
+
<label>Filtrer CER >
|
| 617 |
+
<input type="number" id="gallery-filter-cer" min="0" max="100" value="0" step="1"
|
| 618 |
+
style="width:60px" onchange="renderGallery()"> %
|
| 619 |
+
</label>
|
| 620 |
+
<label>Moteur :
|
| 621 |
+
<select id="gallery-engine-select" onchange="renderGallery()">
|
| 622 |
+
<option value="">Tous</option>
|
| 623 |
+
</select>
|
| 624 |
+
</label>
|
| 625 |
+
</div>
|
| 626 |
+
<div id="gallery-grid" class="gallery-grid"></div>
|
| 627 |
+
<div id="gallery-empty" class="empty-state" style="display:none">
|
| 628 |
+
Aucun document ne correspond aux filtres.
|
| 629 |
+
</div>
|
| 630 |
+
</div>
|
| 631 |
+
</div>
|
| 632 |
+
|
| 633 |
+
<!-- ════ Vue 3 : Document ══════════════════════════════════════════ -->
|
| 634 |
+
<div id="view-document" class="view">
|
| 635 |
+
<div class="doc-layout">
|
| 636 |
+
<!-- Sidebar -->
|
| 637 |
+
<aside class="doc-sidebar">
|
| 638 |
+
<div class="doc-sidebar-header">Documents</div>
|
| 639 |
+
<div id="doc-list"></div>
|
| 640 |
+
</aside>
|
| 641 |
+
|
| 642 |
+
<!-- Contenu principal -->
|
| 643 |
+
<div>
|
| 644 |
+
<div class="card" id="doc-detail-header">
|
| 645 |
+
<div style="display:flex; align-items:baseline; justify-content:space-between; flex-wrap:wrap; gap:.5rem">
|
| 646 |
+
<h2 id="doc-detail-title">Sélectionner un document</h2>
|
| 647 |
+
<div class="stat-row" id="doc-detail-metrics"></div>
|
| 648 |
+
</div>
|
| 649 |
+
</div>
|
| 650 |
+
|
| 651 |
+
<!-- Image zoomable -->
|
| 652 |
+
<div class="card">
|
| 653 |
+
<h3>Image originale</h3>
|
| 654 |
+
<div class="doc-image-wrap" id="doc-image-wrap"
|
| 655 |
+
onwheel="handleZoom(event)"
|
| 656 |
+
onmousedown="startDrag(event)"
|
| 657 |
+
onmousemove="doDrag(event)"
|
| 658 |
+
onmouseup="endDrag()"
|
| 659 |
+
onmouseleave="endDrag()">
|
| 660 |
+
<div class="doc-image-placeholder" id="doc-image-placeholder">
|
| 661 |
+
<span style="font-size:2rem">🖼</span>
|
| 662 |
+
<span>Sélectionnez un document</span>
|
| 663 |
+
</div>
|
| 664 |
+
<img id="doc-image" src="" alt="Image du document" style="display:none">
|
| 665 |
+
<div class="zoom-controls">
|
| 666 |
+
<button class="zoom-btn" onclick="zoom(1.25)" title="Zoom +">+</button>
|
| 667 |
+
<button class="zoom-btn" onclick="zoom(0.8)" title="Zoom −">−</button>
|
| 668 |
+
<button class="zoom-btn" onclick="resetZoom()" title="Réinitialiser">↺</button>
|
| 669 |
+
</div>
|
| 670 |
+
</div>
|
| 671 |
+
</div>
|
| 672 |
+
|
| 673 |
+
<!-- Vérité terrain -->
|
| 674 |
+
<div class="card">
|
| 675 |
+
<h3>Vérité terrain (GT)</h3>
|
| 676 |
+
<div class="gt-panel">
|
| 677 |
+
<div class="gt-panel-header">✓ Ground Truth</div>
|
| 678 |
+
<div class="gt-panel-body" id="doc-gt-text">—</div>
|
| 679 |
+
</div>
|
| 680 |
+
</div>
|
| 681 |
+
|
| 682 |
+
<!-- Diffs par moteur -->
|
| 683 |
+
<div class="card">
|
| 684 |
+
<h3>Sorties OCR — diff par moteur</h3>
|
| 685 |
+
<div class="diff-panels" id="doc-diff-panels"></div>
|
| 686 |
+
</div>
|
| 687 |
+
</div>
|
| 688 |
+
</div>
|
| 689 |
+
</div>
|
| 690 |
+
|
| 691 |
+
<!-- ════ Vue 4 : Analyses ══════════════════════════════════════════ -->
|
| 692 |
+
<div id="view-analyses" class="view">
|
| 693 |
+
<div class="charts-grid">
|
| 694 |
+
|
| 695 |
+
<div class="chart-card">
|
| 696 |
+
<h3>Distribution du CER par moteur</h3>
|
| 697 |
+
<div class="chart-canvas-wrap">
|
| 698 |
+
<canvas id="chart-cer-hist"></canvas>
|
| 699 |
+
</div>
|
| 700 |
+
</div>
|
| 701 |
+
|
| 702 |
+
<div class="chart-card">
|
| 703 |
+
<h3>Profil des moteurs (radar)</h3>
|
| 704 |
+
<div class="chart-canvas-wrap">
|
| 705 |
+
<canvas id="chart-radar"></canvas>
|
| 706 |
+
</div>
|
| 707 |
+
<div style="font-size:.72rem;color:var(--text-muted);margin-top:.5rem">
|
| 708 |
+
Axe radar : CER, WER, MER, WIL — valeurs inversées (plus c'est haut, meilleur est le moteur).
|
| 709 |
+
</div>
|
| 710 |
+
</div>
|
| 711 |
+
|
| 712 |
+
<div class="chart-card">
|
| 713 |
+
<h3>CER par document (tous moteurs)</h3>
|
| 714 |
+
<div class="chart-canvas-wrap">
|
| 715 |
+
<canvas id="chart-cer-doc"></canvas>
|
| 716 |
+
</div>
|
| 717 |
+
</div>
|
| 718 |
+
|
| 719 |
+
<div class="chart-card">
|
| 720 |
+
<h3>Temps d'exécution moyen (secondes/document)</h3>
|
| 721 |
+
<div class="chart-canvas-wrap">
|
| 722 |
+
<canvas id="chart-duration"></canvas>
|
| 723 |
+
</div>
|
| 724 |
+
</div>
|
| 725 |
+
|
| 726 |
+
</div>
|
| 727 |
+
</div>
|
| 728 |
+
|
| 729 |
+
</main>
|
| 730 |
+
|
| 731 |
+
<footer>
|
| 732 |
+
Généré par <strong>Picarones</strong> v{picarones_version}
|
| 733 |
+
— BnF, Département numérique
|
| 734 |
+
— <span id="footer-date"></span>
|
| 735 |
+
</footer>
|
| 736 |
+
|
| 737 |
+
<!-- ── Données embarquées ──────────────────────────────────────────── -->
|
| 738 |
+
<script>
|
| 739 |
+
const DATA = {report_data_json};
|
| 740 |
+
</script>
|
| 741 |
+
|
| 742 |
+
<!-- ── Application ────────────────────────────────────────────────── -->
|
| 743 |
+
<script>
|
| 744 |
+
'use strict';
|
| 745 |
+
|
| 746 |
+
// ── Palette couleurs par moteur ──────────────────────────────────
|
| 747 |
+
const PALETTE = [
|
| 748 |
+
'#2563eb','#dc2626','#16a34a','#ca8a04','#7c3aed',
|
| 749 |
+
'#0891b2','#c2410c','#0f766e','#9333ea','#b45309',
|
| 750 |
+
];
|
| 751 |
+
function engineColor(idx) {{ return PALETTE[idx % PALETTE.length]; }}
|
| 752 |
+
|
| 753 |
+
// ── Navigation ──────────────────────────────────────────────────
|
| 754 |
+
let currentView = 'ranking';
|
| 755 |
+
function showView(name) {{
|
| 756 |
+
document.querySelectorAll('.view').forEach(v => v.classList.remove('active'));
|
| 757 |
+
document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
|
| 758 |
+
document.getElementById('view-' + name).classList.add('active');
|
| 759 |
+
document.querySelectorAll('.tab-btn').forEach(b => {{
|
| 760 |
+
if (b.textContent.toLowerCase().startsWith(
|
| 761 |
+
{{ranking:'c',gallery:'g',document:'d',analyses:'a'}}[name]
|
| 762 |
+
)) b.classList.add('active');
|
| 763 |
+
}});
|
| 764 |
+
currentView = name;
|
| 765 |
+
if (name === 'analyses' && !chartsBuilt) buildCharts();
|
| 766 |
+
}}
|
| 767 |
+
|
| 768 |
+
// ── Formatage ───────────────────────────────────────────────────
|
| 769 |
+
function pct(v, d=2) {{
|
| 770 |
+
if (v === null || v === undefined) return '—';
|
| 771 |
+
return (v * 100).toFixed(d) + ' %';
|
| 772 |
+
}}
|
| 773 |
+
function cerColor(v) {{
|
| 774 |
+
if (v < 0.05) return '#16a34a';
|
| 775 |
+
if (v < 0.15) return '#ca8a04';
|
| 776 |
+
if (v < 0.30) return '#ea580c';
|
| 777 |
+
return '#dc2626';
|
| 778 |
+
}}
|
| 779 |
+
function cerBg(v) {{
|
| 780 |
+
if (v < 0.05) return '#dcfce7';
|
| 781 |
+
if (v < 0.15) return '#fef9c3';
|
| 782 |
+
if (v < 0.30) return '#ffedd5';
|
| 783 |
+
return '#fee2e2';
|
| 784 |
+
}}
|
| 785 |
+
function esc(s) {{
|
| 786 |
+
return String(s)
|
| 787 |
+
.replace(/&/g,'&').replace(/</g,'<')
|
| 788 |
+
.replace(/>/g,'>').replace(/"/g,'"');
|
| 789 |
+
}}
|
| 790 |
+
|
| 791 |
+
// ── Diff renderer ──────────────────────────────────────────────
|
| 792 |
+
function renderDiff(ops) {{
|
| 793 |
+
if (!ops || !ops.length) return '<em style="color:var(--text-muted)">— aucune sortie —</em>';
|
| 794 |
+
return ops.map(op => {{
|
| 795 |
+
if (op.op === 'equal')
|
| 796 |
+
return '<span class="d-eq">' + esc(op.text) + '</span>';
|
| 797 |
+
if (op.op === 'insert')
|
| 798 |
+
return '<span class="d-ins" title="Insertion OCR">' + esc(op.text) + '</span>';
|
| 799 |
+
if (op.op === 'delete')
|
| 800 |
+
return '<span class="d-del" title="Suppression (présent GT)">' + esc(op.text) + '</span>';
|
| 801 |
+
if (op.op === 'replace')
|
| 802 |
+
return '<span class="d-rep-old" title="Remplacement">' + esc(op.old) + '</span>'
|
| 803 |
+
+ '<span class="d-rep-new">' + esc(op.new) + '</span>';
|
| 804 |
+
return '';
|
| 805 |
+
}}).join(' ');
|
| 806 |
+
}}
|
| 807 |
+
|
| 808 |
+
// ── Vue Classement ──────────────────────────────────────────────
|
| 809 |
+
let rankingSort = {{ col: 'cer', dir: 'asc' }};
|
| 810 |
+
|
| 811 |
+
function renderRanking() {{
|
| 812 |
+
const engines = [...DATA.engines];
|
| 813 |
+
// Trier
|
| 814 |
+
engines.sort((a, b) => {{
|
| 815 |
+
let va = a[rankingSort.col], vb = b[rankingSort.col];
|
| 816 |
+
if (typeof va === 'string') va = va.toLowerCase();
|
| 817 |
+
if (typeof vb === 'string') vb = vb.toLowerCase();
|
| 818 |
+
if (va === null) va = Infinity;
|
| 819 |
+
if (vb === null) vb = Infinity;
|
| 820 |
+
return rankingSort.dir === 'asc' ? (va > vb ? 1 : -1) : (va < vb ? 1 : -1);
|
| 821 |
+
}});
|
| 822 |
+
|
| 823 |
+
const tbody = document.getElementById('ranking-tbody');
|
| 824 |
+
tbody.innerHTML = engines.map((e, i) => {{
|
| 825 |
+
const rank = i + 1;
|
| 826 |
+
const badgeClass = rank === 1 ? 'rank-badge rank-1' : 'rank-badge';
|
| 827 |
+
const cerC = cerColor(e.cer); const cerB = cerBg(e.cer);
|
| 828 |
+
const barW = Math.min(100, e.cer * 100 * 3);
|
| 829 |
+
return `<tr>
|
| 830 |
+
<td><span class="${{badgeClass}}">${{rank}}</span></td>
|
| 831 |
+
<td>
|
| 832 |
+
<span class="engine-name">${{esc(e.name)}}</span>
|
| 833 |
+
<span class="engine-version">v${{esc(e.version)}}</span>
|
| 834 |
+
</td>
|
| 835 |
+
<td>
|
| 836 |
+
<span class="bar" style="width:${{barW}}px;background:${{cerC}}"></span>
|
| 837 |
+
<span class="cer-badge" style="color:${{cerC}};background:${{cerB}}">${{pct(e.cer)}}</span>
|
| 838 |
+
</td>
|
| 839 |
+
<td>${{pct(e.wer)}}</td>
|
| 840 |
+
<td>${{pct(e.mer)}}</td>
|
| 841 |
+
<td>${{pct(e.wil)}}</td>
|
| 842 |
+
<td style="color:var(--text-muted)">${{pct(e.cer_median)}}</td>
|
| 843 |
+
<td style="color:var(--text-muted)">${{pct(e.cer_min)}}</td>
|
| 844 |
+
<td style="color:var(--text-muted)">${{pct(e.cer_max)}}</td>
|
| 845 |
+
<td><span class="pill">${{e.doc_count}}</span></td>
|
| 846 |
+
</tr>`;
|
| 847 |
+
}}).join('');
|
| 848 |
+
|
| 849 |
+
// Stats globales
|
| 850 |
+
const stats = document.getElementById('ranking-stats');
|
| 851 |
+
stats.innerHTML = `
|
| 852 |
+
<div class="stat">Corpus <b>${{esc(DATA.meta.corpus_name)}}</b></div>
|
| 853 |
+
<div class="stat">Documents <b>${{DATA.meta.document_count}}</b></div>
|
| 854 |
+
<div class="stat">Moteurs <b>${{DATA.engines.length}}</b></div>
|
| 855 |
+
`;
|
| 856 |
+
}}
|
| 857 |
+
|
| 858 |
+
// Tri au clic sur en-tête
|
| 859 |
+
document.querySelectorAll('#ranking-table th.sortable').forEach(th => {{
|
| 860 |
+
th.addEventListener('click', () => {{
|
| 861 |
+
const col = th.dataset.col;
|
| 862 |
+
if (rankingSort.col === col) {{
|
| 863 |
+
rankingSort.dir = rankingSort.dir === 'asc' ? 'desc' : 'asc';
|
| 864 |
+
}} else {{
|
| 865 |
+
rankingSort.col = col;
|
| 866 |
+
rankingSort.dir = 'asc';
|
| 867 |
+
}}
|
| 868 |
+
document.querySelectorAll('#ranking-table th').forEach(t => {{
|
| 869 |
+
t.classList.remove('sorted');
|
| 870 |
+
const icon = t.querySelector('.sort-icon');
|
| 871 |
+
if (icon) icon.textContent = '↕';
|
| 872 |
+
}});
|
| 873 |
+
th.classList.add('sorted');
|
| 874 |
+
const icon = th.querySelector('.sort-icon');
|
| 875 |
+
if (icon) icon.textContent = rankingSort.dir === 'asc' ? '↑' : '↓';
|
| 876 |
+
renderRanking();
|
| 877 |
+
}});
|
| 878 |
+
}});
|
| 879 |
+
|
| 880 |
+
// ── Vue Galerie ─────────────────────────────────────────────────
|
| 881 |
+
function renderGallery() {{
|
| 882 |
+
const sortKey = document.getElementById('gallery-sort').value;
|
| 883 |
+
const filterCer = parseFloat(document.getElementById('gallery-filter-cer').value) / 100 || 0;
|
| 884 |
+
const filterEngine = document.getElementById('gallery-engine-select').value;
|
| 885 |
+
|
| 886 |
+
let docs = [...DATA.documents];
|
| 887 |
+
|
| 888 |
+
// Filtre CER
|
| 889 |
+
if (filterCer > 0) {{
|
| 890 |
+
docs = docs.filter(d => {{
|
| 891 |
+
if (filterEngine) {{
|
| 892 |
+
const er = d.engine_results.find(r => r.engine === filterEngine);
|
| 893 |
+
return er && er.cer >= filterCer;
|
| 894 |
+
}}
|
| 895 |
+
return d.mean_cer >= filterCer;
|
| 896 |
+
}});
|
| 897 |
+
}}
|
| 898 |
+
|
| 899 |
+
// Tri
|
| 900 |
+
docs.sort((a, b) => {{
|
| 901 |
+
if (sortKey === 'mean_cer') return a.mean_cer - b.mean_cer;
|
| 902 |
+
if (sortKey === 'best_engine') return a.best_engine.localeCompare(b.best_engine);
|
| 903 |
+
return a.doc_id.localeCompare(b.doc_id);
|
| 904 |
+
}});
|
| 905 |
+
|
| 906 |
+
const grid = document.getElementById('gallery-grid');
|
| 907 |
+
const empty = document.getElementById('gallery-empty');
|
| 908 |
+
|
| 909 |
+
if (!docs.length) {{
|
| 910 |
+
grid.innerHTML = '';
|
| 911 |
+
empty.style.display = '';
|
| 912 |
+
return;
|
| 913 |
+
}}
|
| 914 |
+
empty.style.display = 'none';
|
| 915 |
+
|
| 916 |
+
grid.innerHTML = docs.map(doc => {{
|
| 917 |
+
const imgTag = doc.image_b64
|
| 918 |
+
? `<img src="${{doc.image_b64}}" alt="${{esc(doc.doc_id)}}" loading="lazy">`
|
| 919 |
+
: `<div class="img-placeholder">🖹</div>`;
|
| 920 |
+
|
| 921 |
+
const badges = doc.engine_results.map(er => {{
|
| 922 |
+
const c = cerColor(er.cer); const bg = cerBg(er.cer);
|
| 923 |
+
return `<span class="engine-cer-badge" style="color:${{c}};background:${{bg}}"
|
| 924 |
+
title="${{esc(er.engine)}}">${{esc(er.engine.slice(0,6))}} ${{pct(er.cer,1)}}</span>`;
|
| 925 |
+
}}).join('');
|
| 926 |
+
|
| 927 |
+
return `<div class="gallery-card" onclick="openDocument('${{esc(doc.doc_id)}}')">
|
| 928 |
+
${{imgTag}}
|
| 929 |
+
<div class="gallery-card-body">
|
| 930 |
+
<div class="gallery-card-title">${{esc(doc.doc_id)}}</div>
|
| 931 |
+
<div class="gallery-card-badges">${{badges}}</div>
|
| 932 |
+
</div>
|
| 933 |
+
</div>`;
|
| 934 |
+
}}).join('');
|
| 935 |
+
}}
|
| 936 |
+
|
| 937 |
+
// ── Vue Document ────────────────────────────────────────────────
|
| 938 |
+
let currentDocId = null;
|
| 939 |
+
let zoomLevel = 1;
|
| 940 |
+
let dragStart = null;
|
| 941 |
+
let imgOffset = {{ x: 0, y: 0 }};
|
| 942 |
+
|
| 943 |
+
function openDocument(docId) {{
|
| 944 |
+
showView('document');
|
| 945 |
+
loadDocument(docId);
|
| 946 |
+
}}
|
| 947 |
+
|
| 948 |
+
function loadDocument(docId) {{
|
| 949 |
+
const doc = DATA.documents.find(d => d.doc_id === docId);
|
| 950 |
+
if (!doc) return;
|
| 951 |
+
currentDocId = docId;
|
| 952 |
+
|
| 953 |
+
// Sidebar : highlight
|
| 954 |
+
document.querySelectorAll('.doc-list-item').forEach(el => {{
|
| 955 |
+
el.classList.toggle('active', el.dataset.docId === docId);
|
| 956 |
+
}});
|
| 957 |
+
|
| 958 |
+
// Titre
|
| 959 |
+
document.getElementById('doc-detail-title').textContent = doc.doc_id;
|
| 960 |
+
|
| 961 |
+
// Métriques
|
| 962 |
+
const metricsDiv = document.getElementById('doc-detail-metrics');
|
| 963 |
+
const cer = doc.mean_cer;
|
| 964 |
+
metricsDiv.innerHTML = `<div class="stat">CER moyen <b style="color:${{cerColor(cer)}}">${{pct(cer)}}</b></div>
|
| 965 |
+
<div class="stat">Meilleur moteur <b>${{esc(doc.best_engine)}}</b></div>`;
|
| 966 |
+
|
| 967 |
+
// Image
|
| 968 |
+
resetZoom();
|
| 969 |
+
const img = document.getElementById('doc-image');
|
| 970 |
+
const placeholder = document.getElementById('doc-image-placeholder');
|
| 971 |
+
if (doc.image_b64) {{
|
| 972 |
+
img.src = doc.image_b64;
|
| 973 |
+
img.style.display = '';
|
| 974 |
+
placeholder.style.display = 'none';
|
| 975 |
+
}} else {{
|
| 976 |
+
img.style.display = 'none';
|
| 977 |
+
placeholder.style.display = '';
|
| 978 |
+
placeholder.innerHTML = `<span style="font-size:2rem">🖹</span><span>${{esc(doc.image_path)}}</span>`;
|
| 979 |
+
}}
|
| 980 |
+
|
| 981 |
+
// GT
|
| 982 |
+
document.getElementById('doc-gt-text').textContent = doc.ground_truth;
|
| 983 |
+
|
| 984 |
+
// Diffs
|
| 985 |
+
const panels = document.getElementById('doc-diff-panels');
|
| 986 |
+
panels.innerHTML = doc.engine_results.map((er, i) => {{
|
| 987 |
+
const c = cerColor(er.cer); const bg = cerBg(er.cer);
|
| 988 |
+
const diffHtml = renderDiff(er.diff);
|
| 989 |
+
const errBadge = er.error ? `<span class="badge" style="background:#fee2e2;color:#dc2626">Erreur</span>` : '';
|
| 990 |
+
return `<div class="diff-panel">
|
| 991 |
+
<div class="diff-panel-header">
|
| 992 |
+
<span class="diff-panel-title">${{esc(er.engine)}}</span>
|
| 993 |
+
<span class="diff-panel-metrics">
|
| 994 |
+
<span class="cer-badge" style="color:${{c}};background:${{bg}}">${{pct(er.cer)}}</span>
|
| 995 |
+
<span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
|
| 996 |
+
${{errBadge}}
|
| 997 |
+
</span>
|
| 998 |
+
</div>
|
| 999 |
+
<div class="diff-panel-body">${{diffHtml || '<em style="color:var(--text-muted)">Aucune sortie</em>'}}</div>
|
| 1000 |
+
</div>`;
|
| 1001 |
+
}}).join('');
|
| 1002 |
+
}}
|
| 1003 |
+
|
| 1004 |
+
function buildDocList() {{
|
| 1005 |
+
const list = document.getElementById('doc-list');
|
| 1006 |
+
list.innerHTML = DATA.documents.map(doc => {{
|
| 1007 |
+
const c = cerColor(doc.mean_cer); const bg = cerBg(doc.mean_cer);
|
| 1008 |
+
return `<div class="doc-list-item" data-doc-id="${{esc(doc.doc_id)}}"
|
| 1009 |
+
onclick="loadDocument('${{esc(doc.doc_id)}}')">
|
| 1010 |
+
<span class="doc-list-label">${{esc(doc.doc_id)}}</span>
|
| 1011 |
+
<span class="doc-list-cer" style="color:${{c}};background:${{bg}}">${{pct(doc.mean_cer,1)}}</span>
|
| 1012 |
+
</div>`;
|
| 1013 |
+
}}).join('');
|
| 1014 |
+
if (DATA.documents.length) loadDocument(DATA.documents[0].doc_id);
|
| 1015 |
+
}}
|
| 1016 |
+
|
| 1017 |
+
// Zoom
|
| 1018 |
+
function handleZoom(e) {{
|
| 1019 |
+
e.preventDefault();
|
| 1020 |
+
zoom(e.deltaY < 0 ? 1.15 : 0.87);
|
| 1021 |
+
}}
|
| 1022 |
+
function zoom(factor) {{
|
| 1023 |
+
zoomLevel = Math.max(0.5, Math.min(5, zoomLevel * factor));
|
| 1024 |
+
applyZoom();
|
| 1025 |
+
}}
|
| 1026 |
+
function resetZoom() {{
|
| 1027 |
+
zoomLevel = 1; imgOffset = {{ x: 0, y: 0 }};
|
| 1028 |
+
applyZoom();
|
| 1029 |
+
}}
|
| 1030 |
+
function applyZoom() {{
|
| 1031 |
+
const img = document.getElementById('doc-image');
|
| 1032 |
+
img.style.transform = `scale(${{zoomLevel}}) translate(${{imgOffset.x}}px, ${{imgOffset.y}}px)`;
|
| 1033 |
+
}}
|
| 1034 |
+
function startDrag(e) {{
|
| 1035 |
+
if (zoomLevel <= 1) return;
|
| 1036 |
+
dragStart = {{ x: e.clientX - imgOffset.x * zoomLevel, y: e.clientY - imgOffset.y * zoomLevel }};
|
| 1037 |
+
document.getElementById('doc-image-wrap').style.cursor = 'grabbing';
|
| 1038 |
+
}}
|
| 1039 |
+
function doDrag(e) {{
|
| 1040 |
+
if (!dragStart) return;
|
| 1041 |
+
imgOffset.x = (e.clientX - dragStart.x) / zoomLevel;
|
| 1042 |
+
imgOffset.y = (e.clientY - dragStart.y) / zoomLevel;
|
| 1043 |
+
applyZoom();
|
| 1044 |
+
}}
|
| 1045 |
+
function endDrag() {{
|
| 1046 |
+
dragStart = null;
|
| 1047 |
+
document.getElementById('doc-image-wrap').style.cursor = zoomLevel > 1 ? 'grab' : 'zoom-in';
|
| 1048 |
+
}}
|
| 1049 |
+
|
| 1050 |
+
// ── Graphiques ──────────────────────────────────────────────────
|
| 1051 |
+
let chartsBuilt = false;
|
| 1052 |
+
let chartInstances = {{}};
|
| 1053 |
+
|
| 1054 |
+
function destroyChart(id) {{
|
| 1055 |
+
if (chartInstances[id]) {{ chartInstances[id].destroy(); delete chartInstances[id]; }}
|
| 1056 |
+
}}
|
| 1057 |
+
|
| 1058 |
+
function buildCharts() {{
|
| 1059 |
+
if (chartsBuilt) return;
|
| 1060 |
+
chartsBuilt = true;
|
| 1061 |
+
buildCerHistogram();
|
| 1062 |
+
buildRadar();
|
| 1063 |
+
buildCerPerDoc();
|
| 1064 |
+
buildDurationChart();
|
| 1065 |
+
}}
|
| 1066 |
+
|
| 1067 |
+
function buildCerHistogram() {{
|
| 1068 |
+
destroyChart('cer-hist');
|
| 1069 |
+
const ctx = document.getElementById('chart-cer-hist').getContext('2d');
|
| 1070 |
+
// Construire histogramme à bins fixes [0-5, 5-10, 10-20, 20-30, 30-50, 50+]
|
| 1071 |
+
const bins = [0, 0.05, 0.10, 0.20, 0.30, 0.50, 1.01];
|
| 1072 |
+
const labels = ['0–5%', '5–10%', '10–20%', '20–30%', '30–50%', '>50%'];
|
| 1073 |
+
const colors = ['#16a34a','#65a30d','#ca8a04','#ea580c','#dc2626','#9f1239'];
|
| 1074 |
+
|
| 1075 |
+
const datasets = DATA.engines.map((e, ei) => {{
|
| 1076 |
+
const counts = new Array(labels.length).fill(0);
|
| 1077 |
+
e.cer_values.forEach(v => {{
|
| 1078 |
+
for (let i = 0; i < bins.length - 1; i++) {{
|
| 1079 |
+
if (v >= bins[i] && v < bins[i+1]) {{ counts[i]++; break; }}
|
| 1080 |
+
}}
|
| 1081 |
+
}});
|
| 1082 |
+
return {{
|
| 1083 |
+
label: e.name, data: counts,
|
| 1084 |
+
backgroundColor: engineColor(ei) + 'aa',
|
| 1085 |
+
borderColor: engineColor(ei),
|
| 1086 |
+
borderWidth: 1,
|
| 1087 |
+
}};
|
| 1088 |
+
}});
|
| 1089 |
+
|
| 1090 |
+
chartInstances['cer-hist'] = new Chart(ctx, {{
|
| 1091 |
+
type: 'bar',
|
| 1092 |
+
data: {{ labels, datasets }},
|
| 1093 |
+
options: {{
|
| 1094 |
+
responsive: true, maintainAspectRatio: false,
|
| 1095 |
+
plugins: {{ legend: {{ position: 'top', labels: {{ font: {{ size: 11 }} }} }} }},
|
| 1096 |
+
scales: {{
|
| 1097 |
+
x: {{ title: {{ display: true, text: 'Plage CER', font: {{ size: 11 }} }} }},
|
| 1098 |
+
y: {{ title: {{ display: true, text: 'Nombre de documents', font: {{ size: 11 }} }},
|
| 1099 |
+
ticks: {{ stepSize: 1 }} }},
|
| 1100 |
+
}},
|
| 1101 |
+
}},
|
| 1102 |
+
}});
|
| 1103 |
+
}}
|
| 1104 |
+
|
| 1105 |
+
function buildRadar() {{
|
| 1106 |
+
destroyChart('radar');
|
| 1107 |
+
const ctx = document.getElementById('chart-radar').getContext('2d');
|
| 1108 |
+
// Axes : CER, WER, MER, WIL inversés (1 - valeur → plus c'est élevé, mieux c'est)
|
| 1109 |
+
const metrics = ['CER', 'WER', 'MER', 'WIL'];
|
| 1110 |
+
const keys = ['cer', 'wer', 'mer', 'wil'];
|
| 1111 |
+
const datasets = DATA.engines.map((e, i) => {{
|
| 1112 |
+
const data = keys.map(k => Math.max(0, (1 - (e[k] || 0)) * 100));
|
| 1113 |
+
return {{
|
| 1114 |
+
label: e.name, data,
|
| 1115 |
+
backgroundColor: engineColor(i) + '33',
|
| 1116 |
+
borderColor: engineColor(i),
|
| 1117 |
+
borderWidth: 2,
|
| 1118 |
+
pointRadius: 4,
|
| 1119 |
+
pointHoverRadius: 6,
|
| 1120 |
+
}};
|
| 1121 |
+
}});
|
| 1122 |
+
|
| 1123 |
+
chartInstances['radar'] = new Chart(ctx, {{
|
| 1124 |
+
type: 'radar',
|
| 1125 |
+
data: {{ labels: metrics, datasets }},
|
| 1126 |
+
options: {{
|
| 1127 |
+
responsive: true, maintainAspectRatio: false,
|
| 1128 |
+
plugins: {{ legend: {{ position: 'top', labels: {{ font: {{ size: 11 }} }} }} }},
|
| 1129 |
+
scales: {{
|
| 1130 |
+
r: {{
|
| 1131 |
+
min: 0, max: 100,
|
| 1132 |
+
ticks: {{ stepSize: 20, font: {{ size: 10 }} }},
|
| 1133 |
+
pointLabels: {{ font: {{ size: 12, weight: 'bold' }} }},
|
| 1134 |
+
}},
|
| 1135 |
+
}},
|
| 1136 |
+
}},
|
| 1137 |
+
}});
|
| 1138 |
+
}}
|
| 1139 |
+
|
| 1140 |
+
function buildCerPerDoc() {{
|
| 1141 |
+
destroyChart('cer-doc');
|
| 1142 |
+
const ctx = document.getElementById('chart-cer-doc').getContext('2d');
|
| 1143 |
+
const labels = DATA.documents.map(d => d.doc_id);
|
| 1144 |
+
const datasets = DATA.engines.map((e, ei) => {{
|
| 1145 |
+
const data = DATA.documents.map(doc => {{
|
| 1146 |
+
const er = doc.engine_results.find(r => r.engine === e.name);
|
| 1147 |
+
return er ? er.cer * 100 : null;
|
| 1148 |
+
}});
|
| 1149 |
+
return {{
|
| 1150 |
+
label: e.name, data,
|
| 1151 |
+
borderColor: engineColor(ei),
|
| 1152 |
+
backgroundColor: engineColor(ei) + '22',
|
| 1153 |
+
tension: 0.3, fill: false,
|
| 1154 |
+
pointRadius: 3, pointHoverRadius: 5,
|
| 1155 |
+
}};
|
| 1156 |
+
}});
|
| 1157 |
+
|
| 1158 |
+
chartInstances['cer-doc'] = new Chart(ctx, {{
|
| 1159 |
+
type: 'line',
|
| 1160 |
+
data: {{ labels, datasets }},
|
| 1161 |
+
options: {{
|
| 1162 |
+
responsive: true, maintainAspectRatio: false,
|
| 1163 |
+
plugins: {{ legend: {{ position: 'top', labels: {{ font: {{ size: 11 }} }} }} }},
|
| 1164 |
+
scales: {{
|
| 1165 |
+
x: {{ ticks: {{ maxRotation: 45, font: {{ size: 10 }} }} }},
|
| 1166 |
+
y: {{ title: {{ display: true, text: 'CER (%)', font: {{ size: 11 }} }}, min: 0 }},
|
| 1167 |
+
}},
|
| 1168 |
+
}},
|
| 1169 |
+
}});
|
| 1170 |
+
}}
|
| 1171 |
+
|
| 1172 |
+
function buildDurationChart() {{
|
| 1173 |
+
destroyChart('duration');
|
| 1174 |
+
const ctx = document.getElementById('chart-duration').getContext('2d');
|
| 1175 |
+
|
| 1176 |
+
const labels = DATA.engines.map(e => e.name);
|
| 1177 |
+
const data = DATA.engines.map(e => {{
|
| 1178 |
+
const docs = DATA.documents;
|
| 1179 |
+
const durs = docs.flatMap(d => d.engine_results
|
| 1180 |
+
.filter(r => r.engine === e.name)
|
| 1181 |
+
.map(r => r.duration));
|
| 1182 |
+
const mean = durs.length ? durs.reduce((a,b) => a+b, 0) / durs.length : 0;
|
| 1183 |
+
return parseFloat(mean.toFixed(3));
|
| 1184 |
+
}});
|
| 1185 |
+
|
| 1186 |
+
chartInstances['duration'] = new Chart(ctx, {{
|
| 1187 |
+
type: 'bar',
|
| 1188 |
+
data: {{
|
| 1189 |
+
labels,
|
| 1190 |
+
datasets: [{{
|
| 1191 |
+
label: 'Durée moy. (s)',
|
| 1192 |
+
data,
|
| 1193 |
+
backgroundColor: DATA.engines.map((_, i) => engineColor(i) + 'aa'),
|
| 1194 |
+
borderColor: DATA.engines.map((_, i) => engineColor(i)),
|
| 1195 |
+
borderWidth: 1,
|
| 1196 |
+
}}],
|
| 1197 |
+
}},
|
| 1198 |
+
options: {{
|
| 1199 |
+
responsive: true, maintainAspectRatio: false,
|
| 1200 |
+
plugins: {{ legend: {{ display: false }} }},
|
| 1201 |
+
scales: {{
|
| 1202 |
+
y: {{ title: {{ display: true, text: 'Secondes', font: {{ size: 11 }} }}, min: 0 }},
|
| 1203 |
+
}},
|
| 1204 |
+
}},
|
| 1205 |
+
}});
|
| 1206 |
+
}}
|
| 1207 |
+
|
| 1208 |
+
// ── Init ────────────────────────────────────────────────────────
|
| 1209 |
+
function init() {{
|
| 1210 |
+
// Méta nav
|
| 1211 |
+
const d = new Date(DATA.meta.run_date);
|
| 1212 |
+
const fmt = d.toLocaleDateString('fr-FR', {{ year:'numeric', month:'short', day:'numeric' }});
|
| 1213 |
+
document.getElementById('nav-meta').textContent =
|
| 1214 |
+
DATA.meta.corpus_name + ' · ' + fmt;
|
| 1215 |
+
document.getElementById('footer-date').textContent =
|
| 1216 |
+
'Rapport généré le ' + fmt;
|
| 1217 |
+
|
| 1218 |
+
// Sélecteur moteur galerie
|
| 1219 |
+
const sel = document.getElementById('gallery-engine-select');
|
| 1220 |
+
DATA.engines.forEach(e => {{
|
| 1221 |
+
const opt = document.createElement('option');
|
| 1222 |
+
opt.value = e.name; opt.textContent = e.name;
|
| 1223 |
+
sel.appendChild(opt);
|
| 1224 |
+
}});
|
| 1225 |
+
|
| 1226 |
+
renderRanking();
|
| 1227 |
+
renderGallery();
|
| 1228 |
+
buildDocList();
|
| 1229 |
+
}}
|
| 1230 |
+
|
| 1231 |
+
document.addEventListener('DOMContentLoaded', init);
|
| 1232 |
+
</script>
|
| 1233 |
+
</body>
|
| 1234 |
+
</html>
|
| 1235 |
+
"""
|
| 1236 |
+
|
| 1237 |
+
|
| 1238 |
+
# ---------------------------------------------------------------------------
|
| 1239 |
+
# Classe principale
|
| 1240 |
+
# ---------------------------------------------------------------------------
|
| 1241 |
+
|
| 1242 |
+
class ReportGenerator:
|
| 1243 |
+
"""Génère un rapport HTML interactif depuis un BenchmarkResult.
|
| 1244 |
+
|
| 1245 |
+
Usage
|
| 1246 |
+
-----
|
| 1247 |
+
>>> from picarones.report import ReportGenerator
|
| 1248 |
+
>>> gen = ReportGenerator(benchmark_result)
|
| 1249 |
+
>>> path = gen.generate("rapport.html")
|
| 1250 |
+
"""
|
| 1251 |
+
|
| 1252 |
+
def __init__(
|
| 1253 |
+
self,
|
| 1254 |
+
benchmark: BenchmarkResult,
|
| 1255 |
+
images_b64: Optional[dict[str, str]] = None,
|
| 1256 |
+
) -> None:
|
| 1257 |
+
"""
|
| 1258 |
+
Parameters
|
| 1259 |
+
----------
|
| 1260 |
+
benchmark:
|
| 1261 |
+
Résultat de benchmark à visualiser.
|
| 1262 |
+
images_b64:
|
| 1263 |
+
Dictionnaire {doc_id: data-URI base64} des images.
|
| 1264 |
+
Si None, le générateur cherche dans ``benchmark.metadata["_images_b64"]``.
|
| 1265 |
+
"""
|
| 1266 |
+
self.benchmark = benchmark
|
| 1267 |
+
self.images_b64: dict[str, str] = images_b64 or {}
|
| 1268 |
+
|
| 1269 |
+
# Récupérer les images embarquées dans les metadata (fixtures)
|
| 1270 |
+
if not self.images_b64:
|
| 1271 |
+
self.images_b64 = benchmark.metadata.get("_images_b64", {}) # type: ignore[assignment]
|
| 1272 |
+
|
| 1273 |
+
def generate(self, output_path: str | Path) -> Path:
|
| 1274 |
+
"""Génère le fichier HTML et le sauvegarde sur disque.
|
| 1275 |
+
|
| 1276 |
+
Parameters
|
| 1277 |
+
----------
|
| 1278 |
+
output_path:
|
| 1279 |
+
Chemin du fichier HTML à écrire.
|
| 1280 |
+
|
| 1281 |
+
Returns
|
| 1282 |
+
-------
|
| 1283 |
+
Path
|
| 1284 |
+
Chemin absolu du fichier généré.
|
| 1285 |
+
"""
|
| 1286 |
+
output_path = Path(output_path)
|
| 1287 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 1288 |
+
|
| 1289 |
+
report_data = _build_report_data(self.benchmark, self.images_b64)
|
| 1290 |
+
report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
|
| 1291 |
+
|
| 1292 |
+
html = _HTML_TEMPLATE.format(
|
| 1293 |
+
corpus_name=self.benchmark.corpus_name,
|
| 1294 |
+
picarones_version=self.benchmark.picarones_version,
|
| 1295 |
+
report_data_json=report_json,
|
| 1296 |
+
)
|
| 1297 |
+
|
| 1298 |
+
output_path.write_text(html, encoding="utf-8")
|
| 1299 |
+
return output_path.resolve()
|
| 1300 |
+
|
| 1301 |
+
@classmethod
|
| 1302 |
+
def from_json(cls, json_path: str | Path, **kwargs) -> "ReportGenerator":
|
| 1303 |
+
"""Crée un générateur depuis un fichier JSON de résultats.
|
| 1304 |
+
|
| 1305 |
+
Compatible avec les fichiers produits par ``BenchmarkResult.to_json()``.
|
| 1306 |
+
Les images base64 doivent être passées via ``kwargs["images_b64"]``
|
| 1307 |
+
si elles ne sont pas dans le JSON.
|
| 1308 |
+
"""
|
| 1309 |
+
import json as _json
|
| 1310 |
+
|
| 1311 |
+
data = _json.loads(Path(json_path).read_text(encoding="utf-8"))
|
| 1312 |
+
|
| 1313 |
+
# Reconstruction minimale d'un BenchmarkResult depuis le dict
|
| 1314 |
+
from picarones.core.metrics import MetricsResult
|
| 1315 |
+
from picarones.core.results import DocumentResult, EngineReport
|
| 1316 |
+
|
| 1317 |
+
engine_reports = []
|
| 1318 |
+
for er_data in data.get("engine_reports", []):
|
| 1319 |
+
doc_results = []
|
| 1320 |
+
for dr_data in er_data.get("document_results", []):
|
| 1321 |
+
m = dr_data["metrics"]
|
| 1322 |
+
metrics = MetricsResult(
|
| 1323 |
+
cer=m["cer"], cer_nfc=m["cer_nfc"], cer_caseless=m["cer_caseless"],
|
| 1324 |
+
wer=m["wer"], wer_normalized=m["wer_normalized"],
|
| 1325 |
+
mer=m["mer"], wil=m["wil"],
|
| 1326 |
+
reference_length=m["reference_length"],
|
| 1327 |
+
hypothesis_length=m["hypothesis_length"],
|
| 1328 |
+
error=m.get("error"),
|
| 1329 |
+
)
|
| 1330 |
+
doc_results.append(DocumentResult(
|
| 1331 |
+
doc_id=dr_data["doc_id"],
|
| 1332 |
+
image_path=dr_data["image_path"],
|
| 1333 |
+
ground_truth=dr_data["ground_truth"],
|
| 1334 |
+
hypothesis=dr_data["hypothesis"],
|
| 1335 |
+
metrics=metrics,
|
| 1336 |
+
duration_seconds=dr_data.get("duration_seconds", 0.0),
|
| 1337 |
+
engine_error=dr_data.get("engine_error"),
|
| 1338 |
+
))
|
| 1339 |
+
engine_reports.append(EngineReport(
|
| 1340 |
+
engine_name=er_data["engine_name"],
|
| 1341 |
+
engine_version=er_data.get("engine_version", "unknown"),
|
| 1342 |
+
engine_config=er_data.get("engine_config", {}),
|
| 1343 |
+
document_results=doc_results,
|
| 1344 |
+
))
|
| 1345 |
+
|
| 1346 |
+
corpus_info = data.get("corpus", {})
|
| 1347 |
+
bm = BenchmarkResult(
|
| 1348 |
+
corpus_name=corpus_info.get("name", "Corpus"),
|
| 1349 |
+
corpus_source=corpus_info.get("source"),
|
| 1350 |
+
document_count=corpus_info.get("document_count", 0),
|
| 1351 |
+
engine_reports=engine_reports,
|
| 1352 |
+
run_date=data.get("run_date", ""),
|
| 1353 |
+
picarones_version=data.get("picarones_version", ""),
|
| 1354 |
+
metadata=data.get("metadata", {}),
|
| 1355 |
+
)
|
| 1356 |
+
|
| 1357 |
+
images_b64 = kwargs.pop("images_b64", {})
|
| 1358 |
+
return cls(bm, images_b64=images_b64, **kwargs)
|
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests pour picarones.report.diff_utils."""
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
from picarones.report.diff_utils import compute_word_diff, compute_char_diff, diff_stats
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestComputeWordDiff:
|
| 8 |
+
def test_equal_texts(self):
|
| 9 |
+
ops = compute_word_diff("bonjour monde", "bonjour monde")
|
| 10 |
+
assert len(ops) == 1
|
| 11 |
+
assert ops[0]["op"] == "equal"
|
| 12 |
+
assert ops[0]["text"] == "bonjour monde"
|
| 13 |
+
|
| 14 |
+
def test_empty_reference(self):
|
| 15 |
+
ops = compute_word_diff("", "bonjour")
|
| 16 |
+
assert any(o["op"] == "insert" for o in ops)
|
| 17 |
+
|
| 18 |
+
def test_empty_hypothesis(self):
|
| 19 |
+
ops = compute_word_diff("bonjour", "")
|
| 20 |
+
assert any(o["op"] == "delete" for o in ops)
|
| 21 |
+
|
| 22 |
+
def test_both_empty(self):
|
| 23 |
+
ops = compute_word_diff("", "")
|
| 24 |
+
assert ops == []
|
| 25 |
+
|
| 26 |
+
def test_insertion(self):
|
| 27 |
+
ops = compute_word_diff("le chat", "le grand chat")
|
| 28 |
+
assert any(o["op"] == "insert" and "grand" in o["text"] for o in ops)
|
| 29 |
+
|
| 30 |
+
def test_deletion(self):
|
| 31 |
+
ops = compute_word_diff("le grand chat", "le chat")
|
| 32 |
+
assert any(o["op"] == "delete" and "grand" in o["text"] for o in ops)
|
| 33 |
+
|
| 34 |
+
def test_replacement(self):
|
| 35 |
+
ops = compute_word_diff("le chat dort", "le chien dort")
|
| 36 |
+
assert any(o["op"] == "replace" and "chat" in o["old"] and "chien" in o["new"] for o in ops)
|
| 37 |
+
|
| 38 |
+
def test_ops_cover_all_words(self):
|
| 39 |
+
ref = "Bonjour monde médiéval"
|
| 40 |
+
hyp = "Bonjour univers médiéval"
|
| 41 |
+
ops = compute_word_diff(ref, hyp)
|
| 42 |
+
# Reconstruction de la référence depuis les ops equal+delete+replace.old
|
| 43 |
+
ref_reconstructed = []
|
| 44 |
+
for op in ops:
|
| 45 |
+
if op["op"] in ("equal", "delete"):
|
| 46 |
+
ref_reconstructed.extend(op["text"].split())
|
| 47 |
+
elif op["op"] == "replace":
|
| 48 |
+
ref_reconstructed.extend(op["old"].split())
|
| 49 |
+
assert ref_reconstructed == ref.split()
|
| 50 |
+
|
| 51 |
+
def test_result_is_list_of_dicts(self):
|
| 52 |
+
ops = compute_word_diff("texte", "text")
|
| 53 |
+
assert isinstance(ops, list)
|
| 54 |
+
assert all(isinstance(o, dict) for o in ops)
|
| 55 |
+
|
| 56 |
+
def test_all_ops_have_op_key(self):
|
| 57 |
+
ops = compute_word_diff("abc def ghi", "abc xyz ghi")
|
| 58 |
+
assert all("op" in o for o in ops)
|
| 59 |
+
|
| 60 |
+
def test_valid_op_types(self):
|
| 61 |
+
valid_ops = {"equal", "insert", "delete", "replace"}
|
| 62 |
+
ops = compute_word_diff("un deux trois", "un trois quatre")
|
| 63 |
+
assert all(o["op"] in valid_ops for o in ops)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class TestComputeCharDiff:
|
| 67 |
+
def test_equal(self):
|
| 68 |
+
ops = compute_char_diff("abc", "abc")
|
| 69 |
+
assert len(ops) == 1
|
| 70 |
+
assert ops[0]["op"] == "equal"
|
| 71 |
+
|
| 72 |
+
def test_single_char_replace(self):
|
| 73 |
+
ops = compute_char_diff("abc", "axc")
|
| 74 |
+
assert any(o["op"] == "replace" and o["old"] == "b" and o["new"] == "x" for o in ops)
|
| 75 |
+
|
| 76 |
+
def test_empty_strings(self):
|
| 77 |
+
assert compute_char_diff("", "") == []
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class TestDiffStats:
|
| 81 |
+
def test_empty(self):
|
| 82 |
+
stats = diff_stats([])
|
| 83 |
+
assert stats == {"equal": 0, "insert": 0, "delete": 0, "replace": 0}
|
| 84 |
+
|
| 85 |
+
def test_counts(self):
|
| 86 |
+
ops = [
|
| 87 |
+
{"op": "equal", "text": "a"},
|
| 88 |
+
{"op": "insert", "text": "b"},
|
| 89 |
+
{"op": "delete", "text": "c"},
|
| 90 |
+
{"op": "replace", "old": "d", "new": "e"},
|
| 91 |
+
{"op": "equal", "text": "f"},
|
| 92 |
+
]
|
| 93 |
+
stats = diff_stats(ops)
|
| 94 |
+
assert stats["equal"] == 2
|
| 95 |
+
assert stats["insert"] == 1
|
| 96 |
+
assert stats["delete"] == 1
|
| 97 |
+
assert stats["replace"] == 1
|
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests pour picarones.report (generator + fixtures)."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import pytest
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 8 |
+
from picarones.report.generator import ReportGenerator, _build_report_data, _cer_color, _cer_bg
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# ---------------------------------------------------------------------------
|
| 12 |
+
# Fixtures Python (données de test)
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
|
| 15 |
+
@pytest.fixture
|
| 16 |
+
def sample_benchmark():
|
| 17 |
+
return generate_sample_benchmark(n_docs=3, seed=0, include_images=True)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@pytest.fixture
|
| 21 |
+
def sample_generator(sample_benchmark):
|
| 22 |
+
return ReportGenerator(sample_benchmark)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
# Tests generate_sample_benchmark
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
|
| 29 |
+
class TestGenerateSampleBenchmark:
|
| 30 |
+
def test_returns_benchmark_result(self, sample_benchmark):
|
| 31 |
+
from picarones.core.results import BenchmarkResult
|
| 32 |
+
assert isinstance(sample_benchmark, BenchmarkResult)
|
| 33 |
+
|
| 34 |
+
def test_correct_engine_count(self, sample_benchmark):
|
| 35 |
+
assert len(sample_benchmark.engine_reports) == 3
|
| 36 |
+
|
| 37 |
+
def test_correct_doc_count(self, sample_benchmark):
|
| 38 |
+
assert sample_benchmark.document_count == 3
|
| 39 |
+
for report in sample_benchmark.engine_reports:
|
| 40 |
+
assert len(report.document_results) == 3
|
| 41 |
+
|
| 42 |
+
def test_engine_names(self, sample_benchmark):
|
| 43 |
+
names = {r.engine_name for r in sample_benchmark.engine_reports}
|
| 44 |
+
assert "tesseract" in names
|
| 45 |
+
assert "pero_ocr" in names
|
| 46 |
+
|
| 47 |
+
def test_images_in_metadata(self, sample_benchmark):
|
| 48 |
+
images = sample_benchmark.metadata.get("_images_b64", {})
|
| 49 |
+
assert len(images) == 3
|
| 50 |
+
for v in images.values():
|
| 51 |
+
assert v.startswith("data:image/png;base64,")
|
| 52 |
+
|
| 53 |
+
def test_reproducible_with_seed(self):
|
| 54 |
+
bm1 = generate_sample_benchmark(n_docs=3, seed=42)
|
| 55 |
+
bm2 = generate_sample_benchmark(n_docs=3, seed=42)
|
| 56 |
+
# Même CER pour le même seed
|
| 57 |
+
cer1 = bm1.engine_reports[0].document_results[0].metrics.cer
|
| 58 |
+
cer2 = bm2.engine_reports[0].document_results[0].metrics.cer
|
| 59 |
+
assert cer1 == pytest.approx(cer2)
|
| 60 |
+
|
| 61 |
+
def test_without_images(self):
|
| 62 |
+
bm = generate_sample_benchmark(n_docs=2, include_images=False)
|
| 63 |
+
assert bm.metadata.get("_images_b64", {}) == {}
|
| 64 |
+
|
| 65 |
+
def test_metrics_computed(self, sample_benchmark):
|
| 66 |
+
for report in sample_benchmark.engine_reports:
|
| 67 |
+
for dr in report.document_results:
|
| 68 |
+
assert dr.metrics.cer >= 0.0
|
| 69 |
+
assert dr.metrics.wer >= 0.0
|
| 70 |
+
|
| 71 |
+
def test_aggregated_metrics(self, sample_benchmark):
|
| 72 |
+
for report in sample_benchmark.engine_reports:
|
| 73 |
+
assert "cer" in report.aggregated_metrics
|
| 74 |
+
assert "mean" in report.aggregated_metrics["cer"]
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# ---------------------------------------------------------------------------
|
| 78 |
+
# Tests _build_report_data
|
| 79 |
+
# ---------------------------------------------------------------------------
|
| 80 |
+
|
| 81 |
+
class TestBuildReportData:
|
| 82 |
+
def test_structure(self, sample_benchmark):
|
| 83 |
+
data = _build_report_data(sample_benchmark, {})
|
| 84 |
+
assert "meta" in data
|
| 85 |
+
assert "ranking" in data
|
| 86 |
+
assert "engines" in data
|
| 87 |
+
assert "documents" in data
|
| 88 |
+
|
| 89 |
+
def test_engines_count(self, sample_benchmark):
|
| 90 |
+
data = _build_report_data(sample_benchmark, {})
|
| 91 |
+
assert len(data["engines"]) == 3
|
| 92 |
+
|
| 93 |
+
def test_engine_fields(self, sample_benchmark):
|
| 94 |
+
data = _build_report_data(sample_benchmark, {})
|
| 95 |
+
for e in data["engines"]:
|
| 96 |
+
for field in ["name", "version", "cer", "wer", "mer", "wil", "cer_values"]:
|
| 97 |
+
assert field in e
|
| 98 |
+
|
| 99 |
+
def test_documents_count(self, sample_benchmark):
|
| 100 |
+
data = _build_report_data(sample_benchmark, {})
|
| 101 |
+
assert len(data["documents"]) == 3
|
| 102 |
+
|
| 103 |
+
def test_document_fields(self, sample_benchmark):
|
| 104 |
+
data = _build_report_data(sample_benchmark, {})
|
| 105 |
+
for doc in data["documents"]:
|
| 106 |
+
for field in ["doc_id", "image_path", "ground_truth", "mean_cer",
|
| 107 |
+
"best_engine", "engine_results"]:
|
| 108 |
+
assert field in doc
|
| 109 |
+
|
| 110 |
+
def test_diff_computed(self, sample_benchmark):
|
| 111 |
+
data = _build_report_data(sample_benchmark, {})
|
| 112 |
+
for doc in data["documents"]:
|
| 113 |
+
for er in doc["engine_results"]:
|
| 114 |
+
assert "diff" in er
|
| 115 |
+
assert isinstance(er["diff"], list)
|
| 116 |
+
|
| 117 |
+
def test_images_attached(self, sample_benchmark):
|
| 118 |
+
images = sample_benchmark.metadata.get("_images_b64", {})
|
| 119 |
+
data = _build_report_data(sample_benchmark, images)
|
| 120 |
+
# Au moins un document doit avoir une image b64
|
| 121 |
+
has_image = any(doc["image_b64"] for doc in data["documents"])
|
| 122 |
+
assert has_image
|
| 123 |
+
|
| 124 |
+
def test_cer_values_list(self, sample_benchmark):
|
| 125 |
+
data = _build_report_data(sample_benchmark, {})
|
| 126 |
+
for engine in data["engines"]:
|
| 127 |
+
assert len(engine["cer_values"]) == 3
|
| 128 |
+
assert all(isinstance(v, float) for v in engine["cer_values"])
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ---------------------------------------------------------------------------
|
| 132 |
+
# Tests ReportGenerator.generate
|
| 133 |
+
# ---------------------------------------------------------------------------
|
| 134 |
+
|
| 135 |
+
class TestReportGenerator:
|
| 136 |
+
def test_generates_file(self, sample_generator, tmp_path):
|
| 137 |
+
out = tmp_path / "rapport.html"
|
| 138 |
+
path = sample_generator.generate(out)
|
| 139 |
+
assert path.exists()
|
| 140 |
+
assert path.suffix == ".html"
|
| 141 |
+
|
| 142 |
+
def test_file_not_empty(self, sample_generator, tmp_path):
|
| 143 |
+
out = tmp_path / "rapport.html"
|
| 144 |
+
sample_generator.generate(out)
|
| 145 |
+
content = out.read_text(encoding="utf-8")
|
| 146 |
+
assert len(content) > 5000 # fichier substantiel
|
| 147 |
+
|
| 148 |
+
def test_html_structure(self, sample_generator, tmp_path):
|
| 149 |
+
out = tmp_path / "rapport.html"
|
| 150 |
+
sample_generator.generate(out)
|
| 151 |
+
html = out.read_text(encoding="utf-8")
|
| 152 |
+
assert "<!DOCTYPE html>" in html
|
| 153 |
+
assert "<html" in html
|
| 154 |
+
assert "</html>" in html
|
| 155 |
+
|
| 156 |
+
def test_contains_chart_js(self, sample_generator, tmp_path):
|
| 157 |
+
out = tmp_path / "rapport.html"
|
| 158 |
+
sample_generator.generate(out)
|
| 159 |
+
html = out.read_text(encoding="utf-8")
|
| 160 |
+
assert "chart.js" in html.lower() or "Chart.js" in html
|
| 161 |
+
|
| 162 |
+
def test_contains_diff2html(self, sample_generator, tmp_path):
|
| 163 |
+
out = tmp_path / "rapport.html"
|
| 164 |
+
sample_generator.generate(out)
|
| 165 |
+
html = out.read_text(encoding="utf-8")
|
| 166 |
+
assert "diff2html" in html.lower()
|
| 167 |
+
|
| 168 |
+
def test_data_embedded(self, sample_generator, tmp_path):
|
| 169 |
+
out = tmp_path / "rapport.html"
|
| 170 |
+
sample_generator.generate(out)
|
| 171 |
+
html = out.read_text(encoding="utf-8")
|
| 172 |
+
assert "const DATA" in html
|
| 173 |
+
|
| 174 |
+
def test_engine_names_in_html(self, sample_generator, tmp_path):
|
| 175 |
+
out = tmp_path / "rapport.html"
|
| 176 |
+
sample_generator.generate(out)
|
| 177 |
+
html = out.read_text(encoding="utf-8")
|
| 178 |
+
assert "tesseract" in html
|
| 179 |
+
assert "pero_ocr" in html
|
| 180 |
+
|
| 181 |
+
def test_corpus_name_in_html(self, sample_generator, tmp_path):
|
| 182 |
+
out = tmp_path / "rapport.html"
|
| 183 |
+
sample_generator.generate(out)
|
| 184 |
+
html = out.read_text(encoding="utf-8")
|
| 185 |
+
assert "chroniques" in html.lower()
|
| 186 |
+
|
| 187 |
+
def test_creates_parent_dirs(self, sample_generator, tmp_path):
|
| 188 |
+
out = tmp_path / "deep" / "nested" / "rapport.html"
|
| 189 |
+
sample_generator.generate(out)
|
| 190 |
+
assert out.exists()
|
| 191 |
+
|
| 192 |
+
def test_returns_absolute_path(self, sample_generator, tmp_path):
|
| 193 |
+
out = tmp_path / "rapport.html"
|
| 194 |
+
result = sample_generator.generate(out)
|
| 195 |
+
assert result.is_absolute()
|
| 196 |
+
|
| 197 |
+
def test_json_roundtrip(self, sample_benchmark, tmp_path):
|
| 198 |
+
"""Vérifie que le rapport peut être généré depuis un JSON sauvegardé."""
|
| 199 |
+
json_path = tmp_path / "results.json"
|
| 200 |
+
sample_benchmark.to_json(json_path)
|
| 201 |
+
|
| 202 |
+
gen = ReportGenerator.from_json(json_path)
|
| 203 |
+
html_path = tmp_path / "rapport.html"
|
| 204 |
+
gen.generate(html_path)
|
| 205 |
+
assert html_path.exists()
|
| 206 |
+
html = html_path.read_text(encoding="utf-8")
|
| 207 |
+
assert "const DATA" in html
|
| 208 |
+
|
| 209 |
+
def test_embedded_json_valid(self, sample_generator, tmp_path):
|
| 210 |
+
"""Extrait et parse le JSON embarqué dans le HTML pour vérifier sa validité."""
|
| 211 |
+
out = tmp_path / "rapport.html"
|
| 212 |
+
sample_generator.generate(out)
|
| 213 |
+
html = out.read_text(encoding="utf-8")
|
| 214 |
+
|
| 215 |
+
# Extraire le JSON entre "const DATA = " et le ";" de fin de ligne
|
| 216 |
+
import re
|
| 217 |
+
match = re.search(r"const DATA = (\{.*?\});", html, re.DOTALL)
|
| 218 |
+
assert match, "Bloc const DATA non trouvé dans le HTML"
|
| 219 |
+
data = json.loads(match.group(1))
|
| 220 |
+
assert "engines" in data
|
| 221 |
+
assert "documents" in data
|
| 222 |
+
assert len(data["engines"]) == 3
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
# ---------------------------------------------------------------------------
|
| 226 |
+
# Tests helpers de couleur
|
| 227 |
+
# ---------------------------------------------------------------------------
|
| 228 |
+
|
| 229 |
+
class TestCerColor:
|
| 230 |
+
def test_green_below_5pct(self):
|
| 231 |
+
assert _cer_color(0.04) == "#16a34a"
|
| 232 |
+
|
| 233 |
+
def test_yellow_5_to_15pct(self):
|
| 234 |
+
assert _cer_color(0.10) == "#ca8a04"
|
| 235 |
+
|
| 236 |
+
def test_orange_15_to_30pct(self):
|
| 237 |
+
assert _cer_color(0.20) == "#ea580c"
|
| 238 |
+
|
| 239 |
+
def test_red_above_30pct(self):
|
| 240 |
+
assert _cer_color(0.50) == "#dc2626"
|
| 241 |
+
|
| 242 |
+
def test_boundary_exactly_5pct(self):
|
| 243 |
+
# 0.05 est dans la zone jaune (>= 0.05)
|
| 244 |
+
assert _cer_color(0.05) == "#ca8a04"
|