Spaces:
Sleeping
Sleeping
Claude
chantier4: workflows CLI dédiés + propagation fix Sprint 15 LLM + fusion Gallica→IIIF
36694e1 unverified | """Interface en ligne de commande Picarones (Click). | |
| Commandes disponibles | |
| --------------------- | |
| picarones run — Lance un benchmark complet | |
| picarones report — Génère le rapport HTML depuis un JSON de résultats | |
| picarones demo — Génère un rapport de démonstration avec données fictives | |
| picarones metrics — Calcule CER/WER entre deux fichiers texte | |
| picarones engines — Liste les moteurs disponibles | |
| picarones info — Informations de version | |
| picarones history — Consulte l'historique des benchmarks (suivi longitudinal) | |
| picarones robustness — Lance une analyse de robustesse sur un corpus | |
| Exemples d'usage | |
| ---------------- | |
| picarones run --corpus ./corpus/ --engines tesseract --output results.json | |
| picarones metrics --reference gt.txt --hypothesis ocr.txt | |
| picarones history --engine tesseract | |
| picarones robustness --corpus ./gt/ --engine tesseract | |
| picarones engines | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import sys | |
| from pathlib import Path | |
| from typing import TYPE_CHECKING | |
| import click | |
| from picarones import __version__ | |
| if TYPE_CHECKING: | |
| from picarones.engines.base import BaseOCREngine | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| def _setup_logging(verbose: bool) -> None: | |
| level = logging.DEBUG if verbose else logging.INFO | |
| logging.basicConfig( | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| datefmt="%H:%M:%S", | |
| level=level, | |
| ) | |
| def _engine_from_name(engine_name: str, lang: str, psm: int) -> "BaseOCREngine": | |
| """Instancie un moteur par son nom.""" | |
| from picarones.engines.tesseract import TesseractEngine | |
| if engine_name in {"tesseract", "tess"}: | |
| return TesseractEngine(config={"lang": lang, "psm": psm}) | |
| try: | |
| from picarones.engines.pero_ocr import PeroOCREngine | |
| if engine_name in {"pero_ocr", "pero"}: | |
| return PeroOCREngine(config={"name": "pero_ocr"}) | |
| except ImportError: | |
| pass | |
| raise click.BadParameter( | |
| f"Moteur inconnu ou non disponible : '{engine_name}'. " | |
| "Moteurs supportés : tesseract, pero_ocr" | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Groupe principal | |
| # --------------------------------------------------------------------------- | |
| def cli() -> None: | |
| """Picarones — Plateforme de comparaison de moteurs OCR pour documents patrimoniaux.""" | |
| # --------------------------------------------------------------------------- | |
| # picarones run | |
| # --------------------------------------------------------------------------- | |
| def run_cmd( | |
| corpus: str, | |
| engines: str, | |
| output: str, | |
| lang: str, | |
| psm: int, | |
| no_progress: bool, | |
| verbose: bool, | |
| fail_if_cer_above: float | None, | |
| profile: str, | |
| ) -> None: | |
| """Lance un benchmark OCR sur un corpus de documents. | |
| Le corpus doit être un dossier contenant des paires | |
| <image>.<ext> + <image>.gt.txt (vérité terrain). | |
| """ | |
| _setup_logging(verbose) | |
| from picarones.core.corpus import load_corpus_from_directory | |
| from picarones.core.runner import run_benchmark | |
| # Chargement du corpus | |
| try: | |
| corp = load_corpus_from_directory(corpus) | |
| except (FileNotFoundError, ValueError) as exc: | |
| click.echo(f"Erreur corpus : {exc}", err=True) | |
| sys.exit(1) | |
| click.echo(f"Corpus '{corp.name}' — {len(corp)} documents chargés.") | |
| # Instanciation des moteurs | |
| engine_names = [e.strip() for e in engines.split(",") if e.strip()] | |
| ocr_engines = [] | |
| for name in engine_names: | |
| try: | |
| engine = _engine_from_name(name, lang=lang, psm=psm) | |
| ocr_engines.append(engine) | |
| except click.BadParameter as exc: | |
| click.echo(f"Erreur moteur : {exc}", err=True) | |
| sys.exit(1) | |
| if not ocr_engines: | |
| click.echo("Aucun moteur valide spécifié.", err=True) | |
| sys.exit(1) | |
| click.echo(f"Moteurs : {', '.join(e.name for e in ocr_engines)}") | |
| click.echo(f"Profil de métriques : {profile}") | |
| # Lancement du benchmark | |
| result = run_benchmark( | |
| corpus=corp, | |
| engines=ocr_engines, | |
| output_json=output, | |
| show_progress=not no_progress, | |
| profile=profile, | |
| ) | |
| # Affichage du classement | |
| click.echo("\n── Classement ──────────────────────────────────") | |
| for rank, entry in enumerate(result.ranking(), 1): | |
| cer_pct = f"{entry['mean_cer'] * 100:.2f}%" if entry["mean_cer"] is not None else "N/A" | |
| wer_pct = f"{entry['mean_wer'] * 100:.2f}%" if entry["mean_wer"] is not None else "N/A" | |
| failed = entry["failed"] | |
| failed_str = f" ({failed} erreur(s))" if failed else "" | |
| click.echo(f" {rank}. {entry['engine']:<20} CER={cer_pct:<8} WER={wer_pct}{failed_str}") | |
| click.echo(f"\nRésultats écrits dans : {output}") | |
| # Mode CI/CD : exit code non-zero si CER > seuil | |
| if fail_if_cer_above is not None: | |
| for entry in result.ranking(): | |
| if entry["mean_cer"] is not None and entry["mean_cer"] * 100 > fail_if_cer_above: | |
| click.echo( | |
| f"\nECHEC : {entry['engine']} CER={entry['mean_cer']*100:.2f}% " | |
| f"> seuil {fail_if_cer_above:.2f}%", | |
| err=True, | |
| ) | |
| sys.exit(1) | |
| # --------------------------------------------------------------------------- | |
| # Workflows CLI dédiés (chantier 4 post-Sprint 97) | |
| # --------------------------------------------------------------------------- | |
| # | |
| # Chaque commande spécialisée fixe un profil de calcul (chantier 2) et | |
| # émet un message identifiant la famille avant de déléguer au runner. | |
| # L'option ``--profile`` reste disponible mais le défaut change pour | |
| # chaque commande. | |
| def _run_workflow( | |
| *, | |
| corpus: str, | |
| engines: str, | |
| output: str, | |
| lang: str, | |
| psm: int, | |
| no_progress: bool, | |
| verbose: bool, | |
| profile: str, | |
| workflow_label: str, | |
| ) -> None: | |
| """Implémentation commune des commandes ``run``, ``diagnose``, | |
| ``economics`` et ``edition``. | |
| Les 4 commandes partagent le squelette : chargement corpus → | |
| instanciation moteurs → ``run_benchmark(profile=...)`` → affichage | |
| classement. Seul le profil par défaut et le message d'en-tête | |
| diffèrent. | |
| """ | |
| _setup_logging(verbose) | |
| from picarones.core.corpus import load_corpus_from_directory | |
| from picarones.core.runner import run_benchmark | |
| try: | |
| corp = load_corpus_from_directory(corpus) | |
| except (FileNotFoundError, ValueError) as exc: | |
| click.echo(f"Erreur corpus : {exc}", err=True) | |
| sys.exit(1) | |
| click.echo(f"[{workflow_label}] Corpus '{corp.name}' — " | |
| f"{len(corp)} documents chargés.") | |
| engine_names = [e.strip() for e in engines.split(",") if e.strip()] | |
| ocr_engines = [] | |
| for name in engine_names: | |
| try: | |
| engine = _engine_from_name(name, lang=lang, psm=psm) | |
| ocr_engines.append(engine) | |
| except click.BadParameter as exc: | |
| click.echo(f"Erreur moteur : {exc}", err=True) | |
| sys.exit(1) | |
| if not ocr_engines: | |
| click.echo("Aucun moteur valide spécifié.", err=True) | |
| sys.exit(1) | |
| click.echo(f"Moteurs : {', '.join(e.name for e in ocr_engines)}") | |
| click.echo(f"Profil de métriques : {profile}") | |
| result = run_benchmark( | |
| corpus=corp, | |
| engines=ocr_engines, | |
| output_json=output, | |
| show_progress=not no_progress, | |
| profile=profile, | |
| ) | |
| click.echo("\n── Classement ──────────────────────────────────") | |
| for rank, entry in enumerate(result.ranking(), 1): | |
| cer_pct = ( | |
| f"{entry['mean_cer'] * 100:.2f}%" | |
| if entry["mean_cer"] is not None else "N/A" | |
| ) | |
| wer_pct = ( | |
| f"{entry['mean_wer'] * 100:.2f}%" | |
| if entry["mean_wer"] is not None else "N/A" | |
| ) | |
| failed = entry["failed"] | |
| failed_str = f" ({failed} erreur(s))" if failed else "" | |
| click.echo( | |
| f" {rank}. {entry['engine']:<20} " | |
| f"CER={cer_pct:<8} WER={wer_pct}{failed_str}" | |
| ) | |
| click.echo(f"\nRésultats écrits dans : {output}") | |
| def diagnose_cmd( | |
| corpus: str, engines: str, output: str, lang: str, psm: int, | |
| no_progress: bool, verbose: bool, | |
| ) -> None: | |
| """Workflow diagnostic : bench + leviers d'amélioration + image_predictive. | |
| Active le profil ``diagnostics`` (chantier 2) qui calcule les | |
| métriques nécessaires à la vue HTML « Diagnostic approfondi » | |
| (chantier 3) : leviers, profil d'image, baseline, longitudinal. | |
| Idéal pour comprendre *pourquoi* un moteur produit ces résultats | |
| sur ce corpus, pas seulement *quel CER*. | |
| """ | |
| _run_workflow( | |
| corpus=corpus, engines=engines, output=output, | |
| lang=lang, psm=psm, | |
| no_progress=no_progress, verbose=verbose, | |
| profile="diagnostics", | |
| workflow_label="diagnose", | |
| ) | |
| def economics_cmd( | |
| corpus: str, engines: str, output: str, lang: str, psm: int, | |
| no_progress: bool, verbose: bool, | |
| ) -> None: | |
| """Workflow économique : bench + throughput effectif + (cost projection). | |
| Active le profil ``economics`` (chantier 2) qui se concentre sur | |
| les métriques de décision budget : pages/h utilisable (intégrant | |
| la correction humaine HTR-United à 5 s/erreur), coût marginal par | |
| erreur évitée. La vue HTML « Coût et performance » (chantier 3) | |
| est ensuite branchée. | |
| """ | |
| _run_workflow( | |
| corpus=corpus, engines=engines, output=output, | |
| lang=lang, psm=psm, | |
| no_progress=no_progress, verbose=verbose, | |
| profile="economics", | |
| workflow_label="economics", | |
| ) | |
| def edition_cmd( | |
| corpus: str, engines: str, output: str, lang: str, psm: int, | |
| no_progress: bool, verbose: bool, | |
| ) -> None: | |
| """Workflow édition critique : bench + métriques philologiques. | |
| Active le profil ``philological`` (chantier 2) qui inclut les | |
| modules philologiques (unicode_blocks, abbreviations, MUFI, | |
| early_modern_typography, modern_archives, roman_numerals) et la | |
| vue HTML « Taxonomie avancée » (chantier 3) avec comparaison | |
| miroir leader vs runner-up. Cible : éditeurs de chartes, | |
| paléographes, archivistes. | |
| """ | |
| _run_workflow( | |
| corpus=corpus, engines=engines, output=output, | |
| lang=lang, psm=psm, | |
| no_progress=no_progress, verbose=verbose, | |
| profile="philological", | |
| workflow_label="edition", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # picarones metrics | |
| # --------------------------------------------------------------------------- | |
| def metrics_cmd(reference: str, hypothesis: str, json_output: bool) -> None: | |
| """Calcule CER et WER entre deux fichiers texte.""" | |
| from picarones.core.metrics import compute_metrics | |
| ref_text = Path(reference).read_text(encoding="utf-8").strip() | |
| hyp_text = Path(hypothesis).read_text(encoding="utf-8").strip() | |
| result = compute_metrics(ref_text, hyp_text) | |
| if json_output: | |
| click.echo(json.dumps(result.as_dict(), ensure_ascii=False, indent=2)) | |
| else: | |
| click.echo(f"CER : {result.cer_percent:.2f}%") | |
| click.echo(f"CER (NFC) : {result.cer_nfc * 100:.2f}%") | |
| click.echo(f"CER (caseless) : {result.cer_caseless * 100:.2f}%") | |
| click.echo(f"WER : {result.wer_percent:.2f}%") | |
| click.echo(f"WER (normalisé): {result.wer_normalized * 100:.2f}%") | |
| click.echo(f"MER : {result.mer * 100:.2f}%") | |
| click.echo(f"WIL : {result.wil * 100:.2f}%") | |
| click.echo(f"Longueur GT : {result.reference_length} chars") | |
| click.echo(f"Longueur OCR : {result.hypothesis_length} chars") | |
| if result.error: | |
| click.echo(f"Erreur : {result.error}", err=True) | |
| # --------------------------------------------------------------------------- | |
| # picarones engines | |
| # --------------------------------------------------------------------------- | |
| def engines_cmd() -> None: | |
| """Liste les moteurs OCR disponibles et vérifie leur installation.""" | |
| engines = [ | |
| ("tesseract", "Tesseract 5 (pytesseract)", "pytesseract"), | |
| ("pero_ocr", "Pero OCR", "pero_ocr"), | |
| ] | |
| click.echo("Moteurs OCR disponibles :\n") | |
| for engine_id, label, module in engines: | |
| try: | |
| __import__(module) | |
| status = click.style("✓ disponible", fg="green") | |
| except ImportError: | |
| status = click.style("✗ non installé", fg="red") | |
| click.echo(f" {engine_id:<15} {label:<35} {status}") | |
| click.echo( | |
| "\nPour installer un moteur manquant :\n" | |
| " pip install pytesseract\n" | |
| " pip install pero-ocr" | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # picarones info | |
| # --------------------------------------------------------------------------- | |
| def info_cmd() -> None: | |
| """Affiche les informations de version de Picarones et de ses dépendances.""" | |
| click.echo(f"Picarones v{__version__}") | |
| click.echo("") | |
| deps = [ | |
| ("click", "click"), | |
| ("jiwer", "jiwer"), | |
| ("Pillow", "PIL"), | |
| ("pytesseract", "pytesseract"), | |
| ("tqdm", "tqdm"), | |
| ("numpy", "numpy"), | |
| ("pyyaml", "yaml"), | |
| ] | |
| click.echo("Dépendances :") | |
| for name, module in deps: | |
| try: | |
| mod = __import__(module) | |
| version = getattr(mod, "__version__", "installé") | |
| status = click.style(f"v{version}", fg="green") | |
| except ImportError: | |
| status = click.style("non installé", fg="red") | |
| click.echo(f" {name:<15} {status}") | |
| # --------------------------------------------------------------------------- | |
| # picarones report | |
| # --------------------------------------------------------------------------- | |
| def report_cmd(results: str, output: str, verbose: bool) -> None: | |
| """Génère le rapport HTML interactif depuis un fichier JSON de résultats. | |
| Le rapport est un fichier HTML auto-contenu, lisible hors-ligne, | |
| avec tableau de classement, galerie, vue document et graphiques. | |
| """ | |
| _setup_logging(verbose) | |
| from picarones.report.generator import ReportGenerator | |
| click.echo(f"Chargement des résultats : {results}") | |
| try: | |
| gen = ReportGenerator.from_json(results) | |
| except Exception as exc: | |
| click.echo(f"Erreur lors du chargement : {exc}", err=True) | |
| sys.exit(1) | |
| click.echo("Génération du rapport HTML…") | |
| path = gen.generate(output) | |
| click.echo(f"Rapport généré : {path}") | |
| click.echo(f"Ouvrez-le dans un navigateur : file://{path}") | |
| # --------------------------------------------------------------------------- | |
| # picarones compare (Sprint 28) | |
| # --------------------------------------------------------------------------- | |
| def compare_cmd( | |
| run_a: str, | |
| run_b: str, | |
| output: str, | |
| threshold: float, | |
| label_a: str, | |
| label_b: str, | |
| json_only: bool, | |
| verbose: bool, | |
| ) -> None: | |
| """Compare deux runs de benchmark JSON et signale les régressions. | |
| Convention : un Δ CER positif signifie que ``B`` est moins bon que | |
| ``A``. Un moteur dont |Δ CER| > ``--threshold`` est marqué comme | |
| régression ou amélioration. | |
| \b | |
| Exemples : | |
| picarones compare run_v1.json run_v2.json -o diff.html | |
| picarones compare run_v1.json run_v2.json --json | |
| picarones compare run_v1.json run_v2.json --threshold 0.01 --label-a v1 --label-b v2 | |
| """ | |
| _setup_logging(verbose) | |
| from picarones.report.comparison import ( | |
| compare_benchmarks, | |
| detect_regressions, | |
| render_comparison_html, | |
| ) | |
| diff = compare_benchmarks( | |
| run_a, run_b, | |
| threshold=threshold, | |
| label_a=label_a, | |
| label_b=label_b, | |
| ) | |
| regressions = detect_regressions(diff) | |
| if json_only: | |
| click.echo(json.dumps(diff.as_dict(), ensure_ascii=False, indent=2)) | |
| if regressions: | |
| sys.exit(2) # exit code 2 → régression détectée (utile en CI) | |
| return | |
| out = render_comparison_html(diff, output) | |
| click.echo(f"Rapport de comparaison : {out}") | |
| click.echo(f"Moteurs comparés : {len(diff.deltas)}") | |
| click.echo(f"Régressions : {len(regressions)}") | |
| click.echo(f"Améliorations : {sum(1 for d in diff.deltas if d.is_improvement)}") | |
| if regressions: | |
| click.echo("\n— Régressions détectées —") | |
| for d in regressions: | |
| click.echo( | |
| f" ⚠ {d.engine} : " | |
| f"{d.cer_a:.3f} → {d.cer_b:.3f} (Δ +{d.delta_cer:.3f})" | |
| ) | |
| sys.exit(2) | |
| # --------------------------------------------------------------------------- | |
| # picarones demo | |
| # --------------------------------------------------------------------------- | |
| def demo_cmd( | |
| output: str, | |
| docs: int, | |
| json_output: str | None, | |
| with_history: bool, | |
| with_robustness: bool, | |
| lang: str, | |
| ) -> None: | |
| """Génère un rapport de démonstration avec des données fictives réalistes. | |
| Utile pour tester le rendu HTML sans installer Tesseract ni Pero OCR. | |
| \b | |
| Exemples : | |
| picarones demo | |
| picarones demo --lang en | |
| picarones demo --with-history | |
| picarones demo --with-robustness | |
| picarones demo --with-history --with-robustness --docs 8 | |
| """ | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| click.echo(f"Génération des données fictives ({docs} documents, 3 moteurs)…") | |
| benchmark = generate_sample_benchmark(n_docs=docs) | |
| if json_output: | |
| bm_path = benchmark.to_json(json_output) | |
| click.echo(f"Résultats JSON : {bm_path}") | |
| gen = ReportGenerator(benchmark, lang=lang) | |
| path = gen.generate(output) | |
| click.echo(f"Rapport de démonstration : {path}") | |
| click.echo(f"Ouvrez-le dans un navigateur : file://{path}") | |
| # Suivi longitudinal | |
| if with_history: | |
| click.echo("\n── Démonstration suivi longitudinal ──────────────") | |
| from picarones.core.history import BenchmarkHistory, generate_demo_history | |
| history = BenchmarkHistory(":memory:") | |
| generate_demo_history(history, n_runs=8) | |
| entries = history.query(engine="tesseract") | |
| click.echo(f" {history.count()} entrées générées (8 runs, 3 moteurs).") | |
| click.echo("\n Évolution du CER — tesseract :") | |
| for e in entries: | |
| cer_str = f"{e.cer_percent:.2f}%" if e.cer_percent is not None else "N/A" | |
| bar = "█" * int((e.cer_percent or 0) * 2) | |
| click.echo(f" {e.timestamp[:10]} {cer_str:<8} {bar}") | |
| regression = history.detect_regression("tesseract", threshold=0.01) | |
| if regression and regression.is_regression: | |
| click.echo( | |
| click.style( | |
| f"\n RÉGRESSION détectée ! delta CER = +{regression.delta_cer * 100:.2f}%", | |
| fg="red", | |
| ) | |
| ) | |
| else: | |
| click.echo(click.style("\n Aucune régression détectée.", fg="green")) | |
| # Analyse de robustesse | |
| if with_robustness: | |
| click.echo("\n── Démonstration analyse de robustesse ───────────") | |
| from picarones.core.robustness import generate_demo_robustness_report | |
| report = generate_demo_robustness_report( | |
| engine_names=["tesseract", "pero_ocr"] | |
| ) | |
| for curve in report.curves: | |
| if curve.degradation_type == "noise": | |
| click.echo(f"\n {curve.engine_name} / bruit gaussien :") | |
| for label, cer in zip(curve.labels, curve.cer_values): | |
| cer_pct = f"{(cer or 0) * 100:.1f}%" | |
| bar = "█" * int((cer or 0) * 40) | |
| click.echo(f" {label:<12} {cer_pct:<8} {bar}") | |
| if curve.critical_threshold_level is not None: | |
| click.echo( | |
| click.style( | |
| f" Niveau critique (CER>20%) : σ={curve.critical_threshold_level}", | |
| fg="yellow", | |
| ) | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # picarones import (groupe de sous-commandes) | |
| # --------------------------------------------------------------------------- | |
| def import_group() -> None: | |
| """Importe un corpus depuis une source distante (IIIF, HuggingFace…).""" | |
| def import_iiif_cmd( | |
| manifest_url: str, | |
| pages: str, | |
| output: str, | |
| max_resolution: int, | |
| no_progress: bool, | |
| verbose: bool, | |
| ) -> None: | |
| """Importe un corpus depuis un manifeste IIIF (v2 ou v3). | |
| MANIFEST_URL : URL du manifeste IIIF (Gallica, Bodleian, BL, BSB…) | |
| Exemples : | |
| \b | |
| picarones import iiif https://gallica.bnf.fr/ark:/12148/xxx/manifest.json | |
| picarones import iiif https://gallica.bnf.fr/ark:/12148/xxx/manifest.json --pages 1-10 | |
| picarones import iiif https://gallica.bnf.fr/ark:/12148/xxx/manifest.json --pages 1,3,5-8 --output ./mon_corpus/ | |
| Les images sont téléchargées dans le dossier de sortie. | |
| Des fichiers .gt.txt vides (ou remplis si le manifeste contient des annotations | |
| de transcription) sont créés à côté de chaque image. | |
| """ | |
| _setup_logging(verbose) | |
| from picarones.importers.iiif import IIIFImporter | |
| click.echo(f"Manifeste IIIF : {manifest_url}") | |
| try: | |
| importer = IIIFImporter(manifest_url, max_resolution=max_resolution) | |
| importer.load() | |
| all_canvases = importer.parser.canvases() | |
| click.echo( | |
| f"Manifeste IIIF v{importer.parser.version} — " | |
| f"titre : {importer.parser.label} — " | |
| f"{len(all_canvases)} canvas disponibles" | |
| ) | |
| selected = importer.list_canvases(pages) | |
| click.echo(f"Pages sélectionnées : {len(selected)} sur {len(all_canvases)}") | |
| corpus = importer.import_corpus( | |
| pages=pages, | |
| output_dir=output, | |
| show_progress=not no_progress, | |
| ) | |
| except (ValueError, RuntimeError) as exc: | |
| click.echo(f"Erreur import IIIF : {exc}", err=True) | |
| sys.exit(1) | |
| click.echo(f"\n{len(corpus)} documents importés dans : {output}") | |
| # Résumé | |
| gt_filled = sum(1 for d in corpus.documents if d.ground_truth.strip()) | |
| if gt_filled: | |
| click.echo(f"Transcriptions trouvées dans le manifeste : {gt_filled}/{len(corpus)}") | |
| else: | |
| click.echo( | |
| "Aucune transcription dans le manifeste — " | |
| "les fichiers .gt.txt sont vides (à remplir manuellement ou via OCR)." | |
| ) | |
| click.echo("\nPour lancer un benchmark sur ce corpus :") | |
| click.echo(f" picarones run --corpus {output} --engines tesseract") | |
| # --------------------------------------------------------------------------- | |
| # picarones serve | |
| # --------------------------------------------------------------------------- | |
| def serve_cmd(host: str, port: int, reload: bool, verbose: bool) -> None: | |
| """Lance l'interface web locale Picarones sur localhost. | |
| Accessible dans le navigateur à l'adresse : http://HOST:PORT | |
| \b | |
| Exemples : | |
| picarones serve | |
| picarones serve --port 8080 | |
| picarones serve --host 0.0.0.0 --port 8000 | |
| """ | |
| _setup_logging(verbose) | |
| try: | |
| import uvicorn | |
| except ImportError: | |
| click.echo( | |
| "uvicorn n'est pas installé. Installez-le avec :\n" | |
| " pip install uvicorn[standard]\n" | |
| "ou :\n" | |
| " pip install picarones[web]", | |
| err=True, | |
| ) | |
| sys.exit(1) | |
| url = f"http://{host}:{port}" | |
| click.echo("Picarones — Interface web locale") | |
| click.echo(f"Démarrage du serveur sur {url}") | |
| click.echo("Appuyez sur Ctrl+C pour arrêter.\n") | |
| log_level = "debug" if verbose else "info" | |
| uvicorn.run( | |
| "picarones.web.app:app", | |
| host=host, | |
| port=port, | |
| reload=reload, | |
| log_level=log_level, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # picarones history | |
| # --------------------------------------------------------------------------- | |
| def history_cmd( | |
| db: str, | |
| engine: str | None, | |
| corpus: str | None, | |
| since: str | None, | |
| limit: int, | |
| regression: bool, | |
| regression_threshold: float, | |
| export_json: str | None, | |
| demo: bool, | |
| verbose: bool, | |
| ) -> None: | |
| """Consulte l'historique des benchmarks (suivi longitudinal). | |
| Affiche l'évolution du CER dans le temps pour chaque moteur et corpus. | |
| Permet de détecter automatiquement les régressions entre deux runs. | |
| \b | |
| Exemples : | |
| picarones history | |
| picarones history --engine tesseract --corpus "Chroniques médiévales" | |
| picarones history --regression --regression-threshold 0.02 | |
| picarones history --demo # données fictives de démonstration | |
| picarones history --export-json historique.json | |
| """ | |
| _setup_logging(verbose) | |
| from picarones.core.history import BenchmarkHistory, generate_demo_history | |
| history = BenchmarkHistory(db) | |
| if demo: | |
| click.echo("Insertion de données fictives de démonstration dans l'historique…") | |
| generate_demo_history(history, n_runs=8) | |
| click.echo(f" {history.count()} entrées insérées.") | |
| if export_json: | |
| path = history.export_json(export_json) | |
| click.echo(f"Historique exporté : {path}") | |
| return | |
| entries = history.query(engine=engine, corpus=corpus, since=since, limit=limit) | |
| if not entries: | |
| click.echo("Aucun benchmark dans l'historique.") | |
| click.echo( | |
| "\nPour enregistrer automatiquement les runs, utilisez :\n" | |
| " picarones run --corpus ./gt/ --engines tesseract --save-history\n" | |
| "\nOu pour tester avec des données fictives :\n" | |
| " picarones history --demo" | |
| ) | |
| return | |
| # Regrouper par moteur | |
| by_engine: dict[str, list] = {} | |
| for entry in entries: | |
| by_engine.setdefault(entry.engine_name, []).append(entry) | |
| click.echo(f"\n── Historique des benchmarks ({'filtré' if engine or corpus else 'tous'}) ──") | |
| click.echo(f" Base : {history.db_path}") | |
| click.echo(f" Total entrées : {len(entries)}\n") | |
| for eng_name, eng_entries in by_engine.items(): | |
| click.echo(click.style(f" Moteur : {eng_name}", bold=True)) | |
| for e in eng_entries: | |
| cer_str = f"{e.cer_percent:.2f}%" if e.cer_percent is not None else "N/A" | |
| wer_str = f"{e.wer_mean * 100:.2f}%" if e.wer_mean is not None else "N/A" | |
| ts = e.timestamp[:10] # date uniquement | |
| click.echo(f" {ts} CER={cer_str:<8} WER={wer_str:<8} docs={e.doc_count} corpus={e.corpus_name}") | |
| click.echo() | |
| # Détection de régression | |
| if regression: | |
| click.echo("── Détection de régressions ──────────────────────") | |
| regressions = history.detect_all_regressions(threshold=regression_threshold) | |
| if not regressions: | |
| click.echo( | |
| click.style( | |
| f" Aucune régression détectée (seuil={regression_threshold*100:.1f}%)", | |
| fg="green", | |
| ) | |
| ) | |
| else: | |
| for r in regressions: | |
| delta_str = f"+{r.delta_cer * 100:.2f}%" if r.delta_cer else "N/A" | |
| click.echo( | |
| click.style( | |
| f" RÉGRESSION {r.engine_name} / {r.corpus_name} : " | |
| f"delta CER={delta_str} " | |
| f"({r.baseline_timestamp[:10]} → {r.current_timestamp[:10]})", | |
| fg="red", | |
| ) | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # picarones robustness | |
| # --------------------------------------------------------------------------- | |
| def robustness_cmd( | |
| corpus: str, | |
| engine: str, | |
| degradations: str, | |
| cer_threshold: float, | |
| max_docs: int, | |
| output_json: str | None, | |
| lang: str, | |
| no_progress: bool, | |
| demo: bool, | |
| verbose: bool, | |
| ) -> None: | |
| """Lance une analyse de robustesse d'un moteur OCR face aux dégradations d'image. | |
| Génère des versions dégradées des images (bruit, flou, rotation, | |
| réduction de résolution, binarisation) et mesure le CER à chaque niveau. | |
| \b | |
| Exemples : | |
| picarones robustness --corpus ./gt/ --engine tesseract | |
| picarones robustness --corpus ./gt/ --engine pero_ocr --degradations noise,blur | |
| picarones robustness --corpus ./gt/ --engine tesseract --output-json robustness.json | |
| picarones robustness --corpus ./gt/ --engine tesseract --demo | |
| """ | |
| _setup_logging(verbose) | |
| import json as _json | |
| deg_types = [d.strip() for d in degradations.split(",") if d.strip()] | |
| from picarones.core.robustness import ( | |
| RobustnessAnalyzer, ALL_DEGRADATION_TYPES, generate_demo_robustness_report | |
| ) | |
| # Valider les types de dégradation | |
| invalid = [d for d in deg_types if d not in ALL_DEGRADATION_TYPES] | |
| if invalid: | |
| click.echo( | |
| f"Types de dégradation invalides : {', '.join(invalid)}\n" | |
| f"Types valides : {', '.join(ALL_DEGRADATION_TYPES)}", | |
| err=True, | |
| ) | |
| sys.exit(1) | |
| click.echo(f"Corpus : {corpus}") | |
| click.echo(f"Moteur : {engine}") | |
| click.echo(f"Dégradations : {', '.join(deg_types)}") | |
| click.echo(f"Seuil CER : {cer_threshold * 100:.0f}%") | |
| if demo: | |
| click.echo("\nMode démo : génération d'un rapport fictif réaliste…") | |
| report = generate_demo_robustness_report(engine_names=[engine]) | |
| else: | |
| # Charger le corpus | |
| from picarones.core.corpus import load_corpus_from_directory | |
| try: | |
| corp = load_corpus_from_directory(corpus) | |
| except (FileNotFoundError, ValueError) as exc: | |
| click.echo(f"Erreur corpus : {exc}", err=True) | |
| sys.exit(1) | |
| click.echo(f"\n{len(corp)} documents chargés. Début de l'analyse…\n") | |
| # Instancier le moteur | |
| try: | |
| ocr_engine = _engine_from_name(engine, lang=lang, psm=6) | |
| except click.BadParameter as exc: | |
| click.echo(f"Erreur moteur : {exc}", err=True) | |
| sys.exit(1) | |
| from picarones.core.robustness import RobustnessAnalyzer | |
| analyzer = RobustnessAnalyzer( | |
| engines=[ocr_engine], | |
| degradation_types=deg_types, | |
| cer_threshold=cer_threshold, | |
| ) | |
| report = analyzer.analyze( | |
| corpus=corp, | |
| show_progress=not no_progress, | |
| max_docs=max_docs, | |
| ) | |
| # Affichage des résultats | |
| click.echo("\n── Résultats de robustesse ──────────────────────────") | |
| for curve in report.curves: | |
| click.echo(f"\n {curve.engine_name} / {curve.degradation_type}") | |
| for label, cer in zip(curve.labels, curve.cer_values): | |
| if cer is not None: | |
| bar_len = int(cer * 40) | |
| bar = "█" * bar_len | |
| cer_pct = f"{cer * 100:.1f}%" | |
| threshold_marker = " ← CRITIQUE" if curve.critical_threshold_level is not None and \ | |
| curve.levels[curve.labels.index(label)] == curve.critical_threshold_level else "" | |
| click.echo(f" {label:<12} {cer_pct:<8} {bar}{threshold_marker}") | |
| if curve.critical_threshold_level is not None: | |
| click.echo( | |
| click.style( | |
| f" Niveau critique (CER>{cer_threshold*100:.0f}%) : {curve.critical_threshold_level}", | |
| fg="yellow", | |
| ) | |
| ) | |
| else: | |
| click.echo(click.style(" Robuste jusqu'au niveau max.", fg="green")) | |
| # Résumé | |
| click.echo("\n── Résumé ──────────────────────────────────────────") | |
| for key, val in report.summary.items(): | |
| if key.startswith("most_robust_"): | |
| deg = key.replace("most_robust_", "") | |
| click.echo(f" Moteur le plus robuste ({deg}) : {val}") | |
| # Export JSON | |
| if output_json: | |
| report_dict = report.as_dict() | |
| Path(output_json).write_text( | |
| _json.dumps(report_dict, ensure_ascii=False, indent=2), | |
| encoding="utf-8", | |
| ) | |
| click.echo(f"\nRapport JSON exporté : {output_json}") | |
| # --------------------------------------------------------------------------- | |
| # Mise à jour de picarones demo pour illustrer suivi longitudinal + robustesse | |
| # --------------------------------------------------------------------------- | |
| # --------------------------------------------------------------------------- | |
| # Sprint 70 — sous-groupe `pipeline` : runner et compare de pipelines | |
| # composées (axe B), pilotables depuis des fichiers YAML déclaratifs. | |
| # --------------------------------------------------------------------------- | |
| def pipeline_group() -> None: | |
| """Banc d'essai de pipelines composées (modules tiers).""" | |
| def pipeline_run_cmd( | |
| spec_path: Path, | |
| corpus_dir: Path, | |
| output_json: Path | None, | |
| output_html: Path | None, | |
| lang: str, | |
| ) -> None: | |
| """Exécute la pipeline décrite dans SPEC_PATH sur un corpus.""" | |
| import json as _json | |
| from picarones.core.corpus import load_corpus_from_directory | |
| from picarones.core.pipeline_benchmark import run_pipeline_benchmark | |
| from picarones.core.pipeline_spec_loader import load_pipeline_spec_from_yaml | |
| spec = load_pipeline_spec_from_yaml(spec_path) | |
| corpus = load_corpus_from_directory(str(corpus_dir)) | |
| click.echo( | |
| f"Pipeline {spec.name!r} sur {corpus.name!r} " | |
| f"({len(list(corpus.documents))} docs)" | |
| ) | |
| bench = run_pipeline_benchmark(spec, corpus) | |
| click.echo( | |
| f"Terminé : {bench.n_pipelines_succeeded}/{bench.n_docs} succès " | |
| f"en {bench.total_duration_seconds:.2f}s" | |
| ) | |
| for agg in bench.per_step_aggregates: | |
| click.echo( | |
| f" {agg.step_name}: succès={agg.n_succeeded}/{agg.n_docs} " | |
| f"({agg.success_rate * 100:.0f}%)" | |
| ) | |
| if output_json is not None: | |
| payload = { | |
| "pipeline_name": bench.pipeline_name, | |
| "corpus_name": bench.corpus_name, | |
| "n_docs": bench.n_docs, | |
| "n_pipelines_succeeded": bench.n_pipelines_succeeded, | |
| "n_pipelines_failed": bench.n_pipelines_failed, | |
| "total_duration_seconds": bench.total_duration_seconds, | |
| "per_step_aggregates": [ | |
| { | |
| "step_name": a.step_name, | |
| "n_docs": a.n_docs, | |
| "n_succeeded": a.n_succeeded, | |
| "n_failed": a.n_failed, | |
| "duration_seconds_mean": a.duration_seconds_mean, | |
| "duration_seconds_median": a.duration_seconds_median, | |
| "junction_metrics": a.junction_metrics, | |
| "error_breakdown": a.error_breakdown, | |
| "failing_doc_ids": a.failing_doc_ids, | |
| } | |
| for a in bench.per_step_aggregates | |
| ], | |
| } | |
| Path(output_json).write_text( | |
| _json.dumps(payload, ensure_ascii=False, indent=2), | |
| encoding="utf-8", | |
| ) | |
| click.echo(f"JSON exporté : {output_json}") | |
| if output_html is not None: | |
| from picarones.report.pipeline_render import build_pipeline_report_html | |
| Path(output_html).write_text( | |
| build_pipeline_report_html(bench, lang=lang), | |
| encoding="utf-8", | |
| ) | |
| click.echo(f"HTML exporté : {output_html}") | |
| def pipeline_compare_cmd( | |
| specs_path: Path, | |
| corpus_dir: Path, | |
| output_html: Path | None, | |
| baseline: str | None, | |
| lang: str, | |
| ) -> None: | |
| """Compare N pipelines décrites dans SPECS_PATH sur le même corpus.""" | |
| from picarones.core.corpus import load_corpus_from_directory | |
| from picarones.core.modules import ArtifactType | |
| from picarones.core.pipeline_comparison import compare_pipelines | |
| from picarones.core.pipeline_spec_loader import ( | |
| load_comparison_specs_from_yaml, | |
| ) | |
| specs, extras = load_comparison_specs_from_yaml(specs_path) | |
| corpus = load_corpus_from_directory(str(corpus_dir)) | |
| click.echo( | |
| f"Comparaison de {len(specs)} pipelines sur {corpus.name!r} " | |
| f"({len(list(corpus.documents))} docs)" | |
| ) | |
| comparison = compare_pipelines(specs, corpus) | |
| click.echo( | |
| f"Terminé en {comparison.total_duration_seconds:.2f}s" | |
| ) | |
| ranked = comparison.ranking_by_final_metric( | |
| ArtifactType.TEXT, "cer", | |
| ) | |
| if ranked: | |
| click.echo("\nClassement par CER (TEXT) :") | |
| for i, (name, value) in enumerate(ranked, 1): | |
| shown = f"{value:.4f}" if value is not None else "N/A" | |
| click.echo(f" {i}. {name}: {shown}") | |
| if output_html is not None: | |
| from picarones.report.pipeline_render import ( | |
| RankingSpec, | |
| build_pipeline_comparison_report_html, | |
| ) | |
| rankings_yaml = ( | |
| extras.get("rankings") if isinstance(extras, dict) else None | |
| ) | |
| ranking_specs: list[RankingSpec] = [] | |
| if rankings_yaml and isinstance(rankings_yaml, list): | |
| for r in rankings_yaml: | |
| if not isinstance(r, dict): | |
| continue | |
| try: | |
| at = ArtifactType(r["artifact_type"]) | |
| except (KeyError, ValueError): | |
| continue | |
| ranking_specs.append(RankingSpec( | |
| artifact_type=at, | |
| metric_name=r.get("metric", "cer"), | |
| higher_is_better=bool(r.get("higher_is_better", False)), | |
| label=r.get("label"), | |
| )) | |
| if not ranking_specs: | |
| ranking_specs = [ | |
| RankingSpec(ArtifactType.TEXT, "cer", label="CER"), | |
| ] | |
| Path(output_html).write_text( | |
| build_pipeline_comparison_report_html( | |
| comparison, | |
| ranking_specs=ranking_specs, | |
| baseline_pipeline=baseline, | |
| lang=lang, | |
| ), | |
| encoding="utf-8", | |
| ) | |
| click.echo(f"\nHTML exporté : {output_html}") | |
| if __name__ == "__main__": | |
| cli() | |