Spaces:
Sleeping
Sleeping
| """Construction de la liste ``documents`` (vue galerie + vue détail). | |
| Pour chaque document du corpus, agrège les hypothèses de tous les | |
| moteurs avec leurs métriques, le diff caractère par caractère, et | |
| les champs spécifiques aux pipelines OCR+LLM (intermédiaire, mode, | |
| sur-normalisation). | |
| :func:`annotate_documents_with_difficulty` enrichit ensuite chaque | |
| document avec son score de difficulté intrinsèque (Sprint 7). | |
| """ | |
| from __future__ import annotations | |
| from typing import TYPE_CHECKING | |
| from picarones.core.diff_utils import compute_char_diff, compute_word_diff | |
| from picarones.measurements.difficulty import ( | |
| compute_all_difficulties, | |
| difficulty_label, | |
| ) | |
| from picarones.report.report_data._helpers import safe_round | |
| if TYPE_CHECKING: | |
| from picarones.core.results import BenchmarkResult | |
| def build_documents( | |
| benchmark: "BenchmarkResult", images_b64: dict[str, str], | |
| ) -> list[dict]: | |
| """Retourne la liste ordonnée des documents prêts pour le template. | |
| L'ordre des documents préserve l'ordre d'apparition (premier moteur | |
| d'abord, puis compléments depuis les moteurs suivants si certains | |
| documents ne sont pas couverts par tous les moteurs). | |
| """ | |
| seen_doc_ids: set[str] = set() | |
| doc_ids_ordered: list[str] = [] | |
| for report in benchmark.engine_reports: | |
| for dr in report.document_results: | |
| if dr.doc_id not in seen_doc_ids: | |
| seen_doc_ids.add(dr.doc_id) | |
| doc_ids_ordered.append(dr.doc_id) | |
| # Index croisé : doc_id → {engine_name → DocumentResult} | |
| doc_engine_map: dict[str, dict] = {did: {} for did in doc_ids_ordered} | |
| for report in benchmark.engine_reports: | |
| for dr in report.document_results: | |
| doc_engine_map.setdefault(dr.doc_id, {})[report.engine_name] = dr | |
| documents: list[dict] = [] | |
| engine_names = [r.engine_name for r in benchmark.engine_reports] | |
| for doc_id in doc_ids_ordered: | |
| engine_results: list[dict] = [] | |
| gt = "" | |
| image_path = "" | |
| for engine_name in engine_names: | |
| dr = doc_engine_map[doc_id].get(engine_name) | |
| if dr is None: | |
| continue | |
| gt = dr.ground_truth | |
| image_path = dr.image_path | |
| er_entry = _build_engine_result_entry(engine_name, dr) | |
| engine_results.append(er_entry) | |
| # CER moyen sur ce document (pour le badge galerie) | |
| cer_values = [er["cer"] for er in engine_results if er["error"] is None] | |
| mean_cer = sum(cer_values) / len(cer_values) if cer_values else 1.0 | |
| best_engine = min(engine_results, key=lambda x: x["cer"], default=None) | |
| # Script type (depuis metadata par document si disponible) | |
| script_type = "" | |
| first_engine = engine_names[0] if engine_names else None | |
| first_dr = doc_engine_map[doc_id].get(first_engine) | |
| if first_dr and first_dr.image_quality: | |
| script_type = first_dr.image_quality.get("script_type", "") | |
| documents.append({ | |
| "doc_id": doc_id, | |
| "image_path": image_path, | |
| "image_b64": images_b64.get(doc_id, ""), | |
| "ground_truth": gt, | |
| "mean_cer": safe_round(mean_cer), | |
| "best_engine": best_engine["engine"] if best_engine else "", | |
| "engine_results": engine_results, | |
| "script_type": script_type, | |
| }) | |
| return documents | |
| def _build_engine_result_entry(engine_name: str, dr) -> dict: | |
| """Construit une entrée moteur pour un document donné (extrait pour lisibilité).""" | |
| diff_ops = compute_char_diff(dr.ground_truth, dr.hypothesis) | |
| er_entry: dict = { | |
| "engine": engine_name, | |
| "hypothesis": dr.hypothesis, | |
| "cer": safe_round(dr.metrics.cer), | |
| "cer_diplomatic": safe_round(dr.metrics.cer_diplomatic) if dr.metrics.cer_diplomatic is not None else None, | |
| "wer": safe_round(dr.metrics.wer), | |
| "mer": safe_round(dr.metrics.mer), | |
| "wil": safe_round(dr.metrics.wil), | |
| "duration": dr.duration_seconds, | |
| "error": dr.engine_error, | |
| "diff": diff_ops, | |
| } | |
| # Champs spécifiques aux pipelines OCR+LLM | |
| if dr.ocr_intermediate is not None: | |
| er_entry["ocr_intermediate"] = dr.ocr_intermediate | |
| er_entry["ocr_diff"] = compute_word_diff(dr.ground_truth, dr.ocr_intermediate) | |
| er_entry["llm_correction_diff"] = compute_word_diff(dr.ocr_intermediate, dr.hypothesis) | |
| if dr.pipeline_metadata: | |
| on = dr.pipeline_metadata.get("over_normalization") | |
| if on is not None: | |
| er_entry["over_normalization"] = on | |
| er_entry["pipeline_mode"] = dr.pipeline_metadata.get("pipeline_mode") | |
| # Sprint 5 — métriques avancées par document | |
| if dr.char_scores is not None: | |
| er_entry["ligature_score"] = safe_round(dr.char_scores.get("ligature", {}).get("score")) | |
| er_entry["diacritic_score"] = safe_round(dr.char_scores.get("diacritic", {}).get("score")) | |
| if dr.taxonomy is not None: | |
| er_entry["taxonomy"] = dr.taxonomy | |
| if dr.structure is not None: | |
| er_entry["structure"] = dr.structure | |
| if dr.image_quality is not None: | |
| er_entry["image_quality"] = dr.image_quality | |
| # Sprint 10 | |
| if dr.line_metrics is not None: | |
| er_entry["line_metrics"] = dr.line_metrics | |
| if dr.hallucination_metrics is not None: | |
| er_entry["hallucination_metrics"] = dr.hallucination_metrics | |
| return er_entry | |
| def annotate_documents_with_difficulty( | |
| benchmark: "BenchmarkResult", documents: list[dict], | |
| ) -> None: | |
| """Annote chaque document du dict avec son score de difficulté (Sprint 7). | |
| Modifie ``documents`` en place. Les valeurs par défaut ``0.5`` / | |
| ``"Modéré"`` sont retournées si la difficulté n'a pas pu être | |
| calculée (par exemple corpus dégénéré). | |
| """ | |
| doc_ids_ordered = [d["doc_id"] for d in documents] | |
| gt_map = {d["doc_id"]: d["ground_truth"] for d in documents} | |
| cer_map: dict[str, dict[str, float]] = {d["doc_id"]: {} for d in documents} | |
| iq_map: dict[str, float] = {} | |
| for report in benchmark.engine_reports: | |
| for dr in report.document_results: | |
| cer_map.setdefault(dr.doc_id, {})[report.engine_name] = safe_round(dr.metrics.cer) | |
| if dr.image_quality and "quality_score" in dr.image_quality: | |
| iq_map[dr.doc_id] = dr.image_quality["quality_score"] | |
| difficulty_scores = compute_all_difficulties( | |
| doc_ids=doc_ids_ordered, | |
| ground_truths=gt_map, | |
| cer_map=cer_map, | |
| image_quality_map=iq_map or None, | |
| ) | |
| for doc in documents: | |
| ds = difficulty_scores.get(doc["doc_id"]) | |
| if ds: | |
| doc["difficulty_score"] = safe_round(ds.score) | |
| doc["difficulty_label"] = difficulty_label(ds.score) | |
| else: | |
| doc["difficulty_score"] = 0.5 | |
| doc["difficulty_label"] = "Modéré" | |
| __all__ = ["build_documents", "annotate_documents_with_difficulty"] | |