``).
"""
result = self.load_run_result(run_dir)
return self.render(result)
# ──────────────────────────────────────────────────────────────────
# Loader (statique, utilisable hors instance)
# ──────────────────────────────────────────────────────────────────
@staticmethod
def load_run_result(run_dir: Path | str) -> RunResult:
"""Reconstruit un ``RunResult`` depuis les 4 fichiers persistés
par ``BenchmarkService.persist`` (S41).
Raises
------
FileNotFoundError
Si l'un des fichiers obligatoires (manifest,
pipeline_results, view_results) est manquant.
``artifacts_index.jsonl`` est optionnel pour rester
compatible avec d'anciens runs persistés avant S41.
"""
d = Path(run_dir)
manifest_path = d / "run_manifest.json"
pipelines_path = d / "pipeline_results.jsonl"
artifacts_index_path = d / "artifacts_index.jsonl"
views_path = d / "view_results.jsonl"
if not manifest_path.exists():
raise FileNotFoundError(
f"run_manifest.json absent du dossier : {d!r}",
)
if not pipelines_path.exists():
raise FileNotFoundError(
f"pipeline_results.jsonl absent du dossier : {d!r}",
)
if not views_path.exists():
raise FileNotFoundError(
f"view_results.jsonl absent du dossier : {d!r}",
)
manifest = RunManifest.model_validate_json(
manifest_path.read_text(encoding="utf-8"),
)
# S41 — l'index d'artefacts est désormais séparé des
# pipeline_results.jsonl. On le lit AVANT pour pouvoir
# ré-attacher les artefacts à chaque pipeline_result lors de
# la reconstruction.
artifacts_by_pipeline: dict[
tuple[str, str], list[dict],
] = {}
if artifacts_index_path.exists():
with artifacts_index_path.open("r", encoding="utf-8") as f:
for line in f:
if not line.strip():
continue
rec = json.loads(line)
# `pipeline_name` est uniquement un champ d'index
# (groupement) — on le retire avant de re-valider
# un Artifact (qui a `extra="forbid"`). En revanche
# `document_id` fait partie de l'Artifact lui-même
# et doit être préservé pour la validation pydantic.
pipe_name = rec.pop("pipeline_name")
doc_id = rec["document_id"]
artifacts_by_pipeline.setdefault(
(doc_id, pipe_name), [],
).append(rec)
# Reconstruire les pipeline_results et view_results par doc.
pipeline_results_by_doc: dict[str, list[PipelineResult]] = {}
with pipelines_path.open("r", encoding="utf-8") as f:
for line in f:
if not line.strip():
continue
payload = json.loads(line)
doc_id = payload["document_id"]
# Ré-attache les artefacts depuis l'index S41 si présent.
key = (doc_id, payload.get("pipeline_name", ""))
if key in artifacts_by_pipeline and "artifacts" not in payload:
payload["artifacts"] = artifacts_by_pipeline[key]
pipeline_results_by_doc.setdefault(doc_id, []).append(
PipelineResult.model_validate(payload),
)
view_results_by_doc: dict[str, list[ViewResult]] = {}
with views_path.open("r", encoding="utf-8") as f:
for line in f:
if not line.strip():
continue
payload = json.loads(line)
doc_id = payload.pop("document_id")
view_results_by_doc.setdefault(doc_id, []).append(
ViewResult.model_validate(payload),
)
all_doc_ids = sorted(
set(pipeline_results_by_doc) | set(view_results_by_doc),
)
document_results = tuple(
RunDocumentResult(
document_id=doc_id,
pipeline_results=tuple(
pipeline_results_by_doc.get(doc_id, []),
),
view_results=tuple(view_results_by_doc.get(doc_id, [])),
)
for doc_id in all_doc_ids
)
return RunResult(manifest=manifest, document_results=document_results)
# ──────────────────────────────────────────────────────────────────
# Helpers de rendu
# ──────────────────────────────────────────────────────────────────
def _render_head(self, manifest: RunManifest) -> str:
title = html.escape(
f"{self._labels['title']} — {manifest.corpus_name}",
)
return (
f'\n'
f'\n'
f'\n'
f'\n'
f'{title}\n'
f'\n'
f'\n'
f''
)
def _render_header_block(self, manifest: RunManifest) -> str:
L = self._labels
return (
f'\n'
f'{html.escape(L["title"])}
\n'
f'{html.escape(L["corpus"])} : '
f'{html.escape(manifest.corpus_name)}
\n'
f'{html.escape(L["run_id"])} : '
f'{html.escape(manifest.run_id)}
\n'
f'{html.escape(L["code_version"])} : '
f'{html.escape(manifest.code_version)}
\n'
f'{html.escape(L["started_at"])} : '
f'{html.escape(manifest.started_at.isoformat())} • '
f'{html.escape(L["completed_at"])} : '
f'{html.escape(manifest.completed_at.isoformat())} • '
f'{html.escape(L["duration_seconds"])} : '
f'{manifest.duration_seconds:.3f}
\n'
f'{html.escape(L["n_documents"])} : '
f'{manifest.n_documents}
\n'
f''
)
def _render_pipelines_overview(
self,
pipeline_names: tuple[str, ...],
summaries: dict[str, "_PipelineSummary"],
) -> str:
L = self._labels
rows = []
for name in pipeline_names:
s = summaries.get(name)
if s is None:
# Pipeline du manifest sans aucun résultat (cas dégénéré).
rows.append(
f'| {html.escape(name)} | '
f'0 | '
f'0 | '
f'— |
',
)
continue
rows.append(
f''
f'| {html.escape(name)} | '
f'{s.n_succeeded} | '
f'{s.n_failed} | '
f'{s.duration_total:.3f} | '
f'
',
)
rows_html = "\n".join(rows) if rows else (
'| — |
'
)
return (
f'\n'
f'{html.escape(L["pipelines_overview"])}
\n'
f'\n'
f''
f'| {html.escape(L["pipeline"])} | '
f'{html.escape(L["n_succeeded"])} | '
f'{html.escape(L["n_failed"])} | '
f'{html.escape(L["duration_total"])} | '
f'
\n'
f'\n{rows_html}\n\n'
f'
\n'
f''
)
def _render_view(
self,
*,
view: EvaluationView,
view_results: tuple[ViewResult, ...],
pipeline_names: tuple[str, ...],
artifact_to_pipeline: dict[str, str],
) -> str:
L = self._labels
view_id = html.escape(view.name)
per_pipeline = _aggregate_view_by_pipeline(
view_results=view_results,
artifact_to_pipeline=artifact_to_pipeline,
metric_names=view.metric_names,
)
warnings_html = ""
if view.warnings:
items = "\n".join(
f'{html.escape(w)}' for w in view.warnings
)
warnings_html = (
f''
)
# En-tête : Pipeline | metric_a | metric_b | ... | n
header_cells = [
f'{html.escape(L["pipeline"])} | ',
]
for m in view.metric_names:
header_cells.append(f'{html.escape(m)} | ')
header_cells.append(
f'{html.escape(L["n_observations"])} | ',
)
# Lignes : un par pipeline du manifest.
body_rows: list[str] = []
any_data = bool(per_pipeline)
for pipeline_name in pipeline_names:
cells = [f'{html.escape(pipeline_name)} | ']
agg = per_pipeline.get(pipeline_name)
if agg is None:
# OMIS — rendu fusionné sur toutes les colonnes métriques + n.
cells.append(
f''
f'{_OMITTED_MARKER}'
f' | ',
)
else:
# Une cellule par métrique + colonne n.
# n = max(n_observations) parmi les métriques calculées
# (typiquement identique pour toutes les métriques d'une
# même vue).
for m in view.metric_names:
metric_agg = agg.get(m)
if metric_agg is None:
cells.append('— | ')
else:
cells.append(
f'{metric_agg.mean:.4f} | ',
)
ns = [a.n for a in agg.values() if a is not None]
n = max(ns) if ns else 0
cells.append(f'{n} | ')
body_rows.append(f'{"".join(cells)}
')
if any_data:
table_html = (
f'{html.escape(L["results_per_pipeline"])}
\n'
f'\n'
f'{"".join(header_cells)}
\n'
f'\n' + "\n".join(body_rows) + '\n\n'
'
'
)
else:
table_html = (
f''
f'{html.escape(L["no_data_for_view"])}
'
)
ignored_html = ""
if view.ignored_dimensions:
ignored_html = (
f''
f'{html.escape(L["ignored_dimensions"])} : '
f'{html.escape(", ".join(view.ignored_dimensions))}'
f'
'
)
return (
f'\n'
f'{html.escape(L["view"])} : '
f'{html.escape(view.name)}
\n'
f''
f'{html.escape(view.description or "")}
\n'
f'{warnings_html}\n'
f'{table_html}\n'
f'{ignored_html}\n'
f''
)
def _render_footer(self, manifest: RunManifest) -> str:
return (
f'\n'
f'\n'
f''
)
# ──────────────────────────────────────────────────────────────────────
# Helpers d'agrégation (purs, testables sans rendu)
# ──────────────────────────────────────────────────────────────────────
@dataclass(frozen=True)
class _PipelineSummary:
n_succeeded: int
n_failed: int
duration_total: float
def _summarize_pipelines(
document_results: Iterable[RunDocumentResult],
) -> dict[str, _PipelineSummary]:
"""Agrège succès/échecs/durée par pipeline_name."""
n_ok: dict[str, int] = {}
n_fail: dict[str, int] = {}
duration: dict[str, float] = {}
for doc_result in document_results:
for pr in doc_result.pipeline_results:
name = pr.pipeline_name
if pr.succeeded:
n_ok[name] = n_ok.get(name, 0) + 1
else:
n_fail[name] = n_fail.get(name, 0) + 1
duration[name] = duration.get(name, 0.0) + pr.duration_seconds
all_names = set(n_ok) | set(n_fail) | set(duration)
return {
name: _PipelineSummary(
n_succeeded=n_ok.get(name, 0),
n_failed=n_fail.get(name, 0),
duration_total=duration.get(name, 0.0),
)
for name in all_names
}
def _build_artifact_to_pipeline_map(
document_results: Iterable[RunDocumentResult],
) -> dict[str, str]:
"""Construit ``{artifact_id: pipeline_name}`` à partir des
``PipelineResult.artifacts`` de chaque doc.
Permet de retrouver à quelle pipeline appartient un
``ViewResult.candidate_artifact_id``.
"""
out: dict[str, str] = {}
for doc_result in document_results:
for pr in doc_result.pipeline_results:
for art in pr.artifacts:
out[art.id] = pr.pipeline_name
return out
def _aggregate_view_by_pipeline(
*,
view_results: tuple[ViewResult, ...],
artifact_to_pipeline: dict[str, str],
metric_names: tuple[str, ...],
) -> dict[str, dict[str, _Aggregate]]:
"""Agrège les ``ViewResult`` en moyenne par (pipeline, métrique).
Returns
-------
dict
``{pipeline_name: {metric_name: _Aggregate(mean, n)}}``.
Pipelines absents = aucun ViewResult ne leur correspond
(omis explicitement de la vue).
"""
sums: dict[str, dict[str, float]] = {}
counts: dict[str, dict[str, int]] = {}
for vr in view_results:
pipeline_name = artifact_to_pipeline.get(vr.candidate_artifact_id)
if pipeline_name is None:
# Artefact orphelin : on l'ignore silencieusement (cas
# bizarre, ne devrait pas arriver depuis BenchmarkService).
continue
for metric_name, value in vr.metric_values.items():
if metric_name not in metric_names:
continue
if value is None:
continue
try:
fv = float(value)
except (TypeError, ValueError):
continue
sums.setdefault(pipeline_name, {}).setdefault(metric_name, 0.0)
counts.setdefault(pipeline_name, {}).setdefault(metric_name, 0)
sums[pipeline_name][metric_name] += fv
counts[pipeline_name][metric_name] += 1
out: dict[str, dict[str, _Aggregate]] = {}
for pipeline_name, metric_sums in sums.items():
out[pipeline_name] = {
m: _Aggregate(
mean=metric_sums[m] / counts[pipeline_name][m],
n=counts[pipeline_name][m],
)
for m in metric_sums
}
return out
__all__ = [
"HtmlReportRenderer",
]