"""Sections statistiques du rapport (Sprint 7 + Sprint 17).

Construit les blocs :

- ``pairwise_wilcoxon`` — tests de Wilcoxon par paire de moteurs.
- ``bootstrap_cis`` — intervalles de confiance bootstrap par moteur.
- ``friedman`` + ``nemenyi`` — Sprint 17, multi-moteurs.
- ``reliability_curves`` — courbes de fiabilité par moteur.
- ``venn_data`` — diagramme de Venn des erreurs communes/exclusives.
- ``error_clusters`` — clustering des patterns d'erreurs.
- ``correlation_per_engine`` — matrice de corrélation par moteur.
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Optional

from picarones.core.diff_utils import compute_word_diff
from picarones.measurements.statistics import (
    bootstrap_ci,
    cluster_errors,
    compute_correlation_matrix,
    compute_pairwise_stats,
    compute_reliability_curve,
    compute_venn_data,
    friedman_test,
    nemenyi_posthoc,
)
from picarones.report.report_data._helpers import safe_round

if TYPE_CHECKING:
    from picarones.core.results import BenchmarkResult


def _engine_cer_values(benchmark: "BenchmarkResult") -> dict[str, list[float]]:
    """Map ``engine_name → [cer_individuels valides]``."""
    out: dict[str, list[float]] = {}
    for report in benchmark.engine_reports:
        vals = [
            safe_round(dr.metrics.cer)
            for dr in report.document_results
            if dr.metrics.error is None
        ]
        if vals:
            out[report.engine_name] = vals
    return out


def build_pairwise_wilcoxon(benchmark: "BenchmarkResult") -> list[dict]:
    """Tests de Wilcoxon par paire de moteurs (Sprint 7)."""
    return compute_pairwise_stats(_engine_cer_values(benchmark))


def build_bootstrap_cis(benchmark: "BenchmarkResult") -> list[dict]:
    """Intervalles de confiance bootstrap par moteur (Sprint 7)."""
    bootstrap_cis: list[dict] = []
    for engine_name, vals in _engine_cer_values(benchmark).items():
        lo, hi = bootstrap_ci(vals)
        mean_v = sum(vals) / len(vals) if vals else 0.0
        bootstrap_cis.append({
            "engine": engine_name,
            "mean": safe_round(mean_v),
            "ci_lower": safe_round(lo),
            "ci_upper": safe_round(hi),
        })
    return bootstrap_cis


def build_friedman_and_nemenyi(benchmark: "BenchmarkResult") -> dict:
    """Test de Friedman + post-hoc Nemenyi (Sprint 17, multi-moteurs).

    Alignement strict sur le même ordre de documents : on reconstruit
    la map à partir des documents communs à tous les moteurs, sinon
    Friedman n'est pas applicable.

    Returns
    -------
    dict
        ``{"friedman": {...}, "nemenyi": {...}}`` à fusionner dans
        la section ``statistics`` du rapport.
    """
    # Liste ordonnée des doc_ids selon l'ordre d'apparition.
    seen: set[str] = set()
    doc_ids_ordered: list[str] = []
    for report in benchmark.engine_reports:
        for dr in report.document_results:
            if dr.doc_id not in seen:
                seen.add(dr.doc_id)
                doc_ids_ordered.append(dr.doc_id)

    common_doc_ids: Optional[set[str]] = None
    for report in benchmark.engine_reports:
        doc_ids = {dr.doc_id for dr in report.document_results if dr.metrics.error is None}
        common_doc_ids = doc_ids if common_doc_ids is None else common_doc_ids & doc_ids

    engine_cer_aligned: dict[str, list[float]] = {}
    if common_doc_ids:
        ordered_common = [d for d in doc_ids_ordered if d in common_doc_ids]
        for report in benchmark.engine_reports:
            dr_by_id = {dr.doc_id: dr for dr in report.document_results}
            engine_cer_aligned[report.engine_name] = [
                safe_round(dr_by_id[d].metrics.cer) for d in ordered_common
            ]

    if engine_cer_aligned:
        friedman = friedman_test(engine_cer_aligned)
        nemenyi = nemenyi_posthoc(engine_cer_aligned)
    else:
        friedman = {
            "statistic": 0.0, "p_value": 1.0, "significant": False,
            "df": 0, "n_blocks": 0, "n_engines": 0, "mean_ranks": {},
            "interpretation": "Test de Friedman non calculé — aucun document commun.",
            "error": "no_common_documents",
        }
        nemenyi = {
            "alpha": 0.05, "critical_distance": 0.0, "q_alpha": 0.0,
            "n_blocks": 0, "n_engines": 0, "mean_ranks": {},
            "engines_sorted": [], "significant_matrix": [], "tied_groups": [],
            "error": "no_common_documents",
        }
    return {"friedman": friedman, "nemenyi": nemenyi}


def build_reliability_curves(benchmark: "BenchmarkResult") -> list[dict]:
    """Courbes de fiabilité par moteur (Sprint 7)."""
    reliability_curves: list[dict] = []
    for report in benchmark.engine_reports:
        vals = [
            safe_round(dr.metrics.cer)
            for dr in report.document_results
            if dr.metrics.error is None
        ]
        curve = compute_reliability_curve(vals)
        reliability_curves.append({
            "engine": report.engine_name,
            "points": curve,
        })
    return reliability_curves


def build_venn_data(benchmark: "BenchmarkResult") -> dict:
    """Venn des erreurs communes / exclusives (Sprint 7).

    Construit les ensembles d'erreurs par moteur :
    ``{engine → set("doc_id:gt_tok:hyp_tok")}``.
    """
    venn_error_sets: dict[str, set[str]] = {}
    for report in benchmark.engine_reports:
        error_set: set[str] = set()
        for dr in report.document_results:
            ops = compute_word_diff(dr.ground_truth, dr.hypothesis)
            for op in ops:
                if op["op"] in ("replace", "delete", "insert"):
                    key = (
                        f"{dr.doc_id}:"
                        f"{op.get('old', op.get('text', ''))}:"
                        f"{op.get('new', op.get('text', ''))}"
                    )
                    error_set.add(key)
        venn_error_sets[report.engine_name] = error_set
    return compute_venn_data(venn_error_sets)


def build_error_clusters(benchmark: "BenchmarkResult") -> list[dict]:
    """Clustering des patterns d'erreurs (Sprint 7)."""
    error_data_all: list[dict] = []
    for report in benchmark.engine_reports:
        for dr in report.document_results:
            error_data_all.append({
                "engine": report.engine_name,
                "gt": dr.ground_truth,
                "hypothesis": dr.hypothesis,
            })
    error_clusters_raw = cluster_errors(error_data_all, max_clusters=8)
    return [c.as_dict() for c in error_clusters_raw]


def build_correlation_per_engine(benchmark: "BenchmarkResult") -> list[dict]:
    """Matrice de corrélation par moteur entre métriques métiers (Sprint 7)."""
    correlation_per_engine: list[dict] = []
    for report in benchmark.engine_reports:
        metrics_list: list[dict[str, float]] = []
        for dr in report.document_results:
            if dr.metrics.error is not None:
                continue
            entry: dict[str, float] = {
                "cer": safe_round(dr.metrics.cer),
                "wer": safe_round(dr.metrics.wer),
                "mer": safe_round(dr.metrics.mer),
                "wil": safe_round(dr.metrics.wil),
            }
            if dr.image_quality:
                entry["quality_score"] = safe_round(dr.image_quality.get("quality_score", 0.5))
                entry["sharpness"] = safe_round(dr.image_quality.get("sharpness_score", 0.5))
            if dr.char_scores:
                entry["ligature"] = safe_round(dr.char_scores.get("ligature", {}).get("score", 0.5))
                entry["diacritic"] = safe_round(dr.char_scores.get("diacritic", {}).get("score", 0.5))
            metrics_list.append(entry)
        if metrics_list:
            corr = compute_correlation_matrix(metrics_list)
            correlation_per_engine.append({
                "engine": report.engine_name,
                **corr,
            })
    return correlation_per_engine


__all__ = [
    "build_pairwise_wilcoxon",
    "build_bootstrap_cis",
    "build_friedman_and_nemenyi",
    "build_reliability_curves",
    "build_venn_data",
    "build_error_clusters",
    "build_correlation_per_engine",
]