"""Calcul des métriques CER et WER via jiwer.

Métriques implémentées
----------------------
- CER brut                : distance d'édition caractère / longueur GT
- CER normalisé NFC       : après normalisation Unicode NFC
- CER sans casse          : insensible aux majuscules/minuscules
- CER diplomatique        : après application d'une table de correspondances
                            historiques (ſ=s, u=v, i=j…) — configurable
- WER brut                : word error rate standard
- WER normalisé           : après normalisation des espaces
- MER                     : Match Error Rate (jiwer)
- WIL                     : Word Information Lost (jiwer)
"""

from __future__ import annotations

import logging
import unicodedata
from dataclasses import dataclass
from typing import Optional

logger = logging.getLogger(__name__)

try:
    import jiwer

    _JIWER_AVAILABLE = True
except ImportError:
    _JIWER_AVAILABLE = False


# ---------------------------------------------------------------------------
# Transformations / normalisations
# ---------------------------------------------------------------------------

def _normalize_nfc(text: str) -> str:
    return unicodedata.normalize("NFC", text)


def _normalize_caseless(text: str) -> str:
    return unicodedata.normalize("NFC", text).casefold()


def _normalize_whitespace(text: str) -> str:
    return " ".join(text.split())


# Transformations jiwer pour le WER (normalisation légère des espaces)
_WER_TRANSFORM = (
    jiwer.transforms.Compose(
        [
            jiwer.transforms.RemoveMultipleSpaces(),
            jiwer.transforms.Strip(),
            jiwer.transforms.ReduceToListOfListOfWords(),
        ]
    )
    if _JIWER_AVAILABLE
    else None
)


def _cer_from_strings(reference: str, hypothesis: str) -> float:
    """CER brut : distance d'édition sur les caractères."""
    if not reference:
        return 0.0 if not hypothesis else 1.0
    return jiwer.cer(reference, hypothesis)


# ---------------------------------------------------------------------------
# Résultat structuré
# ---------------------------------------------------------------------------

@dataclass
class MetricsResult:
    """Ensemble des métriques calculées pour une paire (référence, hypothèse)."""

    cer: float
    cer_nfc: float
    cer_caseless: float
    wer: float
    wer_normalized: float
    mer: float
    wil: float
    reference_length: int
    hypothesis_length: int
    error: Optional[str] = None
    cer_diplomatic: Optional[float] = None
    """CER calculé après normalisation diplomatique (ſ=s, u=v, i=j…).
    None si aucun profil diplomatique n'a été fourni à compute_metrics.
    """
    diplomatic_profile_name: Optional[str] = None
    """Nom du profil de normalisation diplomatique utilisé."""

    def as_dict(self) -> dict:
        d = {
            "cer": round(self.cer, 6),
            "cer_nfc": round(self.cer_nfc, 6),
            "cer_caseless": round(self.cer_caseless, 6),
            "wer": round(self.wer, 6),
            "wer_normalized": round(self.wer_normalized, 6),
            "mer": round(self.mer, 6),
            "wil": round(self.wil, 6),
            "reference_length": self.reference_length,
            "hypothesis_length": self.hypothesis_length,
            "error": self.error,
        }
        if self.cer_diplomatic is not None:
            d["cer_diplomatic"] = round(self.cer_diplomatic, 6)
            d["diplomatic_profile_name"] = self.diplomatic_profile_name
        return d

    @property
    def cer_percent(self) -> float:
        return round(self.cer * 100, 2)

    @property
    def wer_percent(self) -> float:
        return round(self.wer * 100, 2)


def compute_metrics(
    reference: str,
    hypothesis: str,
    normalization_profile: "Optional[NormalizationProfile]" = None,  # noqa: F821
    char_exclude: "Optional[frozenset]" = None,
) -> MetricsResult:
    """Calcule l'ensemble des métriques CER/WER pour une paire de textes.

    Parameters
    ----------
    reference:
        Texte de vérité terrain (ground truth).
    hypothesis:
        Texte produit par le moteur OCR.
    normalization_profile:
        Profil de normalisation diplomatique optionnel.
        Si fourni, calcule ``cer_diplomatic`` en plus des métriques standard.
        Si None, utilise le profil medieval_french par défaut.
    char_exclude:
        Ensemble de caractères à supprimer des deux textes avant tout calcul
        (CER, WER, MER, WIL). Appliqué également au CER diplomatique.

    Returns
    -------
    MetricsResult
        Objet contenant toutes les métriques calculées.
    """
    if not _JIWER_AVAILABLE:
        return MetricsResult(
            cer=0.0, cer_nfc=0.0, cer_caseless=0.0,
            wer=0.0, wer_normalized=0.0, mer=0.0, wil=0.0,
            reference_length=len(reference),
            hypothesis_length=len(hypothesis),
            error="jiwer n'est pas installé (pip install jiwer)",
        )

    # Hypothèse vide avec référence non vide = erreur totale (toutes les
    # métriques jiwer lèvent une ZeroDivisionError sur hypothèse vide).
    ref_stripped = reference.strip()
    hyp_stripped = hypothesis.strip() if hypothesis else ""
    if ref_stripped and not hyp_stripped:
        return MetricsResult(
            cer=1.0, cer_nfc=1.0, cer_caseless=1.0,
            wer=1.0, wer_normalized=1.0, mer=1.0, wil=1.0,
            reference_length=len(reference),
            hypothesis_length=0,
        )

    try:
        # Exclusion de caractères avant tout calcul
        if char_exclude:
            reference  = "".join(c for c in reference  if c not in char_exclude)
            hypothesis = "".join(c for c in hypothesis if c not in char_exclude)

        # CER variants
        cer_raw = _cer_from_strings(reference, hypothesis)
        cer_nfc = _cer_from_strings(
            _normalize_nfc(reference), _normalize_nfc(hypothesis)
        )
        cer_caseless = _cer_from_strings(
            _normalize_caseless(reference), _normalize_caseless(hypothesis)
        )

        # WER variants
        ref_norm = _normalize_whitespace(reference)
        hyp_norm = _normalize_whitespace(hypothesis)

        wer_raw = jiwer.wer(reference, hypothesis)
        wer_normalized = jiwer.wer(ref_norm, hyp_norm)
        mer = jiwer.mer(reference, hypothesis)
        wil = jiwer.wil(reference, hypothesis)

        # CER diplomatique — utilise le profil fourni ou le profil médiéval par défaut
        cer_diplomatic: Optional[float] = None
        diplomatic_profile_name: Optional[str] = None
        try:
            from picarones.core.normalization import DEFAULT_DIPLOMATIC_PROFILE
            profile = normalization_profile or DEFAULT_DIPLOMATIC_PROFILE
            ref_diplo = profile.normalize(reference)
            hyp_diplo = profile.normalize(hypothesis)
            cer_diplomatic = _cer_from_strings(ref_diplo, hyp_diplo)
            diplomatic_profile_name = profile.name
        except Exception as e:  # noqa: BLE001
            logger.warning("[metrics] CER diplomatique dégradé : %s", e)

        return MetricsResult(
            cer=cer_raw,
            cer_nfc=cer_nfc,
            cer_caseless=cer_caseless,
            wer=wer_raw,
            wer_normalized=wer_normalized,
            mer=mer,
            wil=wil,
            reference_length=len(reference),
            hypothesis_length=len(hypothesis),
            cer_diplomatic=cer_diplomatic,
            diplomatic_profile_name=diplomatic_profile_name,
        )

    except Exception as exc:  # noqa: BLE001
        logger.warning("[metrics] calcul métriques échoué : %s", exc)
        return MetricsResult(
            cer=0.0, cer_nfc=0.0, cer_caseless=0.0,
            wer=0.0, wer_normalized=0.0, mer=0.0, wil=0.0,
            reference_length=len(reference),
            hypothesis_length=len(hypothesis),
            error=str(exc),
        )


def aggregate_metrics(results: list[MetricsResult]) -> dict:
    """Calcule les statistiques agrégées sur un ensemble de résultats.

    Parameters
    ----------
    results:
        Liste de MetricsResult correspondant à plusieurs documents.

    Returns
    -------
    dict
        Statistiques : moyenne, médiane, min, max, std pour chaque métrique.
    """
    import statistics

    if not results:
        return {}

    def _stats(values: list[float]) -> dict:
        if not values:
            return {}
        return {
            "mean": round(statistics.mean(values), 6),
            "median": round(statistics.median(values), 6),
            "min": round(min(values), 6),
            "max": round(max(values), 6),
            "stdev": round(statistics.stdev(values), 6) if len(values) > 1 else 0.0,
        }

    metric_names = ["cer", "cer_nfc", "cer_caseless", "wer", "wer_normalized", "mer", "wil"]
    aggregated: dict = {}
    for metric in metric_names:
        values = [getattr(r, metric) for r in results if r.error is None]
        aggregated[metric] = _stats(values)

    # CER diplomatique (optionnel — présent seulement si calculé)
    diplo_values = [
        r.cer_diplomatic for r in results
        if r.error is None and r.cer_diplomatic is not None
    ]
    if diplo_values:
        aggregated["cer_diplomatic"] = _stats(diplo_values)
        # Nom du profil (même pour tous les docs d'un corpus)
        profile_name = next(
            (r.diplomatic_profile_name for r in results if r.diplomatic_profile_name),
            None,
        )
        if profile_name:
            aggregated["cer_diplomatic"]["profile"] = profile_name

    aggregated["document_count"] = len(results)
    aggregated["failed_count"] = sum(1 for r in results if r.error is not None)

    return aggregated


# Import paresseux pour éviter les imports circulaires
from typing import TYPE_CHECKING
if TYPE_CHECKING:
    from picarones.core.normalization import NormalizationProfile