# app.py — Traductor Español ↔ Neoíbero (BI-ONLY 1:1 estricto, determinista)
# UI completa + CSS "íbero" + TTS + Línea ibérica (codificación appOld)
# Requiere un ÚNICO CSV con superficies exactas (UTF-8) y columnas:
#   - source_es (o es/es_surface)
#   - target_ni (o ni/ni_surface)
#   - pair_id (opcional)
#
# El motor mantiene 1:1 exacto por superficie, pero aplica heurísticas ligeras ES→NI para desambiguar homógrafos nombre/verbo e infinitivos aislados.
# Puntuación y números pasan tal cual. Desconocidos -> [SIN-LEX:...] / [?:...]
# Determinismo NI→ES: entradas NI duplicadas (ambigüas) quedan bloqueadas y se rinden como [AMB-NI:...]
#
# v94 (2026-05-09): Revertido el sistema simple v93 a favor del sistema enclítico
# convencional. Se asume que el lex contiene las formas con clíticos pegados al
# verbo (parche 059 con convención corta: "buŕdam-ka-nei", "onkot-ka-iki", etc.).
# El motor respeta las palabras enteras del lex y solo descompone si NO están.

import gradio as gr
import os, csv, re, base64, unicodedata, gzip
import torch
from transformers import AutoProcessor, VitsModel
import numpy as np
from html import escape

# ====== cache ======
os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache')
os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf')

DEBUG_MODE = False
def debug_print(msg):
    if DEBUG_MODE: print(f"[DEBUG] {msg}")

# ====== util ======
def _open_maybe_gzip(path):
    if str(path).endswith(".gz"):
        # CSV debe venir en UTF-8 (evita mojibake)
        return gzip.open(path, "rt", encoding="utf-8", newline="")
    return open(path, "r", encoding="utf-8", newline="")

def norm(x): return (str(x).strip()) if x is not None else ""
def lower(x): return norm(x).lower()
def fold(s:str)->str:
    return ''.join(c for c in unicodedata.normalize('NFD', s or "") if unicodedata.category(c)!="Mn")

# ====== rutas ======
def _cand(*names):
    for n in names:
        if os.path.exists(n): return n
        p = os.path.join("salida", n)
        if os.path.exists(p): return p
    return names[0]  # último recurso para mensajes

# Prioriza los "master/surface-ready"; luego retrocompatibles
CSV_BI = _cand(
    "LEXICON_v152_IBERIAN.csv.gz",
    "LEXICON_v86_IBERIAN.csv.gz",
    "LEXICON_v85_IBERIAN.csv.gz",
    "LEXICON_v84_IBERIAN.csv.gz",
    "LEXICON_v83_IBERIAN.csv.gz",
    "LEXICON_v82_IBERIAN.csv.gz",
    "LEXICON_v81_IBERIAN.csv.gz",
    "LEXICON_v80_IBERIAN.csv.gz",
    "LEXICON_v79_IBERIAN.csv.gz",
    "LEXICON_v78_IBERIAN.csv.gz",
    "LEXICON_v77_IBERIAN.csv.gz",
    "LEXICON_v76_IBERIAN.csv.gz",
    "LEXICON_v75_IBERIAN.csv.gz",
    "LEXICON_v74_IBERIAN.csv.gz",
    "LEXICON_v73_IBERIAN.csv.gz",
    "LEXICON_v72_IBERIAN.csv.gz",
    "LEXICON_v71_IBERIAN.csv.gz",
    "LEXICON_v70_IBERIAN.csv.gz",
    "LEXICON_v68_IBERIAN.csv.gz",
    "LEXICON_v67_IBERIAN.csv.gz",
    "LEXICON_v66_IBERIAN.csv.gz",
    "LEXICON_v65_IBERIAN.csv.gz",
    "LEXICON_v64_IBERIAN.csv.gz",
    "LEXICON_v63_IBERIAN.csv.gz",
    "LEXICON_v60_FINAL.csv.gz",
    "LEXICON_v59_PATCHED.csv.gz",
    "LEXICON_UNICO_1a1_v43_all_verbs.csv.gz",
    "LEXICON_UNICO_1a1_v42_verbs_fix.csv.gz",
    "LEXICON_UNICO_1a1_v41_family_fix.csv.gz",
    "LEXICON_UNICO_1a1_v40_accent_fix.csv.gz",
    "MASTER_SURFACE_READY.csv.gz",
    "MASTER_REEXTENDED.csv.gz",
    "BI_SURFACE_READY.csv.gz",
    "HF_Pairs_BI_REEXTENDED.csv.gz",
    "HF_Pairs_BI_EXPANDED1_EXTENDED_FILLED.csv.gz",
    "HF_Pairs_BI_EXPANDED1.csv.gz"
)

# ====== estructuras strict BI ======
# Clave = superficie exacta en minúsculas. Valor = (superficie_original_opuesta, pair_id)
ES2NI = {}   # es_surface_lower -> (ni_surface, pair_id)
NI2ES = {}   # ni_surface_lower -> (es_surface, pair_id)
ES2NI_VERB = {}  # alternativa verbal cuando ES2NI tiene sustantivo
ES2NI_POS = {}   # es_surface_lower -> POS de la entrada principal en ES2NI
ES2NI_MORPH = {} # es_surface_lower -> morfología (PRS, PST, IMP, FUT, etc.)

# N-gramas/frases:
ESPHRASE2NI = {}
NIPHRASE2ES = {}
MAX_NGRAM = 3

# Mapas fold→canónico (se llenan en load_bi_strict_and_diagnose)
ES_FOLD = {}   # fold("carne") → "carne" (pero también fold("carné")→"carne")
NI_FOLD = {}

# ====== signos / tokenización mínima ======
VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’"))

# v99: marcador interno para preservar saltos de línea del usuario sin que
# rompan oraciones durante el procesamiento. Se trata como puntuación
# transparente (no es SENT_END ni CLAUSE_BREAK), así que ningún subsistema
# (modalidad, capitalización, contexto) lo interpreta como cierre. Al final
# del traductor se vuelve a convertir en \n.
NEWLINE_TOK = "⏎"
VISIBLE_PUNCT.add(NEWLINE_TOK)
_num_re = re.compile(r"^\d+([.,:]\d+)?$")
def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))

# --- conversión de dígitos a numerales neoíberos (base vigesimal) ---
_NI_UNITS = {0:'',1:'ban',2:'bi',3:'irur',4:'laur',5:'borste',
             6:'sei',7:'sisbi',8:'sorse',9:'bedar',10:'abar'}
_NI_TWENTIES = {1:'orkei',2:'binorkei',3:'irurokei',4:'laurokei'}

def digit_to_ni(tok:str)->str:
    """Convierte un número entero (str de dígitos) a numeral neoíbero."""
    try:
        n = int(tok)
    except (ValueError, TypeError):
        return tok
    if n <= 0: return tok
    if n <= 10: return _NI_UNITS[n]
    if n <= 19: return f"abar-ke-{_NI_UNITS[n-10]}"
    if n == 20: return "orkei"
    if n < 100:
        twenties = n // 20
        remainder = n % 20
        base = _NI_TWENTIES.get(twenties, tok)
        if remainder == 0: return base
        elif remainder == 10: return f"{base}-abar"
        elif remainder > 10: return f"{base}-abar-ke-{_NI_UNITS[remainder-10]}"
        else: return f"{base}-ke-{_NI_UNITS[remainder]}"
    if n == 100: return "atun"
    if n <= 999:
        hundreds = n // 100
        remainder = n % 100
        h = "atun" if hundreds == 1 else f"{_NI_UNITS[hundreds]}-atun"
        if remainder == 0: return h
        r = digit_to_ni(str(remainder))
        return f"{h}-ke-{r}"
    return tok  # >999: pass through

# --- separadores de cláusula + placeholders atómicos ---
CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
def is_placeholder(tok: str) -> bool:
    return bool(PLACEHOLDER_RE.match(tok or ""))

def _restore_brk(tok, protected):
    m = re.fullmatch(r"__BRK(\d+)__(?:-(na|ba))?", tok or "")
    if not m: return tok
    idx = int(m.group(1))
    suf = m.group(2)
    base = protected[idx] if 0 <= idx < len(protected) else tok
    return base + (f"-{suf}" if suf else "")

def simple_tokenize(text:str):
    """Tokenización mínima, sin romper [ ... ] ni [ ... ]-na/-ba."""
    if not text:
        return []
    protected = []
    def _repl(m):
        key = f"__BRK{len(protected)}__"
        protected.append(m.group(0))
        return key
    t = re.sub(r"\[[^\]]*\]", _repl, (text or "").strip())
    # Proteger números decimales (12,50 / 3.14) y horas (18:30) como tokens
    # atómicos, evitando que se rompan al meter espacios en la puntuación.
    t = re.sub(r"\d+[.,:]\d+", _repl, t)
    t = re.sub(r"\s+"," ", t)
    t = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", t)
    toks = [tok for tok in t.split() if tok]
    for i, tok in enumerate(toks):
        if tok.startswith("__BRK") and "__" in tok:
            toks[i] = _restore_brk(tok, protected)
    return toks

# Pronombres enclíticos del español (ordenados de más largo a más corto
# para evitar que "lo" haga match antes que "los").
_ENCLITICS = ('los','las','les','nos','me','te','lo','la','le','se','os')
_ACCENTED_VOWELS = str.maketrans('áéíóú', 'aeiou')

def _strip_accents(s):
    return s.translate(_ACCENTED_VOWELS)

def expand_enclitics(toks):
    """
    Separa formas verbo+pronombre enclítico en dos tokens cuando ES2NI
    NO contiene la forma combinada. Si la forma entera SÍ está en el lex
    (gracias a 044/053/059), se respeta y no se descompone — esto preserva
    el 1:1 estricto que prometen los parches.

    Solo se descompone como fallback para palabras raras que no entraron
    en los parches enclíticos. En ese caso se generan dos tokens normales
    (sin marcas especiales) que el motor traduce por separado.

    Ejemplos del fallback:
      'ayudarme'  → ['ayudar', 'me']
      'tocarlo'   → ['tocar', 'lo']
      'ayudándome'→ ['ayudando', 'me']
      'dárselo'   → ['darse', 'lo']
    """
    if not toks:
        return toks
    if not ES2NI or not ES2NI_POS:
        return toks

    def _stem_is_verb_or_has_infinitive(stem):
        """¿Es esta raíz un verbo conocido, o tiene un infinitivo en el lex?"""
        if stem not in ES2NI:
            pass
        elif ES2NI_POS.get(stem, "") == "V":
            return True
        for suf in ("r", "er", "ir"):
            inf_candidate = stem + suf
            if inf_candidate in ES2NI and ES2NI_POS.get(inf_candidate, "") == "V":
                return True
        for diph, base in (("ue", "o"), ("ie", "e")):
            idx = stem.rfind(diph)
            if idx < 0:
                continue
            stem_undiph = stem[:idx] + base + stem[idx+2:]
            stem_root = stem_undiph
            if stem_root and stem_root[-1] in "ae":
                stem_root = stem_root[:-1]
            for suf in ("ar", "er", "ir"):
                inf_candidate = stem_root + suf
                if inf_candidate in ES2NI and ES2NI_POS.get(inf_candidate, "") == "V":
                    return True
        return False

    out = []
    for tok in toks:
        tok_l = tok.lower()
        # v94 — Si la palabra entera está en el lex, respetarla.
        # No forzamos descomposición (a diferencia del v93).
        if not tok_l.isalpha() or tok_l in ES2NI:
            out.append(tok)
            continue
        tok_noacc = _strip_accents(tok_l)
        if tok_noacc != tok_l and tok_noacc in ES2NI:
            out.append(tok)
            continue

        split = None
        for clit in _ENCLITICS:
            if not tok_l.endswith(clit):
                continue
            stem = tok_l[:-len(clit)]
            if len(stem) < 2:
                continue
            if len(stem) == 2 and stem not in ES2NI:
                continue
            # v124b: si el stem coincide con una palabra existente que NO sea verbo
            # (N, ADJ, DET, PRON, etc.), NO descomponer. Caso patológico:
            #   "estela" → stem="este" (DET demostrativo) + "la"
            #   "comprobandolas" tenía igual riesgo si "comprobando" fuera N (no lo es)
            # Solo descomponer si stem es CLARAMENTE verbo (POS=V).
            if stem in ES2NI:
                stem_pos = ES2NI_POS.get(stem, "")
                if stem_pos and stem_pos != "V":
                    # stem es N/ADJ/DET/PRON/etc. NO descomponer.
                    continue
            # Probar la raíz tal cual
            if _stem_is_verb_or_has_infinitive(stem):
                split = (stem, clit, None)
                break
            # Probar la raíz sin tildes
            stem_noacc = _strip_accents(stem)
            if stem_noacc != stem and _stem_is_verb_or_has_infinitive(stem_noacc):
                split = (stem_noacc, clit, None)
                break
            # Probar doble clítico: la raíz también acaba en clítico.
            # v124: bloquear stems de menos de 3 caracteres para evitar el bug crítico
            # "estela" → stem="este" (.endswith "te") → stem2="es" (V "ser") → ('es','te','la')
            # Antes el código permitía stem2="es" si estaba en ES2NI. Ahora >=3 chars.
            for clit2 in _ENCLITICS:
                if not stem.endswith(clit2):
                    continue
                stem2 = stem[:-len(clit2)]
                if len(stem2) < 3:
                    continue
                if _stem_is_verb_or_has_infinitive(stem2):
                    split = (stem2, clit2, clit)
                    break
                stem2_noacc = _strip_accents(stem2)
                if stem2_noacc != stem2 and _stem_is_verb_or_has_infinitive(stem2_noacc):
                    split = (stem2_noacc, clit2, clit)
                    break
            if split:
                break

        if split:
            stem, clit1, clit2 = split
            if tok[0].isupper():
                stem = stem[0].upper() + stem[1:]
            # v94 — Tokens normales, sin marcas especiales.
            # El motor los traducirá por separado a través del lex.
            out.append(stem)
            out.append(clit1)
            if clit2:
                out.append(clit2)
        else:
            out.append(tok)
    return out

def detokenize(tokens):
    s = " ".join(tokens)
    s = re.sub(r"\s+([,.;:!?])", r"\1", s)
    s = re.sub(r"([¿¡])\s+", r"\1", s)
    s = re.sub(r"\(\s+", "(", s)
    s = re.sub(r"\s+\)", ")", s)
    # v110: comillas españolas/latinas como apertura/cierre.
    # «"“‘[ son aperturas: no llevan espacio después.
    # »"”’] son cierres: no llevan espacio antes.
    s = re.sub(r"([«“‘\[])\s+", r"\1", s)
    s = re.sub(r"\s+([»”’\]])", r"\1", s)
    s = re.sub(r"\s{2,}", " ", s).strip()
    return s

# Pares verbo+enclítico no ambiguo. Tras NI→ES, "ayudarme" llega como
# "ayudar me" (separado), porque al expandir enclíticos en ES→NI dividimos
# el token. Esta función vuelve a unir formas inequívocas.
_SAFE_CLITICS_RE = r"(me|te|nos|os|se)"
_AMBIG_CLITICS_RE = r"(lo|la|le|los|las|les)"
_SAFE_AFTER_AMBIG = (
    r"(?:\s*[.,;:!?)\]»\"”—–]"
    r"|\s+(?:y|o|pero|sino|aunque|mientras|porque|si|cuando|donde|que|"
    r"también|tampoco|ya|no|todavía|después|antes|ahora|luego|aquí|allí|"
    r"ahí|así|sólo|solo|nunca|jamás|siempre|"
    r"bien|mal|mucho|poco|muy|más|menos|tan|todo|todos|nada|algo|"
    r"hoy|ayer|mañana|pronto|tarde|"
    r"fuerte|fuertemente|suavemente|fijamente|atentamente|"
    # v124: artículos definidos ("acariciándole el pelo")
    r"el|la|los|las|"
    # v124: preposiciones comunes ("perderlo de vista")
    r"de|en|a|por|para|con|sin|sobre|bajo|tras|entre|hasta|hacia|desde|"
    r"un|una|algún|alguna)\b"
    # v128: adverbio genérico en -mente. Caso "abrazando los efusivamente"
    # → "abrazándolos efusivamente". El patrón anterior solo cubría 4
    # adverbios específicos; este captura cualquiera.
    r"|\s+[a-záéíóúñü]+mente\b"
    r"|\s+otra\s+vez\b"
    r"|\s+otras\s+veces\b"
    # v124: placeholder [SIN-LEX:...] / [AMB-NI:...] / [?:...]
    r"|\s+\["
    r"|\s+(?:cada|todos\s+los|todas\s+las)\s+(?:\d+\s+|(?:dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieciséis|diecisiete|dieciocho|diecinueve|veinte|treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa|cien|mil|pocos|pocas|muchos|muchas|tantos|tantas|varios|varias|cuantos|cuantas|algunos|algunas)\s+)?(?:día|días|mañana|mañanas|tarde|tardes|noche|noches|mes|meses|año|años|semana|semanas|hora|horas|momento|momentos|vez|veces)\b"
    r"|\s*$|\s*\n)"
)
_FUSE_INF_SAFE_RE = re.compile(
    r"\b([a-záéíóúñü]+(?:ar|er|ir))\s+" + _SAFE_CLITICS_RE + r"\b",
    re.IGNORECASE)
_FUSE_INF_AMBIG_RE = re.compile(
    r"\b([a-záéíóúñü]+(?:ar|er|ir))\s+" + _AMBIG_CLITICS_RE + r"(?=" + _SAFE_AFTER_AMBIG + r")",
    re.IGNORECASE)
_FUSE_GER_SAFE_RE = re.compile(
    r"\b([a-záéíóúñü]+(?:ando|iendo|yendo))\s+" + _SAFE_CLITICS_RE + r"\b",
    re.IGNORECASE)
_FUSE_GER_AMBIG_RE = re.compile(
    r"\b([a-záéíóúñü]+(?:ando|iendo|yendo))\s+" + _AMBIG_CLITICS_RE + r"(?=" + _SAFE_AFTER_AMBIG + r")",
    re.IGNORECASE)
_FUSE_VERB_SAFE_RE = re.compile(
    r"\b([a-záéíóúñü]+)\s+" + _SAFE_CLITICS_RE + r"\b",
    re.IGNORECASE)
_FUSE_VERB_AMBIG_RE = re.compile(
    r"\b([a-záéíóúñü]+)\s+" + _AMBIG_CLITICS_RE + r"(?=" + _SAFE_AFTER_AMBIG + r")",
    re.IGNORECASE)
_GER_ACCENT_MAP = (("ando","ándo"), ("iendo","iéndo"), ("yendo","yéndo"))
# v114: PRS removido de la lista. Los verbos en presente NO admiten enclíticos en español
# ("es lo más bonito" NO "eslo"). Solo IMP (imperativo: "guárdalo"), INF ("contar+lo→contarlo")
# y GER ("contando+lo→contándolo") admiten clíticos.
_MORPH_ADMITS_ENCLITIC = {"IMP", "INF", "GER"}

def _verb_admits_enclitic(verb):
    v = (verb or "").lower()
    if v in {"me","te","se","nos","os","lo","la","le","los","las","les"}:
        return False
    if v in {"como", "mientras", "para", "sobre", "luego", "casi", "según",
             "salvo", "bajo", "sin", "pasada", "vista", "puesto", "dada",
             "siendo", "habiendo"}:
        return False
    # v134: lista negra de sust/adj frecuentes con verbo homógrafo marginal.
    # Estas palabras están en lex como N/ADJ y tienen un verbo derivado
    # (nadar, penar, rosar, etc.) pero su forma imperativa con enclítico
    # es prácticamente inexistente en uso moderno. Sin esta lista, frases
    # como "nada me sorprende" se fusionan indebidamente como "nádame".
    # Solo se incluyen palabras donde el riesgo de falso positivo (fusionar
    # mal el sustantivo) es mayor que el de falso negativo (no fusionar un
    # IMP legítimo).
    if v in {"agua", "cinta", "clara", "copa", "fina", "goma", "justa",
             "maja", "manga", "mata", "misa", "mosca", "nada", "novela",
             "novia", "obra", "pena", "perla", "punta", "raya", "recta",
             "rima", "rosa", "sopa", "tabla", "taza", "tela", "tinta",
             "trenza", "tribu", "tumba", "vaca", "venta", "visa"}:
        return False
    # v134b: lista blanca de formas SBJ que en español funcionan como
    # imperativo de cortesía (usted/ustedes). El lex las marca como SBJ
    # pero "diga me" → "dígame", "siga me" → "sígame", etc. son fusión
    # legítima en uso real. Se excluyen formas con homógrafo sust/adj
    # frecuente (cante, saque, toque, viva, meta, baile, salga, lea, crea).
    if v in {"abra","abran","aprenda","aprendan","arregle","arreglen",
             "ayude","ayuden","baje","bajen","beba","beban","cierre","cierren",
             "coma","coman","continúe","continúen","cuente","cuenten",
             "decida","decidan","deje","dejen","diga","digan",
             "disculpe","disculpen","empiece","empiecen","encuentre","encuentren",
             "envíe","envíen","escriba","escriban","escuche","escuchen",
             "espere","esperen","firme","firmen","hable","hablen",
             "haga","hagan","intente","intenten","limpie","limpien",
             "llame","llamen","llegue","lleguen","llene","llenen",
             "mire","miren","muestre","muestren","oiga","oigan",
             "pase","pasen","perdone","perdonen","permita","permitan",
             "piense","piensen","pinte","pinten","ponga","pongan",
             "pregunte","pregunten","prepare","preparen","prometa","prometan",
             "pruebe","prueben","quede","queden","reciba","reciban",
             "recuerde","recuerden","responda","respondan","sea","sean",
             "sepa","sepan","siga","sigan","sirva","sirvan",
             "suba","suban","tenga","tengan","termine","terminen",
             "tome","tomen","traiga","traigan","use","usen",
             "vaya","vayan","vea","vean","venga","vengan",
             "vuelva","vuelvan"}:
        return True
    # Caso principal: V con morph que admite enclítico (IMP/INF/GER)
    if v in ES2NI and ES2NI_POS.get(v, "") == "V":
        morph = ES2NI_MORPH.get(v, "")
        if morph in _MORPH_ADMITS_ENCLITIC:
            return True
        # v126: V no-IMP pero termina en a/e/i con infinitivo regular V →
        # probable homógrafo PRS/IMP. Caso "come" (PRS 3S = IMP 2S de comer).
        if len(v) >= 3 and v[-1] in 'aeií':
            for suf in ('r','er','ir'):
                if v+suf in ES2NI and ES2NI_POS.get(v+suf, "") == "V":
                    return True
    # v126b: rescate homógrafos N/V. Si v está en lex como N (o ADJ) PERO su
    # infinitivo regular existe como V, asumir homógrafo verbal. Caso real:
    # "porta" está como N ("puerta") pero "portar" V existe, y "porta te" no
    # puede ser sustantivo (los sustantivos no toman enclíticos). Las
    # preposiciones reales (para, sobre, etc.) ya están vetadas arriba.
    if len(v) >= 3 and v[-1] in 'aeií':
        for suf in ('r','er','ir'):
            if v+suf in ES2NI and ES2NI_POS.get(v+suf, "") == "V":
                return True
    # v142: rescate homógrafos diptongantes ue→o, ie→e. Caso real:
    # "cuenta" PRS-3S/IMP-2S de contar — el lex la tiene como N (factura),
    # "contar" V sí existe. Des-diptongamos antes de buscar el infinitivo.
    # Cubre cuenta/contar, vuelve/volver, siente/sentir, pierde/perder,
    # muestra/mostrar, encuentra/encontrar, etc.
    if len(v) >= 4 and v[-1] in 'aeií':
        for diph, base in (("ue","o"),("ie","e")):
            idx = v.rfind(diph)
            if idx < 0: continue
            stem_undiph = v[:idx] + base + v[idx+2:]
            # Quitar vocal temática para añadir sufijo INF
            stem_root = stem_undiph[:-1] if stem_undiph[-1] in 'ae' else stem_undiph
            for suf in ('ar','er','ir'):
                if stem_root+suf in ES2NI and ES2NI_POS.get(stem_root+suf, "") == "V":
                    return True
    # Filtro final: N/ADJ puros (sin homógrafo verbal) → no admiten
    if v in ES2NI:
        pos_actual = ES2NI_POS.get(v, "")
        if pos_actual and pos_actual != "V":
            return False
    # Fallback original: v no en lex pero ends en a/e
    if v not in ES2NI and len(v) >= 3 and v[-1] in 'ae':
        for suf in ('r', 'er', 'ir'):
            inf_candidate = v + suf
            if inf_candidate in ES2NI and ES2NI_POS.get(inf_candidate, "") == "V":
                return True
    return False

def _fuse_imp_with_accent(verb, clit):
    VOWELS = "aeiouáéíóú"
    fused = verb + clit
    positions = [i for i,c in enumerate(fused) if c.lower() in VOWELS]
    if len(positions) < 3:
        return fused
    verb_positions = [i for i,c in enumerate(verb) if c.lower() in VOWELS]
    if not verb_positions:
        return fused
    if len(verb_positions) == 1:
        tonic_idx = verb_positions[0]
    else:
        tonic_idx = verb_positions[-2]
    if fused[tonic_idx] in 'áéíóú':
        return fused
    vowels_after = sum(1 for p in positions if p > tonic_idx)
    if vowels_after >= 2:
        accent_map = {'a':'á','e':'é','i':'í','o':'ó','u':'ú'}
        ch = fused[tonic_idx].lower()
        if ch in accent_map:
            new_ch = accent_map[ch]
            if fused[tonic_idx].isupper():
                new_ch = new_ch.upper()
            return fused[:tonic_idx] + new_ch + fused[tonic_idx+1:]
    return fused

def _is_real_infinitive(word):
    w = (word or "").lower()
    if w not in ES2NI:
        return False
    pos = ES2NI_POS.get(w, "")
    return pos == "V"

def fuse_enclitics_es(es_text):
    if not es_text:
        return es_text

    # Helper: detectar si el clítico ambiguo "lo/la/los/las" va seguido
    # inmediatamente de "que", lo que indica pronombre relativo y NO clítico
    # de objeto directo. En ese caso NO se debe fusionar.
    # Ejemplos:
    #   "Pregúntame lo que quieras" → NO fusionar (lo que = relativo)
    #   "decir lo que pienso"        → NO fusionar (lo que = relativo)
    #   "decirle que venga"          → SÍ fusionar (le=OI, no aplica esta regla)
    #   "llévalo de aquí"            → SÍ fusionar (no hay "que" detrás)
    def _is_relative_lo_que(clit, m):
        if (clit or "").lower() not in ("lo","la","los","las"):
            return False
        rest = m.string[m.end():]
        return bool(re.match(r"\s+que\b", rest, re.IGNORECASE))

    def _inf_safe(m):
        verb, clit = m.group(1), m.group(2)
        if _is_real_infinitive(verb):
            return verb + clit
        return m.group(0)
    def _inf_ambig(m):
        verb, clit = m.group(1), m.group(2)
        if _is_real_infinitive(verb):
            if _is_relative_lo_que(clit, m):
                return m.group(0)
            return verb + clit
        return m.group(0)

    def _ger_accent(verb, clit):
        for plain, accented in _GER_ACCENT_MAP:
            if verb.lower().endswith(plain):
                return verb[:-len(plain)] + accented + clit
        return verb + " " + clit
    def _ger_safe(m):
        verb, clit = m.group(1), m.group(2)
        if not _is_real_infinitive(verb):
            return m.group(0)
        return _ger_accent(verb, clit)
    def _ger_ambig(m):
        verb, clit = m.group(1), m.group(2)
        if not _is_real_infinitive(verb):
            return m.group(0)
        if _is_relative_lo_que(clit, m):
            return m.group(0)
        return _ger_accent(verb, clit)

    es_text = _FUSE_INF_SAFE_RE.sub(_inf_safe, es_text)
    es_text = _FUSE_INF_AMBIG_RE.sub(_inf_ambig, es_text)
    es_text = _FUSE_GER_SAFE_RE.sub(_ger_safe, es_text)
    es_text = _FUSE_GER_AMBIG_RE.sub(_ger_ambig, es_text)

    def _verb_safe(m):
        verb, clit = m.group(1), m.group(2)
        v = verb.lower()
        if v.endswith(("ar","er","ir","ando","iendo","yendo")):
            return m.group(0)
        if not _verb_admits_enclitic(v):
            return m.group(0)
        return _fuse_imp_with_accent(verb, clit)
    def _verb_ambig(m):
        verb, clit = m.group(1), m.group(2)
        v = verb.lower()
        if v.endswith(("ar","er","ir","ando","iendo","yendo")):
            return m.group(0)
        if not _verb_admits_enclitic(v):
            return m.group(0)
        if _is_relative_lo_que(clit, m):
            return m.group(0)
        return _fuse_imp_with_accent(verb, clit)

    es_text = _FUSE_VERB_SAFE_RE.sub(_verb_safe, es_text)
    es_text = _FUSE_VERB_AMBIG_RE.sub(_verb_ambig, es_text)

    # Contexto seguro tras clítico ambiguo en fusión DOBLE: igual que el
    # contexto simple (_SAFE_AFTER_AMBIG) PERO excluyendo "que", porque
    # "verbo+pronominal + lo/la/los/las + que" suele introducir un
    # pronombre relativo ("lo que pienso", "la que quiero"), no un OD del
    # verbo. Ejemplo: "Pregúntame lo que quieras" NO debe fusionarse a
    # "Pregúntámelo que quieras". La fusión simple sigue usando el
    # contexto con "que" (ej. "decirle que venga" sí fusiona).
    _SAFE_AFTER_AMBIG_DOUBLE = (
        r"(?:\s*[.,;:!?)\]»\"”—–]"     # puntuación
        r"|\s+(?:y|o|pero|sino|aunque|mientras|porque|si|cuando|donde|"
        r"también|tampoco|ya|no|todavía|después|antes|ahora|luego|aquí|allí|"
        r"ahí|así|sólo|solo|nunca|jamás|siempre|"
        r"bien|mal|mucho|poco|muy|más|menos|tan|todo|todos|nada|algo|"
        r"hoy|ayer|mañana|pronto|tarde|"
        r"fuerte|fuertemente|suavemente|fijamente|atentamente|"
    r"un|una|algún|alguna)\b"
        r"|\s+[a-záéíóúñü]+mente\b"
        r"|\s+otra\s+vez\b"
        r"|\s+otras\s+veces\b"
        r"|\s+(?:cada|todos\s+los|todas\s+las)\s+(?:día|días|mañana|mañanas|tarde|tardes|noche|noches|mes|meses|año|años|semana|semanas|hora|horas|momento|momentos|vez|veces)\b"
        r"|\s*$|\s*\n)"
    )
    _DOUBLE_CLIT_RE = re.compile(
        r"\b([a-záéíóúñü]+(?:me|te|se|nos|os))\s+" + _AMBIG_CLITICS_RE +
        r"(?=" + _SAFE_AFTER_AMBIG_DOUBLE + r")",
        re.IGNORECASE)
    def _double_clit(m):
        word, clit2 = m.group(1), m.group(2)
        clit1_endings = ("me","te","se","nos","os")
        verb_orig = None
        for end in clit1_endings:
            if word.lower().endswith(end):
                cand = word[:-len(end)]
                cand_noacc = (cand.replace('á','a').replace('é','e')
                                  .replace('í','i').replace('ó','o').replace('ú','u'))
                if (cand.lower() in ES2NI or cand_noacc.lower() in ES2NI):
                    verb_orig = cand_noacc
                    break
        if not verb_orig:
            return m.group(0)
        if not _verb_admits_enclitic(verb_orig):
            return m.group(0)
        if _is_relative_lo_que(clit2, m):
            return m.group(0)
        return _fuse_imp_with_accent(word, clit2)

    es_text = _DOUBLE_CLIT_RE.sub(_double_clit, es_text)
    return es_text

# ====== Modalidad vascoide (-na / -ba) ======
MODAL_SUFFIX_ENABLE = True
MODAL_ONLY_ON_FINITE = True
MODAL_STRIP_QE_IN_NI = True

SENT_END = {".", "!", "?", "…"}
OPEN_FOR = {"?": "¿", "!": "¡"}
WRAP_PREFIX = set(list("«“‘([{\"'—–"))  # v103: añadidos — y – para que ¿/¡ se inserten tras el guion de diálogo
PERS_ENDINGS = ("-n","-śe","-ek","-śek","-k")
TAM_FINITE   = ("-ke","-bo","-ta","-ni","-ir")

def looks_like_finite_ni(tok:str)->bool:
    t = (tok or "").lower()
    if not t or t.startswith("["): return False
    base = re.sub(r"-(na|ba)$","", t)
    for tam in TAM_FINITE:
        if base.endswith(tam) or any(base.endswith(tam+pe) for pe in PERS_ENDINGS):
            return True
    return False

def last_content_index(tokens, start, end_exclusive):
    i = end_exclusive - 1
    while i >= start and tokens[i] in VISIBLE_PUNCT:
        i -= 1
    return i if i >= start else -1

def strip_qe_punct(tokens):
    # v131: solo eliminar `?` y `!` (signos de cierre, su información se
    # codifica en el sufijo modal `-na`/`-ba` añadido en
    # `add_modal_suffixes_es2ni`). Los `¿/¡` se preservan tal cual: ya están
    # en VISIBLE_PUNCT, así que viajan transparentes por el pipeline NI sin
    # interferir con ningún mapeo. Al volver NI→ES, `strip_modal_suffixes_ni`
    # los emite en su posición original. Esto evita que `add_inverted_openers`
    # tenga que reinventar los `¿/¡` desde cero — los respeta tal y como los
    # escribió el usuario.
    return [t for t in tokens if t not in ("?", "!")]

def _is_numeric_comma(tokens, i):
    return (0 < i < len(tokens)-1 and tokens[i] == "," and
            is_number(tokens[i-1]) and is_number(tokens[i+1]))

def _is_time_colon(tokens, i):
    return (0 < i < len(tokens)-1 and tokens[i] == ":" and
            is_number(tokens[i-1]) and is_number(tokens[i+1]))

def _is_true_clause_break(tokens, i):
    if tokens[i] not in CLAUSE_BREAKS: return False
    if _is_numeric_comma(tokens, i): return False
    if _is_time_colon(tokens, i): return False
    return True

def add_modal_suffixes_es2ni(tokens):
    if not MODAL_SUFFIX_ENABLE:
        return tokens
    out = tokens[:]
    n = len(out)
    i = 0
    sent_start = 0
    while i < n:
        if out[i] in ("?", "!"):
            closer = out[i]
            target = last_content_index(out, sent_start, i)
            if target != -1:
                suf = "na" if closer == "?" else "ba"
                # v125: SIEMPRE añadir el modal `-na`/`-ba`, incluso si la palabra
                # ya termina en `-na`/`-ba` léxicamente. Sustantivos como "tarde"
                # tienen NI `mirgaŕ-na` (con `-na` lexical), y antes el check impedía
                # añadir el modal interrogativo, perdiendo la marca de pregunta:
                #   "¿Tan tarde?" → "bosmes mirgaŕ-na" (sin `?`) → "Tan tarde." al volver.
                # El fix: añadir siempre. La inversa `strip_modal_suffixes_ni` separa el
                # último `-na` y deja "mirgaŕ-na" en buf (lookup OK como sust "tarde"):
                #   "mirgaŕ-na-na" → strip → "mirgaŕ-na" + "?" → "tarde ?" → "¿tarde?"
                out[target] = out[target] + "-" + suf
            sent_start = i + 1
        elif out[i] in SENT_END:
            sent_start = i + 1
        i += 1
    if MODAL_STRIP_QE_IN_NI:
        out = strip_qe_punct(out)
    return out

def strip_modal_suffixes_ni(tokens):
    if not MODAL_SUFFIX_ENABLE:
        return tokens

    out = []
    buf = []
    pending_end = None
    mode = None

    def _emit(end_override=None, also_append=None):
        nonlocal buf, mode, pending_end, out
        local = [t for t in buf if t not in ("¿","?","¡","!")]
        if local:
            end_tok = end_override or ("?" if mode == "?" else "!" if mode == "!" else pending_end or ".")
            out.extend(local)
            # No añadir end_tok si:
            #  - el último token ya es signo cerrante (.,!?…:;) — evita "Una vez le pregunté:."
            #  - O es placeholder Y NO hay modalidad explícita (?/!), evitando "Aitor."
            #    espurio. Si hay ?/! explícito, sí debe cerrar (ej. "[Nova]-na" → "Nova?").
            last = local[-1]
            is_punct_already = last in {".", "!", "?", "…", ":", ";"}
            # v131: cuando preservamos los `¿/¡` originales, _emit() se llama en
            # mitad de frase con un buf que a veces solo contiene tokens
            # transparentes (NEWLINE_TOK, guión de diálogo, comillas) sin
            # contenido alfabético. En esos casos NO debe añadirse cierre, o
            # aparecerían "." espurios entre el contexto previo y el `¿/¡`
            # nuevo (ej. "recibido. ⏎ — . ¿Tan tarde?").
            has_alpha = any(any(c.isalpha() for c in t) for t in local)
            if is_punct_already or not has_alpha:
                pass  # ya cerrado, o no hay contenido real que cerrar
            else:
                # v115: añadir punto SIEMPRE (también tras placeholder).
                # Antes se omitía para evitar "Aitor." espurio, pero esto
                # causaba que oraciones consecutivas se pegaran cuando un
                # nombre propio cerraba una de ellas:
                # "decía Pablo era..." → "decía Pablo. era..."
                # El punto final del texto se elimina por el ajuste de la
                # línea 673 (".", "." -> ".") cuando hay un centinela.
                out.append(end_tok)
        buf.clear(); mode = None; pending_end = None
        if also_append:
            out.append(also_append)

    # v96: solo añadir "." centinela si la oración no termina ya en sentence-end
    if tokens and tokens[-1] in SENT_END:
        toks = list(tokens)
    else:
        toks = tokens + ["."]
    for i, t in enumerate(toks):
        if t in ("¿", "¡"):
            # v131: el `¿/¡` original del usuario llega hasta aquí preservado
            # (gracias a strip_qe_punct modificado). Volcamos el buffer
            # acumulado a `out` SIN añadir cierre — el contexto previo a la
            # pregunta no es una oración completa, es preámbulo (un guión
            # de diálogo, un vocativo seguido de coma, una conjunción...).
            # Antes esto añadía un "." espurio entre el contexto y el `¿`.
            local = [x for x in buf if x not in ("¿","?","¡","!")]
            out.extend(local)
            buf.clear()
            out.append(t)
            mode = "?" if t == "¿" else "!"
            pending_end = None
            continue
        if t in ("?", "!"):
            pending_end = t; _emit(); continue
        if t in SENT_END:
            pending_end = t; _emit(); continue

        if t in CLAUSE_BREAKS and mode in ("?","!"):
            buf.append(t)
            continue

        m = re.search(r"-(na|ba)$", (t or "").lower())
        if m:
            if (t or "").lower() in NI2ES:
                buf.append(t)
                continue
            t = t[:-len(m.group(0))]
            if t: buf.append(t)
            mode = "?" if m.group(1) == "na" else "!"
            _emit()
            continue

        if t:
            buf.append(t)

    if len(out) >= 2 and out[-1] == "." and out[-2] == ".": out.pop()
    return out


# Tildes diacríticas en preguntas
_DIACR_ALWAYS = {
    "cuando":"cuándo", "donde":"dónde", "como":"cómo",
    "cuanto":"cuánto", "cuanta":"cuánta",
    "cuantos":"cuántos", "cuantas":"cuántas",
    "cuan":"cuán", "cual":"cuál", "cuales":"cuáles",
    "adonde":"adónde",
}
_DIACR_HEAD_ONLY = {"que":"qué", "quien":"quién", "quienes":"quiénes"}

def apply_interrogative_tildes(tokens):
    out = list(tokens)
    i = 0
    n = len(out)
    while i < n:
        if out[i] != "¿":
            i += 1
            continue
        j = i + 1
        depth = 0
        end = -1
        while j < n:
            t = out[j]
            if t == "¿":
                depth += 1
            elif t == "?":
                if depth == 0:
                    end = j
                    break
                depth -= 1
            elif t in SENT_END:
                break
            j += 1
        if end < 0:
            i += 1
            continue
        head_pending = True
        for k in range(i+1, end):
            tok = out[k]
            if not tok or not tok[0].isalpha():
                if tok in (",", ";", ":"):
                    head_pending = True
                continue
            tok_l = tok.lower()
            replacement = None
            if tok_l in _DIACR_ALWAYS:
                replacement = _DIACR_ALWAYS[tok_l]
            elif head_pending and tok_l in _DIACR_HEAD_ONLY:
                replacement = _DIACR_HEAD_ONLY[tok_l]
            # v115: para 'cuando/donde/como' (NO 'qué/quién'), si el verbo
            # cercano es subjuntivo, es subordinada temporal/condicional, no
            # interrogativa indirecta. No poner tilde.
            # Ej: "¿Lo recordarán cuando les contemos?" - 'contemos' SBJ → sin tilde
            #     "¿Sabes cuándo viene?" - 'viene' PRS → con tilde
            if replacement and tok_l in ('cuando','donde','como','adonde'):
                # v117: solo cancelar tilde si NO es la primera palabra alfabética
                # de la interrog. Si 'cuando' está al inicio, es interrog directa
                # pura y debe tildarse. Si hay palabras antes (suele ser verbo
                # principal de duda/pregunta), es subordinada → posible cancelación.
                is_first_alpha = True
                for prev_k in range(i+1, k):
                    pt = out[prev_k]
                    if pt and pt[0].isalpha():
                        is_first_alpha = False
                        break
                if not is_first_alpha:
                    for lookahead in range(1, 4):
                        if k + lookahead >= end: break
                        next_tok = out[k + lookahead]
                        if not next_tok or not next_tok[0].isalpha(): continue
                        next_l = next_tok.lower()
                        if next_l in ES2NI:
                            morph = ES2NI_MORPH.get(next_l, '')
                            pos = ES2NI_POS.get(next_l, '')
                            # cuando + verbo SBJ/IPFV/PST → temporal subordinada, sin tilde
                            if pos == 'V' and morph in ('SBJ', 'IPFV', 'PST'):
                                replacement = None
                            break
            if replacement is not None:
                if tok and tok[0].isupper():
                    replacement = replacement[0].upper() + replacement[1:]
                out[k] = replacement
            head_pending = False
        i = end + 1
    return out

def add_inverted_openers(tokens):
    out = tokens[:]
    START_BREAKS = SENT_END | CLAUSE_BREAKS

    # v103: separar palabras-q ACENTUADAS (inequívocas: solo interrogativas)
    # de las SIN TILDE (ambiguas: pueden ser conjunciones/relativos).
    # En preguntas/exclamaciones se prioriza la primera acentuada del tramo.
    # Si no hay acentuadas, se usa la sin-tilde más cercana al cierre (?/!).
    EXCL_ACCENTED = {'qué','cuán','cuánto','cuánta','cuántos','cuántas','cómo'}
    EXCL_PLAIN    = {'que','cuan','cuanto','cuanta','cuantos','cuantas','como'}
    INTERR_ACCENTED = {'qué','quién','quiénes','cuándo','dónde','cómo','cuál',
                       'cuáles','cuán','cuánto','cuánta','cuántos','cuántas'}
    # v124: 'por' quitado para evitar que 'Por cierto, ¿sabes...?' se trate como
    # interrogativa pura (lo que movía '¿' al inicio absoluto antes de 'Por').
    INTERR_PLAIN    = {'que','quien','quienes','cuando','donde','como','cual',
                       'cuales','cuan','cuanto','cuanta','cuantos','cuantas'}

    def _is_true_start_break(idx):
        if out[idx] in SENT_END: return True
        if out[idx] == NEWLINE_TOK: return True  # v103: salto de línea es break
        if out[idx] in CLAUSE_BREAKS: return _is_true_clause_break(out, idx)
        return False

    # v103: para encontrar el scope de una pregunta/exclamación, paramos
    # también en `:` (salvo que sea una hora `\d:\d`). Esto evita que un
    # encabezado como "Dime una cosa: ¿sigues escribiendo?" se trate como
    # una sola pregunta y el `¿` acabe antes de "Dime".
    def _is_scope_break(idx):
        if out[idx] in SENT_END: return True
        if out[idx] == NEWLINE_TOK: return True
        if out[idx] == ':' and not _is_time_colon(out, idx): return True
        if out[idx] == ';': return True
        return False

    i = 0
    while i < len(out):
        if out[i] in ("?", "!"):
            closer = out[i]; opener = OPEN_FOR[closer]
            accented_set = EXCL_ACCENTED if closer == "!" else INTERR_ACCENTED
            plain_set    = EXCL_PLAIN    if closer == "!" else INTERR_PLAIN

            # Retroceder hasta scope-break (SENT_END/NEWLINE_TOK/`:`/`;`).
            j = i - 1
            while j >= 0:
                if _is_scope_break(j):
                    break
                j -= 1
            scope_start = j + 1

            # Buscar palabra-q en [scope_start, i):
            #   1) PRIMERA acentuada (apertura natural de la pregunta)
            #   2) si no hay acentuada, ÚLTIMA sin tilde (más cercana al cierre)
            qword_pos = -1
            for k in range(scope_start, i):
                if out[k].lower() in accented_set:
                    qword_pos = k
                    break
            if qword_pos == -1:
                for k in range(i - 1, scope_start - 1, -1):
                    if out[k].lower() in plain_set:
                        qword_pos = k
                        break

            # v129: si no hay palabra-q en el scope inmediato y el break fue
            # un NEWLINE_TOK (salto de línea, no break sintáctico real),
            # extender el scope retrocediendo a través de NEWLINE_TOKs hasta
            # encontrar un break "real" (SENT_END / : / ;). Esto recoge
            # preguntas partidas por \n: "¿O la consideras\ndemasiado X?"
            # debe poner ¿ antes de 'O', no antes de 'demasiado'.
            # v131: ROLLBACK de v129+v130. La extensión de scope a través de
            # NEWLINE_TOK introducida en v129 (y refinada en v130) creaba más
            # bugs de los que arreglaba: posicionaba `¿` huérfanos antes de
            # saltos de línea, dentro de comillas de citas, etc. La filosofía
            # correcta es PRESERVAR los `¿/¡` del texto original (cambios en
            # strip_qe_punct y strip_modal_suffixes_ni). Esta función queda
            # como red de seguridad solo para textos descuidados que no traen
            # signos de apertura.

            if qword_pos != -1:
                # Posicionar ¿/¡ justo después del último break (CLAUSE/SENT)
                # antes de la palabra-q.
                cl_start = scope_start
                for k in range(qword_pos - 1, scope_start - 1, -1):
                    if out[k] in CLAUSE_BREAKS:
                        if _is_true_clause_break(out, k):
                            cl_start = k + 1
                            break
                    if out[k] in SENT_END or out[k] == NEWLINE_TOK:
                        cl_start = k + 1
                        break
                start = cl_start
            else:
                # Pregunta/exclamación sí/no: ¿/¡ va al inicio absoluto
                start = scope_start

            # Saltar WRAP_PREFIX (comillas, paréntesis, —, …) iniciales
            k = start
            while k < i and out[k] in WRAP_PREFIX:
                k += 1
            # v131: si entre el último break sintáctico real (. ; : ! ? …) y `i`
            # ya existe un opener (el `¿/¡` del texto original que ahora se
            # preserva), NO insertar otro. Buscamos hacia atrás SIN parar en
            # NEWLINE_TOK ni CLAUSE_BREAKS, porque el usuario puede haber
            # escrito `¿X⏎Y?` o `¿X, Y?` con el `¿` lejos del `?`.
            anti_dup_start = 0
            for jj in range(i - 1, -1, -1):
                if out[jj] in SENT_END or out[jj] in (';', ':'):
                    anti_dup_start = jj + 1
                    break
            has_opener_already = any(out[kk] == opener for kk in range(anti_dup_start, i))
            if not has_opener_already:
                out.insert(k, opener); i += 1
        i += 1
    return out

# ====== EXPANSIONES (deterministas, sólo ES→NI) ======
EXPANSION_ENABLE = True
FLAG_COLNAMES = ("flags","FLAGS","expand","EXPAND","tags","TAGS","morph","MORPH")
FLAG_PLURAL = ("S",)
FLAG_3PL    = ("3","V3")

VOWELS = "aeiouáéíóúüAEIOUÁÉÍÓÚÜ"

def _has_flag(cell:str, wanted:tuple)->bool:
    c = (cell or "")
    return any(w in c for w in wanted)

def _pluralize_es_form(s: str) -> str:
    if not s: return s
    sl = s.lower()
    if sl.endswith("z"):
        return s[:-1] + ("ces" if s[-1].islower() else "CES")
    if s[-1] not in VOWELS:
        return s + ("es" if s[-1].islower() else "ES")
    return s + ("s" if s[-1].islower() else "S")

def _present_3pl_from_3sg(s: str) -> str:
    if not s: return s
    return s + ("n" if s[-1].islower() else "N")

# ====== TTS (appOld) ======
print("Cargando modelo de voz (opcional)…")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = model = None
try:
    processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa")
    model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device)
    print("Modelo de voz cargado.")
except Exception as e:
    print(f"AVISO TTS: {e}")

def add_reading_pauses(text: str, level:int=3) -> str:
    if level <= 1: return text
    t = re.sub(r",\s*", ", , ", text)
    t = re.sub(r"\.\s*", ". . ", text)
    return re.sub(r'\s+',' ',t).strip()

def hispanize_for_tts(ni_text: str) -> str:
    text=unicodedata.normalize('NFC', (ni_text or "").lower())
    text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es').replace('-', ' ')
    text=re.sub(r'\[.*?\]','',text); text=re.sub(r'\s+',' ',text).strip()
    return add_reading_pauses(text, 3)

def synthesize_speech(text):
    if not text or not text.strip() or model is None or processor is None: return None
    try:
        inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device)
        with torch.no_grad(): output = model(**inputs).waveform
        speech_np = output.cpu().numpy().squeeze()
        mx = max(abs(speech_np.min()), abs(speech_np.max()))
        if mx>0: speech_np = speech_np/mx*0.9
        return (16000, speech_np.astype(np.float32))
    except Exception as e:
        print(f"Error TTS: {e}"); return None

# ====== Línea ibérica (appOld) ======
V = "aeiou"
SYL_FOR = {
    "b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"],
    "d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"],
    "t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"],
    "g":["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"],
    "k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"]
}
ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›",
           "l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"}
CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"}

def tokens_from_latin(ni:str)->str:
    out=[]; i=0; ni=unicodedata.normalize('NFC', (ni or "").lower())
    while i<len(ni):
        c=ni[i]
        if c=="p": c="b"
        if c=="-": out.append("—"); i+=1; continue
        if c in V:
            out.append(ALPHA_FOR.get(c, c.upper())); i+=1; continue
        if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V:
            idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx]
            coda=ni[i+2] if i+2<len(ni) else ""
            if coda in CODA_FOR and coda!="": tok+=CODA_FOR[coda]; i+=3
            else: i+=2
            out.append(tok); continue
        out.append(ALPHA_FOR.get(c, c.upper())); i+=1
    return "".join(out)

KEYS_MODE = "full"
KEYS_OVERRIDE = {}

def georgeos_keys(token_str:str, ni_plain:str)->str:
    low=unicodedata.normalize('NFC', (ni_plain or "").lower())
    if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low]
    m=re.findall(r"‹(.*?)›", token_str)
    out=[]
    for t in m:
        if KEYS_MODE == "compact":
            if len(t)==2 and t[0] in "BDTGK": out.append(t[0])
            elif t in ("A","E","I","O","U"): out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t[0].upper())
        else:
            if len(t)==2 and t[0] in "BDTGK": out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t)
    return "".join(out)

TRIDOT = "|"
def render_ib_with_tridots(ib_toks):
    res=[]; prev_word=False
    for tk in ib_toks:
        is_punct = tk in VISIBLE_PUNCT
        if is_punct:
            res.append(" "+tk+" "); prev_word=False
        else:
            if prev_word: res.append(" "+TRIDOT+" ")
            res.append(tk); prev_word=True
    return "".join(res).strip()

# ====== BI loader + diagnóstico ======

STRICT_BI_ENFORCE = True
AMBIG_NI = {}
BI_DIAG_HTML = "<em>Sin CSV cargado.</em>"

def load_bi_strict_and_diagnose():
    global BI_DIAG_HTML
    ES2NI.clear(); NI2ES.clear(); ESPHRASE2NI.clear(); NIPHRASE2ES.clear()
    AMBIG_NI.clear(); ES2NI_VERB.clear()
    ES2NI_POS.clear()
    ES2NI_MORPH.clear()
    NI2ES_LEMMA = {}

    if not os.path.exists(CSV_BI):
        msg=f"[ERROR] No se encontró el CSV bilingüe: {CSV_BI}"
        print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
        return False

    rows=0; dup_es=0; dup_ni=0; empty_pid=0
    mismatch_backmap = 0
    mismatch_samples = []
    pid_seen=set()

    print(f"Detectado CSV bilingüe: {CSV_BI}")
    try:
        with _open_maybe_gzip(CSV_BI) as f:
            rd = csv.DictReader(f)
            flds=set(rd.fieldnames or [])
            ES_COL = "source_es" if "source_es" in flds else "es_surface" if "es_surface" in flds else "es"
            NI_COL = "target_ni" if "target_ni" in flds else "ni_surface" if "ni_surface" in flds else "ni"
            IDCOL  = "pair_id"   if "pair_id"   in flds else "id" if "id" in flds else None
            FLAGCOL = None
            for cand in FLAG_COLNAMES:
                if cand in flds:
                    FLAGCOL = cand; break
            POS_COL = "pos_es" if "pos_es" in flds else "pos" if "pos" in flds else None
            LEMMA_COL = "es_lemma" if "es_lemma" in flds else "lemma" if "lemma" in flds else None
            MORPH_COL = "es_morph" if "es_morph" in flds else "morph" if "morph" in flds else None

            base_rows = []
            for r in rd:
                es_orig = (r.get(ES_COL) or "").strip()
                ni_orig = (r.get(NI_COL) or "").strip()
                if not (es_orig and ni_orig): continue
                pid = (r.get(IDCOL) or "").strip() if IDCOL else ""
                if not pid: empty_pid += 1
                else: pid_seen.add(pid)
                flags = (r.get(FLAGCOL) or "") if FLAGCOL else ""

                es = lower(es_orig)
                ni = lower(ni_orig)

                if " " in es:
                    if es not in ESPHRASE2NI:
                        ESPHRASE2NI[es] = (ni_orig, pid)
                if " " in ni:
                    if ni not in NIPHRASE2ES:
                        NIPHRASE2ES[ni] = (es_orig, pid)

                pos = (r.get(POS_COL) or "").strip() if POS_COL else ""
                morph = (r.get(MORPH_COL) or "").strip() if MORPH_COL else ""
                _MORPH_PRIO = {"PRS":10,"PST":9,"IPFV":8,"FUT":7,"COND":6,
                               "INF":5,"GER":4,"PART":3,"SBJ":2,"SBJ_IPFV":1,"IMP":0}
                _POS_PRIO = {"ADJ":3, "N":2, "V":1}
                if es in ES2NI:
                    dup_es += 1
                    old_pos = ES2NI_POS.get(es, "")
                    old_morph = ES2NI_MORPH.get(es, "")
                    replace = False
                    new_p = _POS_PRIO.get(pos, 0)
                    old_p = _POS_PRIO.get(old_pos, 0)
                    if new_p > old_p:
                        if old_pos == "V":
                            ES2NI_VERB[es] = ES2NI[es]
                        replace = True
                    elif pos == "V" and old_pos == "V":
                        new_m = _MORPH_PRIO.get(morph, -1)
                        old_m = _MORPH_PRIO.get(old_morph, -1)
                        if new_m > old_m:
                            ES2NI_VERB[es] = ES2NI[es]
                            replace = True
                    elif pos == "V" and old_pos in ("N", "ADJ"):
                        ES2NI_VERB[es] = (ni_orig, pid)
                    if replace:
                        ES2NI[es] = (ni_orig, pid)
                        ES2NI_POS[es] = pos
                        ES2NI_MORPH[es] = morph
                else:
                    ES2NI[es] = (ni_orig, pid)
                    ES2NI_POS[es] = pos
                    ES2NI_MORPH[es] = morph

                lemma = (r.get(LEMMA_COL) or "").strip().lower() if LEMMA_COL else ""
                if ni in NI2ES:
                    dup_ni += 1
                    old_lemma = NI2ES_LEMMA.get(ni, "")
                    if lemma and old_lemma and lemma == old_lemma:
                        NI2ES[ni] = (es_orig, pid)
                    else:
                        s = AMBIG_NI.get(ni, set())
                        s.add(NI2ES[ni][0]); s.add(es_orig)
                        AMBIG_NI[ni] = s
                        if STRICT_BI_ENFORCE:
                            NI2ES.pop(ni, None)
                else:
                    if STRICT_BI_ENFORCE and ni in AMBIG_NI:
                        pass
                    else:
                        NI2ES[ni] = (es_orig, pid)
                        NI2ES_LEMMA[ni] = lemma

                base_rows.append((es_orig, ni_orig, pid, flags))
                rows += 1

        if EXPANSION_ENABLE:
            for es_orig, ni_orig, pid, flags in base_rows:
                if not flags: continue
                if _has_flag(flags, FLAG_PLURAL):
                    pl = _pluralize_es_form(es_orig)
                    pl_key = lower(pl)
                    if pl_key not in ES2NI:
                        ES2NI[pl_key] = (ni_orig, pid)
                if _has_flag(flags, FLAG_3PL):
                    p3 = _present_3pl_from_3sg(es_orig)
                    p3_key = lower(p3)
                    if p3_key not in ES2NI:
                        ES2NI[p3_key] = (ni_orig, pid)

        for es_low, (ni_surf, _) in ES2NI.items():
            ni_low = lower(ni_surf)
            back = NI2ES.get(ni_low)
            if back and lower(back[0]) != es_low:
                mismatch_backmap += 1
                if len(mismatch_samples) < 10:
                    mismatch_samples.append((es_low, ni_low, lower(back[0])))

    except Exception as e:
        msg=f"[ERROR] Al leer {CSV_BI}: {e}"
        print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
        return False

    ES_FOLD.clear(); NI_FOLD.clear()
    for es_key in ES2NI:
        fk = fold(es_key)
        if fk != es_key and fk not in ES_FOLD:
            ES_FOLD[fk] = es_key
    for ni_key in NI2ES:
        fk = fold(ni_key)
        if fk != ni_key and fk not in NI_FOLD:
            NI_FOLD[fk] = ni_key
    debug_print(f"Fold maps: ES_FOLD={len(ES_FOLD)}, NI_FOLD={len(NI_FOLD)}")

    es_unique = len(ES2NI)
    ni_unique = len(NI2ES)
    pid_unique = len(pid_seen)

    print(f"✓ BI-ONLY ESTRICTO cargado: {rows:,} filas.")
    if dup_es: print(f"[AVISO] {dup_es:,} duplicados ES (se usó la primera).")
    if dup_ni: print(f"[AVISO] {dup_ni:,} duplicados NI (bloqueados en modo estricto).")
    if empty_pid: print(f"[AVISO] {empty_pid:,} filas sin pair_id.")
    if mismatch_backmap:
        print(f"[ALERTA] {mismatch_backmap:,} asimetrías ES↔NI (misma NI apunta a otro ES).")

    sam_html = ""
    if mismatch_samples:
        sam_rows = "".join(
            f"<li><code>{escape(es)}</code> → <code>{escape(ni)}</code> → <code>{escape(es2)}</code></li>"
            for es,ni,es2 in mismatch_samples 
        )
        sam_html = f"<details><summary>Muestras</summary><ul>{sam_rows}</ul></details>"

    ambN = sum(len(v) > 1 for v in AMBIG_NI.values())
    ambList = ", ".join(f"{k}→{sorted(list(v))[:3]}" for k,v in list(AMBIG_NI.items())[:5])

    BI_DIAG_HTML = f"""
    <div style="font-family:Georgia,serif">
      <b>Diagnóstico del CSV BI</b><br>
      Archivo: <b>{escape(CSV_BI)}</b><br>
      Filas base (CSV): <b>{rows:,}</b><br>
      ES únicas (tras expansiones): <b>{es_unique:,}</b> &nbsp;|&nbsp; NI únicas: <b>{ni_unique:,}</b> &nbsp;|&nbsp; pair_id únicos: <b>{pid_unique:,}</b><br>
      Duplicados ES: <b>{dup_es:,}</b> &nbsp;|&nbsp; Duplicados NI: <b>{dup_ni:,}</b> (bloqueados en estricto) &nbsp;|&nbsp; Sin pair_id: <b>{empty_pid:,}</b><br>
      Asimetrías ES↔NI: <b>{mismatch_backmap:,}</b>
      {sam_html}
      <hr style="border:0;border-top:1px solid #caa">
      <small>NI ambiguas bloqueadas: <b>{ambN:,}</b>{(' · ej.: ' + escape(ambList)) if ambN else ''}</small><br>
      <small>Regla: el motor usa <b>sólo</b> tablas 1:1; NI duplicadas se bloquean y se muestran como <code>[AMB-NI:...]</code>.</small>
    </div>
    """
    return rows > 0

print("Cargando léxico/pares (BI-estricto)…")
load_bi_strict_and_diagnose()

# =====================================================================
# Sistema de parches NO DESTRUCTIVO
# =====================================================================
import glob, re as _re_patches

LEX_PATCH_PATTERN = _re_patches.compile(r"^\d{3}_.+\.csv(\.gz)?$")
LEX_PATCH_LOG = []
LEX_DEPRECATED_DIR = "deprecated"
STRICT_PATCHES = False

_KNOWN_LEGACY_ORPHANS = set()
_LEGACY_ORPHANS_FILE = os.path.join(LEX_DEPRECATED_DIR, "_known_legacy_orphans.csv")

def _load_known_legacy_orphans():
    if not os.path.exists(_LEGACY_ORPHANS_FILE):
        return
    try:
        with open(_LEGACY_ORPHANS_FILE, "r", encoding="utf-8", newline="") as f:
            for row in csv.DictReader(f):
                es = (row.get("source_es") or "").strip().lower()
                ni = (row.get("target_ni") or "").strip().lower()
                if es and ni:
                    _KNOWN_LEGACY_ORPHANS.add((es, ni))
        debug_print(f"[PATCH] Legacy orphans aceptados: {len(_KNOWN_LEGACY_ORPHANS)}")
    except Exception as e:
        debug_print(f"[PATCH] No se pudo leer {_LEGACY_ORPHANS_FILE}: {e}")

def _append_to_graveyard(patch_name, row_data):
    try:
        os.makedirs(LEX_DEPRECATED_DIR, exist_ok=True)
        path = os.path.join(LEX_DEPRECATED_DIR, patch_name)
        new_file = not os.path.exists(path)
        with open(path, "a", encoding="utf-8", newline="") as f:
            w = csv.DictWriter(f, fieldnames=[
                "source_es","target_ni","pos_es","es_morph","pair_id","reason"
            ])
            if new_file:
                w.writeheader()
            w.writerow({k: row_data.get(k, "") for k in
                        ["source_es","target_ni","pos_es","es_morph","pair_id","reason"]})
    except Exception as e:
        debug_print(f"[PATCH] No se pudo escribir cementerio: {e}")

def _patch_pre_lint(rows, patch_name, future_targets=None, future_es=None):
    errors = []
    warns = []
    future_targets = future_targets or set()
    future_es = future_es or set()

    es_added = {}
    ni_targets = {}
    for r in rows:
        op = (r.get("op") or "").strip().lower()
        es = (r.get("source_es") or "").strip().lower()
        ni = (r.get("target_ni") or "").strip().lower()
        if op in ("add", "alias", "replace") and es and ni:
            es_added[es] = ni
            ni_targets.setdefault(ni, set()).add(es)

    for r in rows:
        op = (r.get("op") or "").strip().lower()
        es = (r.get("source_es") or "").strip().lower()
        ni = (r.get("target_ni") or "").strip().lower()
        reason = (r.get("reason") or "").strip()

        if op == "delete":
            if es in ES2NI:
                old_ni = ES2NI[es][0].lower() if isinstance(ES2NI[es], tuple) else ES2NI[es]
                ni_rescued = (old_ni in ni_targets) or (es in es_added)
                if not ni_rescued:
                    pair = (es, old_ni)
                    if pair in _KNOWN_LEGACY_ORPHANS:
                        warns.append(("delete-orphan-known", es, old_ni,
                                      "huérfano legacy aceptado"))
                    elif old_ni in future_targets or es in future_es:
                        warns.append(("delete-orphan-deferred-rescue", es, old_ni,
                                      "rescatado en parche posterior"))
                    else:
                        errors.append(("delete-orphan-ni", es, old_ni,
                                       f"NI {old_ni!r} quedaría huérfano sin reemplazo"))
        elif op == "retire":
            if not reason:
                errors.append(("retire-no-reason", es, ni,
                               "retire requiere campo reason"))
        elif op == "replace":
            if not (es and ni):
                errors.append(("replace-incomplete", es, ni,
                               "replace requiere ES y NI"))

    return errors, warns

def _cleanup_ambig_after_remove(removed_es_l, old_ni_surf):
    """v102: tras eliminar una entrada (delete/retire/replace), si el NI
    estaba bloqueado en AMBIG_NI por una colisión con el ES borrado, recalcular.
    Si tras la baja queda un solo candidato, sacarlo de AMBIG_NI y restaurarlo
    en NI2ES para que la inversa vuelva a funcionar.
    """
    if not old_ni_surf:
        return
    ni_l = old_ni_surf.lower()
    if ni_l not in AMBIG_NI:
        return
    AMBIG_NI[ni_l] = {x for x in AMBIG_NI[ni_l] if x.lower() != removed_es_l}
    if len(AMBIG_NI[ni_l]) <= 1:
        survivors = AMBIG_NI.pop(ni_l, set())
        if survivors and ni_l not in NI2ES:
            survivor_es = next(iter(survivors))
            if survivor_es.lower() in ES2NI:
                NI2ES[ni_l] = (survivor_es, "")

def apply_lex_patches():
    _load_known_legacy_orphans()

    try:
        all_files = os.listdir(".")
    except Exception as e:
        debug_print(f"[PATCH] No se pudo listar el directorio: {e}")
        return

    patch_files = sorted([f for f in all_files if LEX_PATCH_PATTERN.match(f)])
    # Si existen X.csv y X.csv.gz, priorizar la versión comprimida e ignorar
    # la plana. Esto permite migración segura sin duplicar la aplicación de ops.
    _gz_bases = {f[:-3] for f in patch_files if f.endswith(".csv.gz")}  # quita ".gz"
    _shadowed = [f for f in patch_files if f.endswith(".csv") and f in _gz_bases]
    if _shadowed:
        patch_files = [f for f in patch_files if f not in _shadowed]
        debug_print(f"[PATCH] Ignorando {len(_shadowed)} .csv sombreados por su .csv.gz")
    if not patch_files:
        debug_print("[PATCH] No se encontraron archivos NNN_*.csv")
        return

    print(f"[PATCH] Aplicando {len(patch_files)} archivo(s) de parche...")
    totals = {"add":0,"override":0,"alias":0,"delete":0,"replace":0,"retire":0,"skipped":0}

    parsed_patches = []
    for pf in patch_files:
        try:
            if pf.endswith(".gz"):
                _f = gzip.open(pf, "rt", encoding="utf-8", newline="")
            else:
                _f = open(pf, "r", encoding="utf-8", newline="")
            with _f as f:
                parsed_patches.append((pf, list(csv.DictReader(f))))
        except Exception:
            parsed_patches.append((pf, None))

    for idx, (patch_path, rows) in enumerate(parsed_patches):
        patch_name = patch_path
        if rows is None:
            print(f"[PATCH] Error leyendo {patch_name}")
            continue
        future_ni = set()
        future_es = set()
        for fp, frows in parsed_patches[idx+1:]:
            if frows is None: continue
            for r in frows:
                op = (r.get("op") or "").strip().lower()
                es = (r.get("source_es") or "").strip().lower()
                ni = (r.get("target_ni") or "").strip().lower()
                if op in ("add", "alias", "replace") and es and ni:
                    future_ni.add(ni)
                    future_es.add(es)

        ops = {"add":0,"override":0,"alias":0,"delete":0,"replace":0,"retire":0,"skipped":0}

        errors, warns = _patch_pre_lint(rows, patch_name,
                                        future_targets=future_ni,
                                        future_es=future_es)
        if errors:
            print(f"[PATCH] {patch_name}: {len(errors)} error(es) destructivo(s) detectado(s):")
            for kind, es, ni, msg in errors[:5]:
                print(f"          ✗ [{kind}] {es!r}: {msg}")
            if len(errors) > 5:
                print(f"          ... y {len(errors)-5} más")
            if STRICT_PATCHES:
                raise RuntimeError(
                    f"Parche {patch_name} no pasa el linter (STRICT_PATCHES=True). "
                    "Documenta en deprecated/_known_legacy_orphans.csv o usa op=replace.")
            for kind, es, ni, msg in errors:
                LEX_PATCH_LOG.append((patch_name, kind, es, ni, "lint-error", msg))
        if warns:
            for kind, es, ni, msg in warns:
                LEX_PATCH_LOG.append((patch_name, kind, es, ni, "lint-warn", msg))

        for row in rows:
            op = (row.get("op") or "").strip().lower()
            es = (row.get("source_es") or "").strip()
            ni = (row.get("target_ni") or "").strip()
            pos = (row.get("pos_es") or "").strip()
            morph = (row.get("es_morph") or "").strip()
            pid = (row.get("pair_id") or "").strip() or f"patch::{patch_name}"
            reason = (row.get("reason") or "").strip()

            es_l = es.lower()
            ni_l = ni.lower()

            if op == "add":
                if es_l in ES2NI:
                    # v108: si el nuevo POS=V y el existente es ADJ/N de mayor
                    # prioridad, añadir como lectura verbal alternativa
                    # (poblar ES2NI_VERB sin tocar ES2NI principal). Esto
                    # replica el comportamiento de carga del CSV base para
                    # palabras ambiguas como "despierto" (ADJ + V/PRS-1S).
                    old_pos = ES2NI_POS.get(es_l, "")
                    # v144 (este patch): caso inverso — el nuevo es N/ADJ y el
                    # existente es V. Algunos sustantivos comunes solo entraron
                    # al lex como conjugación verbal homógrafa (ej. "bala" como
                    # darsitok-ke = balar PRS-3S). Aquí promovemos el sustantivo
                    # a ES2NI (default contextual) y preservamos la verbal en
                    # ES2NI_VERB. El motor de traducción elige por contexto
                    # (NOUN_CTX → sustantivo, VERB_CTX → verbo).
                    if pos in ("N", "ADJ") and old_pos == "V" and es_l not in ES2NI_VERB:
                        # Mover la verbal existente a ES2NI_VERB
                        ES2NI_VERB[es_l] = ES2NI[es_l]
                        # Promover la nueva entrada como principal
                        ni_already = ni_l in NI2ES
                        ES2NI[es_l] = (ni, pid)
                        ES2NI_POS[es_l] = pos
                        if morph: ES2NI_MORPH[es_l] = morph
                        if not ni_already:
                            NI2ES[ni_l] = (es, pid)
                        if " " in es_l and es_l not in ESPHRASE2NI:
                            ESPHRASE2NI[es_l] = (ni, pid)
                        if " " in ni_l and ni_l not in NIPHRASE2ES and not ni_already:
                            NIPHRASE2ES[ni_l] = (es, pid)
                        ops["add"] += 1
                        LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok",
                            f"{pos} promovido (verbal preservado en ES2NI_VERB)"))
                        continue
                    if pos == "V" and old_pos in ("ADJ", "N") and es_l not in ES2NI_VERB:
                        ES2NI_VERB[es_l] = (ni, pid)
                        # v109: si NI2ES apunta a la forma SIN diptongar
                        # (variante ortográfica menos correcta), preferir
                        # la nueva forma diptongada como inversa canónica.
                        # Ej: NI2ES['titśuśdum-ke']='solta' → cambiar a 'suelta'.
                        # Heurística: probar des-diptongar (ie→e, ue→o) la
                        # nueva ES; si coincide con la actual inversa, es
                        # la variante sin diptongar → reemplazar.
                        if ni_l in NI2ES:
                            old_inv = NI2ES[ni_l][0].lower()
                            if old_inv != es_l:
                                replaced = False
                                for diph, base in (('ie','e'), ('ue','o')):
                                    if diph in es_l:
                                        for idx in range(len(es_l) - 1):
                                            if es_l[idx:idx+2] == diph:
                                                cand = es_l[:idx] + base + es_l[idx+2:]
                                                if cand == old_inv:
                                                    NI2ES[ni_l] = (es, pid)
                                                    replaced = True
                                                    break
                                        if replaced: break
                        else:
                            # v146: el NI verbal alternativo es completamente
                            # nuevo en NI2ES. Sin esto, la inversa NI→ES queda
                            # huérfana (p.ej. "tú kamsistok-ke-śe" no traduciría
                            # a "tú cuentas" tras el parche 172). Registramos la
                            # inversa solo cuando no existía → no rompe ningún
                            # mapping previo. Lo análogo al not ni_already de
                            # la rama v144 de arriba.
                            NI2ES[ni_l] = (es, pid)
                            if " " in ni_l and ni_l not in NIPHRASE2ES:
                                NIPHRASE2ES[ni_l] = (es, pid)
                        ops["add"] += 1
                        LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok",
                            f"lectura verbal alternativa (principal {old_pos} preservado)"))
                        continue
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip", "ES ya existe"))
                    continue
                # Permitir sinonimia: varias ES pueden mapear a la misma NI.
                # Solo registramos ES→NI; mantenemos NI→ES de la primera entrada
                # registrada (canónica), evitando crear ambigüedad inversa.
                ni_already = ni_l in NI2ES
                ES2NI[es_l] = (ni, pid)
                if not ni_already:
                    NI2ES[ni_l] = (es, pid)
                if pos:   ES2NI_POS[es_l] = pos
                if morph: ES2NI_MORPH[es_l] = morph
                if " " in es_l and es_l not in ESPHRASE2NI:
                    ESPHRASE2NI[es_l] = (ni, pid)
                if " " in ni_l and ni_l not in NIPHRASE2ES and not ni_already:
                    NIPHRASE2ES[ni_l] = (es, pid)
                ops["add"] += 1
                note = "ok (sinónimo de NI existente)" if ni_already else "ok"
                LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", note))

            elif op == "override":
                prev = ES2NI.get(es_l)
                ES2NI[es_l] = (ni, pid)
                NI2ES[ni_l] = (es, pid)
                if pos:   ES2NI_POS[es_l] = pos
                if morph: ES2NI_MORPH[es_l] = morph
                if " " in es_l:
                    ESPHRASE2NI[es_l] = (ni, pid)
                if " " in ni_l:
                    NIPHRASE2ES[ni_l] = (es, pid)
                ops["override"] += 1
                prev_str = f"era {prev[0]}" if prev else "no existía"
                LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", prev_str))

            elif op == "alias":
                if es_l in ES2NI:
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip", "ES ya existe"))
                    continue
                ES2NI[es_l] = (ni, pid)
                if pos:   ES2NI_POS[es_l] = pos
                if morph: ES2NI_MORPH[es_l] = morph
                if " " in es_l and es_l not in ESPHRASE2NI:
                    ESPHRASE2NI[es_l] = (ni, pid)
                ops["alias"] += 1
                LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", "alias ortográfico"))

            elif op == "delete":
                if es_l not in ES2NI:
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip", "no existía"))
                    continue
                old_ni_surf, old_pid = ES2NI[es_l]
                old_pos = ES2NI_POS.get(es_l, "")
                old_morph = ES2NI_MORPH.get(es_l, "")
                _append_to_graveyard(patch_name, {
                    "source_es": es, "target_ni": old_ni_surf,
                    "pos_es": old_pos, "es_morph": old_morph,
                    "pair_id": old_pid,
                    "reason": reason or "delete sin reason (legacy)",
                })
                del ES2NI[es_l]
                ES2NI_POS.pop(es_l, None)
                ES2NI_MORPH.pop(es_l, None)
                if old_ni_surf.lower() in NI2ES and \
                   NI2ES[old_ni_surf.lower()][0].lower() == es_l:
                    del NI2ES[old_ni_surf.lower()]
                _cleanup_ambig_after_remove(es_l, old_ni_surf)
                ops["delete"] += 1
                LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", "movido a deprecated/"))

            elif op == "replace":
                if not (es and ni):
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
                                          "replace requiere ES y NI"))
                    continue
                if ni_l in NI2ES and NI2ES[ni_l][0].lower() != es_l:
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
                                          f"NI nuevo ya pertenece a {NI2ES[ni_l][0]!r}"))
                    continue
                if es_l in ES2NI:
                    old_ni_surf, old_pid = ES2NI[es_l]
                    old_pos = ES2NI_POS.get(es_l, "")
                    old_morph = ES2NI_MORPH.get(es_l, "")
                    _append_to_graveyard(patch_name, {
                        "source_es": es, "target_ni": old_ni_surf,
                        "pos_es": old_pos, "es_morph": old_morph,
                        "pair_id": old_pid,
                        "reason": reason or f"replaced by {ni}",
                    })
                    if old_ni_surf.lower() in NI2ES and \
                       NI2ES[old_ni_surf.lower()][0].lower() == es_l:
                        del NI2ES[old_ni_surf.lower()]
                ES2NI[es_l] = (ni, pid)
                NI2ES[ni_l] = (es, pid)
                if pos:   ES2NI_POS[es_l] = pos
                if morph: ES2NI_MORPH[es_l] = morph
                ops["replace"] += 1
                LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", reason or ""))

            elif op == "retire":
                if not reason:
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
                                          "retire requiere reason"))
                    continue
                if es_l not in ES2NI:
                    ops["skipped"] += 1
                    LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
                                          "no existía"))
                    continue
                old_ni_surf, old_pid = ES2NI[es_l]
                _append_to_graveyard(patch_name, {
                    "source_es": es, "target_ni": old_ni_surf,
                    "pos_es": ES2NI_POS.get(es_l,""),
                    "es_morph": ES2NI_MORPH.get(es_l,""),
                    "pair_id": old_pid, "reason": reason,
                })
                del ES2NI[es_l]
                ES2NI_POS.pop(es_l, None)
                ES2NI_MORPH.pop(es_l, None)
                if old_ni_surf.lower() in NI2ES and \
                   NI2ES[old_ni_surf.lower()][0].lower() == es_l:
                    del NI2ES[old_ni_surf.lower()]
                _cleanup_ambig_after_remove(es_l, old_ni_surf)
                ops["retire"] += 1
                LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", reason))

            else:
                ops["skipped"] += 1
                LEX_PATCH_LOG.append((patch_name, "?", es, ni, "skip",
                                      f"op desconocida: {op!r}"))

        summary = ", ".join(f"{k}={v}" for k,v in ops.items() if v)
        print(f"[PATCH]   {patch_name}: {summary or 'sin cambios'}")
        for k in totals:
            totals[k] += ops[k]

    print(f"[PATCH] Total: " + ", ".join(f"{k}={v}" for k,v in totals.items() if v))

    if any(totals[k] for k in ("add","override","replace","delete","retire")):
        ES_FOLD.clear(); NI_FOLD.clear()
        for es_key in ES2NI:
            fk = fold(es_key)
            if fk != es_key and fk not in ES_FOLD:
                ES_FOLD[fk] = es_key
        for ni_key in NI2ES:
            fk = fold(ni_key)
            if fk != ni_key and fk not in NI_FOLD:
                NI_FOLD[fk] = ni_key

def _restore_orphan_ni_after_patches():
    """v112: tras aplicar todos los parches, restaurar entradas NI2ES huérfanas.
    Si una entrada ES→NI existe pero el NI no está en NI2ES NI en AMBIG_NI
    (es decir, NI completamente huérfano sin colisión), restaurarla en NI2ES.
    Esto cubre el caso típico: DELETE de fem espuria (invierna) deja NI2ES[anśutdiś]
    vacío pero invierno→anśutdiś sigue siendo válido.
    NO actúa cuando NI está en AMBIG_NI (sin criterio universal para elegir winner).
    """
    restored = 0
    for es_k, val_k in ES2NI.items():
        ni_k, pid_k = val_k
        ni_l = ni_k.lower()
        if ni_l not in NI2ES and ni_l not in AMBIG_NI:
            NI2ES[ni_l] = (es_k, pid_k)
            restored += 1
    if restored:
        debug_print(f"[PATCH] v112: restauradas {restored} entradas NI huérfanas tras deletes")

apply_lex_patches()
_restore_orphan_ni_after_patches()

def _recompute_ambig_ni_after_patches():
    """v133: tras aplicar todos los parches, sincronizar AMBIG_NI y NI2ES con
    el estado real de ES2NI. Dos correcciones complementarias:

    1) AMBIG fantasmas (v132): entradas marcadas como ambiguas durante la
       carga inicial pero cuyos contribuyentes cambiaron de NI vía override
       sin limpiar la marca. Ej: śunleŕśet-ir era AMBIG{abierto, abrutado};
       parche 127 movió 'abrutado' a sirudata-ir, así que solo 'abierto'
       apunta ya a śunleŕśet-ir, pero AMBIG seguía bloqueando la inversa.

    2) NI2ES obsoleto (v133): entradas en NI2ES que apuntan a un ES cuyo
       ES2NI actual ya no apunta a ese NI. Esto pasa cuando el motor en
       `override` sobrescribe NI2ES[ni_l] = (es, pid) sin comprobar si el
       NI estaba en AMBIG ni si el ES viejo allí presente sigue siendo
       coherente. Ej real: NI2ES['bemuŕ-k']='gruesas' tras parche 103,
       pero parche 128 movió gruesa→ti-bemuŕ-k; el único ES que apunta a
       bemuŕ-k es ahora 'gruesos', así que NI2ES['bemuŕ-k'] debería ser
       'gruesos', no 'gruesas'.

    Opción de fix elegida (la conservadora): solo corregir cuando hay
    EXACTAMENTE un superviviente único. NO eliminar huérfanas (NI2ES
    apuntando a algo sin candidato actual): podrían venir de versiones
    viejas de NI que aún aparezcan en textos guardados. NO tocar casos
    multi-candidato (irían a AMBIG; preferimos dejarlos como están y no
    introducir nuevos AMB inesperados).

    Coste: O(|ES2NI|) + O(|NI2ES|). ~6 s en arranque (medido sobre
    1.5M ES2NI + 2M NI2ES tras todos los parches). Una sola pasada al
    final; no toca el bucle interno de override (intento previo de hacer
    cleanup por cada override disparó el tiempo de arranque a >30 min).
    """
    # Indexar ES2NI por NI: para cada NI, qué ES apunta realmente a él
    # ahora mismo. Necesario tanto para limpiar AMBIG como para detectar
    # NI2ES obsoletos.
    actual_ni_to_es = {}
    for es_l, val in ES2NI.items():
        ni_l = val[0].lower()
        actual_ni_to_es.setdefault(ni_l, []).append((es_l, val[1]))

    # Paso 1: limpiar AMBIG_NI fantasmas y restaurar NI2ES si está vacío
    cleaned_amb = 0
    restored_amb = 0
    for ni_l in list(AMBIG_NI.keys()):
        actual = actual_ni_to_es.get(ni_l, [])
        if len(actual) <= 1:
            AMBIG_NI.pop(ni_l, None)
            cleaned_amb += 1
            if len(actual) == 1 and ni_l not in NI2ES:
                survivor_es, survivor_pid = actual[0]
                NI2ES[ni_l] = (survivor_es, survivor_pid)
                restored_amb += 1

    # Paso 2 (v133): corregir NI2ES obsoletos con superviviente único
    fixed_obsolete = 0
    for ni_l in NI2ES:
        cur_es = NI2ES[ni_l][0].lower()
        # ¿la entrada actual es válida? (cur_es existe en ES2NI y apunta
        # de vuelta a este ni_l)
        if cur_es in ES2NI and ES2NI[cur_es][0].lower() == ni_l:
            continue
        # Está obsoleta. Buscar superviviente único.
        actual = actual_ni_to_es.get(ni_l, [])
        if len(actual) == 1:
            survivor_es, survivor_pid = actual[0]
            NI2ES[ni_l] = (survivor_es, survivor_pid)
            fixed_obsolete += 1
        # Si len(actual) == 0: huérfana, dejar como está (puede ser
        # versión vieja del NI usada en textos guardados).
        # Si len(actual) > 1: multi-candidato, dejar como está (no
        # promovemos a AMBIG aquí para no introducir nuevos bloqueos).

    # Paso 3 (v140): restaurar NI2ES huérfanos. Cuando un parche hace delete
    # de un ES que era el único guardado en NI2ES para ese NI, NI2ES[ni] se
    # borra. Pero puede que OTRO ES siga apuntando a ese NI vía ES2NI (caso
    # típico: parche borra "ilici" pero "elche" sigue apuntando a NI ilici).
    # Aquí detectamos esos casos y restauramos NI2ES con el superviviente único.
    restored_orphan = 0
    for ni_l, actual in actual_ni_to_es.items():
        if ni_l not in NI2ES and ni_l not in AMBIG_NI and len(actual) == 1:
            survivor_es, survivor_pid = actual[0]
            NI2ES[ni_l] = (survivor_es, survivor_pid)
            restored_orphan += 1

    if cleaned_amb or fixed_obsolete or restored_orphan:
        debug_print(f"[PATCH] v140: AMBIG limpiados={cleaned_amb} (restaurados={restored_amb}), NI2ES obsoletos corregidos={fixed_obsolete}, huérfanos restaurados={restored_orphan}")

_recompute_ambig_ni_after_patches()

def _register_ipfv_3s_reverse():
    # v147: SBJ usa -ni-n y SBJ_IPFV usa -nibo-n (sufijos dominantes en el lex).
    # Los antiguos -na-n y -nabo-n son residuales (~93 entradas) pero se
    # mantienen por compatibilidad.
    suffixes_1s_to_3s = ['-ska-n', '-tei-n', '-ni-n', '-nibo-n',
                         '-na-n', '-nabo-n']
    added = 0
    skipped = 0
    new_entries = []
    for ni_key, val in list(NI2ES.items()):
        for suf in suffixes_1s_to_3s:
            if ni_key.endswith(suf):
                ni_3s = ni_key[:-2]
                if ni_3s in NI2ES:
                    skipped += 1
                    break
                new_entries.append((ni_3s, val))
                break
    for ni_3s, val in new_entries:
        NI2ES[ni_3s] = val
        added += 1
    debug_print(f"3S reverse map (IPFV/COND/SBJ/SBJ_IPFV): {added} formas registradas, {skipped} ya existían")

_register_ipfv_3s_reverse()

# ====== VERSION MARKER ======
# v103 (2026-05-10): 
#   - Fix `:.` espurio: si el último token de buf en strip_modal_suffixes_ni
#     ya es signo cerrante (.,!?…:;), no añadir un punto extra al final.
#   - Fix punto espurio tras placeholder: si último de buf es [SIN-LEX:...]
#     o similar, tampoco añadir end_tok. Evita "Aitor." cuando input no tenía.
#   - Fix preservar formato hora `\d:\d`: en ES→NI, los dígitos en patrón
#     "número : número" no se convierten a numerales NI, así "6:30" sale
#     como "6 : 30" en NI y vuelve como "6:30" en el roundtrip.
#   - Fix fusión clítico ambiguo + número intermedio: el regex _SAFE_AFTER_AMBIG
#     ahora acepta cuantificador opcional entre "cada/todos los" y la palabra
#     temporal: "cada quince días", "todos los 15 años", etc.
#   - Fix ¡/¿ saltando \n\n: add_inverted_openers ahora trata NEWLINE_TOK como
#     break, así la apertura ¡/¿ se inserta DESPUÉS del salto de línea, no
#     ANTES.
#   - Fix ¡/¿ en preguntas sí/no: la heurística retrocede SIEMPRE hasta
#     SENT_END/NEWLINE_TOK (inicio absoluto), y luego si hay palabra-q en
#     el tramo, posiciona el ¿/¡ justo antes; si no hay palabra-q (preg.
#     sí/no), va al inicio absoluto. Evita "Has llegado bien, ¿Hijo?".
#   - Fix tilde olvidada (v101) anti-falso-positivo: la regla "el/tu + V → tónico"
#     ahora desactiva si la forma plural de la palabra siguiente existe como
#     sustantivo (POS=N), lo que delata ambigüedad sustantivo/verbo.
#     Cubre: "el destino" (destinos=N), "el dedo" (dedos=N), "el marco", etc.
#
# v102 (2026-05-10): Helper _cleanup_ambig_after_remove. Tras delete/retire,
# si el NI estaba bloqueado en AMBIG_NI por colisión con el ES borrado,
# recalcula: si queda un único candidato superviviente, lo restaura en
# NI2ES para que la inversa vuelva a funcionar. Antes, una colisión detectada
# en la carga inicial quedaba bloqueada aunque luego se borrase la
# entrada conflictiva.
#
# v101 (2026-05-09): Ampliada detección de tilde olvidada con "tu/el + V".
#
# v104 (2026-05-11): op "add" en patches admite SINÓNIMOS NI. Antes, si una
# entrada nueva proponía una NI ya ocupada, se descartaba por "ambigüedad".
# Ahora se permite la sinonimia (varias ES → 1 NI): se registra el mapeo
# ES2NI siempre; NI2ES conserva la primera entrada registrada (canónica),
# que será la que devuelva la inversa NI→ES. Esto desbloquea casos como
# librito/librillo (ambos diminutivos legítimos de "libro" → lundokbek-bdo)
# o florcita/florecita (variantes ortográficas del mismo diminutivo).
# v111 (2026-05-13): Regla 4b en _choose_es_to_ni - "det + N + homógrafo → V"
# para palabras con AMBAS entradas N y V en el lex (vía pos_es).
# Ej: "El gato araña el sofá" → araña = V; "La oveja bala fuerte" → bala = V.
# Parche 105 añade las entradas V para araña, bala, palma, turba, fresa.
#
# v112 (2026-05-13): Pasada lineal final _restore_orphan_ni_after_patches que
# restaura NI2ES huérfanos tras DELETE. Si una entrada ES→NI existe pero su NI
# no está en NI2ES ni en AMBIG_NI, restaurarla en NI2ES. NO actúa cuando hay
# AMBIG (sin criterio universal para elegir winner). Esto resuelve el caso
# donde, al borrar una fem espuria (invierna), el masc correspondiente
# (invierno) quedaba sin entrada inversa.
#
# v113 (2026-05-13): Fix bug en apply_apocope_es. Las funciones 
# _is_masc_sg_noun_candidate y _is_singular_noun_candidate ahora verifican que
# next_word sea realmente N o ADJ en el lex, no cualquier palabra que cumpla
# los filtros morfológicos. Esto evita que "banco grande, leía" se convierta
# en "banco gran, leía" (porque "leía" es V, no sustantivo).
# v114 (2026-05-14): Fix bug 'eslo'. Quitado PRS de _MORPH_ADMITS_ENCLITIC para 
# que verbos en presente NO admitan enclíticos. "es lo" no se fusiona a "eslo".
# v115 (2026-05-14): Fix bug 'punto faltante tras placeholder'. La lógica que
# evitaba "Aitor." al final del texto causaba que oraciones consecutivas con
# nombre propio al final de la primera se pegaran ("decía Pablo era..."). Ahora
# se añade siempre el punto; el centinela final se limpia por la línea siguiente.
# v116 (2026-05-14): (a) Añadido em/en dash a _SAFE_AFTER_AMBIG para que 
# "intentar lo — respondió" → "intentarlo — respondió". (b) Ampliada regla 
# tildes interrog: cuando+verbo SBJ/IPFV/PST es temporal subordinado, sin tilde.
# v117 (2026-05-14): (a) Apocope blacklist: añadidas palabras gramaticales
# que NUNCA permiten apocope aunque tengan POS=N en lex (porque, que, aunque,
# sino, etc.). Arregla "Grande porque" → "Gran porque". (b) Tildes interrog:
# solo cancelar tilde de cuando/donde/como si NO es la primera palabra
# alfabética de la interrog. "¿Cuándo fue eso?" mantiene tilde (es primera).
# v118 (2026-05-14): Fix apocope con puntuación pegada. Si el token a apocopar
# tiene una coma, punto u otro signo adherido (ej. "grande,"), NO se apocopa.
# Significa que el adjetivo está aislado por pausa, no atributivo de la 
# siguiente palabra. "es grande, espaciosa" mantiene "grande,".
# v119 (2026-05-14): Añadidos un/una/unos/unas/algún/alguna/algunos/algunas
# al _SAFE_AFTER_AMBIG. Permite fusionar "dando le un beso" → "dándole un beso".
# v120 (2026-05-14): Removidos plurales unos/unas/algunos/algunas del SAFE_AFTER.
# La fusión DOUBLE_CLIT disparaba mal en "recordarnos los unos a los otros"
# (expresión idiomática con 'unos' = pronombre) generando "recordárnoslos unos".
# Solo singulares un/una/algún/alguna son seguros como determinantes.
# v121 (2026-05-14): Capitalización tras cierre de cita. Caracteres »/"/"/'
# son ahora transparentes cuando start=True (igual que las aperturas), de modo
# que tras "...todos. »" la siguiente palabra ("Mi padre") se capitaliza
# correctamente en lugar de quedar "mi padre".
# v122 (2026-05-14): Fix bug "punto extra al final". El regex 4 de
# postprocess_spanish añadía espacio entre dos signos de puntuación pegados
# ("X.Y" → "X. Y"), incluso cuando Y era también puntuación. Esto causaba
# que ".⏎." (donde ⏎ es NEWLINE_TOK) se convirtiera en ". ⏎." y luego en
# ".\n." (un punto, salto, otro punto). Solución doble: (a) lookahead 
# excluye .,;:!? del salto de espacio; (b) limpieza final que elimina punto
# duplicado al final del texto.
# v124 (2026-05-14): tres fixes prioritarios sobre v123:
#  1) expand_enclitics: el fallback de doble-clítico ahora exige stem2 >= 3 chars
#     → arregla bug CRÍTICO "estela" → "es"+"te"+"la" (descomponía sustantivos
#     comunes como verbo+enclíticos por raíz de 2 chars).
#  2) _SAFE_AFTER_AMBIG ampliado con artículos (el/la/los/las), preposiciones
#     (de/en/a/por/...) y placeholders [SIN-LEX:...] → fusiona enclíticos en:
#     "abrazándolos efusivamente", "acariciándole el pelo",
#     "reprochándoles la ausencia", "perderlo de vista".
#  3) 'por' quitado de INTERR_PLAIN → arregla "Por cierto, ¿sabes...?" donde
#     el motor movía '¿' al inicio absoluto antes de 'Por'.
# v125 (2026-05-14): fix interrog perdido (siempre añade modal -na/-ba)
# v126 (2026-05-14): triada de fixes residuales:
#   #2 _verb_admits_enclitic admite verbos V con morph PRS si su infinitivo
#      existe (rescata "porta te" → "pórtate", PRS 3S homógrafa con IMP 2S).
#   #5 post-procesado regex que añade tilde diacrítica a formas
#      gerundio+clítico pegadas sin tilde ("abriendose" → "abriéndose").
#   #6 reordenar signo de pregunta/exclamación + comilla cerrante
#      ("? »" → "»?", "! »" → "»!").
# v126b (2026-05-14): rescate homógrafos N/V en _verb_admits_enclitic.
# El v126 (fix #2 aplicado mal en la sesión anterior por sobreescritura del
# archivo) solo activaba el rescate IMP si la palabra estaba como V en el lex.
# "porta" está catalogada como N (sustantivo "puerta") y aún así puede ser
# IMP 2S de "portar". El v126b añade un check independiente del POS principal:
# si v termina en a/e/i y su infinitivo regular existe como V, admitir el
# enclítico. El contexto sintáctico ("v + me/te/se/...") garantiza que es
# verbal: los sustantivos no toman enclíticos.
# v127 (2026-05-15): elimina espacio antes de comilla cerrante »
# tras todos los fixes anteriores. Caso: "bien. »" → "bien.»",
# "hielo. »" → "hielo.»". No interfiere con el fix #6 (?»/!») porque
# se aplica después. Postprocesado regex sencillo.
# v128 (2026-05-15): _SAFE_AFTER_AMBIG y _SAFE_AFTER_AMBIG_DOUBLE incluyen
# adverbios genéricos en -mente como contexto seguro tras clítico AMBIG.
# Caso: "abrazando los efusivamente" → "abrazándolos efusivamente".
# El patrón anterior solo cubría 4 adverbios específicos (fuertemente,
# suavemente, fijamente, atentamente); este captura cualquiera con [a-z]+mente.
# v133 (2026-05-16): ampliación del fix de motor de v132. Añade una segunda
# pasada que detecta y corrige entradas NI2ES OBSOLETAS: NI2ES[ni]=es donde
# ES2NI[es] ya no apunta a este ni. Esto pasa porque el motor en `override`
# sobrescribe NI2ES sin comprobar si la entrada vieja allí presente está
# obsoleta o si el NI está en AMBIG.
#
# Caso real desencadenante: NI2ES['bemuŕ-k']='gruesas' tras el parche 103,
# pero el parche 128 movió gruesa→ti-bemuŕ-k. El único ES que apunta a
# bemuŕ-k es ahora 'gruesos'. v132 no lo corregía porque solo restauraba
# NI2ES si estaba vacío. v133 corrige también cuando hay superviviente
# único.
#
# Opción conservadora: solo corregir cuando hay EXACTAMENTE 1 superviviente.
# No eliminar entradas huérfanas (0 candidatos), por si son versiones viejas
# del NI usadas en textos guardados. No promover multi-candidato a AMBIG.
#
# Coste medido: ~6 s al arranque (3 ejecuciones: 5.5, 5.5, 7.4 s). Una
# sola pasada al final, no afecta el bucle interno de override. Corrige
# ~328K entradas NI2ES (verificado en lex actual con 129 parches).
#
# v150 (2026-05-22): PRO-DROP para todas las personas. En ES→NI, cuando el
# traductor produce "pronombre sujeto + verbo finito" y el verbo ya marca esa
# persona con su sufijo (-n 1S, -śe 2S, ∅ 3S, -ek 1PL, -śe-k 2PL, -r 3PL), se
# ELIMINA el pronombre redundante. Implementado en drop_redundant_pronouns_ni
# (+ helper _ni_verb_person), llamado en translate_es_to_ni_bi tras el bucle y
# antes de la modalidad; ib_toks se regenera siempre desde el `out` final.
# Conserva pronombres tónicos (tras prep. ka/ta/tan/kin/kara) y sujetos
# coordinados. NO toca el léxico (los sufijos de persona del verbo se mantienen
# y son los que cargan la persona). Desactivable con PRODROP_ENABLE=False.
# Ejemplos: "yo vivía"->tetgan-ska-n, "él vive"->tetgan-ke, "ellos viven"->
# tetgan-ke-r. La inversa NI->ES no cambia: tetgan-ska-n y tetgan-ska siguen
# dando "vivía" (ambiguo, como el español).
VERSION_MARKER = "v150_2026_05_22_prodrop"
try:
    print(f"[Neoíbero translator] versión cargada: {VERSION_MARKER}", flush=True)
    print(f"[Neoíbero translator] léxico activo: {CSV_BI}", flush=True)
except Exception:
    pass

# ====== Utilidad n-grama (longest-match, BI-only) ======
def _longest_match(tokens, i, phrase_map):
    if not phrase_map: return (0, None)
    max_span = 0; surface = None
    for span in range(1, MAX_NGRAM+1):
        if i+span > len(tokens): break
        cand = " ".join(lower(t) for t in tokens[i:i+span])
        if cand in phrase_map:
            max_span = span
            surface = phrase_map[cand][0]
        else:
            fcand = " ".join(fold(lower(t)) for t in tokens[i:i+span])
            if fcand != cand and fcand in phrase_map:
                max_span = span
                surface = phrase_map[fcand][0]
    return (max_span, surface)

# ====== Post-proceso ES (espacios + mayúsculas de oración) ======
def sentence_case_spanish(s: str) -> str:
    out = []
    start = True
    in_br = False
    WRAPS = "¿¡\"'«(“‘[—–"  # v95: añadidos — (em dash) y – (en dash) para diálogos
    last_real = None  # v103: último carácter "real" (no whitespace ni NEWLINE_TOK)

    for ch in s:
        if ch == '[':
            in_br = True

        # v103: `:` seguido de salto de línea reinicia mayúscula (carta, lista
        # vertical). Cubre "Querido Marco:\nTe escribo..." → "Te" capitalizado.
        if not in_br and last_real == ':' and (ch == '\n' or ch == NEWLINE_TOK):
            start = True

        if not in_br and start:
            # v100: NEWLINE_TOK se trata como espacio para no romper la
            # capitalización tras —/¡/¿ cuando va precedido del marcador.
            if ch.isspace() or ch == NEWLINE_TOK:
                out.append(ch)
            elif ch in WRAPS:
                out.append(ch)
            elif ch in '»"”\'':  # v121: cierres de cita son transparentes
                out.append(ch)
                # mantiene start=True para capitalizar la siguiente palabra
            elif ch.isalpha():
                out.append(ch.upper()); start = False
            else:
                out.append(ch)
                start = ch in "¿¡"
        else:
            out.append(ch)
            if not in_br and ch in ".?!…":
                start = True
            elif not in_br and ch in "¿¡":
                start = True

        if ch == ']':
            in_br = False

        # Actualizar last_real solo con caracteres significativos
        if not ch.isspace() and ch != NEWLINE_TOK:
            last_real = ch

    return "".join(out)

def postprocess_spanish(s: str) -> str:
    s = re.sub(r"(\d)\s*:\s*(\d)", r"\1:\2", s)
    s = re.sub(r"(\d)\s*([.,])\s*(\d)", r"\1\2\3", s)
    s = re.sub(r"\s+([,.;:!?])", r"\1", s)
    # v122: excluir puntuación de fin y NEWLINE_TOK del lookahead. Antes,
    # "X.Y" se procesaba como "X. Y" siempre. Ahora si Y es puntuación de fin
    # (.,;:!?) O el NEWLINE_TOK, NO se añade espacio. Evita que ".⏎." se
    # convierta en ". ⏎." y luego en ".\n.".
    s = re.sub(rf"([?.!;])(?!\s|$|[.,;:!?]|{re.escape(NEWLINE_TOK)})([^\s])", r"\1 \2", s)
    s = re.sub(r"([¿¡])\s+", r"\1", s)
    s = re.sub(r"\s{2,}", " ", s).strip()
    return sentence_case_spanish(s)

# ====== Pro-drop (v150) — elisión del pronombre sujeto redundante ======
#
# QUÉ HACE:
#   En ES→NI, cuando el traductor produce "pronombre sujeto + verbo finito"
#   y el verbo ya marca la misma persona con su sufijo, se ELIMINA el pronombre
#   (queda solo el verbo). El verbo carga la persona; el pronombre era redundante.
#     yo vivía      nai tetgan-ska-n  -> tetgan-ska-n   (la -n ya dice "1S")
#     tú vives      śe tetgan-ke-śe   -> tetgan-ke-śe   (-śe = 2S)
#     él vive       nun tetgan-ke     -> tetgan-ke      (3S = sin sufijo)
#     nosotros..    eki tetgan-ke-ek  -> tetgan-ke-ek   (-ek = 1PL)
#     vosotros..    śek tetgan-ke-śe-k-> tetgan-ke-śe-k (-śe-k = 2PL)
#     ellos viven   nun-k tetgan-ke-r -> tetgan-ke-r    (-r = 3PL)
#
# POR QUÉ:
#   El neoíbero es pro-drop por diseño (el manual ya dice que los sujetos
#   pronominales se eliden cuando la persona queda clara por el sufijo verbal).
#   El motor insertaba pronombre Y sufijo a la vez, lo cual era redundante.
#   Esta función aplica la elisión que el sistema ya tenía conceptualmente.
#
# QUÉ NO TOCA / SEGURIDAD:
#   - NO elimina el pronombre si va precedido de preposición (ka/ta/tan/kin/
#     kara): ahí es pronombre TÓNICO, no sujeto ("ka nun" = "a él").
#   - NO elimina si el verbo no concuerda en persona con el pronombre.
#   - NO elimina si entre el pronombre y el verbo aparece otro pronombre sujeto
#     (sujetos coordinados: "nai ne śe" = "yo y tú").
#   - Solo actúa dentro de la misma cláusula (para en puntuación, CLAUSE_BREAKS,
#     SENT_END y NEWLINE_TOK).
#   - Para 3S el verbo no tiene sufijo de persona; se detecta como "TAM final
#     sin sufijo personal". Riesgo bajo de falso positivo con sustantivos que
#     casualmente tengan forma raíz-TAM; si apareciera, basta PRODROP_ENABLE=False.
#   - Se aplica ANTES de add_modal_suffixes_es2ni; la línea ibérica (ib_toks)
#     se regenera después desde el `out` final, así que queda sincronizada.
#
# Para desactivar el pro-drop por completo: PRODROP_ENABLE = False
PRODROP_ENABLE = True

# Pronombres sujeto NI -> persona gramatical que codifican.
# (él/ella = nun/ti-nun comparten 3S; ellos/ellas = nun-k/ti-nun-k comparten 3PL)
_PRODROP_PRONOUNS = {
    "nai": "1S",
    "śe": "2S",
    "nun": "3S",
    "ti-nun": "3S",
    "eki": "1PL",
    "śek": "2PL",
    "nun-k": "3PL",
    "ti-nun-k": "3PL",
}

# Preposiciones tras las cuales el "pronombre" es tónico (no sujeto): no elidir.
_PRODROP_PREP_BLOCK = frozenset({"ka", "ta", "tan", "kin", "kara"})

# TAM finitos que pueden llevar sufijo de persona (excluye INF ∅, GER -min,
# PART -ir, IMP -ka, que no participan en la concordancia sujeto-verbo).
_PRODROP_TAM_SET = frozenset({"ke", "bo", "ta", "ska", "tei", "ni", "nibo"})

def _ni_verb_person(tok):
    """Devuelve la persona ('1S','2S','3S','1PL','2PL','3PL') de un verbo
    finito NI según su sufijo, o None si no es un verbo finito reconocible.
    Requiere estructura con guiones (raíz-TAM[-persona])."""
    t = (tok or "").lower()
    if not t or t.startswith("["):
        return None
    # Quitar sufijo modal interrogativo/exclamativo -na/-ba si está presente.
    t = re.sub(r"-(na|ba)$", "", t)
    if "-" not in t:
        return None
    parts = t.split("-")
    if len(parts) < 2:
        return None
    last = parts[-1]
    # 2PL: ...-TAM-śe-k
    if last == "k":
        if len(parts) >= 3 and parts[-2] == "śe" and parts[-3] in _PRODROP_TAM_SET:
            return "2PL"
        return None
    # 1PL: ...-TAM-ek
    if last == "ek":
        if parts[-2] in _PRODROP_TAM_SET:
            return "1PL"
        return None
    # 2S: ...-TAM-śe
    if last == "śe":
        if parts[-2] in _PRODROP_TAM_SET:
            return "2S"
        return None
    # 3PL: ...-TAM-r
    if last == "r":
        if parts[-2] in _PRODROP_TAM_SET:
            return "3PL"
        return None
    # 1S: ...-TAM-n
    if last == "n":
        if parts[-2] in _PRODROP_TAM_SET:
            return "1S"
        return None
    # 3S: el último elemento es un TAM finito (sin sufijo de persona)
    if last in _PRODROP_TAM_SET:
        return "3S"
    return None

def drop_redundant_pronouns_ni(tokens):
    """Elimina los pronombres sujeto cuyo verbo finito siguiente ya marca la
    misma persona. Conserva los tónicos (tras preposición) y los sujetos
    coordinados. Opera token a token sobre la salida NI (latín)."""
    if not PRODROP_ENABLE or not tokens:
        return tokens
    n = len(tokens)
    drop_idx = set()
    for idx, tok in enumerate(tokens):
        person = _PRODROP_PRONOUNS.get(lower(tok))
        if person is None:
            continue
        # ¿Va precedido de preposición? -> pronombre tónico, no elidir.
        j = idx - 1
        prev = None
        while j >= 0:
            if tokens[j] in VISIBLE_PUNCT:
                j -= 1
                continue
            prev = lower(tokens[j])
            break
        if prev in _PRODROP_PREP_BLOCK:
            continue
        # Buscar hacia adelante el primer verbo finito de la misma cláusula.
        k = idx + 1
        found_person = None
        while k < n:
            tk = tokens[k]
            if tk in SENT_END or tk in CLAUSE_BREAKS or tk == NEWLINE_TOK:
                break
            if tk in VISIBLE_PUNCT:
                k += 1
                continue
            # Otro pronombre sujeto -> sujetos coordinados, no es nuestro verbo.
            if lower(tk) in _PRODROP_PRONOUNS:
                break
            vp = _ni_verb_person(tk)
            if vp is not None:
                found_person = vp
                break
            k += 1
        if found_person == person:
            drop_idx.add(idx)
    if not drop_idx:
        return tokens
    return [t for i_, t in enumerate(tokens) if i_ not in drop_idx]

# ====== Traducción BI estricta ======
def translate_es_to_ni_bi(text:str):
    # v99: el texto se procesa como un único bloque. Los saltos de línea del
    # usuario se reemplazan por un marcador transparente (NEWLINE_TOK) que
    # cruza el pipeline sin disparar ningún cierre de oración, modalidad ni
    # capitalización; al final se reconvierten en \n.
    text = (text or "").replace("\r\n", "\n").replace("\r", "\n")
    text = text.replace("\n", f" {NEWLINE_TOK} ")

    toks = simple_tokenize(text)
    toks = expand_enclitics(toks)

    _NOUN_CTX = frozenset({'el','la','los','las','un','una','unos','unas','al','del',
                           'de','en','con','por','para','a','sin','sobre','entre',
                           'hacia','hasta','desde','contra','según','ante','bajo','tras',
                           'mi','tu','su','mis','tus','sus','nuestro','nuestra',
                           'nuestros','nuestras','vuestro','vuestra','vuestros','vuestras',
                           'este','esta','estos','estas','ese','esa','esos','esas',
                           'aquel','aquella','aquellos','aquellas','cada','otro','otra',
                           'mucho','mucha','muchos','muchas','poco','poca','pocos','pocas',
                           'todo','toda','todos','todas','algún','alguna','ningún','ninguna',
                           'buen','mal','gran','primer','tercer','qué','cuánto','cuánta'})
    _VERB_CTX = frozenset({'yo','tú','él','ella','nosotros','nosotras','vosotros','vosotras',
                           'ellos','ellas','usted','ustedes',
                           'se','me','te','nos','os','le','les','lo',
                           'no','ya','también','tampoco','nunca','siempre','aún','todavía',
                           'que','quien','quienes','donde','cuando','como','si',
                           # v148: auxiliares de haber para tiempos compuestos.
                           # Si van seguidos de palabra con doble lectura ADJ/V_PART,
                           # debe elegirse V_PART (kamsistok-ir, no gusdenos-ir).
                           'he','has','ha','hemos','habéis','han',
                           'había','habías','habíamos','habíais','habían',
                           'hube','hubiste','hubo','hubimos','hubisteis','hubieron',
                           'habré','habrás','habrá','habremos','habréis','habrán',
                           'habría','habrías','habríamos','habríais','habrían',
                           'haya','hayas','hayamos','hayáis','hayan',
                           'hubiera','hubieras','hubiéramos','hubierais','hubieran',
                           'hubiese','hubieses','hubiésemos','hubieseis','hubiesen'})
    _INFINITIVE_ENDINGS = ('ar','er','ir')
    _INFINITIVE_CTX = frozenset({'de','sin','para','por','al','antes','tras','hasta'})
    _VERB_ALWAYS = frozenset({'son','es','ha','he','era','fue','fui','van',
                              'dan','das','den','des','hay','doy','soy','voy',
                              'iba','di','haya'})

    _TIME_WORDS = frozenset({'año','años','día','días','mes','meses',
                             'semana','semanas','hora','horas',
                             'minuto','minutos','segundo','segundos',
                             'tiempo','rato','siglo','siglos',
                             'década','décadas','momento','momentos',
                             'instante','instantes','jornada','jornadas',
                             'noche','noches','tarde','tardes','mañana','mañanas',
                             'milenio','milenios'})
    _TIME_QUANTIFIERS = frozenset({'mucho','muchos','mucha','muchas',
                                    'poco','pocos','poca','pocas',
                                    'tanto','tantos','tanta','tantas',
                                    'algunos','algunas','varios','varias',
                                    'demasiado','demasiados','demasiada','demasiadas',
                                    'un','una','unos','unas',
                                    'dos','tres','cuatro','cinco','seis','siete',
                                    'ocho','nueve','diez','once','doce','trece',
                                    'catorce','quince','veinte','treinta','cuarenta',
                                    'cincuenta','cien','mil','medio','media'})

    _FIRST_PERSON_SUBJECTS = frozenset({'yo'})
    _THIRD_SG_PRON_SUBJECTS = frozenset({'él','ella','ello','usted','esto','eso','aquello'})
    _SINGULAR_NP_DETS = frozenset({'el','la','un','una','este','esta','ese','esa','aquel','aquella',
                                    'mi','tu','su','nuestro','nuestra','vuestro','vuestra'})

    _PREP_CTX = frozenset({'a','ante','con','contra','de','desde','en','entre',
                           'hacia','hasta','para','por','según','sin','sobre','tras'})
    _TILDE_MAP = {'mi': 'mí', 'el': 'él', 'si': 'sí', 'tu': 'tú'}
    _PHRASE_BREAK = frozenset({'y','o','e','u','ni','que','pero','sino','como',
                               'porque','cuando','donde','aunque','pues'})

    def _accented_lookup(key):
        if key in _TILDE_MAP and _TILDE_MAP[key] in ES2NI:
            return ES2NI[_TILDE_MAP[key]][0]
        return None

    def _resolve_forms(raw_key:str):
        key = lower(raw_key)
        if key in ES2NI:
            return key, ES2NI[key][0], ES2NI_VERB.get(key, (None, None))[0]
        fkey = fold(key)
        if fkey in ES_FOLD:
            actual = ES_FOLD[fkey]
            return actual, ES2NI[actual][0], ES2NI_VERB.get(actual, (None, None))[0]
        return key, None, None

    def _choose_es_to_ni(current_tok:str, prev_key:str, next_key:str, next_next_key:str, sent_start:bool, prev_prev_key:str=""):
        actual_key, ni_nom, ni_verb = _resolve_forms(current_tok)
        if ni_nom is None and ni_verb is None:
            return None

        key = lower(actual_key)

        if key == 'hace':
            is_time_context = False
            if next_key in _TIME_WORDS:
                is_time_context = True
            elif (next_key in _TIME_QUANTIFIERS or is_number(next_key)) and next_next_key in _TIME_WORDS:
                is_time_context = True
            if is_time_context:
                if 'atrás' in ES2NI:
                    return ES2NI['atrás'][0]

        if key.endswith(_INFINITIVE_ENDINGS):
            if key in ES2NI:
                return ES2NI[key][0]
            fkey = fold(key)
            if fkey in ES_FOLD:
                return ES2NI[ES_FOLD[fkey]][0]

        if key in _VERB_ALWAYS and ni_verb:
            return ni_verb

        if key in _TILDE_MAP:
            accented_ni = _accented_lookup(key)
            if accented_ni:
                # Caso 1 (existente): preposición + tilde-pair + (final/conj/puntuación)
                # Cubre "para mí", "con él", "para sí (mismo)", etc.
                if prev_key in _PREP_CTX:
                    if not next_key or next_key in VISIBLE_PUNCT or next_key in _PHRASE_BREAK:
                        return accented_ni
                # Caso 2 (v101): "tu"/"el" + verbo → pronombre tónico (sujeto).
                # Cubre el error frecuente de omitir la tilde al pronombre sujeto:
                #   "tu vienes mañana" → "tú vienes mañana"
                #   "el dijo que sí"   → "él dijo que sí"
                # No se aplica a "mi" (no puede ser sujeto) ni a "si"
                # (que ante verbo es conjunción condicional, no afirmación).
                #
                # v103 anti-falso-positivo: si la forma plural de la palabra
                # siguiente existe como sustantivo (POS=N), entonces es ambigua
                # nombre/verbo y NO debemos forzar pronombre tónico.
                # Cubre: "el destino" (destinos=N), "el dedo" (dedos=N),
                # "el marco" (marcos=N), "el voto" (votos=N), etc.
                if key in ('tu', 'el') and next_key:
                    next_pos = ES2NI_POS.get(next_key, '')
                    if next_pos == 'V':
                        # v143 anti-falso-positivo: si el verbo siguiente es
                        # subjuntivo o imperativo, es casi siempre un sustantivo
                        # verbal homógrafo (el viaje, el canto, el baile, el grito,
                        # el dibujo, el rezo, el pago...). NO forzar pronombre tónico.
                        next_morph = ES2NI_MORPH.get(next_key, '')
                        if next_morph not in ('SBJ', 'SBJ_IPFV', 'IMP'):
                            next_pl = _pluralize_es_form(next_key)
                            if ES2NI_POS.get(next_pl, '') != 'N':
                                return accented_ni
                # Caso 3 (v101): "si" al inicio de oración + verbo + signo de
                # exclamación al final = afirmación enfática "¡Sí, vengo!".
                # No tocamos: el caso típico "si vienes, dímelo" debe quedar
                # como conjunción condicional. Solo arreglamos cuando es
                # respuesta corta inequívoca: "sí" solo ante puntuación.
                # (Ya cubierto por el caso 1 si hay preposición; si no, lo
                # dejamos al usuario.)

        if prev_key in _NOUN_CTX and ni_nom:
            return ni_nom

        if prev_key in _VERB_CTX and ni_verb:
            return ni_verb

        # v111: det + N(sust) + homógrafo → V (patrón sujeto+verbo)
        # Solo se activa para palabras con AMBAS entradas N y V (homógrafos N/V).
        # v149: NO aplicar cuando la palabra actual es ADJ. El patrón
        # "det + N + ADJ" es nominal puro (un sintagma con adjetivo postnominal),
        # no SVO. Sin esta guarda, las V PART del 175 (kamsistok-ir, gu-lokdutmir-ir,
        # benkoŕ-ir...) se activaban incorrectamente tras "el trabajo", "un campo",
        # etc., produciendo V donde el español exige ADJ.
        if ni_verb and ni_nom and prev_prev_key in _NOUN_CTX:
            if ES2NI_POS.get(prev_key, "") == "N":
                if ES2NI_POS.get(key, "") != "ADJ":
                    return ni_verb

        if prev_key in _INFINITIVE_CTX:
            inf_key = lower(current_tok)
            if inf_key.endswith(_INFINITIVE_ENDINGS):
                if inf_key in ES2NI:
                    return ES2NI[inf_key][0]
                ff = fold(inf_key)
                if ff in ES_FOLD:
                    return ES2NI[ES_FOLD[ff]][0]
            if ni_verb:
                return ni_verb

        if sent_start:
            if key.endswith(_INFINITIVE_ENDINGS):
                if key in ES2NI:
                    return ES2NI[key][0]
                ff = fold(key)
                if ff in ES_FOLD:
                    return ES2NI[ES_FOLD[ff]][0]
            if ni_verb and not ni_nom:
                return ni_verb

        if ni_verb and next_key in _NOUN_CTX:
            pos_nom = ES2NI_POS.get(key, "")
            if pos_nom != "ADJ":
                return ni_verb

        if ni_nom is not None:
            return ni_nom
        if ni_verb is not None:
            return ni_verb
        return None

    def _has_explicit_3s_subject(left_context):
        # v98: en formas ambiguas 1S/3S (subjuntivos, imperfecto, condicional...),
        # asumir 3S por defecto cuando no hay "yo" explícito. La 1S debe ir marcada
        # con su pronombre ("yo sea breve"); en su ausencia, "Sea breve" / "que pague"
        # se leen como 3S (cortesía, subordinada con sujeto elidido).
        ctx = [lower(x) for x in (left_context or []) if x]
        tail = ctx[-5:] if ctx else []

        # Disparador firme de 1S: "yo" explícito en el contexto cercano
        if any(tok in _FIRST_PERSON_SUBJECTS for tok in tail):
            return False

        # Disparadores firmes de 3S: pronombre explícito o NP determinada
        if tail and tail[-1] in _THIRD_SG_PRON_SUBJECTS:
            return True

        for j in range(len(tail)-2, -1, -1):
            if tail[j] in _SINGULAR_NP_DETS:
                if j > 0 and tail[j-1] in _PREP_CTX:
                    return False  # locativo, no es sujeto
                if j < len(tail)-1:
                    return True

        # Sin "yo" explícito → 3S por defecto
        return True

    def _adjust_ipfv_ambiguous_person(ni, left_context):
        if not (ni and isinstance(ni, str)):
            return ni
        # v147: incluir sufijos SBJ dominantes -ni-n y -nibo-n. Antiguos
        # -na-n y -nabo-n se mantienen para entradas residuales.
        ambiguous_suffixes = ("-ska-n", "-tei-n", "-ni-n", "-nibo-n",
                              "-na-n", "-nabo-n")
        if any(ni.endswith(suf) for suf in ambiguous_suffixes):
            if _has_explicit_3s_subject(left_context):
                return ni[:-2]
        return ni

    out=[]; ib_toks=[]
    i=0; prev_key=""
    sent_start = True
    left_context=[]
    while i < len(toks):
        t = toks[i]
        if t in VISIBLE_PUNCT:
            out.append(t); ib_toks.append(t); prev_key=""; i+=1
            if t in SENT_END:
                sent_start = True
                left_context=[]
            elif t == NEWLINE_TOK:
                # v103: salto de línea reinicia oración (evita que "Te"
                # tras "Marco:\n" se trate como nombre propio)
                sent_start = True
                left_context=[]
            elif t in CLAUSE_BREAKS:
                left_context=[]
            continue
        if is_placeholder(t):
            out.append(t); ib_toks.append(t); prev_key=""; i+=1
            sent_start = False
            left_context.append(t)
            continue

        # v94 — Sin marcas especiales: todos los tokens pasan por el flujo normal.

        span, ni_surface = _longest_match(toks, i, ESPHRASE2NI)
        if span > 1:
            ni_surface = _adjust_ipfv_ambiguous_person(ni_surface, left_context)
            out.append(ni_surface)
            ib_toks.append(georgeos_keys(tokens_from_latin(ni_surface), ni_surface))
            prev_key = lower(toks[i+span-1]) if i+span-1 < len(toks) else ""
            for k_idx in range(i, i+span):
                left_context.append(toks[k_idx])
            i += span
            sent_start = False
            continue

        next_key = ""
        next_next_key = ""
        j = i + 1
        while j < len(toks):
            if toks[j] in VISIBLE_PUNCT:
                if toks[j] in SENT_END:
                    break
                j += 1
                continue
            next_key = lower(toks[j])
            break
        if next_key:
            k = j + 1
            while k < len(toks):
                if toks[k] in VISIBLE_PUNCT:
                    if toks[k] in SENT_END:
                        break
                    k += 1
                    continue
                next_next_key = lower(toks[k])
                break

        key = lower(t)
        # v104: detectar NOMBRE PROPIO / TOPÓNIMO NO ATESTIGUADO.
        # Si la palabra capitalizada NO está en el lex, devolverla TAL CUAL
        # (sin corchete), porque puede ser un nombre propio (Ana, Marco) o
        # un topónimo no atestiguado (Madrid, Valencia). El lex contiene
        # solo topónimos atestiguados, así que si está en el lex, se traduce.
        key_in_lex = key in ES2NI or fold(key) in ES_FOLD
        is_proper_noun = (
            len(t) >= 2
            and t[0].isupper()
            and not t.isupper()
            and t.isalpha()
            and not key_in_lex
        )
        if is_proper_noun:
            out.append(t); ib_toks.append(t)
            prev_key = key
            left_context.append(t)
            i += 1
            sent_start = False
            continue

        prev_prev_key = lower(left_context[-2]) if len(left_context) >= 2 else ""
        ni = _choose_es_to_ni(t, prev_key, next_key, next_next_key, sent_start, prev_prev_key)
        ni = _adjust_ipfv_ambiguous_person(ni, left_context)

        if ni is not None:
            out.append(ni)
            ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
        elif is_number(key):
            # v103: si el número está en patrón hora (\d:\d), preservar
            # los dígitos tal cual para no perder el formato en el roundtrip.
            # Detección: el número está adyacente a ":" entre dos números.
            is_hour_context = False
            if i+2 < len(toks) and toks[i+1] == ':' and is_number(toks[i+2]):
                is_hour_context = True
            elif i >= 2 and toks[i-1] == ':' and is_number(toks[i-2]):
                is_hour_context = True
            if is_hour_context:
                out.append(key); ib_toks.append(key)
            else:
                ni_num = digit_to_ni(key)
                out.append(ni_num); ib_toks.append(georgeos_keys(tokens_from_latin(ni_num), ni_num))
        else:
            ph = f"[SIN-LEX:{t}]"
            out.append(ph); ib_toks.append(ph)

        prev_key = key
        left_context.append(t)
        i += 1
        sent_start = False

    # v150: pro-drop — eliminar pronombres sujeto redundantes ANTES de la
    # modalidad. Si el verbo finito siguiente ya marca la persona, el pronombre
    # sobra. Ver drop_redundant_pronouns_ni / PRODROP_ENABLE.
    out = drop_redundant_pronouns_ni(out)

    if MODAL_SUFFIX_ENABLE:
        out = add_modal_suffixes_es2ni(out)

    # v150: regenerar ib_toks SIEMPRE desde el `out` final (tras pro-drop y
    # modal), para que la línea ibérica quede sincronizada con el latín tanto
    # si MODAL_SUFFIX_ENABLE está activo como si no.
    ib_toks = []
    for tt in out:
        if tt in VISIBLE_PUNCT or tt.startswith("["):
            ib_toks.append(tt)
        else:
            ib_toks.append(georgeos_keys(tokens_from_latin(tt), tt))

    ni_text = detokenize(out)
    # v99: reconvertir el marcador transparente en saltos de línea reales
    ni_text = re.sub(rf"\s*{re.escape(NEWLINE_TOK)}\s*", "\n", ni_text)
    ib_html = "<div class='ib-line'>" + escape(render_ib_with_tridots(ib_toks)) + "</div>"
    ib_html = ib_html.replace(NEWLINE_TOK, "\n")
    return ni_text, ib_html


# ====== Apócope automática (v90+v91) ======
_APOCOPE_RULES = [
    ('ninguno', 'ningún', 'M'),
    ('Ninguno', 'Ningún', 'M'),
    ('alguno',  'algún',  'M'),
    ('Alguno',  'Algún',  'M'),
    ('bueno',   'buen',   'M'),
    ('Bueno',   'Buen',   'M'),
    ('malo',    'mal',    'M'),
    ('Malo',    'Mal',    'M'),
    ('primero', 'primer', 'M'),
    ('Primero', 'Primer', 'M'),
    ('tercero', 'tercer', 'M'),
    ('Tercero', 'Tercer', 'M'),
    ('grande',  'gran',   'X'),
    ('Grande',  'Gran',   'X'),
]

# v117: palabras que NUNCA son candidato a sustantivo/adjetivo siguiente
# para apocope, aunque puedan estar mal clasificadas como N en el lex.
# Conjunciones, preposiciones, adverbios, determinantes, pronombres, etc.
_APOCOPE_BLACKLIST = {
    "porque","que","aunque","sino","si","como","cuando","donde","mientras","pero",
    "y","o","u","ni","ya","no","sí","muy","más","mas","menos","tan","tanto","tanta",
    "todo","toda","todos","todas","nada","algo","alguno","alguna","algunos","algunas",
    "este","esta","estos","estas","ese","esa","esos","esas","aquel","aquella",
    "mi","tu","su","mis","tus","sus","nuestro","nuestra","nuestros","nuestras",
    "vuestro","vuestra","vuestros","vuestras","de","del","en","a","al","por","para",
    "con","sin","sobre","bajo","tras","entre","hacia","hasta","desde","durante",
    "según","contra","mediante",
    "lo","la","le","los","las","les","me","te","se","nos","os",
    "fue","es","son","era","eran","fueron","será","serán","ha","han","había",
    "habían","habrá","habrán",
}

def _is_masc_sg_noun_candidate(word):
    if not word or not word[0].isalpha():
        return False
    if not word[0].islower():
        return False
    wl = word.lower()
    # v117: blacklist explícita de palabras gramaticales
    if wl in _APOCOPE_BLACKLIST:
        return False
    if len(wl) > 3 and wl.endswith('s'):
        return False
    if wl.endswith('a'):
        return False
    if wl.endswith(('dad', 'tad', 'ción', 'sión', 'tud', 'umbre', 'eza')):
        return False
    pos = ES2NI_POS.get(wl, "")
    if pos and pos not in ("N", "ADJ"):
        return False
    return True

def _is_singular_noun_candidate(word):
    if not word or not word[0].isalpha():
        return False
    if not word[0].islower():
        return False
    wl = word.lower()
    # v117: blacklist explícita
    if wl in _APOCOPE_BLACKLIST:
        return False
    if len(wl) > 3 and wl.endswith('s'):
        return False
    pos = ES2NI_POS.get(wl, "")
    if pos and pos not in ("N", "ADJ"):
        return False
    return True

def apply_apocope_es(text):
    if not text:
        return text
    tokens = re.findall(r"\S+|\s+", text)
    
    for i in range(len(tokens) - 2):
        tok = tokens[i]
        if not tok or not tok[0].isalpha():
            continue
        
        prev_word = None
        for j in range(i-1, -1, -1):
            tj = tokens[j]
            if tj.strip() == '':
                continue
            if tj[0].isalpha():
                prev_word = tj.lower().rstrip('.,;:!?"\'')
                break
            else:
                break
        if prev_word in ('más', 'mas', 'menos', 'tan', 'muy'):
            continue
        
        next_word_idx = None
        for j in range(i+1, len(tokens)):
            tj = tokens[j]
            if tj.strip() == '':
                continue
            if tj[0].isalpha():
                next_word_idx = j
                break
            else:
                break
        
        if next_word_idx is None:
            continue
        
        next_word = tokens[next_word_idx]
        next_word_clean = re.sub(r'[.,;:!?"\']+$', '', next_word)
        if not next_word_clean:
            continue
        
        for plena, apocopada, genero in _APOCOPE_RULES:
            tok_clean = re.sub(r'[.,;:!?"\']+$', '', tok)
            if tok_clean != plena:
                continue
            # v118: si el token tiene PUNTUACIÓN pegada (coma, punto, etc.),
            # NO apocopar. Significa que el adjetivo está aislado por pausa.
            # Ej: "es grande, espaciosa" - 'grande,' NO se apocopa a 'gran,'.
            # "grande coche" sí se apocopa a "gran coche".
            if tok != plena:
                # hay signo pegado al final
                break
            if genero == 'M':
                if _is_masc_sg_noun_candidate(next_word_clean):
                    tokens[i] = apocopada
            elif genero == 'X':
                if _is_singular_noun_candidate(next_word_clean):
                    tokens[i] = apocopada
            break
    
    return ''.join(tokens)


def translate_ni_to_es_bi(text:str):
    # v99: el texto se procesa como un único bloque. Los saltos de línea del
    # usuario se reemplazan por un marcador transparente (NEWLINE_TOK) que
    # cruza el pipeline sin disparar ningún cierre de oración, modalidad ni
    # capitalización; al final se reconvierten en \n.
    text = (text or "").replace("\r\n", "\n").replace("\r", "\n")
    text = text.replace("\n", f" {NEWLINE_TOK} ")

    toks = simple_tokenize(text)

    if MODAL_SUFFIX_ENABLE:
        toks = strip_modal_suffixes_ni(toks)

    def _is_doge_hace_context(idx, tokens):
        if idx >= len(tokens) or lower(tokens[idx]) != 'doge':
            return False
        if idx+1 >= len(tokens):
            return False
        es_time_words = {'año','años','día','días','mes','meses','semana','semanas',
                         'hora','horas','minuto','minutos','segundo','segundos',
                         'momento','momentos','instante','instantes',
                         'rato','ratos','tiempo','siglo','siglos',
                         'década','décadas','milenio','milenios',
                         'jornada','jornadas','noche','noches',
                         'tarde','tardes','mañana','mañanas'}

        def _es_of(ni_token):
            t = lower(ni_token)
            es = (NI2ES.get(t, (None,))[0] or "").lower()
            return es

        for probe in (idx+1, idx+2):
            if probe >= len(tokens):
                break
            es = _es_of(tokens[probe])
            if es and es in es_time_words:
                return True
        return False

    def _is_at_sentence_start(idx, tokens):
        if idx == 0:
            return True
        prev = tokens[idx-1]
        return prev in SENT_END or prev == "." or prev == "!" or prev == "?"

    out=[]
    i=0
    while i < len(toks):
        t = toks[i]
        if t in VISIBLE_PUNCT:
            out.append(t); i+=1; continue
        if is_placeholder(t):
            # v103: si es placeholder de nombre propio (forma [Nombre] sin
            # ":" después de "["), devolver el nombre sin corchetes.
            # Los placeholders [SIN-LEX:...], [?:...], [AMB-NI:...] se
            # mantienen literales para que el usuario los vea.
            inner = t[1:-1]
            if ':' not in inner:
                out.append(inner)
            else:
                out.append(t)
            i += 1
            continue
        span, es_surface = _longest_match(toks, i, NIPHRASE2ES)
        if span > 1:
            out.append(es_surface); i += span; continue

        key = lower(t)
        fkey = fold(key)

        if key == 'doge' and _is_doge_hace_context(i, toks):
            if _is_at_sentence_start(i, toks):
                out.append('Hace')
            else:
                out.append('hace')
            i += 1
            continue

        if key == 'galbi-ke' and i+1 < len(toks):
            nxt = lower(toks[i+1])
            is_part = nxt.endswith('-ir') or '-ir-' in nxt
            if is_part:
                out.append('ha')
                i += 1
                continue

        if key in NI2ES:
            es = NI2ES[key][0] or ""
            out.append(es if es else t)  # v141: sin corchete para no-atestiguados
        elif fkey in NI_FOLD:
            es = NI2ES[NI_FOLD[fkey]][0] or ""
            out.append(es if es else t)  # v141: sin corchete para no-atestiguados
        elif key in AMBIG_NI or fkey in AMBIG_NI and STRICT_BI_ENFORCE:
            out.append(f"[AMB-NI:{t}]")
        elif is_number(key):
            out.append(t)
        else:
            out.append(t)  # v141: sin corchete; coherente con regla v104
        i += 1

    if MODAL_SUFFIX_ENABLE:
        out = add_inverted_openers(out)
        out = apply_interrogative_tildes(out)

    es_text = detokenize(out)
    es_text = postprocess_spanish(es_text)
    es_text = apply_apocope_es(es_text)
    es_text = fuse_enclitics_es(es_text)
    # v126 FIX #5: añadir tilde a gerundios con clítico fusionado. Cuando la
    # forma se ensambla pegada (sin espacio) como "abriendose paso", el
    # gerundio queda sin tilde diacrítica. La fusión correcta es "abriéndose".
    # Detecta cualquier <stem>+ando|iendo|yendo seguido inmediatamente de
    # me/te/se/nos/os/le/les/lo/la/los/las y pone la tilde sobre la vocal
    # del sufijo gerundivo.
    _GER_TILDE = {"ando":"ándo", "iendo":"iéndo", "yendo":"yéndo"}
    def _add_ger_tilde(m):
        stem = m.group(1)
        ger = m.group(2)
        clitic = m.group(3)
        return stem + _GER_TILDE[ger] + clitic
    es_text = re.sub(
        r"\b([a-záéíóúñü]*?)(ando|iendo|yendo)(me|te|se|nos|os|le|les|lo|la|los|las)\b",
        _add_ger_tilde, es_text, flags=re.IGNORECASE
    )
    # v126 FIX #6: reordenar signo de pregunta/exclamación + comilla cerrante.
    # El motor coloca `?` justo tras la palabra modal (`mirgaŕ-na»` → `tarde ?
    # »`), pero la convención castellana es que el signo de cierre vaya FUERA
    # de las comillas cuando el material citado es el contenido completo de la
    # pregunta. Detectar "? »" / "! »" y reordenar a "»?" / "»!".
    es_text = re.sub(r"([?!])\s*»", r"»\1", es_text)
    # v127 FIX #A: eliminar cualquier espacio espurio antes de comilla cerrante.
    # El detokenizer trata `»` como token separado y deja un espacio antes
    # ("bien. »", "hielo. »"). En español la comilla cerrante NUNCA va precedida
    # de espacio: «texto.» no «texto. ». Aplicar tras el reordenamiento del fix
    # #6 para no interferir con `?»`/`!»` (esos casos no tienen espacio).
    es_text = re.sub(r"\s+»", "»", es_text)
    # v99: reconvertir el marcador transparente en saltos de línea reales
    es_text = re.sub(rf"\s*{re.escape(NEWLINE_TOK)}\s*", "\n", es_text)
    # v122: limpiar punto duplicado al final del texto. El centinela `.` final
    # se añade siempre y luego se elimina si los dos últimos tokens son `.`.
    # Pero si quedan separados por whitespace o newline tras el procesado, esa
    # limpieza falla. Este regex final caza el caso.
    es_text = re.sub(r"\.\s*\.\s*$", ".", es_text)
    return es_text

# ====== Diagnóstico ======
def diagnose_text(text, dir_label):
    if not text or not text.strip():
        return "<em>Introduce texto para diagnosticar.</em>"

    toks = simple_tokenize(text)
    if dir_label.startswith("ES"):
        toks = expand_enclitics(toks)
    unknown=set(); asym=set(); amb=set()
    total_tokens=0; covered=0

    if dir_label.startswith("ES"):
        head = "ES→NI"
        i=0
        while i < len(toks):
            t = toks[i]
            if t in VISIBLE_PUNCT or is_number(t):
                i+=1; continue
            total_tokens += 1
            span, _ = _longest_match(toks, i, ESPHRASE2NI)
            if span > 1:
                covered += 1; i += span; continue
            k=lower(t)
            fk=fold(k)
            if k not in ES2NI and fk not in ES_FOLD:
                unknown.add(t); i+=1; continue
            if k not in ES2NI: k=ES_FOLD.get(fk, k)
            covered += 1
            ni = ES2NI[k][0]
            back = NI2ES.get(lower(ni))
            if back and lower(back[0]) != k:
                asym.add(f"{t} → {ni} → {back[0]}")
            i+=1
    else:
        head = "NI→ES"
        i=0
        while i < len(toks):
            t = toks[i]
            if t in VISIBLE_PUNCT or is_number(t):
                i+=1; continue
            total_tokens += 1
            span, _ = _longest_match(toks, i, NIPHRASE2ES)
            if span > 1:
                covered += 1; i += span; continue
            k=lower(t)
            fk=fold(k)
            if k in AMBIG_NI or fk in AMBIG_NI:
                amb.add(t); i+=1; continue
            if k not in NI2ES and fk not in NI_FOLD:
                unknown.add(t); i+=1; continue
            if k not in NI2ES: k=NI_FOLD.get(fk, k)
            covered += 1
            es = NI2ES[k][0]
            back = ES2NI.get(lower(es))
            if back and lower(back[0]) != k:
                asym.add(f"{t} → {es} → {back[0]}")
            i+=1

    cov_pct = (covered/total_tokens*100) if total_tokens else 100.0
    cov_html = f"<div><b>Tokens (sin puntuación/numéricos):</b> {total_tokens} &nbsp;|&nbsp; <b>Cubiertos:</b> {covered} ({cov_pct:.1f}%)</div>"

    unk_html = "".join(f"<li><code>{escape(u)}</code></li>" for u in sorted(unknown, key=lambda x: lower(x))) or "<li><i>—</i></li>"
    amb_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(amb, key=lambda x: lower(x))) or "<li><i>—</i></li>"
    asy_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(asym)) or "<li><i>—</i></li>"

    return f"<b>Diagnóstico {head}</b>{cov_html}<b>Ambiguas (NI duplicada):</b><ul>{amb_html}</ul><b>Faltantes:</b><ul>{unk_html}</ul><b>Asimetrías:</b><ul>{asy_html}</ul>"

# ====== UI (CSS / acordeones / fuentes) ======
LABELS={
    "ES":{
        "title":"Traductor Español ↔ Neoíbero",
        "subtitle":"CSV estricto (BI-only 1:1; desambiguación ligera ES→NI; .gz) — determinista",
        "in_label_es":"✏️ Entrada (Español)",
        "in_label_ni":"✏️ Entrada (Neoíbero)",
        "in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Idatzi hemen. Adib.: nuker-ke ni etxe-ka.",
        "out_lat_esni":"📜 Salida: Neoíbero (latín)",
        "out_lat_nies":"📜 Salida: Español",
        "out_ib":"🗿 Línea ibérica",
        "out_audio":"🔊 Locución (Audio)",
        "btn":"🔄 Traducir",
        "combo":"🌍 Idioma (UI + explicación)",
        "dir":"🔁 Dirección",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentación y Referencia",
        "acc_titles":[
            "🌍 ¿Qué es el neoíbero?",
            "🔤 Fonología y escritura",
            "📐 Sistema nominal: género, número y caso",
            "🔄 Sistema verbal: TAM, persona y clíticos",
            "🌿 Derivación y familias de palabras",
            "🔢 Sistema numérico vigesimal",
            "📝 Sintaxis básica y partículas",
            "❓ Modalidad vascoide (-na / -ba)",
            "⚙️ Pipeline del traductor (1:1 estricto)",
            "📚 Bibliografía y créditos",
            "🧾 Glosario técnico"
        ]
    },
    "EN":{
        "title":"Spanish ↔ Neo-Iberian Translator",
        "subtitle":"Strict BI-only (1:1 surfaces; light ES→NI disambiguation; .gz) — deterministic",
        "in_label_es":"✏️ Input (Spanish)",
        "in_label_ni":"✏️ Input (Neo-Iberian)",
        "in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Type here. E.g., nuker-ke ni etxe-ka.",
        "out_lat_esni":"📜 Output: Neo-Iberian (Latin)",
        "out_lat_nies":"📜 Output: Spanish",
        "out_ib":"🗿 Iberian line",
        "out_audio":"🔊 Speech (Audio)",
        "btn":"🔄 Translate",
        "combo":"🌍 Language (UI + docs)",
        "dir":"🔁 Direction",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentation & Reference",
        "acc_titles":[
            "🌍 What is Neo-Iberian?",
            "🔤 Phonology and writing",
            "📐 Nominal system: gender, number & case",
            "🔄 Verbal system: TAM, person & clitics",
            "🌿 Derivation and word families",
            "🔢 Vigesimal number system",
            "📝 Basic syntax and particles",
            "❓ Vascoid modality (-na / -ba)",
            "⚙️ Translator pipeline (strict 1:1)",
            "📚 Bibliography and credits",
            "🧾 Technical glossary"
        ]
    }
}

# ====== CSS + fuente ======
def build_css():
    b64=None
    if os.path.exists("Iberia-Georgeos.ttf"):
        with open("Iberia-Georgeos.ttf","rb") as f:
            b64=base64.b64encode(f.read()).decode("ascii")
    font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')"
    return f"""
@font-face {{
  font-family: 'IberiaGeorgeos';
  src: {font_src};
  font-weight: normal; font-style: normal;
}}
:root {{
  --iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C;
  --iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32;
}}
.gradio-container {{ background:transparent!important;
  font-family:'Georgia','Times New Roman',serif!important; }}
html, body {{ background: transparent !important; }}
.gradio-container h1, .gradio-container h2, .gradio-container h3 {{
  color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important;
  border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important;
}}
.gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important;
  padding:1.5rem!important; margin-bottom:0.2rem!important; }}
.gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
  border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }}
.gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important;
  color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }}
.gradio-container .gr-textbox textarea, .gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:#000!important;
  font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }}
.gradio-container .gr-textbox textarea:focus, .gradio-container .gr-textbox input:focus {{
  border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }}
.gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
  border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 2px 2px rgba(0,0,0,.4)!important;
  box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }}
.gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important;
  transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }}
.gradio-container, gradio-app {{
  --button-primary-background-fill: linear-gradient(145deg,#CD7F32,#A0522D) !important;
  --button-primary-background-fill-hover: linear-gradient(145deg,#A0522D,#CD7F32) !important;
  --button-primary-text-color: #fff !important;
  --button-primary-border-color: #8B4513 !important;
  --checkbox-label-background-fill-selected: linear-gradient(145deg,#CD7F32,#A0522D) !important;
  --checkbox-label-text-color-selected: #fff !important;
  --checkbox-label-border-color-selected: #8B4513 !important;
  --block-label-text-color: #4a2e15 !important;
  --block-title-text-color: #4a2e15 !important;
  --block-info-text-color: #4a2e15 !important;
  --button-secondary-text-color: #4a2e15 !important;
  --checkbox-label-text-color: #4a2e15 !important;
  --input-placeholder-color: #8a6f4a !important;
  --input-text-color: #000 !important;
  --input-text-size: inherit !important;
}}
.gradio-container label.selected,
.gradio-container [data-testid$="-radio-label"].selected {{
  background: linear-gradient(145deg,#CD7F32,#A0522D) !important;
  color: #fff !important;
  border-color: #8B4513 !important;
}}
.gradio-container label.selected *,
.gradio-container [data-testid$="-radio-label"].selected * {{
  color: #fff !important;
}}
.gradio-container .gr-button.gr-button-secondary,
.gradio-container button.secondary {{
  color: #4a2e15 !important;
}}
.gradio-container span[data-testid="block-info"],
.gradio-container .block-title,
.gradio-container .block-label,
.gradio-container label > span:not(.selected),
.gradio-container .gr-form > label,
.gradio-container .gr-block label {{
  color: #4a2e15 !important;
}}
.ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important;
  background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important;
  border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important;
  box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }}
.ib-line::before {{ content:''!important; position:absolute!important; inset:0!important;
  background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important;
  pointer-events:none!important; border-radius:10px!important; }}
@media (max-width:768px) {{
  .ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }}
  .gradio-container .gr-group {{ padding:1rem!important; }}
  .gradio-container h1 {{ font-size:1.8rem!important; }}
}}
@media (max-width:480px) {{
  .ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }}
  .gradio-container h1 {{ font-size:1.5rem!important; }}
}}
.gradio-container button[role="tab"] {{
  background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
  border:2px solid var(--iberian-sand)!important;
  border-bottom:none!important;
  color:var(--iberian-clay)!important;
  font-weight:600!important;
  font-family:'Georgia','Times New Roman',serif!important;
  font-size:1.05rem!important;
  padding:0.8rem 2rem!important;
  margin:0 0.3rem 0 0!important;
  border-radius:8px 8px 0 0!important;
  transition:all .25s ease!important;
  box-shadow:2px 2px 6px rgba(0,0,0,.12)!important;
  text-shadow:1px 1px 2px rgba(139,69,19,.08)!important;
}}
.gradio-container button[role="tab"]:hover {{
  background:linear-gradient(145deg,var(--iberian-ochre),#CC7722)!important;
  color:#ffffff!important;
  transform:translateY(-3px)!important;
  box-shadow:0 5px 10px rgba(139,69,19,.25)!important;
  text-shadow:1px 1px 3px rgba(0,0,0,.3)!important;
}}
.gradio-container button[role="tab"][aria-selected="true"] {{
  background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
  border:3px solid var(--iberian-clay)!important;
  border-bottom:none!important;
  color:#ffffff!important;
  font-weight:700!important;
  box-shadow:0 6px 12px rgba(139,69,19,.35), inset 0 1px 0 rgba(255,255,255,.25)!important;
  text-shadow:1px 2px 3px rgba(0,0,0,.45)!important;
  transform:translateY(0px)!important;
}}
.gradio-container div[role="tablist"] {{
  background:linear-gradient(145deg,#e8dcc8,#d9c4b0)!important;
  border-bottom:4px solid var(--iberian-bronze)!important;
  padding:0.5rem 1rem 0 1rem!important;
  border-radius:10px 10px 0 0!important;
  box-shadow:0 2px 8px rgba(139,69,19,.15)!important;
}}
"""
CSS = build_css()

# ====== leer TU mapa HTML y embeber en iframe (sin tocar su contenido) ======
def _load_map_html() -> str:
    for cand in ("mapa_iberos_neoibero.html", "salida/mapa_iberos_neoibero.html"):
        if os.path.exists(cand):
            with open(cand, "r", encoding="utf-8") as f:
                return f.read()
    return """<!doctype html><meta charset=utf-8>
<title>Mapa</title>
<style>html,body,#m{height:100%;margin:0}#m{height:100vh}</style>
<link rel=stylesheet href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css">
<div id=m></div>
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script>var map=L.map('m').setView([40,-2],6);
L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png',{maxZoom:18,attribution:'&copy; OpenStreetMap'}).addTo(map);
L.circle([39,-0.3],{radius:70000}).addTo(map);</script>"""

MAP_SRC = _load_map_html()
MAP_DATA_URL = "data:text/html;base64," + base64.b64encode(MAP_SRC.encode("utf-8")).decode("ascii")

# ====== Blocks UI ======
with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="stone", secondary_hue="stone", neutral_hue="stone")) as demo:
    with gr.Group():
        with gr.Row():
            combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"])
            direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"])

    with gr.Group():
        es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5, elem_id="ni_es_input")
        with gr.Row():
            btn_tr   = gr.Button(LABELS["ES"]["btn"], variant="primary")
            btn_diag = gr.Button("🔎 Diagnosticar BI con este texto", variant="secondary")
            btn_clear_in = gr.Button("🗑️ Borrar entrada", variant="secondary")
        with gr.Row():
            with gr.Column(scale=2):
                ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False, elem_id="ni_es_output", show_copy_button=True)
                with gr.Row():
                    btn_copy_out  = gr.Button("📋 Copiar salida", variant="secondary", size="sm")
                    btn_cut_out   = gr.Button("✂️ Cortar salida", variant="secondary", size="sm")
                    btn_clear_out = gr.Button("🗑️ Borrar salida", variant="secondary", size="sm")
                loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=True)
                audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy")
            with gr.Column(scale=1):
                ib_out = gr.HTML(label=LABELS["ES"]["out_ib"])
                diag_out = gr.HTML(value="")

    def do_translate(text, dir_label):
        if not text or not text.strip():
            return (gr.update(value=""),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None),
                    gr.update(value=""))
        if dir_label.startswith("ES"):
            latin, ib = translate_es_to_ni_bi(text)
            return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin),
                    gr.update(value=ib),
                    gr.update(visible=True),
                    gr.update(value=None),
                    gr.update(value=""))
        else:
            es_text = translate_ni_to_es_bi(text)
            return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None),
                    gr.update(value=""))

    btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out, diag_out])

    def run_locution(latin_text, dir_label):
        if dir_label.startswith("ES"):
            return synthesize_speech(latin_text)
        return None
    loc_btn.click(run_locution, [ni_out, direction], audio_out)

    def do_diagnose(text, dir_label):
        return gr.update(value=diagnose_text(text, dir_label))
    btn_diag.click(do_diagnose, [es_in, direction], [diag_out])

    def switch_lang(sel_lang, dir_label):
        L=LABELS[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        return (
            gr.update(label=L["combo"], value=sel_lang),
            gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label),
            gr.update(label=in_label, placeholder=in_ph),
            gr.update(label=out_lab),
            gr.update(label=L["out_ib"]),
            gr.update(label=L["out_audio"]),
            gr.update(value=L["btn"])
        )
    combo.change(
        switch_lang,
        [combo, direction],
        [combo, direction,
         es_in, ni_out, ib_out, audio_out, btn_tr]
    )

    def switch_direction(dir_label, sel_lang):
        L=LABELS[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        loc_vis  = True if dir_label.startswith("ES") else False
        return (gr.update(label=in_label, placeholder=in_ph),
                gr.update(label=out_lab, value=""),
                gr.update(value="<div class='ib-line'></div>"),
                gr.update(visible=loc_vis),
                gr.update(value=None),
                gr.update(value=""))
    direction.change(
        switch_direction,
        [direction, combo],
        [es_in, ni_out, ib_out, loc_btn, audio_out, diag_out]
    )

    # ---- v123: botones de utilidad UI ----
    # Borrar entrada (reinicia es_in)
    btn_clear_in.click(
        fn=lambda: "",
        inputs=None,
        outputs=[es_in],
    )

    # Borrar salida (reinicia ni_out, ib_out, audio_out, diag_out)
    def _clear_output_block():
        return ("", "<div class='ib-line'></div>", None, "")
    btn_clear_out.click(
        fn=_clear_output_block,
        inputs=None,
        outputs=[ni_out, ib_out, audio_out, diag_out],
    )

    # Copiar salida al portapapeles (solo JS, no toca el state)
    btn_copy_out.click(
        fn=None,
        inputs=[ni_out],
        outputs=None,
        js="(text) => { if (text) { navigator.clipboard.writeText(text); } return []; }",
    )

    # Cortar salida (copia al portapapeles + limpia outputs)
    def _cut_output_block(_text):
        return ("", "<div class='ib-line'></div>", None, "")
    btn_cut_out.click(
        fn=_cut_output_block,
        inputs=[ni_out],
        outputs=[ni_out, ib_out, audio_out, diag_out],
        js="(text) => { if (text) { navigator.clipboard.writeText(text); } return text; }",
    )

# ====== smoke opcional ======
def _symmetry_smoketest():
    print("\n[SMOKE] Prueba ES↔NI (BI-estricto, determinista)…")
    probes = [
        "nuker-ke ni etxe-ka ?",
        "¿Pagaste 12,75 en la cafetería?",
        "Marta llega a las 18:30.",
        "[SIN-LEX:Tomás]-na euŕak-ke !"
    ]
    for p in probes:
        es_from_ni = translate_ni_to_es_bi(p)
        ni_round, _ = translate_es_to_ni_bi(es_from_ni)
        print("  IN:", p)
        print("  ES:", es_from_ni)
        print("  NI:", ni_round)
        print("---")

if DEBUG_MODE:
    _symmetry_smoketest()

if __name__ == "__main__":
    demo.queue().launch()