# app.py — Traductor Español ↔ Neoíbero v4.4.1 FIXED
# UI clásica (v2.3 LTS) + motor bidireccional v4.4, con Línea Ibérica y Locución
# 2025-01 – Actualizado para CSVs v4.4 ULTRA-DEFINITIVO + FIXES CRÍTICOS
# Cambios v4.4:
#  - Compatible con HF_Pairs_ES_NI_RICH_v4.csv (783K pares)
#  - Compatible con HF_Pairs_NI_ES_Translator_v4.csv (783K pares)
#  - Usa campos nuevos: ni_surface, ni_tam, ni_pn
#  - Números 1-100 invariables funcionando
#  - Subjuntivos irregulares corregidos (vengas ✅)
#  - RESPETA caracteres especiales (ŕ, ś) completamente
# Cambios v4.4.1 FIXED:
#  - FIX: Carga correcta de CSV NI→ES (índices y minúsculas corregidos)
#  - FIX: Sistema de conjugación con persona/número completo (1S,2S,3S,1P,2P,3P)
#  - FIX: Interrogativas y exclamativas (-na/-ba) mejoradas
#  - FIX: Preservación de nombres propios en traducción NI→ES

import gradio as gr
import os, csv, re, base64, unicodedata
import torch
from transformers import AutoProcessor, VitsModel
import numpy as np
from html import escape  # ← para escapar la línea ibérica en HTML

# Caches locales (si existen)
os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache')
os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf')

DEBUG_MODE = False
def debug_print(msg):
    if DEBUG_MODE:
        print(f"[DEBUG] {msg}")

# =========================
# LÉXICO Y ESTRUCTURAS v4.4
# =========================
# << RUTAS EN RAÍZ DEL REPO >>
CSV_CANDIDATES = [
    "HF_Pairs_ES_NI_RICH_v4.csv",      # ← NUEVO v4.4
    "HF_Pairs_ES_NI_RICH.csv",
    "HF_Pairs_ES_NI.csv",
    "Diccionario_ES_Neoibero.csv",
]
CSV_NI_ES = [
    "HF_Pairs_NI_ES_Translator_v4.csv", # ← NUEVO v4.4
    "HF_Pairs_NI_ES_Translator.csv",
]

# ES→NI
SURF_RICH = {}   # (es_lower, tag) -> ni_surface
LEX_FORM  = {}   # es_form -> ni_lemma/surface
LEX_LEMMA = {}   # es_lemma -> ni_lemma
FOLD_FORM = {}   # es_form_no_diacritics -> ni_lemma
LEX_META  = {}   # es_form/lemma -> {"pos":..., "tam_ok":...}
FORCE_KEYS = set()

# NI→ES
NI_TO_ES_SURF = {}   # (ni_surface, ni_tam) -> es_surface
NI_TO_ES_FORM = {}   # ni_form/root -> es_form
NI_TO_ES_LEMMA= {}   # ni_root -> es_lemma

# Mapeo de sufijos de persona en neoíbero → español
NI_PERSON_MAP = {
    "-mu": "1S",  # yo
    "-su": "2S",  # tú
    "-i":  "3S",  # él/ella
    "-gu": "1P",  # nosotros
    "-zu": "2P",  # vosotros
    "-te": "3P",  # ellos/ellas
}

# =========================
# MORFOLOGÍA – ESPAÑOL
# =========================
RE_GER   = re.compile(r"(ando|iendo|yendo)$", re.I)
RE_PART  = re.compile(r"(ado|ido|to|so|cho)$", re.I)

FUT_END   = ("é","ás","á","emos","éis","án")
COND_END  = ("ía","ías","ía","íamos","íais","ían")
PRET_AR   = ("é","aste","ó","amos","asteis","aron")
PRET_ERIR = ("í","iste","ió","imos","isteis","ieron")
IMPF_AR   = ("aba","abas","ábamos","abais","aban")
IMPF_ERIR = ("ía","ías","íamos","íais","ían")
SUBJ_AR   = ("e","es","e","emos","éis","en")
SUBJ_ERIR = ("a","as","a","amos","áis","an")
SUBJ_PAST_AR   = ("ara","aras","ara","áramos","arais","aran","ase","ases","ase","ásemos","aseis","asen")
SUBJ_PAST_ERIR = ("iera","ieras","iera","iéramos","ierais","ieran","iese","ieses","iese","iésemos","ieseis","iesen")
PRS_AR    = ("o","as","a","amos","áis","an")
PRS_ER    = ("o","es","e","emos","éis","en")
PRS_IR    = ("o","es","e","imos","ís","en")

# ——— Parches FUT/COND sin tilde + tallos irregulares ———
RE_COND_NT_REG = re.compile(r"(?:ar|er|ir)(?:ia|ias|iamos|iais|ian)$", re.I)  # hablaria, comerias...
RE_COND_NT_IRR = re.compile(r"(tendr|vendr|pondr|saldr|valdr|podr|habr|sabr|cabr|querr|dir|har)(?:ia|ias|iamos|iais|ian)$", re.I)
RE_FUT_NT_IRR  = re.compile(r"(tendr|vendr|pondr|saldr|valdr|podr|habr|sabr|cabr|querr|dir|har)(?:re|ras|ra|remos|reis|ran)$", re.I)

def _strip_any(w, ends):
    for s in sorted(ends, key=len, reverse=True):
        if w.endswith(s):
            return w[:-len(s)], s
    return None, None

def _guess_class_from_ending(ending):
    if ending in PRET_AR or ending in IMPF_AR or ending in SUBJ_AR or ending in PRS_AR:
        return "ar"
    return "er"

IRREG_LEMMA = {
    "fui":"ir","fuiste":"ir","fue":"ir","fuimos":"ir","fuisteis":"ir","fueron":"ir",
    "voy":"ir","vas":"ir","va":"ir","vamos":"ir","vais":"ir","van":"ir",
    "soy":"ser","eres":"ser","es":"ser","somos":"ser","sois":"ser","son":"ser",
    "era":"ser","eras":"ser","éramos":"ser","erais":"ser","eran":"ser",
    "he":"haber","has":"haber","ha":"haber","hemos":"haber","habéis":"haber","han":"haber",
    "hube":"haber","hubo":"haber","hubimos":"haber","hubiste":"haber","hubisteis":"haber","hubieron":"haber",
    "estoy":"estar","estás":"estar","está":"estar","estamos":"estar","estáis":"estar","están":"estar",
    "estuve":"estar","estuviste":"estar","estuvo":"estar","estuvimos":"estar","estuvisteis":"estar","estuvieron":"estar",
    "estaba":"estar","estabas":"estar","estábamos":"estar","estabais":"estar","estaban":"estar",

    "tuve":"tener","tuviste":"tener","tuvo":"tener","tuvimos":"tener","tuvisteis":"tener","tuvieron":"tener",
    "vine":"venir","viniste":"venir","vino":"venir","vinimos":"venir","vinisteis":"venir","vinieron":"venir",
    "hice":"hacer","hiciste":"hacer","hizo":"hacer","hicimos":"hacer","hicisteis":"hacer","hicieron":"hacer",
    "puse":"poner","pusiste":"poner","puso":"poner","pusimos":"poner","pusisteis":"poner","pusieron":"poner",
    "pude":"poder","pudiste":"poder","pudo":"poder","pudimos":"poder","pudisteis":"poder","pudieron":"poder",
    "quise":"querer","quisiste":"querer","quiso":"querer","quisimos":"querer","quisisteis":"querer","quisieron":"querer",
    "supe":"saber","supiste":"saber","supo":"saber","supimos":"saber","supisteis":"saber","supieron":"saber",
    "traje":"traer","trajiste":"traer","trajo":"traer","trajimos":"traer","trajisteis":"traer","trajeron":"traer",
    "dije":"decir","dijiste":"decir","dijo":"decir","dijimos":"decir","dijisteis":"decir","dijeron":"decir",
    "conduje":"conducir","condujiste":"conducir","condujo":"conducir","condujimos":"conducir","condujisteis":"conducir","condujeron":"conducir",
    "anduve":"andar","anduviste":"andar","anduvo":"andar","anduvimos":"andar","anduvisteis":"andar","anduvieron":"andar",
    "cupe":"caber","cupiste":"caber","cupo":"caber","cupimos":"caber","cupisteis":"caber","cupieron":"caber",
    "di":"dar","diste":"dar","dio":"dar","dimos":"dar","disteis":"dar","dieron":"dar",
    "vi":"ver","viste":"ver","vio":"ver","vimos":"ver","visteis":"ver","vieron":"ver",

    "tengo":"tener","vengo":"venir","pongo":"poner","salgo":"salir","traigo":"traer","caigo":"caer",
    "hago":"hacer","oigo":"oír","digo":"decir","valgo":"valer","sigo":"seguir",

    "tienes":"tener","tiene":"tener","tienen":"tener",
    "vienes":"venir","viene":"venir","vienen":"venir",
    "pienso":"pensar","piensas":"pensar","piensa":"pensar","piensan":"pensar",
    "quiero":"querer","quieres":"querer","quiere":"querer","quieren":"querer",
    "prefiero":"preferir","prefieres":"preferir","prefiere":"preferir","prefieren":"preferir",

    "vaya":"ir","vayas":"ir","vayamos":"ir","vayáis":"ir","vayan":"ir",
    "sea":"ser","seas":"ser","seamos":"ser","seáis":"ser","sean":"ser",
    "haya":"haber","hayas":"haber","hayamos":"haber","hayáis":"haber","hayan":"haber",
    "dé":"dar","des":"dar","demos":"dar","deis":"dar","den":"dar",
    "esté":"estar","estés":"estar","estemos":"estar","estéis":"estar","estén":"estar",
    "tenga":"tener","tengas":"tener","tengamos":"tener","tengáis":"tener","tengan":"tener",
    "venga":"venir","vengas":"venir","vengamos":"venir","vengáis":"venir","vengan":"venir",  # ← FIX v4.4

    "ve":"ir","id":"ir",
    "sé":"ser","sed":"ser",
    "haz":"hacer","haced":"hacer",
    "pon":"poner","poned":"poner",
    "ven":"venir","venid":"venir",
    "ten":"tener","tened":"tener",
    "sal":"salir","salid":"salir",
    "di":"decir","decid":"decir",

    "doy":"dar","das":"dar","da":"dar","damos":"dar","dais":"dar","dan":"dar",
    "veo":"ver","ves":"ver","vemos":"ver","veis":"ver","ven":"ver",
    "oí":"oír","oíste":"oír","oyó":"oír","oímos":"oír","oísteis":"oír","oyeron":"oír",
    "iba":"ir","ibas":"ir","íbamos":"ir","ibais":"ir","iban":"ir",
    "veía":"ver","veías":"ver","veíamos":"ver","veíais":"ver","veían":"ver",

    "vinieras":"venir","lloviera":"llover",
}
# NUEVO: lemas para futuro de subjuntivo arcaico
IRREG_LEMMA.update({
    "viniere":"venir","vinieres":"venir","vinieren":"venir",
    "hiciere":"hacer","hicieres":"hacer","hicieren":"hacer",
    "tuviere":"tener","tuvieres":"tener","tuvieren":"tener",
})

IRREG_MORPH_TAGS = {
    # Subjuntivos
    "vaya":"SBJV","vayas":"SBJV","vayamos":"SBJV","vayáis":"SBJV","vayan":"SBJV",
    "sea":"SBJV","seas":"SBJV","seamos":"SBJV","seáis":"SBJV","sean":"SBJV",
    "haya":"SBJV","hayas":"SBJV","hayamos":"SBJV","hayáis":"SBJV","hayan":"SBJV",
    "dé":"SBJV","des":"SBJV","demos":"SBJV","deis":"SBJV","den":"SBJV",
    "esté":"SBJV","estés":"SBJV","estemos":"SBJV","estéis":"SBJV","estén":"SBJV",
    "tenga":"SBJV","tengas":"SBJV","tengamos":"SBJV","tengáis":"SBJV","tengan":"SBJV",
    "venga":"SBJV","vengas":"SBJV","vengamos":"SBJV","vayáis":"SBJV","vengan":"SBJV",
    "haga":"SBJV","hagas":"SBJV","hagamos":"SBJV","hagáis":"SBJV","hagan":"SBJV",
    "pueda":"SBJV","puedas":"SBJV","podamos":"SBJV","podáis":"SBJV","puedan":"SBJV",

    # Imperativos
    "id":"IMP","sed":"IMP",
    "haz":"IMP","haced":"IMP","pon":"IMP","poned":"IMP","ven":"IMP","venid":"IMP",
    "ten":"IMP","tened":"IMP","sal":"IMP","salid":"IMP","decid":"IMP",

    # Imperativos con clíticos
    "llámame":"IMP","llámalo":"IMP","llámala":"IMP","llámanos":"IMP","llámalos":"IMP","llámalas":"IMP",
    "dime":"IMP","dímelo":"IMP","dinos":"IMP","dínoslo":"IMP",
    "hazme":"IMP","hazlo":"IMP","hazla":"IMP","haznos":"IMP",
    "ponme":"IMP","ponlo":"IMP","ponla":"IMP","ponnos":"IMP",
    "dame":"IMP","dámelo":"IMP","danos":"IMP","dánoslo":"IMP",
    "tráeme":"IMP","tráelo":"IMP","tráela":"IMP","tráenos":"IMP",
    "díselo":"IMP","pónselo":"IMP","házselo":"IMP",

    # Futuro de subjuntivo (arcaico)
    "viniere":"FUT_SBJV","vinieres":"FUT_SBJV","vinieren":"FUT_SBJV",
    "hiciere":"FUT_SBJV","hicieres":"FUT_SBJV","hicieren":"FUT_SBJV",
    "fuere":"FUT_SBJV","fueres":"FUT_SBJV","fueren":"FUT_SBJV",
    "hubiere":"FUT_SBJV","hubieres":"FUT_SBJV","hubieren":"FUT_SBJV",

    # Pretéritos
    "creísteis":"PST","dijisteis":"PST","hicisteis":"PST","pusisteis":"PST",
    "supisteis":"PST","quisisteis":"PST","trajisteis":"PST",
    "vi":"PST","dio":"PST","fue":"PST","fui":"PST",

    # Imperfectos
    "iba":"IPFV","ibas":"IPFV","íbamos":"IPFV","ibais":"IPFV","iban":"IPFV",
    "veía":"IPFV","veías":"IPFV","veíamos":"IPFV","veíais":"IPFV","veían":"IPFV",
}

def looks_like_verb_form_strict(w: str) -> bool:
    w = (w or "").lower()
    if w.endswith(("ar","er","ir")): return True
    if RE_GER.search(w) or RE_PART.search(w): return True
    if re.search(r"(á|ás|áis|és|éis|ís)$", w): return True
    if _strip_any(w, FUT_END+COND_END)[0] is not None: return True
    if _strip_any(w, PRET_AR+PRET_ERIR)[0] is not None: return True
    if _strip_any(w, IMPF_AR+IMPF_ERIR)[0] is not None: return True
    if _strip_any(w, SUBJ_PAST_AR+SUBJ_PAST_ERIR)[0] is not None: return True
    # Irregulares sin tilde (FUT/COND) + COND regular sin tilde
    if RE_COND_NT_REG.search(w): return True
    if RE_COND_NT_IRR.search(w): return True
    if RE_FUT_NT_IRR.search(w):  return True
    # Irregulares pretéritos y compañía
    if re.search(r"(anduve|anduviste|anduvo|anduvimos|anduvieron|conduje|traduje|produje|reduje|introduje|supe|quise|pude|puse|hice|hizo|dije|dijo|traje|trajo|tuve|tuvo|vine|vino|cupe|cupo)$", w):
        return True
    return False

def _zco_guess(w:str)->str:
    if w.endswith("uzco"): return w[:-4] + "ucir"
    if w.endswith("ezco"): return w[:-4] + "ecer"
    if w.endswith("ozco"): return w[:-4] + "ocer"
    if w.endswith("azco"): return w[:-4] + "acer"
    return ""

def guess_infinitive_es(w: str) -> str:
    w = (w or "").lower()
    if w in IRREG_LEMMA: return IRREG_LEMMA[w]
    if w in ("vámonos","vamonos"): return "ir"
    if w.endswith("zco"):
        z = _zco_guess(w)
        if z: return z
    if w.endswith("go"):
        base = w[:-2]
        map_go = {"ten":"tener","ven":"venir","pon":"poner","sal":"salir","tra":"traer","ca":"caer","ha":"hacer","oi":"oír","di":"decir","val":"valer","si":"seguir"}
        for k,v in map_go.items():
            if base.startswith(k): return v
    if w.endswith(("ar","er","ir")): return w
    m = RE_GER.search(w)
    if m:
        base = w[:m.start()]
        return base + ("ar" if m.group(0)=="ando" else "er")
    m = RE_PART.search(w)
    if m:
        base = w[:m.start()]
        part_irreg = {
            "hecho":"hacer","dicho":"decir","visto":"ver","puesto":"poner","escrito":"escribir",
            "abierto":"abrir","cubierto":"cubrir","muerto":"morir","roto":"romper",
            "vuelto":"volver","resuelto":"resolver","frito":"freír","impreso":"imprimir",
            "satisfecho":"satisfacer","provisto":"proveer"
        }
        if w in part_irreg: return part_irreg[w]
        return base + "er"
    # FUT/COND con tilde
    base, end = _strip_any(w, FUT_END+COND_END)
    if base is not None:
        irreg = {"saldr":"salir","vendr":"venir","tendr":"tener","pondr":"poner","valdr":"valer","podr":"poder",
                 "habr":"haber","sabr":"saber","cabr":"caber","querr":"querer","dir":"decir","har":"hacer"}
        if base in irreg: return irreg[base]
        return base
    # COND sin tilde (regular e irregular)
    m = RE_COND_NT_IRR.search(w)
    if m:
        irreg = {"saldr":"salir","vendr":"venir","tendr":"tener","pondr":"poner","valdr":"valer","podr":"poder",
                 "habr":"haber","sabr":"saber","cabr":"caber","querr":"querer","dir":"decir","har":"hacer"}
        stem = m.group(1)
        return irreg.get(stem, "")
    m = RE_COND_NT_REG.search(w)
    if m:
        suf = m.group(0).replace("ar","",1).replace("er","",1).replace("ir","",1)  # "ia" / "ias" / ...
        return w[:-len(suf)]  # quita "ia/ias/..." → deja el infinitivo
    # FUT sin tilde (solo irregulares para evitar ambigüedades)
    m = RE_FUT_NT_IRR.search(w)
    if m:
        irreg = {"saldr":"salir","vendr":"venir","tendr":"tener","pondr":"poner","valdr":"valer","podr":"poder",
                 "habr":"haber","sabr":"saber","cabr":"caber","querr":"querer","dir":"decir","har":"hacer"}
        stem = m.group(1)
        return irreg.get(stem, "")
    # Otras heurísticas
    if w.endswith("áis"): return w[:-3] + "ar"
    if w.endswith("éis"): return w[:-3] + "er"
    if w.endswith("ís"):  return w[:-2] + "ir"
    if w.endswith("ás"):  return w[:-2] + "ar"
    if w.endswith("és"):  return w[:-2] + "er"
    if w.endswith("á"):   return w[:-1] + "ar"
    for group in (PRET_AR+PRET_ERIR, IMPF_AR+IMPF_ERIR, SUBJ_AR+SUBJ_ERIR, PRS_AR+PRS_ER+PRS_IR):
        base, end = _strip_any(w, group)
        if base is not None:
            return base + _guess_class_from_ending(end)
    base, end = _strip_any(w, SUBJ_PAST_AR)
    if base is not None: return base + "ar"
    base, end = _strip_any(w, SUBJ_PAST_ERIR)
    if base is not None: return base + "er"
    return ""

def es_morph_tag(w: str) -> str:
    w = (w or "").lower()
    if w in IRREG_MORPH_TAGS: return IRREG_MORPH_TAGS[w]

    # Imperativos con clíticos / perífrasis
    if re.search(r"^(llám|dím|házm|pónm|vén|dám|tén|tráe)(a|e)?(me|te|lo|la|nos|os|les|se|melo|telo|selo)$", w): return "IMP"
    if re.search(r"(adme|edme|idme|adlo|edle|idle|adnos|ednos)$", w): return "IMP"
    if re.search(r"(?:ad|ed|id|ád|éd|íd)(?:me|te|se|lo|la|nos|os|les|melo|telo|selo|noslo|oslo|sela|selas|selos)$", w): return "IMP"
    if re.search(r"^.*[áéí]ndo(me|te|se|lo|la|nos|os|les|melo|telo|selo)$", w): return "IPFV"
    if re.search(r"(melo|telo|selo|noslo|oslo|sela|selas|selos)$", w):
        base = re.sub(r"(melo|telo|selo|noslo|oslo|sela|selas|selos)$", "", w)
        if base and len(base) > 2: return "IMP"

    # FUT/COND sin tilde (prioridad antes de otras reglas)
    if RE_FUT_NT_IRR.search(w):  return "FUT"
    if RE_COND_NT_IRR.search(w): return "COND"
    if RE_COND_NT_REG.search(w): return "COND"

    if w.endswith(("ar","er","ir")): return "INF"
    if RE_GER.search(w):  return "IPFV"
    if RE_PART.search(w): return "PST"
    if _strip_any(w, PRET_AR+PRET_ERIR)[0] is not None:  return "PST"
    if _strip_any(w, IMPF_AR+IMPF_ERIR)[0] is not None:  return "IPFV"
    if _strip_any(w, FUT_END)[0]  is not None:           return "FUT"
    if _strip_any(w, COND_END)[0] is not None:           return "COND"
    if re.search(r"(á|ás|áis|és|éis|ís)$", w):           return "PRS"
    if _strip_any(w, SUBJ_AR+SUBJ_ERIR)[0] is not None:  return "SBJV"
    if _strip_any(w, PRS_AR+PRS_ER+PRS_IR)[0] is not None:  return "PRS"
    if _strip_any(w, SUBJ_PAST_AR+SUBJ_PAST_ERIR)[0] is not None: return "SBJV"
    if re.search(r"(anduve|conduje|traduje|produje|reduje|introduje|supe|quise|pude|puse|hice|hizo|dije|dijo|traje|trajo|tuve|tuvo|vine|vino|cupe|cupo)$", w):
        return "PST"
    if re.search(r"^.+[aei]d$", w): return "IMP"
    return "UNK"

# =========================
# MORFOLOGÍA – NEOÍBERO
# =========================
NI_TAM_SUFFIXES = {
    "-ke": "PRS","-ei": "PST","-ta": "IPFV","-na": "FUT",
    "-ne": "COND","-ni": "SBJV","-tu": "IMP","-ra":"FUT_SBJV"
}
def detect_ni_tam(word: str):
    # Parche: tolerar colas pronominales tras TAM (p. ej., -i, -mu)
    word = (word or "").lower().strip()
    for pn in ("-i", "-mu", "-su", "-gu", "-zu", "-te"):
        if word.endswith(pn):
            cand = word[:-len(pn)]
            # solo aceptamos cortar PN si entonces aparece un TAM conocido
            if any(cand.endswith(suf) for suf in NI_TAM_SUFFIXES.keys()):
                word = cand
                break
    for suf, tag in NI_TAM_SUFFIXES.items():
        if word.endswith(suf):
            return word[:-len(suf)], tag, suf
    return word, "INF", ""

# =========================
# UTILIDADES
# =========================
def fold(s:str)->str:
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c)!="Mn")
def has_diacritic(s:str)->bool:
    return bool(re.search(r"[áéíóúüÁÉÍÓÚÜ]", s or ""))

def _canon_pos(p: str) -> str:
    p = (p or "").strip().upper()
    MAP = {"V":"V","VERB":"V","N":"N","NOUN":"N","ADJ":"ADJ","ADJECTIVE":"ADJ","ADV":"ADV","ADVERB":"ADV",
           "INTJ":"INTJ","INTERJ":"INTJ","INTERJECTION":"INTJ","PRON":"PRON","PRONOUN":"PRON",
           "PART":"PART","PARTICLE":"PART","POSTP":"POSTP","ADP":"POSTP","ADPOSITION":"POSTP",
           "NUM":"NUM","DET":"DET"}
    return MAP.get(p, "")
def _boolish(x):
    if x is None: return None
    s = str(x).strip().lower()
    if s in ("1","true","t","yes","y","si","sí"): return True
    if s in ("0","false","f","no","n"): return False
    return None
def _meta_set(form_es:str, pos:str=None, tam_ok=None):
    if not form_es: return
    d = LEX_META.setdefault(form_es, {})
    if pos and not d.get("pos"): d["pos"] = pos
    if tam_ok is not None and d.get("tam_ok") is None: d["tam_ok"] = bool(tam_ok)
def pos_of_es(token_low:str) -> str:
    m = LEX_META.get(token_low, {})
    if m.get("pos"): return m["pos"]
    return "V" if looks_like_verb_form_strict(token_low) else ""
def tam_allowed_for_es(token_low:str) -> bool:
    m = LEX_META.get(token_low, {})
    if m.get("tam_ok") is not None: return bool(m["tam_ok"])
    return pos_of_es(token_low) == "V"

# =========================
# TTS (Meta MMS)
# =========================
print("Cargando modelo de voz...")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = model = None
try:
    processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa")
    model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device)
    print("Modelo de voz cargado.")
except Exception as e:
    print(f"ERROR TTS: {e}")

PAUSE_LEVEL=3
def add_reading_pauses(text: str, level:int=3) -> str:
    if level <= 1: return text
    t = text
    if level >= 2: t = re.sub(r",\s*", ", , ", t)
    if level >= 3:
        t = re.sub(r"\.\s*", ". . ", t); t = re.sub(r";\s*", "; ; ", t)
    return re.sub(r"\s+"," ",t).strip()

def hispanize_for_tts(ni_text: str) -> str:
    text=(ni_text or "").lower()
    # CRÍTICO: Respetar caracteres iberos
    text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es')
    text=text.replace('ŕa','rra').replace('aŕe','arre').replace('-', ' ')
    text=re.sub(r'\[.*?\]','',text)
    text=re.sub(r'\s+',' ',text).strip()
    return add_reading_pauses(text, PAUSE_LEVEL)

def synthesize_speech(text):
    if not text or not text.strip() or model is None or processor is None: return None
    try:
        inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device)
        with torch.no_grad(): output = model(**inputs).waveform
        speech_np = output.cpu().numpy().squeeze()
        mx = max(abs(speech_np.min()), abs(speech_np.max()))
        if mx>0: speech_np = speech_np/mx*0.9
        return (16000, speech_np.astype(np.float32))
    except Exception as e:
        print(f"Error TTS: {e}"); return None

# =========================
# LÍNEA IBÉRICA (claves Georgeos)
# =========================
KEYS_MODE = "explicit"
V = "aeiou"
SYL_FOR={"b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"],
         "d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"],
         "t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"],
         "g":["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"],
         "k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"]}
ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›","l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"}
CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"}

def tokens_from_latin(ni:str)->str:
    out=[]; i=0; ni=(ni or "").lower()
    while i<len(ni):
        c=ni[i]
        if c=="p": c="b"               # no /p/ independiente
        if c=="-": out.append("—"); i+=1; continue
        if c in V:
            out.append(ALPHA_FOR[c]); i+=1; continue
        if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V:
            idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx]
            coda=ni[i+2] if i+2<len(ni) else ""
            if coda in CODA_FOR and coda!="":
                tok+=CODA_FOR[coda]; i+=3
            else:
                i+=2
            out.append(tok); continue
        out.append(ALPHA_FOR.get(c, c.upper())); i+=1
    return "".join(out)

KEYS_OVERRIDE={"ka":"K","mi":"MI","te":"TE","ne":"N","o":"O","eś":"X"}
def georgeos_keys(token_str:str, ni_plain:str)->str:
    low=(ni_plain or "").lower()
    if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low]
    m=re.findall(r"‹(.*?)›", token_str)
    out=[]
    for t in m:
        if KEYS_MODE == "compact":
            if len(t)==2 and t[0] in "BDTGK": out.append(t[0])
            elif t in ("A","E","I","O","U"): out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t[0].upper())
        else:
            if len(t)==2 and t[0] in "BDTGK": out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t.upper())
    return "".join(out)

TRIDOT = "/"
VISIBLE_PUNCT = {",",".",";","; ",":","…","(",")","[","]","{","}","\"","'","«","»","—","–",""",""","'","'"}
HARD_BOUND    = {".",";","—","–",":","(",")","«","»"}

def render_ib_with_tridots(toks):
    res=[]; prev_word=False
    for tk in toks:
        is_punct = tk in VISIBLE_PUNCT
        if is_punct:
            res.append(" "+tk+" "); prev_word=False
        else:
            if prev_word: res.append(" "+TRIDOT+" ")
            res.append(tk); prev_word=True
    return "".join(res).strip()

# =========================
# TRADUCTOR ES→NI
# =========================
TAM_SUFFIX={"PRS":"-ke","PST":"-ei","FUT":"-na","IPFV":"-ta",
            "COND":"-ne","SBJV":"-ni","IMP":"-tu","INF":"","FUT_SBJV":"-ra","UNK":"-ke"}
VERB_TAM = ("-ke","-na","-ei","-ta","-ni","-ne","-tu","-ra")

def strip_ni_tam(lemma: str):
    lemma = lemma or ""
    for s in sorted(VERB_TAM, key=len, reverse=True):
        if lemma.endswith(s): return lemma[:-len(s)], s
    return lemma, ""

STOP=set("""
el la los las lo un una unos unas al del de en con sin por sobre entre hasta desde hacia según tras
pero aunque sino que como si porque cuando donde mientras
muy ya sí no también solo sólo aún aun más menos
mi mis tu tus su sus nuestro nuestra nuestros nuestras
esto eso aquello ese esa esos esas aquel aquella aquellos aquellas
quien quién quiénes cual cuál cuales cuáles cuyo cuya cuyos cuyas
eh ay oh uy ah aja jeje jaja aah ahh ohh uhh
""".split())

# --- Reglas "a" → ka/mi/te
def rule_a(prev_tok:str, token:str, next_tok:str)->str:
    verbs={"dar","decir","contar","enviar","ofrecer","mostrar","prestar","regalar","entregar"}
    if prev_tok in verbs: return "mi"
    nombres={"ana","marta","juan","pedro","luis","maría","jose","carlos","laura"}
    if next_tok in nombres: return "te"
    return "ka"

Q_ENCLITIC_INT = "-na"
Q_ENCLITIC_EXC = "-ba"
WH_WORDS = {
    "qué","quien","quién","quienes","quiénes","cual","cuál","cuales","cuáles",
    "donde","dónde","cuando","cuándo","como","cómo",
    "cuanto","cuánto","cuanta","cuánta","cuantos","cuántos","cuantas","cuántas"
}
def is_wh_token(t: str) -> bool:
    low = (t or "").lower()
    if low in WH_WORDS: return True
    f = fold(low)
    return f in {"que","quien","quienes","cual","cuales","donde","cuando","como","cuanto","cuanta","cuantos","cuantas"}

def has_wh_outside_parens(toks) -> bool:
    depth = 0
    for tk in toks:
        if tk in {"(", "«", """, "'"}: depth += 1
        elif tk in {")", "»", """, "'"}: depth = max(0, depth-1)
        elif depth == 0 and is_wh_token(tk): return True
    return False

ESTAR_SET={"estoy","estás","está","estamos","estáis","están","estaba","estabas","estábamos","estabais","estaban"}
HABER_SET={"he","has","ha","hemos","habéis","han","había","habías","habíamos","habíais","habían"}

def detect_tam_with_context(toks, i, sentence_start=False):
    t=toks[i].lower()
    prev=toks[i-1].lower() if i>0 else ""
    prev2=toks[i-2].lower() if i>1 else ""
    nxt=toks[i+1].lower() if i+1<len(toks) else ""
    tag=es_morph_tag(t)

    # imperativos con clíticos al principio
    if re.search(r"(melo|telo|selo|noslo|oslo)$", t):
        if sentence_start or prev in {",", ".", "!", "¡", ";", ":"}: return "IMP"
    if i == 0 or prev in {",", ".", "!", "¡", ";", ":"}:
        if t in {"ve","ven","haz","pon","sal","di","ten","sé","id","venid","tened"}: return "IMP"

    if prev in {"que","si","cuando","aunque","mientras","hasta","para"}:
        if tag=="SBJV": return "SBJV"
        if tag=="UNK" and re.search(r"(e|a)$", t) and not t.endswith(("ar","er","ir")): return "SBJV"

    if t in ESTAR_SET or t in HABER_SET: return "PRS"
    if prev in ESTAR_SET and RE_GER.search(nxt): return "IPFV"
    if prev in HABER_SET and RE_PART.search(nxt): return "PST"
    if prev == "a" and prev2 in {"voy","vas","va","vamos","vais","van"} and t.endswith(("ar","er","ir")): return "FUT"
    if RE_GER.search(t):  return "IPFV"
    if RE_PART.search(t): return "PST"
    return tag if tag!="UNK" else "PRS"

def forced_lemma_with_context(low:str, prev:str, nxt:str)->str:
    if low=="visto" and nxt=="de": return "vestir"
    return ""

def has_tilde_equiv_lookup(low:str)->str:
    if has_diacritic(low) and not looks_like_verb_form_strict(low):
        f=fold(low)
        if f in LEX_FORM: return LEX_FORM[f]
        if f in FOLD_FORM: return FOLD_FORM[f]
    return ""

def lookup_form_lemma(token:str, prev:str, nxt:str):
    if not token: return "", False
    low=token.lower()
    fl=forced_lemma_with_context(low, prev, nxt)
    if fl and fl in LEX_LEMMA: return LEX_LEMMA[fl], True
    if low in LEX_FORM: return LEX_FORM[low], True
    til=has_tilde_equiv_lookup(low)
    if til: return til, True
    if looks_like_verb_form_strict(low):
        lem=guess_infinitive_es(low)
        if lem and lem in LEX_LEMMA: return LEX_LEMMA[lem], True
    return "", False

def attach_enclitic(out_words, ib_keys, plain, attach_idx, encl):
    if attach_idx is None or attach_idx < 0 or attach_idx >= len(out_words): return
    cur = out_words[attach_idx] or ""
    if cur.endswith(encl): return
    out_words[attach_idx] = cur + encl
    plain[attach_idx]     = (plain[attach_idx] or "") + encl
    ib_keys[attach_idx]   = georgeos_keys(tokens_from_latin(plain[attach_idx]), plain[attach_idx])

def ensure_terminal_qmark(out_words, ib_keys, plain):
    if not out_words:
        out_words.append("?"); ib_keys.append(""); plain.append("?"); return
    j = len(out_words) - 1
    while j >= 0 and (out_words[j] == "" or out_words[j] is None): j -= 1
    if j < 0:
        out_words.append("?"); ib_keys.append(""); plain.append("?"); return
    if out_words[j] == ".":
        out_words[j] = "?"; ib_keys[j] = ""; plain[j] = "?"
    elif out_words[j] not in {"?","!"}:
        out_words.append("?"); ib_keys.append(""); plain.append("?")

def normalize_surface_by_pos(ni_surface:str, pos:str) -> str:
    if not ni_surface: return ni_surface
    if pos != "V":
        root, _ = strip_ni_tam(ni_surface)
        return root
    return ni_surface

def translate_sentence(sent:str):
    toks = re.sub(r"\s+"," ", (sent or "").strip())
    # Separamos también comillas curvas
    toks = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–""''])", r" \1 ", toks)
    toks = [t for t in toks.split() if t]

    out_words=[]; ib_keys=[]; plain=[]
    neg_next=False; last_finite_idx=None; has_qmark=False
    saw_wh = has_wh_outside_parens(toks)
    sentence_start=True

    for i,t in enumerate(toks):
        if t in {"¿","¡"}:
            sentence_start=True; continue
        if t in {"?","!"}:
            if t=="?": has_qmark=True
            encl = Q_ENCLITIC_INT if t=="?" else Q_ENCLITIC_EXC
            attach_idx = last_finite_idx
            if attach_idx is None:
                for j in range(len(out_words)-1, -1, -1):
                    if out_words[j] and out_words[j] not in VISIBLE_PUNCT:
                        attach_idx = j; break
            if attach_idx is not None: attach_enclitic(out_words, ib_keys, plain, attach_idx, encl)
            out_words.append(t); ib_keys.append(""); plain.append(t)
            sentence_start=True; continue

        if t in VISIBLE_PUNCT:
            out_words.append(t); ib_keys.append(t); plain.append(t)
            if t in HARD_BOUND:
                last_finite_idx=None
                sentence_start = (t in {".",":",";","—","–"})
            continue

        low=t.lower()
        prev = toks[i-1].lower() if i>0 else ""
        nxt  = toks[i+1].lower() if i+1<len(toks) else ""

        if (sentence_start and t in {"ve","ven","haz","pon","sal","di","ten","sé","id","venid","tened"}) or \
           (re.search(r"(me|te|lo|la|nos|os|les|se)$", low) and looks_like_verb_form_strict(low)):
            tag_detected="IMP"
        else:
            tag_detected = detect_tam_with_context(toks, i, sentence_start)

        pos_hint = pos_of_es(low)
        is_verb_like = looks_like_verb_form_strict(low) or (pos_hint=="V")
        tam_ok = tam_allowed_for_es(low)

        if low=="no": neg_next=True; continue

        # Contracciones: al/del → (a/de + el)
        if low == "al":
            for ni in ("ka","do"):
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            sentence_start=False; continue
        if low == "del":
            for ni in ("ta","do"):
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            sentence_start=False; continue

        if low=="a":
            ni=rule_a(prev,low,nxt)
            out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            continue
        if low == "un":
            ni="banu"; out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni); continue
        if low == "una":
            ni="bana"; out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni); continue
        if low == "uno":
            ni="ban";  out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni); continue

        if (low in STOP) and (low not in LEX_FORM):
            continue

        ni_direct = SURF_RICH.get((low, tag_detected))
        if neg_next and is_verb_like:
            out_words.append("eś"); ib_keys.append(georgeos_keys(tokens_from_latin("eś"),"eś")); plain.append("eś")
            neg_next=False
        if ni_direct:
            if any(ni_direct.endswith(s) for s in VERB_TAM):
                ni=ni_direct
            else:
                ni=normalize_surface_by_pos(ni_direct, "V" if tam_ok else (pos_hint or ""))
            out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            if tam_ok and any(ni.endswith(s) for s in VERB_TAM): last_finite_idx=len(out_words)-1
            sentence_start=False; continue

        ni_lemma, ok = lookup_form_lemma(t, prev, nxt)
        if ok:
            if low in FORCE_KEYS:
                ni = LEX_FORM.get(low, ni_lemma)
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1; sentence_start=False; continue
            root, old_suf = strip_ni_tam(ni_lemma or "")
            if tag_detected=="IMP":
                ni=root+"-tu"
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1; sentence_start=False; continue
            if old_suf=="-tu":
                ni=ni_lemma
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1; sentence_start=False; continue
            if tam_ok and is_verb_like:
                suf=TAM_SUFFIX.get(tag_detected,"-ke")
                base=root or (ni_lemma or "")
                ni= base+suf if suf else base
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1
            else:
                ni=normalize_surface_by_pos(ni_lemma if ni_lemma!="" else "Ø", pos_hint or "")
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            sentence_start=False; continue

        placeholder=f"[SIN-LEX:{t}]"
        out_words.append(placeholder); ib_keys.append(placeholder); plain.append(placeholder)
        sentence_start=False

    appended_na=False
    if saw_wh and not has_qmark:
        encl=Q_ENCLITIC_INT
        attach_idx=last_finite_idx
        if attach_idx is None:
            for j in range(len(out_words)-1,-1,-1):
                if out_words[j] and out_words[j] not in VISIBLE_PUNCT and out_words[j] not in {"?","!"} and not out_words[j].startswith("["):
                    attach_idx=j; break
        if attach_idx is not None and not (out_words[attach_idx].endswith("-na") or out_words[attach_idx].endswith("-ba")):
            attach_enclitic(out_words, ib_keys, plain, attach_idx, encl); appended_na=True
    if appended_na and not has_qmark: ensure_terminal_qmark(out_words, ib_keys, plain)

    ib_clean=[k for k in ib_keys if k!=""]
    return " ".join(out_words), ib_clean

def translate(text:str):
    lines=[l for l in (text or "").split("\n") if l.strip()]
    ni_lines=[]; ib_lines=[]
    for ln in lines:
        ni,ib_toks=translate_sentence(ln)
        ni_lines.append(ni); ib_lines.append(render_ib_with_tridots(ib_toks))
    return "\n".join(ni_lines), "\n".join(ib_lines)

# =========================
# TRADUCTOR NI→ES (mejorado)
# =========================
# Sufijos nominales más comunes (no-TAM) para fallback suave
NI_NOMINAL_SUFFIXES = ("-ar","-en","-ka","-la","-si","-ŕa")

def normalize_ni(text: str) -> str:
    # Normaliza espacios, permite "tridots" (/) y desencapsula [SIN-LEX:…]
    t = (text or "").replace("/", " ")
    t = re.sub(r"\[SIN-LEX:([^\]]+)\]", r"\1", t)
    return re.sub(r"\s+", " ", t.strip())

def tokenize_ni(text: str):
    # Separar también comillas curvas
    text = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–""''])", r" \1 ", text)
    return [t for t in text.split() if t]

# --- NUEVO: solo conjugar si el lema ES termina en -ar/-er/-ir
def _is_spanish_verb_lemma(lemma: str) -> bool:
    return isinstance(lemma, str) and re.search(r"(ar|er|ir)$", lemma)

def _detect_ni_person(ni_form: str):
    """Detecta persona/número del sufijo neoíbero y devuelve (person, root_limpia)"""
    for suf, pn in NI_PERSON_MAP.items():
        if ni_form.endswith(suf):
            return pn, ni_form[:-len(suf)]
    return "3S", ni_form  # default

def _conj_es_from_lemma(lemma: str, tag: str, person: str = "3S"):
    """Conjuga un verbo español según TAM y persona/número"""
    if not _is_spanish_verb_lemma(lemma):
        return lemma
    
    lemma = lemma.lower()
    
    # Verbos irregulares completos (clave: lemma, tag, person)
    IRREG_FULL = {
        # SER
        ("ser", "PRS", "1S"): "soy", ("ser", "PRS", "2S"): "eres", ("ser", "PRS", "3S"): "es",
        ("ser", "PRS", "1P"): "somos", ("ser", "PRS", "2P"): "sois", ("ser", "PRS", "3P"): "son",
        ("ser", "PST", "1S"): "fui", ("ser", "PST", "2S"): "fuiste", ("ser", "PST", "3S"): "fue",
        ("ser", "PST", "1P"): "fuimos", ("ser", "PST", "2P"): "fuisteis", ("ser", "PST", "3P"): "fueron",
        ("ser", "IPFV", "1S"): "era", ("ser", "IPFV", "2S"): "eras", ("ser", "IPFV", "3S"): "era",
        ("ser", "IPFV", "1P"): "éramos", ("ser", "IPFV", "2P"): "erais", ("ser", "IPFV", "3P"): "eran",
        ("ser", "SBJV", "1S"): "sea", ("ser", "SBJV", "2S"): "seas", ("ser", "SBJV", "3S"): "sea",
        ("ser", "SBJV", "1P"): "seamos", ("ser", "SBJV", "2P"): "seáis", ("ser", "SBJV", "3P"): "sean",
        # IR
        ("ir", "PRS", "1S"): "voy", ("ir", "PRS", "2S"): "vas", ("ir", "PRS", "3S"): "va",
        ("ir", "PRS", "1P"): "vamos", ("ir", "PRS", "2P"): "vais", ("ir", "PRS", "3P"): "van",
        ("ir", "PST", "1S"): "fui", ("ir", "PST", "2S"): "fuiste", ("ir", "PST", "3S"): "fue",
        ("ir", "PST", "1P"): "fuimos", ("ir", "PST", "2P"): "fuisteis", ("ir", "PST", "3P"): "fueron",
        ("ir", "IPFV", "1S"): "iba", ("ir", "IPFV", "2S"): "ibas", ("ir", "IPFV", "3S"): "iba",
        ("ir", "IPFV", "1P"): "íbamos", ("ir", "IPFV", "2P"): "ibais", ("ir", "IPFV", "3P"): "iban",
        ("ir", "SBJV", "1S"): "vaya", ("ir", "SBJV", "2S"): "vayas", ("ir", "SBJV", "3S"): "vaya",
        ("ir", "SBJV", "1P"): "vayamos", ("ir", "SBJV", "2P"): "vayáis", ("ir", "SBJV", "3P"): "vayan",
        # ESTAR
        ("estar", "PRS", "1S"): "estoy", ("estar", "PRS", "2S"): "estás", ("estar", "PRS", "3S"): "está",
        ("estar", "PRS", "1P"): "estamos", ("estar", "PRS", "2P"): "estáis", ("estar", "PRS", "3P"): "están",
        ("estar", "SBJV", "1S"): "esté", ("estar", "SBJV", "2S"): "estés", ("estar", "SBJV", "3S"): "esté",
        ("estar", "SBJV", "1P"): "estemos", ("estar", "SBJV", "2P"): "estéis", ("estar", "SBJV", "3P"): "estén",
        # TENER
        ("tener", "PRS", "1S"): "tengo", ("tener", "PRS", "2S"): "tienes", ("tener", "PRS", "3S"): "tiene",
        ("tener", "PRS", "1P"): "tenemos", ("tener", "PRS", "2P"): "tenéis", ("tener", "PRS", "3P"): "tienen",
        ("tener", "SBJV", "1S"): "tenga", ("tener", "SBJV", "2S"): "tengas", ("tener", "SBJV", "3S"): "tenga",
        ("tener", "SBJV", "1P"): "tengamos", ("tener", "SBJV", "2P"): "tengáis", ("tener", "SBJV", "3P"): "tengan",
        # VENIR
        ("venir", "PRS", "1S"): "vengo", ("venir", "PRS", "2S"): "vienes", ("venir", "PRS", "3S"): "viene",
        ("venir", "PRS", "1P"): "venimos", ("venir", "PRS", "2P"): "venís", ("venir", "PRS", "3P"): "vienen",
        ("venir", "SBJV", "1S"): "venga", ("venir", "SBJV", "2S"): "vengas", ("venir", "SBJV", "3S"): "venga",
        ("venir", "SBJV", "1P"): "vengamos", ("venir", "SBJV", "2P"): "vengáis", ("venir", "SBJV", "3P"): "vengan",
        # HACER
        ("hacer", "PRS", "1S"): "hago", ("hacer", "PRS", "2S"): "haces", ("hacer", "PRS", "3S"): "hace",
        ("hacer", "PRS", "1P"): "hacemos", ("hacer", "PRS", "2P"): "hacéis", ("hacer", "PRS", "3P"): "hacen",
        ("hacer", "SBJV", "1S"): "haga", ("hacer", "SBJV", "2S"): "hagas", ("hacer", "SBJV", "3S"): "haga",
        ("hacer", "SBJV", "1P"): "hagamos", ("hacer", "SBJV", "2P"): "hagáis", ("hacer", "SBJV", "3P"): "hagan",
        ("hacer", "PST", "1S"): "hice", ("hacer", "PST", "3S"): "hizo",
        # PONER
        ("poner", "PRS", "1S"): "pongo", ("poner", "PRS", "2S"): "pones", ("poner", "PRS", "3S"): "pone",
        ("poner", "PRS", "1P"): "ponemos", ("poner", "PRS", "2P"): "ponéis", ("poner", "PRS", "3P"): "ponen",
        ("poner", "SBJV", "1S"): "ponga", ("poner", "SBJV", "2S"): "pongas", ("poner", "SBJV", "3S"): "ponga",
        ("poner", "SBJV", "1P"): "pongamos", ("poner", "SBJV", "2P"): "pongáis", ("poner", "SBJV", "3P"): "pongan",
        # DAR
        ("dar", "PRS", "1S"): "doy", ("dar", "PRS", "2S"): "das", ("dar", "PRS", "3S"): "da",
        ("dar", "PRS", "1P"): "damos", ("dar", "PRS", "2P"): "dais", ("dar", "PRS", "3P"): "dan",
        ("dar", "SBJV", "1S"): "dé", ("dar", "SBJV", "2S"): "des", ("dar", "SBJV", "3S"): "dé",
        ("dar", "SBJV", "1P"): "demos", ("dar", "SBJV", "2P"): "deis", ("dar", "SBJV", "3P"): "den",
        # HABER
        ("haber", "PRS", "1S"): "he", ("haber", "PRS", "2S"): "has", ("haber", "PRS", "3S"): "ha",
        ("haber", "PRS", "1P"): "hemos", ("haber", "PRS", "2P"): "habéis", ("haber", "PRS", "3P"): "han",
        ("haber", "SBJV", "1S"): "haya", ("haber", "SBJV", "2S"): "hayas", ("haber", "SBJV", "3S"): "haya",
        ("haber", "SBJV", "1P"): "hayamos", ("haber", "SBJV", "2P"): "hayáis", ("haber", "SBJV", "3P"): "hayan",
        # PODER
        ("poder", "PRS", "1S"): "puedo", ("poder", "PRS", "2S"): "puedes", ("poder", "PRS", "3S"): "puede",
        ("poder", "PRS", "1P"): "podemos", ("poder", "PRS", "2P"): "podéis", ("poder", "PRS", "3P"): "pueden",
        ("poder", "SBJV", "1S"): "pueda", ("poder", "SBJV", "2S"): "puedas", ("poder", "SBJV", "3S"): "pueda",
        ("poder", "SBJV", "1P"): "podamos", ("poder", "SBJV", "2P"): "podáis", ("poder", "SBJV", "3P"): "puedan",
        # DECIR
        ("decir", "PRS", "1S"): "digo", ("decir", "PRS", "2S"): "dices", ("decir", "PRS", "3S"): "dice",
        ("decir", "PRS", "1P"): "decimos", ("decir", "PRS", "2P"): "decís", ("decir", "PRS", "3P"): "dicen",
        # SABER
        ("saber", "PRS", "1S"): "sé", ("saber", "PRS", "2S"): "sabes", ("saber", "PRS", "3S"): "sabe",
        # VER
        ("ver", "PRS", "1S"): "veo", ("ver", "PRS", "2S"): "ves", ("ver", "PRS", "3S"): "ve",
        ("ver", "PRS", "1P"): "vemos", ("ver", "PRS", "2P"): "veis", ("ver", "PRS", "3P"): "ven",
    }
    
    # Buscar forma irregular completa
    if (lemma, tag, person) in IRREG_FULL:
        return IRREG_FULL[(lemma, tag, person)]
    
    # Tallos irregulares FUT/COND
    irr_stems = {
        "salir":"saldr","venir":"vendr","tener":"tendr","poner":"pondr","valer":"valdr","poder":"podr",
        "haber":"habr","saber":"sabr","caber":"cabr","querer":"querr","decir":"dir","hacer":"har"
    }
    
    # Conjugación regular
    root = lemma[:-2]
    verb_class = lemma[-2:]  # ar, er, ir
    
    # PRESENTE
    if tag == "PRS":
        endings_ar = {"1S":"o","2S":"as","3S":"a","1P":"amos","2P":"áis","3P":"an"}
        endings_er = {"1S":"o","2S":"es","3S":"e","1P":"emos","2P":"éis","3P":"en"}
        endings_ir = {"1S":"o","2S":"es","3S":"e","1P":"imos","2P":"ís","3P":"en"}
        endings = endings_ar if verb_class == "ar" else (endings_ir if verb_class == "ir" else endings_er)
        return root + endings.get(person, "a")
    
    # PRETÉRITO
    if tag == "PST":
        endings_ar = {"1S":"é","2S":"aste","3S":"ó","1P":"amos","2P":"asteis","3P":"aron"}
        endings_er = {"1S":"í","2S":"iste","3S":"ió","1P":"imos","2P":"isteis","3P":"ieron"}
        endings = endings_ar if verb_class == "ar" else endings_er
        return root + endings.get(person, "ó")
    
    # FUTURO
    if tag == "FUT":
        stem = irr_stems.get(lemma, lemma)
        endings = {"1S":"é","2S":"ás","3S":"á","1P":"emos","2P":"éis","3P":"án"}
        return stem + endings.get(person, "á")
    
    # CONDICIONAL
    if tag == "COND":
        stem = irr_stems.get(lemma, lemma)
        endings = {"1S":"ía","2S":"ías","3S":"ía","1P":"íamos","2P":"íais","3P":"ían"}
        return stem + endings.get(person, "ía")
    
    # SUBJUNTIVO PRESENTE
    if tag == "SBJV":
        if verb_class == "ar":
            endings = {"1S":"e","2S":"es","3S":"e","1P":"emos","2P":"éis","3P":"en"}
        else:
            endings = {"1S":"a","2S":"as","3S":"a","1P":"amos","2P":"áis","3P":"an"}
        return root + endings.get(person, "e" if verb_class == "ar" else "a")
    
    # IMPERFECTO
    if tag == "IPFV":
        if verb_class == "ar":
            endings = {"1S":"aba","2S":"abas","3S":"aba","1P":"ábamos","2P":"abais","3P":"aban"}
        else:
            endings = {"1S":"ía","2S":"ías","3S":"ía","1P":"íamos","2P":"íais","3P":"ían"}
        return root + endings.get(person, "aba" if verb_class == "ar" else "ía")
    
    # IMPERATIVO
    if tag == "IMP":
        if person == "2S":
            return root + ("a" if verb_class == "ar" else "e")
        return lemma  # otras personas usan subjuntivo
    
    # INFINITIVO/GERUNDIO/PARTICIPIO
    if tag in {"INF","UNK"}:
        return lemma
    
    # Default
    return lemma

# Mantener compatibilidad con código antiguo
def _conj_es_3sg(lemma:str, tag:str) -> str:
    """Wrapper para compatibilidad - llama a _conj_es_from_lemma con 3S"""
    return _conj_es_from_lemma(lemma, tag, "3S")

def _strip_nominal_suffix(base: str):
    """Si no hay match directo, intenta quitar sufijos nominales comunes."""
    for suf in sorted(NI_NOMINAL_SUFFIXES, key=len, reverse=True):
        if base.endswith(suf):
            return base[:-len(suf)], suf
    return base, ""

def _cleanup_es_spaces(s: str) -> str:
    s = re.sub(r"\s+([,.;:!?])", r"\1", s)
    s = re.sub(r"\(\s+", "(", s)
    s = re.sub(r"\s+\)", ")", s)
    s = re.sub(r"\s{2,}", " ", s).strip()
    # micro-limpiezas
    s = s.replace("a a ", " a ")
    return s

def translate_ni_to_es(sent: str):
    toks = tokenize_ni(normalize_ni(sent))
    out=[]
    for i, t in enumerate(toks):
        # Preservar puntuación
        if t in VISIBLE_PUNCT or t in {"?", "!", "¿", "¡"}:
            out.append(t)
            continue
        
        # ✅ FIX: Preservar nombres propios (primera letra mayúscula)
        if t and t[0].isupper() and not t.isupper() and len(t) > 1:
            out.append(t)
            continue

        low=t.lower()

        # Quita enclíticos -na / -ba (interrog./exclam.) SOLO para lookup
        lookup_form = low[:-3] if (low.endswith("-na") or low.endswith("-ba")) else low

        # 1) Forma directa (superficie o raíz)
        if lookup_form in NI_TO_ES_FORM:
            out.append(NI_TO_ES_FORM[lookup_form])
            continue

        # 2) TAM por sufijo + PERSONA ✅ NUEVO
        root, tam_tag, tam_suffix = detect_ni_tam(lookup_form)
        person, root_clean = _detect_ni_person(root)  # ✅ DETECTAR PERSONA

        # 2a) Superficie exacta (con TAM)
        es_direct = NI_TO_ES_SURF.get((lookup_form, tam_tag))
        if es_direct:
            out.append(es_direct)
            continue

        # 2b) CONJUGAR con persona ✅ MEJORADO
        if tam_tag not in {"INF","UNK"} and root_clean in NI_TO_ES_LEMMA:
            es_lemma = NI_TO_ES_LEMMA[root_clean]
            out.append(_conj_es_from_lemma(es_lemma, tam_tag, person))
            continue

        # 2c) Raíz conocida → forma/lema ES
        if root_clean in NI_TO_ES_FORM:
            out.append(NI_TO_ES_FORM[root_clean])
            continue
        if root_clean in NI_TO_ES_LEMMA:
            es_lemma = NI_TO_ES_LEMMA[root_clean]
            out.append(_conj_es_from_lemma(es_lemma, tam_tag, person))
            continue

        # 3) Fallback suave para nominales: quita -ar/-en/-ka/-la/-si/-ŕa y reintenta
        base2, suf2 = _strip_nominal_suffix(root_clean if root_clean else lookup_form)
        if base2 != (root_clean if root_clean else lookup_form):
            if base2 in NI_TO_ES_FORM:
                out.append(NI_TO_ES_FORM[base2])
                continue
            if base2 in NI_TO_ES_LEMMA:
                # ruta nominal: NO conjugar aunque sea verbo; devolvemos el lema limpio
                out.append(NI_TO_ES_LEMMA[base2])
                continue
            # Si aún no, último recurso: presentar el núcleo "limpio"
            out.append(base2)
            continue

        # 4) Desconocido → marcador suave
        out.append(f"[?:{t}]")

    return _cleanup_es_spaces(" ".join(out))

# =========================
# CARGA DE LÉXICO
# =========================
def load_lexicon():
    loaded=False
    total_rich=total_simple=0
    for p in CSV_CANDIDATES:
        if not os.path.exists(p): continue
        try:
            with open(p, encoding="utf-8") as f:
                rd=csv.DictReader(f); flds=set(rd.fieldnames or [])
                # v4.4: formato nuevo con ni_surface
                if {"source_es","es_morph","ni_surface"}.issubset(flds):
                    for r in rd:
                        es=(r.get("source_es") or "").strip().lower()
                        tag=(r.get("es_morph") or "").strip().upper()
                        surf=(r.get("ni_surface") or "").strip()
                        if not surf:
                            root=(r.get("ni_root") or "").strip(); suf=(r.get("ni_suffix") or "").strip()
                            if root or suf: surf=f"{root}{suf}"
                        if es and tag and surf: SURF_RICH[(es,tag)] = surf; total_rich+=1

                        ni=(r.get("target_ni") or "").strip()
                        es_lem=(r.get("es_lemma") or "").strip().lower()

                        pos = _canon_pos(r.get("pos") or r.get("es_pos") or r.get("target_pos") or r.get("pos_es") or r.get("ni_pos") or "")
                        tam_ok = _boolish(r.get("tam_ok"))

                        if es: _meta_set(es, pos=pos, tam_ok=(tam_ok if tam_ok is not None else (pos=="V" if pos else None)))
                        if es_lem:
                            _meta_set(es_lem, pos=("V" if es_lem.endswith(("ar","er","ir")) else (pos or "")),
                                      tam_ok=(tam_ok if tam_ok is not None else (pos=="V" if pos else None)))

                        if es and ni!="": LEX_FORM.setdefault(es,ni)
                        if es_lem and ni!="": LEX_LEMMA.setdefault(es_lem,ni)
                    loaded=True; continue

                if {"source_es","target_ni"}.issubset(flds):
                    for r in rd:
                        es=(r.get("source_es") or "").strip().lower()
                        ni=(r.get("target_ni") or "").strip()
                        if not es: continue
                        LEX_FORM.setdefault(es,ni); total_simple+=1
                        _meta_set(es, pos="", tam_ok=None)
                        if looks_like_verb_form_strict(es):
                            lem=guess_infinitive_es(es)
                            if lem:
                                LEX_LEMMA.setdefault(lem,ni); _meta_set(lem, pos="V", tam_ok=True)
                    loaded=True; continue

                if {"es","ni_lemma"}.issubset(flds):
                    for r in rd:
                        es=(r.get("es") or "").strip().lower()
                        ni=(r.get("ni_lemma") or "").strip()
                        if not es: continue
                        LEX_FORM.setdefault(es,ni); total_simple+=1
                        _meta_set(es, pos="", tam_ok=None)
                        if looks_like_verb_form_strict(es):
                            lem=guess_infinitive_es(es)
                            if lem:
                                LEX_LEMMA.setdefault(lem,ni); _meta_set(lem, pos="V", tam_ok=True)
                    loaded=True; continue
        except Exception as e:
            print(f"[WARN] No se pudo leer {p}: {e}")
    if total_rich or total_simple:
        print(f"✓ ES→NI: {total_rich} superficies ricas, {total_simple} pares simples")

    global FOLD_FORM
    FOLD_FORM={}
    for k,v in LEX_FORM.items():
        fk=fold(k)
        if fk!=k and len(k)>=5 and not looks_like_verb_form_strict(k):
            FOLD_FORM.setdefault(fk,v)

    # Cobertura mínima
    KEEP_MIN={
        "y":"ne","o":"o","no":"eś","a":"ka","para":"kara","eso":"kok","tarta":"gatel",
        "el":"do", "la":"da", "los":"don", "las":"dan",
        "un":"banu","una":"bana","uno":"ban",
        "este":"aŕe","esta":"aŕa","estos":"aŕen","estas":"aŕan",
        
        # Números básicos
        "dos":"bi","tres":"irur","cuatro":"laur","cinco":"borste","seis":"śei",
        "siete":"sisbi","ocho":"sorse","nueve":"lauŕbi","diez":"abaŕ","veinte":"oŕkei",
        
        # Números 1-100 (dígitos)
        "1":"ban","2":"bi","3":"irur","4":"laur","5":"borste",
        "6":"śei","7":"sisbi","8":"sorse","9":"bedar","10":"abaŕ",
        "11":"abaŕ-ke-ban","12":"abaŕ-ke-bi","13":"abaŕ-ke-irur","14":"abaŕ-ke-laur","15":"abaŕ-ke-borste",
        "16":"abaŕ-ke-śei","17":"abaŕ-ke-sisbi","18":"abaŕ-ke-sorse","19":"abaŕ-ke-bedar","20":"oŕkei",
        "21":"oŕkei-ke-ban","22":"oŕkei-ke-bi","23":"oŕkei-ke-irur","24":"oŕkei-ke-laur","25":"oŕkei-ke-borste",
        "26":"oŕkei-ke-śei","27":"oŕkei-ke-sisbi","28":"oŕkei-ke-sorse","29":"oŕkei-ke-bedar","30":"oŕkei-abaŕ",
        "31":"oŕkei-abaŕ-ke-ban","32":"oŕkei-abaŕ-ke-bi","33":"oŕkei-abaŕ-ke-irur","34":"oŕkei-abaŕ-ke-laur","35":"oŕkei-abaŕ-ke-borste",
        "36":"oŕkei-abaŕ-ke-śei","37":"oŕkei-abaŕ-ke-sisbi","38":"oŕkei-abaŕ-ke-sorse","39":"oŕkei-abaŕ-ke-bedar","40":"binoŕkei",
        "41":"binoŕkei-abaŕ-ke-ban","42":"binoŕkei-abaŕ-ke-bi","43":"binoŕkei-abaŕ-ke-irur","44":"binoŕkei-abaŕ-ke-laur","45":"binoŕkei-abaŕ-ke-borste",
        "46":"binoŕkei-abaŕ-ke-śei","47":"binoŕkei-abaŕ-ke-sisbi","48":"binoŕkei-abaŕ-ke-sorse","49":"binoŕkei-abaŕ-ke-bedar","50":"binoŕkei-abaŕ",
        "51":"binoŕkei-abaŕ-ke-ban","52":"binoŕkei-abaŕ-ke-bi","53":"binoŕkei-abaŕ-ke-irur","54":"binoŕkei-abaŕ-ke-laur","55":"binoŕkei-abaŕ-ke-borste",
        "56":"binoŕkei-abaŕ-ke-śei","57":"binoŕkei-abaŕ-ke-sisbi","58":"binoŕkei-abaŕ-ke-sorse","59":"binoŕkei-abaŕ-ke-bedar","60":"iruŕokei",
        "61":"iruŕokei-abaŕ-ke-ban","62":"iruŕokei-abaŕ-ke-bi","63":"iruŕokei-abaŕ-ke-irur","64":"iruŕokei-abaŕ-ke-laur","65":"iruŕokei-abaŕ-ke-borste",
        "66":"iruŕokei-abaŕ-ke-śei","67":"iruŕokei-abaŕ-ke-sisbi","68":"iruŕokei-abaŕ-ke-sorse","69":"iruŕokei-abaŕ-ke-bedar","70":"iruŕokei-abaŕ",
        "71":"iruŕokei-abaŕ-ke-ban","72":"iruŕokei-abaŕ-ke-bi","73":"iruŕokei-abaŕ-ke-irur","74":"iruŕokei-abaŕ-ke-laur","75":"iruŕokei-abaŕ-ke-borste",
        "76":"iruŕokei-abaŕ-ke-śei","77":"iruŕokei-abaŕ-ke-sisbi","78":"iruŕokei-abaŕ-ke-sorse","79":"iruŕokei-abaŕ-ke-bedar","80":"lauŕokei",
        "81":"lauŕokei-abaŕ-ke-ban","82":"lauŕokei-abaŕ-ke-bi","83":"lauŕokei-abaŕ-ke-irur","84":"lauŕokei-abaŕ-ke-laur","85":"lauŕokei-abaŕ-ke-borste",
        "86":"lauŕokei-abaŕ-ke-śei","87":"lauŕokei-abaŕ-ke-sisbi","88":"lauŕokei-abaŕ-ke-sorse","89":"lauŕokei-abaŕ-ke-bedar","90":"lauŕokei-abaŕ",
        "91":"lauŕokei-abaŕ-ke-ban","92":"lauŕokei-abaŕ-ke-bi","93":"lauŕokei-abaŕ-ke-irur","94":"lauŕokei-abaŕ-ke-laur","95":"lauŕokei-abaŕ-ke-borste",
        "96":"lauŕokei-abaŕ-ke-śei","97":"lauŕokei-abaŕ-ke-sisbi","98":"lauŕokei-abaŕ-ke-sorse","99":"lauŕokei-abaŕ-ke-bedar","100":"atun",
        
        # Números en letras
        "once":"abaŕ-ke-ban","doce":"abaŕ-ke-bi","trece":"abaŕ-ke-irur","catorce":"abaŕ-ke-laur","quince":"abaŕ-ke-borste",
        "dieciséis":"abaŕ-ke-śei","dieciseis":"abaŕ-ke-śei","diecisiete":"abaŕ-ke-sisbi","dieciocho":"abaŕ-ke-sorse","diecinueve":"abaŕ-ke-bedar",
        "veintiuno":"oŕkei-ke-ban","veintidós":"oŕkei-ke-bi","veintidos":"oŕkei-ke-bi","veintitrés":"oŕkei-ke-irur","veintitres":"oŕkei-ke-irur",
        "veinticuatro":"oŕkei-ke-laur","veinticinco":"oŕkei-ke-borste","veintiséis":"oŕkei-ke-śei","veintiseis":"oŕkei-ke-śei",
        "veintisiete":"oŕkei-ke-sisbi","veintiocho":"oŕkei-ke-sorse","veintinueve":"oŕkei-ke-bedar",
        "treinta":"oŕkei-abaŕ","cuarenta":"binoŕkei","cincuenta":"binoŕkei-abaŕ","sesenta":"iruŕokei",
        "setenta":"iruŕokei-abaŕ","ochenta":"lauŕokei","noventa":"lauŕokei-abaŕ","cien":"atun",
        
        # Pronombres y partículas
        "yo":"ni","tú":"zu","él":"nar","ella":"nar",
        "nosotros":"gu","nosotras":"gu","vosotros":"zuek","vosotras":"zuek",
        "ellos":"narek","ellas":"narek",
        "que":"ze","si":"baldin","cuando":"noiz","donde":"non",
        "como":"nola","porque":"zeren","mientras":"bitarte",
        "versión":"bertsi","test":"froga","prueba":"froga",
        "ejemplo":"adibid","texto":"testu","palabra":"hitz"
    }
    for k,v in KEEP_MIN.items():
        LEX_FORM.setdefault(k,v)
        if k in {"yo","tú","él","ella","nosotros","nosotras","vosotros","vosotras","ellos","ellas"}:
            _meta_set(k, pos="PRON", tam_ok=False)
        elif k in {"que","si","cuando","donde","como","porque","mientras"}:
            _meta_set(k, pos="PART", tam_ok=False)
        elif k.isdigit() or k in {"uno","dos","tres","cuatro","cinco","seis","siete","ocho","nueve","diez","once","doce","trece","catorce","quince","dieciséis","dieciseis","diecisiete","dieciocho","diecinueve","veinte","veintiuno","veintidós","veintidos","veintitrés","veintitres","veinticuatro","veinticinco","veintiséis","veintiseis","veintisiete","veintiocho","veintinueve","treinta","cuarenta","cincuenta","sesenta","setenta","ochenta","noventa","cien"}:
            _meta_set(k, pos="NUM", tam_ok=False)
        else:
            _meta_set(k, pos=_canon_pos("PART" if k in {"y","o","no","a","para"} else "DET"), tam_ok=False)

    BUILTIN_LEMMA={
        # Solo por seguridad si faltara en CSV
        "llover":"euŕak","llamar":"deitu","venir":"nuker","ir":"nitus",
        "hacer":"giotael","tener":"giokk","poder":"binbel","poner":"pusen",
        "ser":"izan","estar":"egon"
    }
    for k,v in BUILTIN_LEMMA.items():
        LEX_LEMMA.setdefault(k,v); _meta_set(k, pos="V", tam_ok=True)

    FORCE_FORMS = {
        "voy":"nitus-ke","vas":"nitus-ke","va":"nitus-ke","vamos":"nitus-ke","vais":"nitus-ke","van":"nitus-ke",
        "vengo":"nuker-ke","vienes":"nuker-ke","viene":"nuker-ke","venimos":"nuker-ke","venís":"nuker-ke","vienen":"nuker-ke",
        "ven":"nuker-tu","haz":"giotael-tu","pon":"pusen-tu","di":"siśnesir-tu","sal":"salku-tu","ten":"giokk-tu","sé":"suber-tu"
    }
    for form, ni in FORCE_FORMS.items():
        LEX_FORM[form] = ni; _meta_set(form, pos="V", tam_ok=True)
    global FORCE_KEYS
    FORCE_KEYS = set(FORCE_FORMS.keys())
    return loaded

def load_lexicon_ni_es():
    loaded=False
    total=0
    # 1) Intento DictReader con cabecera
    for p in CSV_NI_ES:
        if not os.path.exists(p):
            debug_print(f"CSV NI→ES no encontrado: {p}")
            continue
        try:
            with open(p, encoding="utf-8") as f:
                sniffer = csv.Sniffer()
                sample = f.read(4096)
                f.seek(0)
                has_header = sniffer.has_header(sample)
                if has_header:
                    dr = csv.DictReader(f)
                    fieldnames = [x.lower() for x in (dr.fieldnames or [])]
                    # nombres plausibles
                    fn_source = next((c for c in fieldnames if "source" in c and ("ni" in c or "neo" in c)), None)
                    fn_target = next((c for c in fieldnames if "target" in c and ("es" in c or "spa" in c)), None)
                    fn_eslem  = next((c for c in fieldnames if "es_lem" in c or c=="es_lemma" or "lemma_es" in c), None)
                    # v4.4: el CSV usa 'ni_tam'
                    fn_morph  = next((c for c in fieldnames if c in {"ni_tam","ni_morph","ni_tag"} or "morph" in c), None)
                    fn_root   = next((c for c in fieldnames if "ni_root" in c or c=="root" or "ni_lemma" in c), None)

                    if fn_source and fn_target:
                        debug_print(f"Cargando {p} con cabecera: source={fn_source}, target={fn_target}")
                        for r in dr:
                            # ✅ FIX 1: NO convertir a minúsculas
                            source_ni = (r.get(fn_source) or "").strip()  # ← SIN .lower()
                            target_es = (r.get(fn_target) or "").strip()
                            es_lemma  = (r.get(fn_eslem) or "").strip().lower() if fn_eslem else ""
                            ni_morph  = (r.get(fn_morph) or "").strip().upper() if fn_morph else ""
                            ni_root   = (r.get(fn_root) or "").strip().lower() if fn_root else ""
                            
                            if source_ni and target_es:
                                # ✅ FIX 2: Sobrescribir en vez de setdefault
                                NI_TO_ES_FORM[source_ni] = target_es
                                if ni_morph:
                                    NI_TO_ES_SURF[(source_ni, ni_morph)] = target_es
                            if ni_root and es_lemma:
                                NI_TO_ES_LEMMA.setdefault(ni_root, es_lemma)
                            if ni_root and target_es:
                                NI_TO_ES_FORM.setdefault(ni_root, target_es)
                            total+=1
                        print(f"✓ Cargadas {total} filas NI→ES (cabecera) desde {p}")
                        loaded=True
                        continue  # pasa al siguiente fichero si hay
                        
                # 2) Fallback por posiciones
                f.seek(0)
                reader=csv.reader(f)
                count=0
                for row in reader:
                    if not row: continue
                    if count==0 and any("source" in (c or "").lower() or "ni_" in (c or "").lower() or "target" in (c or "").lower() for c in row):
                        count+=1
                        continue
                    
                    # ✅ FIX 3: Índices correctos según estructura del CSV v4.4
                    # source_ni, target_es, ni_tam, ni_pn, es_morph, es_pn, ni_root, ni_suffix, es_lemma, pos_es, evidencia
                    #     0          1        2      3        4        5       6         7         8         9       10
                    source_ni = (row[0] if len(row)>0 else "").strip()           # ← SIN .lower()
                    target_es = (row[1] if len(row)>1 else "").strip()
                    ni_tam    = (row[2] if len(row)>2 else "").strip().upper()   # ← CORRECTO: posición 2
                    ni_root   = (row[6] if len(row)>6 else "").strip().lower()   # ← Ya estaba bien
                    es_lemma  = (row[8] if len(row)>8 else "").strip().lower()   # ← CORRECTO: posición 8
                    
                    if source_ni and target_es:
                        NI_TO_ES_FORM[source_ni] = target_es  # ← Sobrescribir
                        if ni_tam:
                            NI_TO_ES_SURF[(source_ni, ni_tam)] = target_es
                    if ni_root and es_lemma:
                        NI_TO_ES_LEMMA.setdefault(ni_root, es_lemma)
                    if ni_root and target_es:
                        NI_TO_ES_FORM.setdefault(ni_root, target_es)
                    count+=1
                    total+=1
                    
                if count>0:
                    print(f"✓ Cargadas {count} filas NI→ES (posicional) desde {p}")
                    loaded=True
        except Exception as e:
            print(f"[WARN] Error leyendo {p}: {e}")
            import traceback
            traceback.print_exc()

    # ✅ FIX 4: Vocabulario mínimo ampliado
    KEEP_MIN_NI = {
        # Partículas
        "ne":"y","o":"o","eś":"no","ka":"a","mi":"a","te":"a",
        "kin":"con","tan":"en","ta":"de","kara":"para",
        
        # Pronombres
        "ni":"yo","zu":"tú","nar":"él","gu":"nosotros",
        "ban":"un","banu":"un","bana":"una",
        
        # Artículos
        "do":"el","da":"la","don":"los","dan":"las",
        
        # Demostrativos
        "aŕe":"este","aŕa":"esta","aŕen":"estos","aŕan":"estas",
        
        # Verbos base
        "nuker":"venir","siśnesir":"decir","giotael":"hacer",
        "izan":"ser","egon":"estar","giokk":"tener",
        "pusen":"poner","binbel":"poder","nitus":"ir",
        "deitu":"llamar","euŕak":"llover",
        
        # Interjecciones
        "batsornel":"hola","sabernel":"adiós",
        
        # Sustantivos comunes
        "domśaldum":"pan","śesilmen":"café","kuknomtok":"restaurante",
        "sikliskoŕ":"casa","śaldalbam":"mercado","bekmil":"cine",
        "seŕtuŕgok":"año","kordo":"pueblo","tokbatkir":"ciudad",
        "eskom":"amigo","nintos":"madre","śimnas":"padre",
    }
    
    for k,v in KEEP_MIN_NI.items():
        NI_TO_ES_FORM.setdefault(k,v)
        
    if total:
        print(f"✓ NI→ES: {total} pares cargados (incluyendo {len(KEEP_MIN_NI)} mínimos)")
    else:
        print(f"⚠ NI→ES: No se cargaron pares desde CSV, usando {len(KEEP_MIN_NI)} mínimos")
        
    # ✅ FIX 5: DEBUG - mostrar muestras cargadas
    if DEBUG_MODE and total > 0:
        print("\n[DEBUG] Muestra de NI_TO_ES_FORM:")
        samples = list(NI_TO_ES_FORM.items())[:30]
        for k, v in samples:
            print(f"  {k} → {v}")
            
    return loaded

print("Cargando léxico ES→NI..."); load_lexicon()
print("Cargando léxico NI→ES..."); load_lexicon_ni_es()

# =========================
# UI CLÁSICA (con dirección)
# =========================
LABELS={
    "ES":{
        "title":"Traductor Español ↔ Neoíbero v4.4",
        "subtitle":"Explora el renacimiento ibérico con tecnología moderna — ULTRA-DEFINITIVO",
        "in_label_es":"✏️ Entrada (Español)",
        "in_label_ni":"✏️ Entrada (Neoíbero)",
        "in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Idatzi hemen. Adib.: nitus-ke ni etxe-ka.",
        "out_lat_esni":"📜 Salida: Neoíbero (latín)",
        "out_lat_nies":"📜 Salida: Español",
        "out_ib":"🗿 Línea ibérica",
        "out_audio":"🔊 Locución (Audio)",
        "btn":"🔄 Traducir",
        "combo":"🌍 Idioma (UI + explicación)",
        "dir":"🔁 Dirección",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentación y Referencia",
        "acc_titles":[
            "🎓 Marco académico y decisiones del neoíbero",
            "🏛️ Herencia posible del íbero histórico",
            "🎨 Diseño de la conlang (neoíbero)",
            "⚙️ Pipeline del traductor (paso a paso)",
            "🔤 Ortografía, línea ibérica y claves",
            "❓/❗ Modalidad presunto vascoide (-na / -ba)",
            "📖 Gramática de referencia (v1.2)",
            "📚 Bibliografía de base",
            "🧾 Siglas y glosario"
        ]
    },
    "EN":{
        "title":"Spanish ↔ Neo-Iberian Translator v4.4",
        "subtitle":"Explore the revival of Neo-Iberian with modern tech — ULTRA-DEFINITIVE",
        "in_label_es":"✏️ Input (Spanish)",
        "in_label_ni":"✏️ Input (Neo-Iberian)",
        "in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Type here. E.g., nitus-ke ni etxe-ka.",
        "out_lat_esni":"📜 Output: Neo-Iberian (Latin)",
        "out_lat_nies":"📜 Output: Spanish",
        "out_ib":"🗿 Iberian line",
        "out_audio":"🔊 Speech (Audio)",
        "btn":"🔄 Translate",
        "combo":"🌍 Language (UI + docs)",
        "dir":"🔁 Direction",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentation & Reference",
        "acc_titles":[
            "🎓 Background & design choices",
            "🏛️ Possible inheritance from ancient Iberian",
            "🎨 Conlang design (Neo-Iberian)",
            "⚙️ Translator pipeline (step by step)",
            "🔤 Orthography, Iberian line & keys",
            "❓/❗ 'Vascoid' mood (-na / -ba)",
            "📖 Reference grammar (v1.2)",
            "📚 Core references",
            "🧾 Acronyms & glossary"
        ]
    }
}

# Documentación completa del appOld.py
DOC_ES_0 = """**Escritura y datos.**
El *neoíbero* se diseña como una **lengua conjetural** que toma como base el corpus ibérico (ss. V–I a.C.) conocido, más una morfología y un léxico especulativos construidos con plausibilidad histórica y tipológica.
"""
DOC_ES_1 = """**Herencia antigua posible.**
- Raíces documentadas en inscripciones ibéricas reales: *ban*, *bi*, *irur*, *laur*, *borste*, *śei*, *sisbi*, *sorse* (numerales); *belai* (cuervo), *ebee* (perdiz), etc.
- **CV(C)** phonotactics; no **/p/** fonémico; *r/ŕ* desaconsejado en inicio de palabra.
- Postposiciones/sufijos nominales: **-k** (pl), **-te** (agente), **-ar/-en** (genitivo/origen), **-ka** (dat./loc./dist.), **-i** (ac. con PN).
- Partículas: **ne** 'y', **o** 'o', **eś** 'no'.
- Numerales: *ban, bi, irur, laur, borste, śei, sisbi, sorse, lauŕbi, abaŕ (10), oŕkei (20).*
"""
DOC_ES_2 = """**Diseño de la conlang:**
- **TAM (v3.2-LTS):** PRS **-ke**, PST **-bo**, FUT **-ta**, IPFV **-ri**, IMP **-tu**, **SBJV -ni**, **COND -ne**.
- Derivación: verbos (-ke/-ta/-bo/-ri/-ni/-ne), adjetivos (-si), sustantivos (-ar/-en/-tu/-la/-ŕa/-si).
- Orden preferido **SOV**.
"""
DOC_ES_3 = """**Pipeline (resumen):**
1) Tokenizar; partir **al→ka do**, **del→ta do**.
2) `a` → `ka`/`mi`/`te`.
3) CSV rico da **superficie** NI; si no, CSV simple → **lema** NI.
4) **Puerta POS/TAM**: solo verbos obtienen TAM; otros se normalizan a lema/raíz.
5) Negación **eś** antes del primer verbo finito.
6) ?/! → enclíticos **-na/-ba** en el último verbo finito (o último constituyente).
7) WH desnudo añade **-na** e inserta `?`.
8) Línea ibérica: solo puntuación visible; separador de palabras = **"/"** (tridots).
"""
DOC_ES_4 = """**Ortografía y claves:**
- Modo de claves **explicit** (BA/BE/BI/BO/BU).
- Separador de palabras = "/".
- Atajos: `ka`→**K**, `mi`→**MI**, `te`→**TE**, `ne`→**N**, `o`→**O**, `eś`→**X**.
"""
DOC_ES_5 = """**Modalidad (-na/-ba):**
- **-na** interrogativa; **-ba** exclamativa, se une al último verbo finito (o último constituyente).
"""
DOC_ES_6 = """**Gramática mínima (NI):**
- Verbo: raíz + **TAM**; negación preverbal **eś**.
- Casos productivos: -k (pl), -te (agente), -ka (dat/loc), -ar/-en (genitivo/origen).
"""
DOC_ES_7 = """**Referencias principales:** Untermann; de Hoz; Ferrer i Jané; Correa; gramáticas/corpora bascoide seleccionados."""
DOC_ES_8 = """**Acrónimos (v3.2-LTS):**
- **TAM** (PRS, PST, FUT, IPFV, SBJV, COND, IMP, FUT_SBJV); **PN**; **POS**; **LEMMa/SURFACE**; **RT**; **LTS**; **SOV**; **CV(C)**; **CSV**; **Enclítico**.
"""

DOC_EN_0 = """**Writing & data.**
*Neo-Iberian* is designed as a **conlang** that takes the known Iberian corpus (5th–1st c. BCE) as a base, plus a speculative morphology and lexicon built with historical and typological plausibility.
"""
DOC_EN_1 = """**Possible ancient heritage.**
- Roots documented in real Iberian inscriptions: *ban*, *bi*, *irur*, *laur*, *borste*, *śei*, *sisbi*, *sorse* (numerals); *belai* (raven), *ebee* (partridge), etc.
- **CV(C)** phonotactics; no phonemic **/p/**; *r/ŕ* disallowed word-initially.
- Postpositions/nominal suffixes: **-k** (pl), **-te** (agent), **-ar/-en** (genitive/origin), **-ka** (dat./loc./dist.), **-i** (acc. with PN).
- Particles: **ne** 'and', **o** 'or', **eś** 'not'.
- Numerals: *ban, bi, irur, laur, borste, śei, sisbi, sorse, lauŕbi, abaŕ (10), oŕkei (20).*
"""
DOC_EN_2 = """**Conlang design:**
- **TAM (v3.2-LTS):** PRS **-ke**, PST **-bo**, FUT **-ta**, IPFV **-ri**, IMP **-tu**, **SBJV -ni**, **COND -ne**.
- Derivation: verbs (-ke/-ta/-bo/-ri/-ni/-ne), adjectives (-si), nouns (-ar/-en/-tu/-la/-ŕa/-si).
- Preferred order **SOV**.
"""
DOC_EN_3 = """**Pipeline (summary):**
1) Tokenize; split **al→ka do**, **del→ta do**.
2) `a` → `ka`/`mi`/`te`.
3) Rich CSV gives NI **surface**; else simple CSV → NI **lemma**.
4) **POS/TAM gating**: only verbs get TAM; others normalize to lemma/root.
5) Negation **eś** before the first finite verb.
6) ?/! → enclitics **-na/-ba** on the last finite verb (or last constituent).
7) Bare WH adds **-na** and inserts `?`.
8) Iberian line: visible punctuation only; word separator is **"/"** (tridots).
"""
DOC_EN_4 = """**Orthography & keys:**
- Keys mode **explicit** (BA/BE/BI/BO/BU).
- Word separator = "/".
- Shortcuts: `ka`→**K**, `mi`→**MI**, `te`→**TE**, `ne`→**N**, `o`→**O**, `eś`→**X**.
"""
DOC_EN_5 = """**Modality (-na/-ba):**
- **-na** interrogative; **-ba** exclamative, attached to the last finite verb (or last constituent).
"""
DOC_EN_6 = """**Minimal grammar (NI):**
- Verb: root + **TAM**; preverbal negation **eś**.
- Productive cases: -k (pl), -te (agent), -ka (dat/loc), -ar/-en (genitive/origin).
"""
DOC_EN_7 = """**Core references:** Untermann; de Hoz; Ferrer i Jané; Correa; selected Bascoid grammars/corpora."""
DOC_EN_8 = """**Acronyms (v3.2-LTS):**
- **TAM** (PRS, PST, FUT, IPFV, SBJV, COND, IMP, FUT_SBJV); **PN**; **POS**; **LEMMa/SURFACE**; **RT**; **LTS**; **SOV**; **CV(C)**; **CSV**; **Enclitic**.
"""

DOC={
    "ES":[DOC_ES_0, DOC_ES_1, DOC_ES_2, DOC_ES_3, DOC_ES_4, DOC_ES_5, DOC_ES_6, DOC_ES_7, DOC_ES_8],
    "EN":[DOC_EN_0, DOC_EN_1, DOC_EN_2, DOC_EN_3, DOC_EN_4, DOC_EN_5, DOC_EN_6, DOC_EN_7, DOC_EN_8]
}

# CSS del diseño original (appOld.py)
def build_css():
    b64=None
    if os.path.exists("Iberia-Georgeos.ttf"):
        with open("Iberia-Georgeos.ttf","rb") as f:
            b64=base64.b64encode(f.read()).decode("ascii")
    font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')"
    return f"""
@font-face {{
  font-family: 'IberiaGeorgeos';
  src: {font_src};
  font-weight: normal; font-style: normal;
}}
:root {{
  --iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C;
  --iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32;
}}
.gradio-container {{ background:linear-gradient(135deg,#f4e8d8 0%,#e8d5c4 50%,#d4c4b0 100%)!important;
  font-family:'Georgia','Times New Roman',serif!important; }}
.gradio-container h1,.gradio-container h2,.gradio-container h3 {{
  color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important;
  border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important;
}}
.gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important;
  padding:1.5rem!important; margin-bottom:1.5rem!important; }}
.gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
  border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }}
.gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important;
  color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }}
.gradio-container .gr-textbox textarea,.gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:var(--iberian-stone)!important;
  font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }}
.gradio-container .gr-textbox textarea:focus,.gradio-container .gr-textbox input:focus {{
  border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }}
.gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
  border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 1px 2px rgba(0,0,0,.4)!important;
  box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }}
.gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important;
  transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }}
.ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important;
  background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important;
  border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important;
  box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }}
.ib-line::before {{ content:''!important; position:absolute!important; inset:0!important;
  background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important;
  pointer-events:none!important; border-radius:10px!important; }}
@media (max-width:768px) {{
  .ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }}
  .gradio-container .gr-group {{ padding:1rem!important; }}
  .gradio-container h1 {{ font-size:1.8rem!important; }}
}}
@media (max-width:480px) {{
  .ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }}
  .gradio-container h1 {{ font-size:1.5rem!important; }}
}}
"""
CSS = build_css()

# =========================
# INTERFAZ GRADIO
# =========================
with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
    with gr.Group():
        title = gr.Markdown(f"# {LABELS['ES']['title']}")
        subtitle = gr.Markdown(f"*{LABELS['ES']['subtitle']}*")

    with gr.Row():
        combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"])
        direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"])

    with gr.Group():
        doc_header = gr.Markdown(f"## {LABELS['ES']['doc_header']}")
        acc_titles = LABELS["ES"]["acc_titles"]
        with gr.Accordion(acc_titles[0], open=False) as acc1:  md1 = gr.Markdown(DOC["ES"][0])
        with gr.Accordion(acc_titles[1], open=False) as acc2:  md2 = gr.Markdown(DOC["ES"][1])
        with gr.Accordion(acc_titles[2], open=False) as acc3:  md3 = gr.Markdown(DOC["ES"][2])
        with gr.Accordion(acc_titles[3], open=False) as acc4:  md4 = gr.Markdown(DOC["ES"][3])
        with gr.Accordion(acc_titles[4], open=False) as acc5:  md5 = gr.Markdown(DOC["ES"][4])
        with gr.Accordion(acc_titles[5], open=False) as acc6:  md6 = gr.Markdown(DOC["ES"][5])
        with gr.Accordion(acc_titles[6], open=False) as acc7:  md7 = gr.Markdown(DOC["ES"][6])
        with gr.Accordion(acc_titles[7], open=False) as acc8:  md8 = gr.Markdown(DOC["ES"][7])
        with gr.Accordion(acc_titles[8], open=False) as acc9:  md9 = gr.Markdown(DOC["ES"][8])

    with gr.Group():
        es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5)
        btn_tr = gr.Button(LABELS["ES"]["btn"], variant="primary")
        with gr.Row():
            with gr.Column(scale=2):
                ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False)
                loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=False)
                audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy")
            with gr.Column(scale=1):
                ib_out = gr.HTML(label=LABELS["ES"]["out_ib"])

    def do_translate(text, dir_label):
        if not text or not text.strip():
            return (gr.update(value=""),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None))
        if dir_label.startswith("ES"):
            latin, ib = translate(text)
            ib_html = "<div class='ib-line'>" + escape(ib) + "</div>"
            return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin),
                    gr.update(value=ib_html),
                    gr.update(visible=True),
                    gr.update(value=None))
        else:
            es_text = translate_ni_to_es(text)
            return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None))

    btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out])

    def run_locution(latin_text, dir_label):
        if dir_label.startswith("ES"):
            return synthesize_speech(latin_text)
        return None

    loc_btn.click(run_locution, [ni_out, direction], audio_out)

    def switch_lang(sel_lang, dir_label):
        L=LABELS[sel_lang]; T=L["acc_titles"]; D=DOC[sel_lang]
        # Input/Output labels dependen de la dirección
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        return (
            gr.update(value=f"# {L['title']}"),
            gr.update(value=f"*{L['subtitle']}*"),
            gr.update(label=L["combo"], value=sel_lang),
            gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label),
            gr.update(value=f"## {L['doc_header']}"),
            gr.update(label=T[0]), gr.update(value=D[0]),
            gr.update(label=T[1]), gr.update(value=D[1]),
            gr.update(label=T[2]), gr.update(value=D[2]),
            gr.update(label=T[3]), gr.update(value=D[3]),
            gr.update(label=T[4]), gr.update(value=D[4]),
            gr.update(label=T[5]), gr.update(value=D[5]),
            gr.update(label=T[6]), gr.update(value=D[6]),
            gr.update(label=T[7]), gr.update(value=D[7]),
            gr.update(label=T[8]), gr.update(value=D[8]),
            gr.update(label=in_label, placeholder=in_ph),
            gr.update(label=out_lab),
            gr.update(label=L["out_ib"]),
            gr.update(label=L["out_audio"]),
            gr.update(value=L["btn"])
        )

    combo.change(
        switch_lang,
        [combo, direction],
        [title, subtitle, combo, direction, doc_header,
         acc1, md1, acc2, md2, acc3, md3, acc4, md4, acc5, md5, acc6, md6, acc7, md7, acc8, md8, acc9, md9,
         es_in, ni_out, ib_out, audio_out, btn_tr]
    )

    def switch_direction(dir_label, sel_lang):
        # Solo cambia etiquetas y visibilidad de Locutar/Línea ibérica
        L=LABELS[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        # Locución solo para ES → NI
        loc_vis  = True if dir_label.startswith("ES") else False
        # Línea ibérica visible solo para ES → NI (tras traducir)
        return (gr.update(label=in_label, placeholder=in_ph),
                gr.update(label=out_lab, value=""),
                gr.update(value="<div class='ib-line'></div>"),
                gr.update(visible=loc_vis),
                gr.update(value=None))

    direction.change(
        switch_direction,
        [direction, combo],
        [es_in, ni_out, ib_out, loc_btn, audio_out]
    )

if __name__ == "__main__":
    demo.queue().launch()