# app.py — Traductor Español ↔ Neoíbero
# UI clásica (v2.2) + motor bidireccional (v3), con Línea Ibérica y Locución
# 2025-10 – unificación y limpieza (parches: enclíticos, números, cobertura mínima)
# 2025-10 (rev): Superficie estricta + NI→ES desde CSV dedicado si existe (fallback inversor en memoria)
# 2025-11 (rev2): prioridad absoluta de CSV, contador de evidencias, doc vascoide ampliada, fallbacks solo si faltan en CSV

import gradio as gr
import os, csv, re, base64, unicodedata
import torch
from transformers import AutoProcessor, VitsModel
import numpy as np
from html import escape  # ← para escapar la línea ibérica en HTML

# Caches locales (si existen)
os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache')
os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf')

DEBUG_MODE = False
def debug_print(msg):
    if DEBUG_MODE:
        print(f"[DEBUG] {msg}")

# =========================
# LÉXICO Y ESTRUCTURAS
# =========================
# << RUTAS EN RAÍZ DEL REPO >>
CSV_ES_NI = "HF_Pairs_ES_NI_RICH.csv"              # ES→NI (rico)
CSV_NI_ES = "HF_Pairs_NI_ES_Translator.csv"        # NI→ES (determinista si existe)

# Superficie estricta: no tocar superficies que vengan del CSV
STRICT_SURFACE = True

# ES→NI
SURF_RICH = {}   # (es_lower, tag) -> ni_surface
LEX_FORM  = {}   # es_form -> ni_lemma/surface
LEX_LEMMA = {}   # es_lemma -> ni_lemma
FOLD_FORM = {}   # es_form_no_diacritics -> ni_lemma
LEX_META  = {}   # es_form/lemma -> {"pos":..., "tam_ok":...}
FORCE_KEYS = set()

# NI→ES (se carga de CSV si existe; si no, se genera como inverso en memoria)
NI_TO_ES_SURF = {}   # (ni_surface_lower_or_fold, tag) -> es_surface (exacto observado)
NI_TO_ES_FORM = {}   # ni_form/root (lower/fold) -> es_surface o es_lemma
NI_TO_ES_LEMMA= {}   # ni_root (lower/fold) -> es_lemma

# Contador informativo de evidencias (solo logging)
EVIDENCE_COUNTS = {}  # p.ej.: {"conjetural": 2039780, "vascoide": 9, ...}

# =========================
# MORFOLOGÍA – ESPAÑOL
# =========================
RE_GER   = re.compile(r"(ando|iendo|yendo)$", re.I)
RE_PART  = re.compile(r"(ado|ido|to|so|cho)$", re.I)

FUT_END   = ("é","ás","á","emos","éis","án")
COND_END  = ("ía","ías","ía","íamos","íais","ían")
PRET_AR   = ("é","aste","ó","amos","asteis","aron")
PRET_ERIR = ("í","iste","ió","imos","isteis","ieron")
IMPF_AR   = ("aba","abas","ábamos","abais","aban")
IMPF_ERIR = ("ía","ías","íamos","íais","ían")
SUBJ_AR   = ("e","es","e","emos","éis","en")
SUBJ_ERIR = ("a","as","a","amos","áis","an")
SUBJ_PAST_AR   = ("ara","aras","ara","áramos","arais","aran","ase","ases","ase","ásemos","aseis","asen")
SUBJ_PAST_ERIR = ("iera","ieras","iera","iéramos","ierais","ieran","iese","ieses","iese","iésemos","ieseis","iesen")
PRS_AR    = ("o","as","a","amos","áis","an")
PRS_ER    = ("o","es","e","emos","éis","en")
PRS_IR    = ("o","es","e","imos","ís","en")

def _strip_any(w, ends):
    for s in sorted(ends, key=len, reverse=True):
        if w.endswith(s):
            return w[:-len(s)], s
    return None, None

def _guess_class_from_ending(ending):
    if ending in PRET_AR or ending in IMPF_AR or ending in SUBJ_AR or ending in PRS_AR:
        return "ar"
    return "er"

# Irregularidades para **adivinar lema** y **tag** (TAM de rescate)
IRREG_LEMMA = {
    "fui":"ir","fuiste":"ir","fue":"ir","fuimos":"ir","fuisteis":"ir","fueron":"ir",
    "voy":"ir","vas":"ir","va":"ir","vamos":"ir","vais":"ir","van":"ir",
    "soy":"ser","eres":"ser","es":"ser","somos":"ser","sois":"ser","son":"ser",
    "era":"ser","eras":"ser","éramos":"ser","erais":"ser","eran":"ser",
    "he":"haber","has":"haber","ha":"haber","hemos":"haber","habéis":"haber","han":"haber",
    "hube":"haber","hubo":"haber","hubimos":"haber","hubiste":"haber","hubisteis":"haber","hubieron":"haber",
    "estoy":"estar","estás":"estar","está":"estar","estamos":"estar","estáis":"estar","están":"estar",
    "estuve":"estar","estuviste":"estar","estuvo":"estar","estuvimos":"estar","estuvisteis":"estar","estuvieron":"estar",
    "estaba":"estar","estabas":"estar","estábamos":"estar","estabais":"estar","estaban":"estar",
    "tuve":"tener","tuviste":"tener","tuvo":"tener","tuvimos":"tener","tuvisteis":"tener","tuvieron":"tener",
    "vine":"venir","viniste":"venir","vino":"venir","vinimos":"venir","vinisteis":"venir","vinieron":"venir",
    "hice":"hacer","hiciste":"hacer","hizo":"hacer","hicimos":"hacer","hicisteis":"hacer","hicieron":"hacer",
    "puse":"poner","pusiste":"poner","puso":"poner","pusimos":"poner","pusisteis":"poner","pusieron":"poner",
    "pude":"poder","pudiste":"poder","pudo":"poder","pudimos":"poder","pudisteis":"poder","pudieron":"poder",
    "quise":"querer","quisiste":"querer","quiso":"querer","quisimos":"querer","quisisteis":"querer","quisieron":"querer",
    "supe":"saber","supiste":"saber","supo":"saber","supimos":"saber","supisteis":"saber","supieron":"saber",
    "traje":"traer","trajiste":"traer","trajo":"traer","trajimos":"traer","trajisteis":"traer","trajeron":"traer",
    "dije":"decir","dijiste":"decir","dijo":"decir","dijimos":"decir","dijisteis":"decir","dijeron":"decir",
    "conduje":"conducir","condujiste":"conducir","condujo":"conducir","condujimos":"conducir","condujisteis":"conducir","condujeron":"conducir",
    "anduve":"andar","anduviste":"andar","anduvo":"andar","anduvimos":"andar","anduvisteis":"andar","anduvieron":"andar",
    "cupe":"caber","cupiste":"caber","cupo":"caber","cupimos":"caber","cupisteis":"caber","cupieron":"caber",
    "di":"dar","diste":"dar","dio":"dar","dimos":"dar","disteis":"dar","dieron":"dar",
    "vi":"ver","viste":"ver","vio":"ver","vimos":"ver","visteis":"ver","vieron":"ver",
    "tengo":"tener","vengo":"venir","pongo":"poner","salgo":"salir","traigo":"traer","caigo":"caer","hago":"hacer","oigo":"oír","digo":"decir","valgo":"valer","sigo":"seguir",
    "tienes":"tener","tiene":"tener","tienen":"tener",
    "vienes":"venir","viene":"venir","vienen":"venir",
    "pienso":"pensar","piensas":"pensar","piensa":"pensar","piensan":"pensar",
    "quiero":"querer","quieres":"querer","quiere":"querer","quieren":"querer",
    "prefiero":"preferir","prefieres":"preferir","prefiere":"preferir","prefieren":"preferir",
    "vaya":"ir","vayas":"ir","vayamos":"ir","vayáis":"ir","vayan":"ir",
    "sea":"ser","seas":"ser","seamos":"ser","seáis":"ser","sean":"ser",
    "haya":"haber","hayas":"haber","hayamos":"haber","hayáis":"haber","hayan":"haber",
    "dé":"dar","des":"dar","demos":"dar","deis":"dar","den":"dar",
    "esté":"estar","estés":"estar","estemos":"estar","estéis":"estar","estén":"estar",
    "tenga":"tener","tengas":"tener","tengamos":"tener","tengáis":"tener","tengan":"tener",
    "venga":"venir","vengas":"venir","vengamos":"venir","vayáis":"ir","vengan":"venir",
    "ve":"ir","id":"ir","sé":"ser","sed":"ser","haz":"hacer","haced":"hacer","pon":"poner","poned":"poner",
    "ven":"venir","venid":"venir","ten":"tener","tened":"tener","sal":"salir","salid":"salir","di":"decir","decid":"decir",
    "doy":"dar","das":"dar","da":"dar","damos":"dar","dais":"dar","dan":"dar",
    "veo":"ver","ves":"ver","vemos":"ver","veis":"ver","ven":"ver",
    "oí":"oír","oíste":"oír","oyó":"oír","oímos":"oír","oísteis":"oír","oyeron":"oír",
    "iba":"ir","ibas":"ir","íbamos":"ir","ibais":"ir","iban":"ir",
    "veía":"ver","veías":"ver","veíamos":"ver","veíais":"ver","veían":"ver",
    "vinieras":"venir","lloviera":"llover",
    # Futuro de subjuntivo (arcaico)
    "viniere":"venir","vinieres":"venir","vinieren":"venir",
    "hiciere":"hacer","hicieres":"hacer","hicieren":"hacer",
    "hubiere":"haber","hubieres":"haber","hubieren":"haber",
    # "fuere" ambiguo (ser/ir) → omitido a propósito
}

IRREG_MORPH_TAGS = {
    # Subjuntivos/imperativos/etc (para tag de rescate)
    "vaya":"SBJV","vayas":"SBJV","vayamos":"SBJV","vayáis":"SBJV","vayan":"SBJV",
    "sea":"SBJV","seas":"SBJV","seamos":"SBJV","seáis":"SBJV","sean":"SBJV",
    "haya":"SBJV","hayas":"SBJV","hayamos":"SBJV","hayáis":"SBJV","hayan":"SBJV",
    "dé":"SBJV","des":"SBJV","demos":"SBJV","deis":"SBJV","den":"SBJV",
    "esté":"SBJV","estés":"SBJV","estemos":"SBJV","estéis":"SBJV","estén":"SBJV",
    "tenga":"SBJV","tengas":"SBJV","tengamos":"SBJV","tengáis":"SBJV","tengan":"SBJV",
    "venga":"SBJV","vengas":"SBJV","vengamos":"SBJV","vengáis":"SBKV","vengan":"SBJV",
    "haga":"SBJV","hagas":"SBJV","hagamos":"SBJV","hagáis":"SBJV","hagan":"SBJV",
    "pueda":"SBJV","puedas":"SBJV","podamos":"SBJV","podáis":"SBJV","puedan":"SBJV",
    "id":"IMP","sed":"IMP","haz":"IMP","haced":"IMP","pon":"IMP","poned":"IMP","ven":"IMP","venid":"IMP",
    "ten":"IMP","tened":"IMP","sal":"IMP","salid":"IMP","decid":"IMP",
    "llámame":"IMP","llámalo":"IMP","llámala":"IMP","llámanos":"IMP","llámalos":"IMP","llámalas":"IMP",
    "dime":"IMP","dímelo":"IMP","dinos":"IMP","dínoslo":"IMP",
    "hazme":"IMP","hazlo":"IMP","hazla":"IMP","haznos":"IMP",
    "ponme":"IMP","ponlo":"IMP","ponla":"IMP","ponnos":"IMP",
    "dame":"IMP","dámelo":"IMP","danos":"IMP","dánoslo":"IMP",
    "tráeme":"IMP","tráelo":"IMP","tráela":"IMP","tráenos":"IMP",
    "díselo":"IMP","pónselo":"IMP","házselo":"IMP",
    "viniere":"FUT_SBJV","vinieres":"FUT_SBJV","vinieren":"FUT_SBJV",
    "hiciere":"FUT_SBJV","hicieres":"FUT_SBJV","hicieren":"FUT_SBJV",
    "fuere":"FUT_SBJV","fueres":"FUT_SBJV","fueren":"FUT_SBJV",
    "hubiere":"FUT_SBJV","hubieres":"FUT_SBJV","hubieren":"FUT_SBJV",
    "creísteis":"PST","dijisteis":"PST","hicisteis":"PST","pusisteis":"PST",
    "supisteis":"PST","quisisteis":"PST","trajisteis":"PST","vi":"PST","dio":"PST","fue":"PST","fui":"PST",
    "iba":"IPFV","ibas":"IPFV","íbamos":"IPFV","ibais":"IPFV","iban":"IPFV",
    "veía":"IPFV","veías":"IPFV","veíamos":"IPFV","veíais":"IPFV","veían":"IPFV",
}

def looks_like_verb_form_strict(w: str) -> bool:
    w = (w or "").lower()
    if w.endswith(("ar","er","ir")): return True
    if RE_GER.search(w) or RE_PART.search(w): return True
    if re.search(r"(á|ás|áis|és|éis|ís)$", w): return True
    if _strip_any(w, FUT_END+COND_END)[0] is not None: return True
    if _strip_any(w, PRET_AR+PRET_ERIR)[0] is not None: return True
    if _strip_any(w, IMPF_AR+IMPF_ERIR)[0] is not None: return True
    if _strip_any(w, SUBJ_PAST_AR+SUBJ_PAST_ERIR)[0] is not None: return True
    if re.search(r"(anduve|anduviste|anduvo|anduvimos|anduvieron|conduje|traduje|produje|reduje|introduje|supe|quise|pude|puse|hice|hizo|dije|dijo|traje|trajo|tuve|tuvo|vine|vino|cupe|cupo)$", w):
        return True
    return False

def _zco_guess(w:str)->str:
    if w.endswith("uzco"): return w[:-4] + "ucir"
    if w.endswith("ezco"): return w[:-4] + "ecer"
    if w.endswith("ozco"): return w[:-4] + "ocer"
    if w.endswith("azco"): return w[:-4] + "acer"
    return ""

def guess_infinitive_es(w: str) -> str:
    w = (w or "").lower()
    if w in IRREG_LEMMA: return IRREG_LEMMA[w]
    if w in ("vámonos","vamonos"): return "ir"
    if w.endswith("zco"):
        z = _zco_guess(w)
        if z: return z
    if w.endswith("go"):
        base = w[:-2]
        map_go = {"ten":"tener","ven":"venir","pon":"poner","sal":"salir","tra":"traer","ca":"caer","ha":"hacer","oi":"oír","di":"decir","val":"valer","si":"seguir"}
        for k,v in map_go.items():
            if base.startswith(k): return v
    if w.endswith(("ar","er","ir")): return w
    m = RE_GER.search(w)
    if m:
        base = w[:m.start()]
        return base + ("ar" if m.group(0)=="ando" else "er")
    m = RE_PART.search(w)
    if m:
        base = w[:m.start()]
        part_irreg = {
            "hecho":"hacer","dicho":"decir","visto":"ver","puesto":"poner","escrito":"escribir",
            "abierto":"abrir","cubierto":"cubrir","muerto":"morir","roto":"romper",
            "vuelto":"volver","resuelto":"resolver","frito":"freír","impreso":"imprimir",
            "satisfecho":"satisfacer","provisto":"proveer"
        }
        if w in part_irreg: return part_irreg[w]
        return base + "er"
    base, end = _strip_any(w, FUT_END+COND_END)
    if base is not None:
        irreg = {"saldr":"salir","vendr":"venir","tendr":"tener","pondr":"poner","valdr":"valer","podr":"poder",
                 "habr":"haber","sabr":"saber","cabr":"caber","querr":"querer","dir":"decir","har":"hacer"}
        if base in irreg: return irreg[base]
        return base
    if w.endswith("áis"): return w[:-3] + "ar"
    if w.endswith("éis"): return w[:-3] + "er"
    if w.endswith("ís"):  return w[:-2] + "ir"
    if w.endswith("ás"):  return w[:-2] + "ar"
    if w.endswith("és"):  return w[:-2] + "er"
    if w.endswith("á"):   return w[:-1] + "ar"
    for group in (PRET_AR+PRET_ERIR, IMPF_AR+IMPF_ERIR, SUBJ_AR+SUBJ_ERIR, PRS_AR+PRS_ER+PRS_IR):
        base, end = _strip_any(w, group)
        if base is not None:
            return base + _guess_class_from_ending(end)
    base, end = _strip_any(w, SUBJ_PAST_AR)
    if base is not None: return base + "ar"
    base, end = _strip_any(w, SUBJ_PAST_ERIR)
    if base is not None: return base + "er"
    return ""

def es_morph_tag(w: str) -> str:
    w = (w or "").lower()
    if w in IRREG_MORPH_TAGS: return IRREG_MORPH_TAGS[w]
    if re.search(r"^(llám|dím|házm|pónm|vén|dám|tén|tráe)(a|e)?(me|te|lo|la|nos|os|les|se|melo|telo|selo)$", w): return "IMP"
    if re.search(r"(adme|edme|idme|adlo|edle|idle|adnos|ednos)$", w): return "IMP"
    if re.search(r"(?:ad|ed|id|ád|éd|íd)(?:me|te|se|lo|la|nos|os|les|melo|telo|selo|noslo|oslo|sela|selas|selos)$", w): return "IMP"
    if re.search(r"^.*[áéí]ndo(me|te|se|lo|la|nos|os|les|melo|telo|selo)$", w): return "IPFV"
    if re.search(r"(melo|telo|selo|noslo|oslo|sela|selas|selos)$", w):
        base = re.sub(r"(melo|telo|selo|noslo|oslo|sela|selas|selos)$", "", w)
        if base and len(base) > 2: return "IMP"
    if w.endswith(("ar","er","ir")): return "INF"
    if RE_GER.search(w):  return "IPFV"
    if RE_PART.search(w): return "PST"
    if _strip_any(w, PRET_AR+PRET_ERIR)[0] is not None:  return "PST"
    if _strip_any(w, IMPF_AR+IMPF_ERIR)[0] is not None:  return "IPFV"
    if _strip_any(w, FUT_END)[0]  is not None:           return "FUT"
    if _strip_any(w, COND_END)[0] is not None:           return "COND"
    if re.search(r"(á|ás|áis|és|éis|ís)$", w):           return "PRS"
    if _strip_any(w, SUBJ_AR+SUBJ_ERIR)[0] is not None:  return "SBJV"
    if _strip_any(w, PRS_AR+PRS_ER+PRS_IR)[0] is not None:  return "PRS"
    if _strip_any(w, SUBJ_PAST_AR+SUBJ_PAST_ERIR)[0] is not None: return "SBJV"
    if re.search(r"(anduve|conduje|traduje|produje|reduje|introduje|supe|quise|pude|puse|hice|hizo|dije|dijo|traje|trajo|tuve|tuvo|vine|vino|cupe|cupo)$", w):
        return "PST"
    if re.search(r"^.+[aei]d$", w): return "IMP"
    return "UNK"

# =========================
# MORFOLOGÍA – NEOÍBERO
# =========================
NI_TAM_SUFFIXES = {"-ke":"PRS","-bo":"PST","-ta":"FUT","-ri":"IPFV","-ni":"COND","-tu":"IMP","-ra":"FUT_SBJV"}
def detect_ni_tam(word: str):
    word = (word or "").lower().strip()
    for suf, tag in NI_TAM_SUFFIXES.items():
        if word.endswith(suf):
            return word[:-len(suf)], tag, suf
    return word, "INF", ""

# =========================
# UTILIDADES
# =========================
def fold(s:str)->str:
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c)!="Mn")
def has_diacritic(s:str)->bool:
    return bool(re.search(r"[áéíóúüÁÉÍÓÚÜ]", s or ""))

def _canon_pos(p: str) -> str:
    p = (p or "").strip().upper()
    MAP = {"V":"V","VERB":"V","N":"N","NOUN":"N","ADJ":"ADJ","ADJECTIVE":"ADJ","ADV":"ADV","ADVERB":"ADV",
           "INTJ":"INTJ","INTERJ":"INTJ","INTERJECTION":"INTJ","PRON":"PRON","PRONOUN":"PRON",
           "PART":"PART","PARTICLE":"PART","POSTP":"POSTP","ADP":"POSTP","ADPOSITION":"POSTP"}
    return MAP.get(p, "")
def _boolish(x):
    if x is None: return None
    s = str(x).strip().lower()
    if s in ("1","true","t","yes","y","si","sí"): return True
    if s in ("0","false","f","no","n"): return False
    return None
def _meta_set(form_es:str, pos:str=None, tam_ok=None):
    if not form_es: return
    d = LEX_META.setdefault(form_es, {})
    if pos and not d.get("pos"): d["pos"] = pos
    if tam_ok is not None and d.get("tam_ok") is None: d["tam_ok"] = bool(tam_ok)
def pos_of_es(token_low:str) -> str:
    m = LEX_META.get(token_low, {})
    if m.get("pos"): return m["pos"]
    return "V" if looks_like_verb_form_strict(token_low) else ""
def tam_allowed_for_es(token_low:str) -> bool:
    m = LEX_META.get(token_low, {})
    if m.get("tam_ok") is not None: return bool(m.get("tam_ok"))
    return pos_of_es(token_low) == "V"

# =========================
# TTS (Meta MMS)
# =========================
print("Cargando modelo de voz...")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = model = None
try:
    processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa")
    model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device)
    print("Modelo de voz cargado.")
except Exception as e:
    print(f"ERROR TTS: {e}")

PAUSE_LEVEL=3
def add_reading_pauses(text: str, level:int=3) -> str:
    if level <= 1: return text
    t = text
    if level >= 2: t = re.sub(r",\s*", ", , ", t)
    if level >= 3:
        t = re.sub(r"\.\s*", ". . ", t); t = re.sub(r";\s*", "; ; ", t)
    return re.sub(r'\s+',' ',t).strip()

def hispanize_for_tts(ni_text: str) -> str:
    text=(ni_text or "").lower()
    text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es')
    text=text.replace('ŕa','rra').replace('aŕe','arre').replace('-', ' ')
    text=re.sub(r'\[.*?\]','',text)
    text=re.sub(r'\s+',' ',text).strip()
    return add_reading_pauses(text, PAUSE_LEVEL)

def synthesize_speech(text):
    if not text or not text.strip() or model is None or processor is None: return None
    try:
        inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device)
        with torch.no_grad(): output = model(**inputs).waveform
        speech_np = output.cpu().numpy().squeeze()
        mx = max(abs(speech_np.min()), abs(speech_np.max()))
        if mx>0: speech_np = speech_np/mx*0.9
        return (16000, speech_np.astype(np.float32))
    except Exception as e:
        print(f"Error TTS: {e}"); return None

# =========================
# LÍNEA IBÉRICA (claves Georgeos)
# =========================
KEYS_MODE = "explicit"
V = "aeiou"
SYL_FOR={"b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"],
         "d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"],
         "t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"],
         "g":["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"],
         "k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"]}
ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›","l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"}
CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"}

def tokens_from_latin(ni:str)->str:
    out=[]; i=0; ni=(ni or "").lower()
    while i<len(ni):
        c=ni[i]
        if c=="p": c="b"
        if c=="-": out.append("—"); i+=1; continue
        if c in V:
            out.append(ALPHA_FOR[c]); i+=1; continue
        if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V:
            idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx]
            coda=ni[i+2] if i+2<len(ni) else ""
            if coda in CODA_FOR and coda!="":
                tok+=CODA_FOR[coda]; i+=3
            else:
                i+=2
            out.append(tok); continue
        out.append(ALPHA_FOR.get(c, c.upper())); i+=1
    return "".join(out)

KEYS_OVERRIDE={"ka":"K","mi":"MI","te":"TE","ne":"N","o":"O","eś":"X"}
def georgeos_keys(token_str:str, ni_plain:str)->str:
    low=(ni_plain or "").lower()
    if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low]
    m=re.findall(r"‹(.*?)›", token_str)
    out=[]
    for t in m:
        if KEYS_MODE == "compact":
            if len(t)==2 and t[0] in "BDTGK": out.append(t[0])
            elif t in ("A","E","I","O","U"): out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t[0].upper())
        else:
            if len(t)==2 and t[0] in "BDTGK": out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t.upper())
    return "".join(out)

TRIDOT = "/"
VISIBLE_PUNCT = {",",".",";","; ",":","…","(",")","[","]","{","}","\"","'","«","»","—","–","“","”","‘","’"}
HARD_BOUND    = {".",";","—","–",":","(",")","«","»"}

def render_ib_with_tridots(toks):
    res=[]; prev_word=False
    for tk in toks:
        is_punct = tk in VISIBLE_PUNCT
        if is_punct:
            res.append(" "+tk+" "); prev_word=False
        else:
            if prev_word: res.append(" "+TRIDOT+" ")
            res.append(tk); prev_word=True
    return "".join(res).strip()

# =========================
# TRADUCTOR ES→NI
# =========================
TAM_SUFFIX={"PRS":"-ke","PST":"-bo","FUT":"-ta","IPFV":"-ri","COND":"-ni","SBJV":"-ni","IMP":"-tu","INF":"","FUT_SBJV":"-ra","UNK":"-ke"}
VERB_TAM = ("-ke","-ta","-bo","-ri","-ni","-tu","-ra")

def strip_ni_tam(lemma: str):
    lemma = lemma or ""
    for s in sorted(VERB_TAM, key=len, reverse=True):
        if lemma.endswith(s): return lemma[:-len(s)], s
    return lemma, ""

STOP=set("""
el la los las lo un una unos unas al del de en con sin por sobre entre hasta desde hacia según tras
pero aunque sino que como si porque cuando donde mientras
muy ya sí no también solo sólo aún aun más menos
mi mis tu tus su sus nuestro nuestra nuestros nuestras
esto eso aquello ese esa esos esas aquel aquella aquellos aquellas
quien quién quiénes cual cuál cuales cuáles cuyo cuya cuyos cuyas
eh ay oh uy ah aja jeje jaja aah ahh ohh uhh
""".split())

# --- Reglas "a" → ka/mi/te
def rule_a(prev_tok:str, token:str, next_tok:str)->str:
    verbs={"dar","decir","contar","enviar","ofrecer","mostrar","prestar","regalar","entregar"}
    if prev_tok in verbs: return "mi"
    nombres={"ana","marta","juan","pedro","luis","maría","jose","carlos","laura"}
    if next_tok in nombres: return "te"
    return "ka"

Q_ENCLITIC_INT = "-na"
Q_ENCLITIC_EXC = "-ba"
WH_WORDS = {
    "qué","quien","quién","quienes","quiénes","cual","cuál","cuales","cuáles",
    "donde","dónde","cuando","cuándo","como","cómo",
    "cuanto","cuánto","cuanta","cuánta","cuantos","cuántos","cuantas","cuántas"
}
def is_wh_token(t: str) -> bool:
    low = (t or "").lower()
    if low in WH_WORDS: return True
    f = fold(low)
    return f in {"que","quien","quienes","cual","cuales","donde","cuando","como","cuanto","cuanta","cuantos","cuantas"}

def has_wh_outside_parens(toks) -> bool:
    depth = 0
    for tk in toks:
        if tk in {"(", "«", "“", "‘"}: depth += 1
        elif tk in {")", "»", "”", "’"}: depth = max(0, depth-1)
        elif depth == 0 and is_wh_token(tk): return True
    return False

ESTAR_SET={"estoy","estás","está","estamos","estáis","están","estaba","estabas","estábamos","estabais","estaban"}
HABER_SET={"he","has","ha","hemos","habéis","han","había","habías","habíamos","habíais","habían"}

def detect_tam_with_context(toks, i, sentence_start=False):
    t=toks[i].lower()
    prev=toks[i-1].lower() if i>0 else ""
    prev2=toks[i-2].lower() if i>1 else ""
    nxt=toks[i+1].lower() if i+1<len(toks) else ""
    tag=es_morph_tag(t)

    # imperativos con clíticos al principio
    if re.search(r"(melo|telo|selo|noslo|oslo)$", t):
        if sentence_start or prev in {",", ".", "!", "¡", ";", ":"}: return "IMP"
    if i == 0 or prev in {",", ".", "!", "¡", ";", ":"}:
        if t in {"ve","ven","haz","pon","sal","di","ten","sé","id","venid","tened"}: return "IMP"

    if prev in {"que","si","cuando","aunque","mientras","hasta","para"}:
        if tag=="SBJV": return "SBJV"
        if tag=="UNK" and re.search(r"(e|a)$", t) and not t.endswith(("ar","er","ir")): return "SBJV"

    if t in ESTAR_SET or t in HABER_SET: return "PRS"
    if prev in ESTAR_SET and RE_GER.search(nxt): return "IPFV"
    if prev in HABER_SET and RE_PART.search(nxt): return "PST"
    if prev == "a" and prev2 in {"voy","vas","va","vamos","vais","van"} and t.endswith(("ar","er","ir")): return "FUT"
    if RE_GER.search(t):  return "IPFV"
    if RE_PART.search(t): return "PST"
    return tag if tag!="UNK" else "PRS"

def forced_lemma_with_context(low:str, prev:str, nxt:str)->str:
    if low=="visto" and nxt=="de": return "vestir"
    return ""

def has_tilde_equiv_lookup(low:str)->str:
    if has_diacritic(low) and not looks_like_verb_form_strict(low):
        f=fold(low)
        if f in LEX_FORM: return LEX_FORM[f]
        if f in FOLD_FORM: return FOLD_FORM[f]
    return ""

# ==== quitar enclíticos españoles para el "guesser" ====
ENCL_RE = re.compile(r"(?:(?:me|te|se|nos|os|le|les)(?:lo|la|los|las)?|(?:lo|la|los|las))$", re.I)
def strip_es_enclitics(w:str)->str:
    if not w: return w
    w = w.replace("-", "")
    return ENCL_RE.sub("", w)

def lookup_form_lemma(token:str, prev:str, nxt:str):
    if not token: return "", False
    low=token.lower()
    fl=forced_lemma_with_context(low, prev, nxt)
    if fl and fl in LEX_LEMMA: return LEX_LEMMA[fl], True
    if low in LEX_FORM: return LEX_FORM[low], True
    til=has_tilde_equiv_lookup(low)
    if til: return til, True

    base_no_clit = strip_es_enclitics(low)
    if base_no_clit != low:
        lem = guess_infinitive_es(base_no_clit)
        if lem and lem in LEX_LEMMA: return LEX_LEMMA[lem], True

    if looks_like_verb_form_strict(low):
        lem=guess_infinitive_es(low)
        if lem and lem in LEX_LEMMA: return LEX_LEMMA[lem], True
    return "", False

def attach_enclitic(out_words, ib_keys, plain, attach_idx, encl):
    if attach_idx is None or attach_idx < 0 or attach_idx >= len(out_words): return
    cur = out_words[attach_idx] or ""
    if cur.endswith(encl): return
    out_words[attach_idx] = cur + encl
    plain[attach_idx]     = (plain[attach_idx] or "") + encl
    ib_keys[attach_idx]   = georgeos_keys(tokens_from_latin(plain[attach_idx]), plain[attach_idx])

def ensure_terminal_qmark(out_words, ib_keys, plain):
    if not out_words:
        out_words.append("?"); ib_keys.append(""); plain.append("?"); return
    j = len(out_words) - 1
    while j >= 0 and (out_words[j] == "" or out_words[j] is None): j -= 1
    if j < 0:
        out_words.append("?"); ib_keys.append(""); plain.append("?"); return
    if out_words[j] == ".":
        out_words[j] = "?"; ib_keys[j] = ""; plain[j] = "?"
    elif out_words[j] not in {"?","!"}:
        out_words.append("?"); ib_keys.append(""); plain.append("?")

def normalize_surface_by_pos(ni_surface:str, pos:str) -> str:
    # Modo superficie estricta: devolvemos exactamente lo que venga del CSV.
    return ni_surface

def translate_sentence(sent:str):
    toks = re.sub(r"\s+"," ", (sent or "").strip())
    toks = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", toks)
    toks = [t for t in toks.split() if t]

    out_words=[]; ib_keys=[]; plain=[]
    neg_next=False; last_finite_idx=None; has_qmark=False
    saw_wh = has_wh_outside_parens(toks)
    sentence_start=True

    for i,t in enumerate(toks):
        if t in {"¿","¡"}:
            sentence_start=True; continue
        if t in {"?","!"}:
            if t=="?": has_qmark=True
            encl = Q_ENCLITIC_INT if t=="?" else Q_ENCLITIC_EXC
            attach_idx = last_finite_idx
            if attach_idx is None:
                for j in range(len(out_words)-1, -1, -1):
                    if out_words[j] and out_words[j] not in VISIBLE_PUNCT:
                        attach_idx = j; break
            if attach_idx is not None: attach_enclitic(out_words, ib_keys, plain, attach_idx, encl)
            out_words.append(t); ib_keys.append(""); plain.append(t)
            sentence_start=True; continue

        if t in VISIBLE_PUNCT:
            out_words.append(t); ib_keys.append(t); plain.append(t)
            if t in HARD_BOUND:
                last_finite_idx=None
                sentence_start = (t in {".",":",";","—","–"})
            continue

        low=t.lower()
        prev = toks[i-1].lower() if i>0 else ""
        nxt  = toks[i+1].lower() if i+1<len(toks) else ""

        if (sentence_start and t in {"ve","ven","haz","pon","sal","di","ten","sé","id","venid","tened"}) or \
           (re.search(r"(me|te|lo|la|nos|os|les|se)$", low) and looks_like_verb_form_strict(low)):
            tag_detected="IMP"
        else:
            tag_detected = detect_tam_with_context(toks, i, sentence_start)

        pos_hint = pos_of_es(low)
        is_verb_like = looks_like_verb_form_strict(low) or (pos_hint=="V")
        tam_ok = tam_allowed_for_es(low)

        if low=="no": neg_next=True; continue
        if low in {"el","la","los","las","al","del"}: continue
        if low=="a":
            ni=rule_a(prev,low,nxt)
            out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            continue
        if low in {"un","una"}:
            ni="ban"
            out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            continue
        if (low in STOP) and (low not in LEX_FORM): continue

        ni_direct = SURF_RICH.get((low, tag_detected))
        if neg_next and is_verb_like:
            out_words.append("eś"); ib_keys.append(georgeos_keys(tokens_from_latin("eś"),"eś")); plain.append("eś")
            neg_next=False
        if ni_direct:
            if any(ni_direct.endswith(s) for s in VERB_TAM):
                ni=ni_direct
            else:
                ni=normalize_surface_by_pos(ni_direct, "V" if tam_ok else (pos_hint or ""))
            out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            if tam_ok and any(ni.endswith(s) for s in VERB_TAM): last_finite_idx=len(out_words)-1
            sentence_start=False; continue

        ni_lemma, ok = lookup_form_lemma(t, prev, nxt)
        if ok:
            if low in FORCE_KEYS:
                ni = LEX_FORM.get(low, ni_lemma)
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1; sentence_start=False; continue
            root, old_suf = strip_ni_tam(ni_lemma or "")
            if tag_detected=="IMP":
                ni=root+"-tu"
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1; sentence_start=False; continue
            if old_suf=="-tu":
                ni=ni_lemma
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1; sentence_start=False; continue
            if tam_ok and is_verb_like:
                suf=TAM_SUFFIX.get(tag_detected,"-ke")
                base=root or (ni_lemma or "")
                ni= base+suf if suf else base
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
                last_finite_idx=len(out_words)-1
            else:
                ni=normalize_surface_by_pos(ni_lemma if ni_lemma!="" else "Ø", pos_hint or "")
                out_words.append(ni); ib_keys.append(georgeos_keys(tokens_from_latin(ni),ni)); plain.append(ni)
            sentence_start=False; continue

        # Números/horas
        if re.fullmatch(r"\d+([.,]\d+)?", low):
            out_words.append(t); ib_keys.append(""); plain.append(t)
            sentence_start=False; continue

        placeholder=f"[SIN-LEX:{t}]"
        out_words.append(placeholder); ib_keys.append(placeholder); plain.append(placeholder)
        sentence_start=False

    appended_na=False
    if saw_wh and not has_qmark:
        encl=Q_ENCLITIC_INT
        attach_idx=last_finite_idx
        if attach_idx is None:
            for j in range(len(out_words)-1,-1,-1):
                if out_words[j] and out_words[j] not in VISIBLE_PUNCT and out_words[j] not in {"?","!"} and not out_words[j].startswith("["):
                    attach_idx=j; break
        if attach_idx is None:
            for j in range(len(out_words)-1,-1,-1):
                if out_words[j] and out_words[j] not in VISIBLE_PUNCT and out_words[j] not in {"?","!"}:
                    attach_idx=j; break
        if attach_idx is not None and not (out_words[attach_idx].endswith("-na") or out_words[attach_idx].endswith("-ba")):
            attach_enclitic(out_words, ib_keys, plain, attach_idx, encl); appended_na=True
    if appended_na and not has_qmark: ensure_terminal_qmark(out_words, ib_keys, plain)

    ib_clean=[k for k in ib_keys if k!=""]
    return " ".join(out_words), ib_clean

def translate(text:str):
    lines=[l for l in (text or "").split("\n") if l.strip()]
    ni_lines=[]; ib_lines=[]
    for ln in lines:
        ni,ib_toks=translate_sentence(ln)
        ni_lines.append(ni); ib_lines.append(render_ib_with_tridots(ib_toks))
    return "\n".join(ni_lines), "\n".join(ib_lines)

# =========================
# TRADUCTOR NI→ES (determinista)
# =========================
def normalize_ni(text: str) -> str:
    t = (text or "").replace("/", " ")
    t = re.sub(r"\[SIN-LEX:([^\]]+)\]", r"\1", t)
    return re.sub(r"\s+", " ", t.strip())

def tokenize_ni(text: str):
    text = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", text)
    return [t for t in text.split() if t]

def _ni_fold(s: str) -> str:
    return (s or "").replace("ś","s").replace("ŕ","r")

def translate_ni_to_es(sent: str):
    toks = tokenize_ni(normalize_ni(sent))
    out = []
    i = 0
    while i < len(toks):
        t = toks[i]

        if i + 2 < len(toks) and re.fullmatch(r"\d{1,2}", toks[i]) and toks[i+1] == ":" and re.fullmatch(r"\d{2}", toks[i+2]):
            out.append(f"{toks[i]}:{toks[i+2]}"); i += 3; continue

        if t in VISIBLE_PUNCT or t in {"?", "!", "¿", "¡"}:
            out.append(t); i += 1; continue

        low = t.lower()
        lookup = low[:-3] if (low.endswith("-na") or low.endswith("-ba")) else low
        root, tag, _ = detect_ni_tam(lookup)

        es_direct = NI_TO_ES_SURF.get((lookup, tag)) or NI_TO_ES_SURF.get((_ni_fold(lookup), tag))
        if es_direct:
            out.append(es_direct); i += 1; continue

        form = NI_TO_ES_FORM.get(lookup) or NI_TO_ES_FORM.get(_ni_fold(lookup))
        if form:
            out.append(form); i += 1; continue

        if root:
            lem = NI_TO_ES_LEMMA.get(root) or NI_TO_ES_LEMMA.get(_ni_fold(root))
            if lem:
                out.append(lem); i += 1; continue

        if re.fullmatch(r"\d+([.,]\d+)?", low):
            out.append(t); i += 1; continue

        out.append(f"[?:{t}]"); i += 1

    s = " ".join(out)
    s = re.sub(r"\s+([,.;:!?])", r"\1", s)
    s = re.sub(r"\(\s+", "(", s)
    s = re.sub(r"\s+\)", ")", s)
    s = re.sub(r"\s{2,}", " ", s).strip()
    s = s.replace("a a ", " a ")
    return s

# =========================
# CARGA DE LÉXICO ES→NI
# =========================
def load_lexicon_es_ni():
    """
    Carga léxico ES→NI desde CSV rico, priorizando SIEMPRE lo que venga en el CSV.
    Rellena: SURF_RICH, LEX_FORM, LEX_LEMMA, LEX_META y EVIDENCE_COUNTS (si hay 'evidencia_es').
    """
    loaded = False
    total_rich = total_simple = 0
    EVIDENCE_COUNTS.clear()

    p = CSV_ES_NI
    if not os.path.exists(p):
        print(f"[WARN] No se encontró {p} (ES→NI).")
        return False

    try:
        with open(p, encoding="utf-8") as f:
            rd = csv.DictReader(f)
            flds = set(rd.fieldnames or [])

            # CSV rico (preferente)
            if {"source_es", "es_morph"}.issubset(flds):
                for r in rd:
                    es = (r.get("source_es") or "").strip().lower()
                    tag = (r.get("es_morph") or "").strip().upper()
                    surf = (r.get("ni_surface") or "").strip()

                    if not surf:
                        root = (r.get("ni_root") or "").strip()
                        suf = (r.get("ni_suffix") or "").strip()
                        if root or suf:
                            surf = f"{root}{suf}"

                    if es and tag and surf:
                        SURF_RICH[(es, tag)] = surf
                        total_rich += 1

                    ni = (r.get("target_ni") or "").strip()
                    es_lem = (r.get("es_lemma") or "").strip().lower()

                    pos = _canon_pos(r.get("pos") or r.get("pos_es") or r.get("pos_ni") or "")
                    tam_ok = _boolish(r.get("tam_ok"))

                    if es:
                        _meta_set(es, pos=pos, tam_ok=(tam_ok if tam_ok is not None else (pos == "V" if pos else None)))

                    if es_lem:
                        _meta_set(
                            es_lem,
                            pos=("V" if es_lem.endswith(("ar","er","ir")) else (pos or "")),
                            tam_ok=(tam_ok if tam_ok is not None else (pos == "V" if pos else None))
                        )

                    if es and ni != "":
                        LEX_FORM.setdefault(es, ni)
                    if es_lem and ni != "":
                        LEX_LEMMA.setdefault(es_lem, ni)

                    ev = (r.get("evidencia_es") or "").strip().lower()
                    if ev:
                        EVIDENCE_COUNTS[ev] = EVIDENCE_COUNTS.get(ev, 0) + 1

                loaded = True

            # CSV simple (soporte)
            elif {"source_es", "target_ni"}.issubset(flds):
                for r in rd:
                    es = (r.get("source_es") or "").strip().lower()
                    ni = (r.get("target_ni") or "").strip()
                    if not es: continue
                    LEX_FORM.setdefault(es, ni)
                    total_simple += 1
                    _meta_set(es, pos="", tam_ok=None)
                    if looks_like_verb_form_strict(es):
                        lem = guess_infinitive_es(es)
                        if lem:
                            LEX_LEMMA.setdefault(lem, ni)
                            _meta_set(lem, pos="V", tam_ok=True)
                loaded = True
            else:
                print(f"[WARN] Campos no reconocidos en {p}: {sorted(flds)}")

    except Exception as e:
        print(f"[WARN] No se pudo leer {p}: {e}")

    # FOLD_FORM: variantes sin diacríticos (solo para *lookup*; no pisa)
    global FOLD_FORM
    FOLD_FORM = {}
    for k, v in LEX_FORM.items():
        fk = fold(k)
        if fk != k and len(k) >= 5 and not looks_like_verb_form_strict(k):
            FOLD_FORM.setdefault(fk, v)

    # ---- Fallbacks mínimos (SOLO si faltan en CSV) ----
    # Se eliminan redundancias: numerales, vascoides y otros deben venir ya en el CSV.
    MIN_FALLBACK_FORM = {
        # Coordinantes y negación
        "y": "ne", "o": "o", "no": "eś",
        # DOM 'a' (si no hay mapeo explícito, se resolverá por regla rule_a)
        "a": "ka",
        # Artículos indefinidos (si faltaran)
        "un": "ban", "una": "ban",
    }
    for k, v in MIN_FALLBACK_FORM.items():
        if k not in LEX_FORM:
            LEX_FORM[k] = v

    # Lemas “core” (SI faltan): solo verbos auxiliares/núcleo del motor
    MIN_FALLBACK_LEMMA = {
        "ir": "nitus", "ser": "izan", "estar": "egon", "haber": "ukan",
        "venir": "nuker", "hacer": "giotael", "ver": "giŕok", "decir": "siśnesiŕ",
        "poder": "giokk", "tener": "giokk", "poner": "pusen", "salir": "salku", "dar": "buś",
        "llamar": "lankur", "llover": "xemmo",
    }
    for k, v in MIN_FALLBACK_LEMMA.items():
        if k not in LEX_LEMMA:
            LEX_LEMMA[k] = v
            _meta_set(k, pos="V", tam_ok=True)

    # Formas forzadas mínimas (imperativos muy frecuentes) — solo si faltan
    FORCE_FORMS_MIN = {
        "ven": "nuker-tu", "haz": "giotael-tu", "pon": "pusen-tu", "di": "siśnesir-tu",
        "llámame": "lankur-tu"
    }
    for form, ni in FORCE_FORMS_MIN.items():
        if form not in LEX_FORM:
            LEX_FORM[form] = ni
            _meta_set(form, pos="V", tam_ok=True)

    global FORCE_KEYS
    FORCE_KEYS = set(FORCE_FORMS_MIN.keys())

    # ---- LOG RESUMEN ----
    if total_rich or total_simple or loaded:
        print(f"✓ ES→NI: {total_rich} superficies ricas, {total_simple} pares simples")
        if EVIDENCE_COUNTS:
            print("\nEvidencias (RICH):")
            for k, v in sorted(EVIDENCE_COUNTS.items(), key=lambda kv: (-kv[1], kv[0])):
                print(f"  {k}: {v}")

    return loaded

# =========================
# CARGA DE LÉXICO NI→ES
# =========================
def load_lexicon_ni_es_from_csv():
    """
    Carga NI→ES desde CSV dedicado (HF_Pairs_NI_ES_Translator.csv)
    ✅ Usa 'source_ni' directamente (NO 'ni_surface' reconstruida).
    """
    if not os.path.exists(CSV_NI_ES):
        return False

    c_surf = c_form = c_lemma = 0
    try:
        with open(CSV_NI_ES, encoding="utf-8") as f:
            rd = csv.DictReader(f)
            for r in rd:
                ni_surface = (r.get("source_ni") or "").strip()
                if not ni_surface: continue
                es_surface = (r.get("target_es") or "").strip()
                if not es_surface: continue

                tag = (r.get("es_morph") or "").strip().upper()
                if not tag or tag == "INF":
                    _, tag0, _ = detect_ni_tam(ni_surface.lower())
                    tag = tag or tag0

                ni_lower = ni_surface.lower()

                if tag:
                    NI_TO_ES_SURF[(ni_lower, tag)] = es_surface
                    NI_TO_ES_SURF[(_ni_fold(ni_lower), tag)] = es_surface
                    c_surf += 1

                NI_TO_ES_FORM[ni_lower] = es_surface
                NI_TO_ES_FORM[_ni_fold(ni_lower)] = es_surface
                c_form += 1

                es_lemma = (r.get("es_lemma") or "").strip().lower()
                if es_lemma:
                    root, _, _ = detect_ni_tam(ni_lower)
                    if root:
                        NI_TO_ES_LEMMA[root] = es_lemma
                        NI_TO_ES_LEMMA[_ni_fold(root)] = es_lemma
                        c_lemma += 1
    except Exception as e:
        print(f"[WARN] No se pudo leer {CSV_NI_ES}: {e}")
        return False

    if c_surf + c_form + c_lemma > 0:
        print(f"✓ NI→ES (CSV): {c_surf} superficies+tag, {c_form} formas, {c_lemma} lemas")
        return True
    return False

def build_inverse_from_esni():
    """Construye NI→ES invirtiendo ES→NI cargado (fallback si no hay CSV)."""
    NI_TO_ES_SURF.clear(); NI_TO_ES_FORM.clear(); NI_TO_ES_LEMMA.clear()
    c_surf = c_form = c_lemma = 0

    for (es_low, tag), ni_surface in SURF_RICH.items():
        ni_low = (ni_surface or "").strip().lower()
        if not ni_low or not es_low or not tag: continue
        if (ni_low, tag) not in NI_TO_ES_SURF:
            NI_TO_ES_SURF[(ni_low, tag)] = es_low
            NI_TO_ES_SURF[(_ni_fold(ni_low), tag)] = es_low
            c_surf += 1
        NI_TO_ES_FORM.setdefault(ni_low, es_low)
        NI_TO_ES_FORM.setdefault(_ni_fold(ni_low), es_low)

    for es_form, ni_form in LEX_FORM.items():
        ni_low = (ni_form or "").strip().lower()
        if not ni_low: continue
        NI_TO_ES_FORM.setdefault(ni_low, es_form)
        NI_TO_ES_FORM.setdefault(_ni_fold(ni_low), es_form)
        c_form += 1

        root, tag, _ = detect_ni_tam(ni_low)
        if tag and tag != "INF" and (ni_low, tag) not in NI_TO_ES_SURF:
            NI_TO_ES_SURF[(ni_low, tag)] = es_form
            NI_TO_ES_SURF[(_ni_fold(ni_low), tag)] = es_form

    for es_lemma, ni_lemma in LEX_LEMMA.items():
        ni_low = (ni_lemma or "").strip().lower()
        if not ni_low: continue
        NI_TO_ES_LEMMA.setdefault(ni_low, es_lemma)
        NI_TO_ES_FORM.setdefault(ni_low, es_lemma)
        NI_TO_ES_LEMMA.setdefault(_ni_fold(ni_low), es_lemma)
        NI_TO_ES_FORM.setdefault(_ni_fold(ni_low), es_lemma)
        c_lemma += 1

    print(f"✓ NI→ES (inversor): {c_surf} superficies+tag, {c_form} formas, {c_lemma} lemas")
    return True

def load_lexicon_ni_es():
    """Carga NI→ES: primero intenta CSV, si no existe invierte ES→NI."""
    loaded_csv = load_lexicon_ni_es_from_csv()
    if not loaded_csv:
        build_inverse_from_esni()

    # Cobertura mínima (no pisa lo existente) — solo conectores clave
    KEEP_MIN_NI = {"ne":"y","o":"o","eś":"no","ka":"a","mi":"a","te":"a","ban":"un","ni":"yo","zu":"tú","nar":"él"}
    for k,v in KEEP_MIN_NI.items():
        NI_TO_ES_FORM.setdefault(k.lower(), v)
        NI_TO_ES_FORM.setdefault(_ni_fold(k.lower()), v)
    return True

print("Cargando léxico ES→NI..."); load_lexicon_es_ni()
print("Cargando léxico NI→ES..."); load_lexicon_ni_es()

# =========================
# UI CLÁSICA (con dirección)
# =========================
LABELS={
    "ES":{
        "title":"Traductor Español ↔ Neoíbero",
        "subtitle":"Explora el renacimiento ibérico con tecnología moderna",
        "in_label_es":"✏️ Entrada (Español)",
        "in_label_ni":"✏️ Entrada (Neoíbero)",
        "in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Idatzi hemen. Adib.: nuker-ke ni etxe-ka.",
        "out_lat_esni":"📜 Salida: Neoíbero (latín)",
        "out_lat_nies":"📜 Salida: Español",
        "out_ib":"🗿 Línea ibérica",
        "out_audio":"🔊 Locución (Audio)",
        "btn":"🔄 Traducir",
        "combo":"🌍 Idioma (UI + explicación)",
        "dir":"🔁 Dirección",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentación y Referencia",
        "acc_titles":[
            "🎓 Marco académico y decisiones del neoíbero",
            "🏛️ Herencia posible del íbero histórico",
            "🎨 Diseño de la conlang (neoíbero)",
            "⚙️ Pipeline del traductor (paso a paso)",
            "🔤 Ortografía, línea ibérica y claves",
            "❓/❗ Modalidad presunto vascoide (-na / -ba)",
            "📖 Gramática de referencia (v1.2)",
            "📚 Bibliografía de base",
            "🧾 Siglas y glosario",
            "🪶 Léxico vascoide, evidencias y prioridad del CSV"
        ]
    },
    "EN":{
        "title":"Spanish ↔ Neo-Iberian Translator",
        "subtitle":"Explore the revival of Neo-Iberian with modern tech",
        "in_label_es":"✏️ Input (Spanish)",
        "in_label_ni":"✏️ Input (Neo-Iberian)",
        "in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Type here. E.g., nuker-ke ni etxe-ka.",
        "out_lat_esni":"📜 Output: Neo-Iberian (Latin)",
        "out_lat_nies":"📜 Output: Spanish",
        "out_ib":"🗿 Iberian line",
        "out_audio":"🔊 Speech (Audio)",
        "btn":"🔄 Translate",
        "combo":"🌍 Language (UI + docs)",
        "dir":"🔁 Direction",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentation & Reference",
        "acc_titles":[
            "🎓 Background & design choices",
            "🏛️ Possible inheritance from ancient Iberian",
            "🎨 Conlang design (Neo-Iberian)",
            "⚙️ Translator pipeline (step-by-step)",
            "🔤 Orthography, Iberian line & keys",
            "❓/❗ ‘Vascoid’ mood (-na / -ba)",
            "📖 Reference grammar (v1.2)",
            "📚 Core references",
            "🧾 Acronyms & glossary",
            "🪶 Vascoid lexicon, evidences & CSV priority"
        ]
    }
}

DOC_ES_0 = """**Escritura y datos (visión general).**
**Objetivo.** Traducir ES↔NI con salida latina y línea ibérica "visual".
**Alcance.** Lengua construida (neoíbero) inspirada en rasgos ibéricos/vascoides; no es reconstrucción histórica.
**Datos.**
- CSV "ricos" (`HF_Pairs_ES_NI_RICH.csv`): superficies condicionadas por morfología (`source_es`, `es_morph`, `ni_surface`, `ni_root`, `ni_suffix`, `es_lemma`, `pos`, `tam_ok`, `ni_tam`…).
- NI→ES se **construye automáticamente** como inverso del ES→NI si no hay CSV dedicado (`HF_Pairs_NI_ES_Translator.csv`).
**Motor.**
- Analizador ES: adivina **TAM** (tiempo/aspecto/modo) + excepciones irregulares; sirve de *rescate* si el CSV no marca el TAM.
- Generador NI: compone raíz + sufijo TAM y añade clíticos modales (-na / -ba).
- Reverso NI→ES: *lookup* determinista (superficie+TAM → ES; sin conjugar).
**Licencia/datos.** Ficheros CSV locales; puedes ampliarlos sin tocar el código.
**Versión.** v2.4 (doc ampliada + prioridad CSV + evidencias)."""

DOC_ES_1 = """**Herencia plausible del íbero (resumen no paleográfico).**
- **Fonotaxis** preferente **CV(C)**; *p* marginal → **b** en línea ibérica.
- **Vibrantes**: /r/ simple, **ŕ** (fuerte) no inicial en grafía latina NI.
- **Casos/postposiciones** productivos en NI: `-k` (plural), `-te` (agente/instrumental), `-ar/-en` (genitivo/origen), `-ka` (dativo/locativo/distal), `-i` (acusativo PN).
- **Partículas**: **ne** 'y', **o** 'o', **eś** 'no'.
- **Numerales** (base 10/20 visual): *ban, bi, irur, laur, borste, sei, sisbi, sorse, lauŕbi, abar, orkei…*  
**Nota.** Diseño coherente interno > exactitud histórica literal."""

DOC_ES_2 = """**Diseño del neoíbero (fonología + morfología).**
**TAM verbal** (sufijos): **PRS** `-ke`, **PST** `-bo`, **FUT** `-ta`, **IPFV** `-ri`, **IMP** `-tu`, **COND/SBJV** `-ni`, **FUT_SBJV** `-ra`.
**Derivación**: `-ar`, `-en`, `-la`, `-ŕa`, `-tu` (agente), `-si` (adjetival).
**Ortografía latina NI.** /p/→**b**; **ś/ŕ**; guiones `-` visibles para TAM/clíticos.
**Orden** preferente **SOV**."""

DOC_ES_3 = """**Pipeline del traductor (ES→NI).**
1) Tokeniza y separa signos (incluye comillas curvas).
2) Elimina artículos/contracciones frecuentes.
3) DOM `a` → **ka/mi/te** (transferencia y PN comunes).
4) Gating POS/TAM: solo verbos reciben TAM.
5) Detección TAM (perífrasis+irregulares) como **rescate** si el CSV no trae `es_morph`.
6) Negación: **eś** antes del último finito.
7) ¿?/¡! → **-na/-ba**; WH sin ¿? → inyecta **-na** + `?`.
8) Línea ibérica: tokens BA/BE/… y **tridots** `/`.
9) Números: pasan tal cual (o según CSV si existen)."""

DOC_ES_4 = """**Ortografía, línea ibérica y claves.**
Modo `explicit` (BA/BE/BI/BO/BU + A/E/I/O/U).
Atajos: `ka`→K, `mi`→MI, `te`→TE, `ne`→N, `o`→O, `eś`→X.
Separador de palabra = **tridots `/`**. `-` para TAM/clíticos.
Puntuación visible: , . ; : … ( ) [ ] « » — – “ ” ‘ ’.
`p`→**b**; codas N/S/Ś/R/Ŕ/L/M/K/T."""

DOC_ES_5 = """**Modalidad -na (¿?) / -ba (¡!).**
- `?` → **-na** al último finito (o último constituyente).
- `!` → **-ba** idem.
- WH fuera de paréntesis sin `?` → inyecta **-na** + `?`.
- Evita duplicados de -na/-ba. `¿ ¡` se ignoran en la lógica."""

DOC_ES_6 = """**Gramática mínima (v1.2).**
Verbo = raíz + TAM; negación **eś**.
Casos: `-k, -te, -ka, -ar/-en, -i`.
Pronombres: `ni, zu, nar, gu, zuek, narek`.
Orden SOV; coordinaciones **ne/o**.
Irregularidades: listados y atajos contextuales (`FORCE_KEYS`)."""

DOC_ES_7 = """**Bibliografía / fuentes (selección).**
Untermann; de Hoz; Ferrer i Jané; Correa; gramáticas del español para heurísticas.
App: decisiones de diseño (no reconstrucción)."""

DOC_ES_8 = """**Glosario y datasets.**
TAM, DOM, superficie, lema, enclítico, tridots, clave.
**CSV ricos (ES→NI)**: `source_es`, `es_morph`, `ni_surface` o (`ni_root`+`ni_suffix`), `es_lemma`, `pos`, `tam_ok`, `ni_tam`, `evidencia_es` (opcional).
**NI→ES**: `HF_Pairs_NI_ES_Translator.csv` **o** inversor automático desde ES→NI.  
**Troubleshooting**:
- Regex enclíticos: corregido.
- Audio: MMS CPU si no hay CUDA; locución oculta en NI→ES.
- `[SIN-LEX:…]` / `[?:…]` exponen huecos para completar el CSV.
"""

DOC_ES_9 = """**Léxico vascoide, evidencias y prioridad del CSV.**
- **Vascoide**: voces marcadas como `evidencia_es = "vascoide"` en el CSV **no se tocan**; se usan tal cual (superficie estricta).
- **Conjetural / familia / irregular / blindaje**: se registran y **solo** informan; no alteran la traducción si la superficie viene dada.
- **Prioridad CSV**: siempre gana `ni_surface` (o `ni_root`+`ni_suffix`) frente a reglas internas. Las reglas solo actúan como **rescate** si falta entrada o `es_morph`.
- **Limpieza**: listas internas (numerales, vascoides, atajos) se reducen al mínimo **solo si faltan** en CSV.
- **Simetría**: NI→ES toma el CSV dedicado; si falta, invierte ES→NI conservando TAM."""

DOC = {
    "ES": [DOC_ES_0, DOC_ES_1, DOC_ES_2, DOC_ES_3, DOC_ES_4, DOC_ES_5, DOC_ES_6, DOC_ES_7, DOC_ES_8, DOC_ES_9],
    "EN": [
        "Script & data (overview).",
        "Possible inheritance (non-palaeographic).",
        "Neo-Iberian design (phonology & morphology).",
        "Translator pipeline (ES→NI).",
        "Orthography, Iberian line & keys.",
        "‘Vascoid’ mood (-na / -ba).",
        "Minimal grammar (v1.2).",
        "Selected references.",
        "Glossary & datasets.",
        "Vascoid lexicon, evidences & CSV priority."
    ]
}

def build_css():
    b64=None
    if os.path.exists("Iberia-Georgeos.ttf"):
        with open("Iberia-Georgeos.ttf","rb") as f:
            b64=base64.b64encode(f.read()).decode("ascii")
    font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')"
    return f"""
@font-face {{
  font-family: 'IberiaGeorgeos';
  src: {font_src};
  font-weight: normal; font-style: normal;
}}
:root {{
  --iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C;
  --iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32;
}}
.gradio-container {{ background:linear-gradient(135deg,#f4e8d8 0%,#e8d5c4 50%,#d4c4b0 100%)!important;
  font-family:'Georgia','Times New Roman',serif!important; }}
.gradio-container h1,.gradio-container h2,.gradio-container h3 {{
  color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important;
  border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important;
}}
.gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important;
  padding:1.5rem!important; margin-bottom:1.5rem!important; }}
.gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
  border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }}
.gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important;
  color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }}
.gradio-container .gr-textbox textarea,.gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:var(--iberian-stone)!important;
  font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }}
.gradio-container .gr-textbox textarea:focus,.gradio-container .gr-textbox input:focus {{
  border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }}
.gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
  border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 1px 2px rgba(0,0,0,.4)!important;
  box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }}
.gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important;
  transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }}
.ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important;
  background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important;
  border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important;
  box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }}
.ib-line::before {{ content:''!important; position:absolute!important; inset:0!important;
  background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important;
  pointer-events:none!important; border-radius:10px!important; }}
@media (max-width:768px) {{
  .ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }}
  .gradio-container .gr-group {{ padding:1rem!important; }}
  .gradio-container h1 {{ font-size:1.8rem!important; }}
}}
@media (max-width:480px) {{
  .ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }}
  .gradio-container h1 {{ font-size:1.5rem!important; }}
}}
"""
CSS = build_css()

with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
    with gr.Group():
        title = gr.Markdown(f"# {LABELS['ES']['title']}")
        subtitle = gr.Markdown(f"*{LABELS['ES']['subtitle']}*")

    with gr.Row():
        combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"])
        direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"])

    with gr.Group():
        doc_header = gr.Markdown(f"## {LABELS['ES']['doc_header']}")
        acc_titles = LABELS["ES"]["acc_titles"]
        with gr.Accordion(acc_titles[0], open=False) as acc1:  md1 = gr.Markdown(DOC["ES"][0])
        with gr.Accordion(acc_titles[1], open=False) as acc2:  md2 = gr.Markdown(DOC["ES"][1])
        with gr.Accordion(acc_titles[2], open=False) as acc3:  md3 = gr.Markdown(DOC["ES"][2])
        with gr.Accordion(acc_titles[3], open=False) as acc4:  md4 = gr.Markdown(DOC["ES"][3])
        with gr.Accordion(acc_titles[4], open=False) as acc5:  md5 = gr.Markdown(DOC["ES"][4])
        with gr.Accordion(acc_titles[5], open=False) as acc6:  md6 = gr.Markdown(DOC["ES"][5])
        with gr.Accordion(acc_titles[6], open=False) as acc7:  md7 = gr.Markdown(DOC["ES"][6])
        with gr.Accordion(acc_titles[7], open=False) as acc8:  md8 = gr.Markdown(DOC["ES"][7])
        with gr.Accordion(acc_titles[8], open=False) as acc9:  md9 = gr.Markdown(DOC["ES"][8])
        with gr.Accordion(acc_titles[9], open=False) as acc10: md10 = gr.Markdown(DOC["ES"][9])

    with gr.Group():
        es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5)
        btn_tr = gr.Button(LABELS["ES"]["btn"], variant="primary")
        with gr.Row():
            with gr.Column(scale=2):
                ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False)
                loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=False)
                audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy")
            with gr.Column(scale=1):
                ib_out = gr.HTML(label=LABELS["ES"]["out_ib"])

    def do_translate(text, dir_label):
        if not text or not text.strip():
            return (gr.update(value=""),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None))
        if dir_label.startswith("ES"):
            latin, ib = translate(text)
            ib_html = "<div class='ib-line'>" + escape(ib) + "</div>"
            return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin),
                    gr.update(value=ib_html),
                    gr.update(visible=True),
                    gr.update(value=None))
        else:
            es_text = translate_ni_to_es(text)
            return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None))

    btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out])

    def run_locution(latin_text, dir_label):
        if dir_label.startswith("ES"):
            return synthesize_speech(latin_text)
        return None

    loc_btn.click(run_locution, [ni_out, direction], audio_out)

    def switch_lang(sel_lang, dir_label):
        L=LABELS[sel_lang]; T=L["acc_titles"]; D=DOC[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        return (
            gr.update(value=f"# {L['title']}"),
            gr.update(value=f"*{L['subtitle']}*"),
            gr.update(label=L["combo"], value=sel_lang),
            gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label),
            gr.update(value=f"## {L['doc_header']}"),
            gr.update(label=T[0]), gr.update(value=D[0]),
            gr.update(label=T[1]), gr.update(value=D[1]),
            gr.update(label=T[2]), gr.update(value=D[2]),
            gr.update(label=T[3]), gr.update(value=D[3]),
            gr.update(label=T[4]), gr.update(value=D[4]),
            gr.update(label=T[5]), gr.update(value=D[5]),
            gr.update(label=T[6]), gr.update(value=D[6]),
            gr.update(label=T[7]), gr.update(value=D[7]),
            gr.update(label=T[8]), gr.update(value=D[8]),
            gr.update(label=T[9]), gr.update(value=D[9]),
            gr.update(label=in_label, placeholder=in_ph),
            gr.update(label=out_lab),
            gr.update(label=L["out_ib"]),
            gr.update(label=L["out_audio"]),
            gr.update(value=L["btn"])
        )

    combo.change(
        switch_lang,
        [combo, direction],
        [title, subtitle, combo, direction, doc_header,
         acc1, md1, acc2, md2, acc3, md3, acc4, md4, acc5, md5,
         acc6, md6, acc7, md7, acc8, md8, acc9, md9, acc10, md10,
         es_in, ni_out, ib_out, audio_out, btn_tr]
    )

    def switch_direction(dir_label, sel_lang):
        L=LABELS[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        loc_vis  = True if dir_label.startswith("ES") else False
        return (gr.update(label=in_label, placeholder=in_ph),
                gr.update(label=out_lab, value=""),
                gr.update(value="<div class='ib-line'></div>"),
                gr.update(visible=loc_vis),
                gr.update(value=None))

    direction.change(
        switch_direction,
        [direction, combo],
        [es_in, ni_out, ib_out, loc_btn, audio_out]
    )

if __name__ == "__main__":
    demo.queue().launch()