Spanish_NeoIberianNewDesign2

Sleeping

App Files Files Community

Spanish_NeoIberianNewDesign2 / app.py

LoloSemper

Upload 4 files

e6f7230 verified about 1 month ago

Raw

History Blame

143 kB

	# app.py — Traductor Español ↔ Neoíbero (BI-ONLY 1:1 estricto, determinista)
	# UI completa + CSS "íbero" + TTS + Línea ibérica (codificación appOld)
	# Requiere un ÚNICO CSV con superficies exactas (UTF-8) y columnas:
	# - source_es (o es/es_surface)
	# - target_ni (o ni/ni_surface)
	# - pair_id (opcional)
	#
	# El motor mantiene 1:1 exacto por superficie, pero aplica heurísticas ligeras ES→NI para desambiguar homógrafos nombre/verbo e infinitivos aislados.
	# Puntuación y números pasan tal cual. Desconocidos -> [SIN-LEX:...] / [?:...]
	# Determinismo NI→ES: entradas NI duplicadas (ambigüas) quedan bloqueadas y se rinden como [AMB-NI:...]
	#
	# v94 (2026-05-09): Revertido el sistema simple v93 a favor del sistema enclítico
	# convencional. Se asume que el lex contiene las formas con clíticos pegados al
	# verbo (parche 059 con convención corta: "buŕdam-ka-nei", "onkot-ka-iki", etc.).
	# El motor respeta las palabras enteras del lex y solo descompone si NO están.

	import gradio as gr
	import os, csv, re, base64, unicodedata, gzip
	import torch
	from transformers import AutoProcessor, VitsModel
	import numpy as np
	from html import escape

	# ====== cache ======
	os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache')
	os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf')

	DEBUG_MODE = False
	def debug_print(msg):
	if DEBUG_MODE: print(f"[DEBUG] {msg}")

	# ====== util ======
	def _open_maybe_gzip(path):
	if str(path).endswith(".gz"):
	# CSV debe venir en UTF-8 (evita mojibake)
	return gzip.open(path, "rt", encoding="utf-8", newline="")
	return open(path, "r", encoding="utf-8", newline="")

	def norm(x): return (str(x).strip()) if x is not None else ""
	def lower(x): return norm(x).lower()
	def fold(s:str)->str:
	return ''.join(c for c in unicodedata.normalize('NFD', s or "") if unicodedata.category(c)!="Mn")

	# ====== rutas ======
	def _cand(*names):
	for n in names:
	if os.path.exists(n): return n
	p = os.path.join("salida", n)
	if os.path.exists(p): return p
	return names[0] # último recurso para mensajes

	# Prioriza los "master/surface-ready"; luego retrocompatibles
	CSV_BI = _cand(
	"LEXICON_v152_IBERIAN.csv.gz",
	"LEXICON_v86_IBERIAN.csv.gz",
	"LEXICON_v85_IBERIAN.csv.gz",
	"LEXICON_v84_IBERIAN.csv.gz",
	"LEXICON_v83_IBERIAN.csv.gz",
	"LEXICON_v82_IBERIAN.csv.gz",
	"LEXICON_v81_IBERIAN.csv.gz",
	"LEXICON_v80_IBERIAN.csv.gz",
	"LEXICON_v79_IBERIAN.csv.gz",
	"LEXICON_v78_IBERIAN.csv.gz",
	"LEXICON_v77_IBERIAN.csv.gz",
	"LEXICON_v76_IBERIAN.csv.gz",
	"LEXICON_v75_IBERIAN.csv.gz",
	"LEXICON_v74_IBERIAN.csv.gz",
	"LEXICON_v73_IBERIAN.csv.gz",
	"LEXICON_v72_IBERIAN.csv.gz",
	"LEXICON_v71_IBERIAN.csv.gz",
	"LEXICON_v70_IBERIAN.csv.gz",
	"LEXICON_v68_IBERIAN.csv.gz",
	"LEXICON_v67_IBERIAN.csv.gz",
	"LEXICON_v66_IBERIAN.csv.gz",
	"LEXICON_v65_IBERIAN.csv.gz",
	"LEXICON_v64_IBERIAN.csv.gz",
	"LEXICON_v63_IBERIAN.csv.gz",
	"LEXICON_v60_FINAL.csv.gz",
	"LEXICON_v59_PATCHED.csv.gz",
	"LEXICON_UNICO_1a1_v43_all_verbs.csv.gz",
	"LEXICON_UNICO_1a1_v42_verbs_fix.csv.gz",
	"LEXICON_UNICO_1a1_v41_family_fix.csv.gz",
	"LEXICON_UNICO_1a1_v40_accent_fix.csv.gz",
	"MASTER_SURFACE_READY.csv.gz",
	"MASTER_REEXTENDED.csv.gz",
	"BI_SURFACE_READY.csv.gz",
	"HF_Pairs_BI_REEXTENDED.csv.gz",
	"HF_Pairs_BI_EXPANDED1_EXTENDED_FILLED.csv.gz",
	"HF_Pairs_BI_EXPANDED1.csv.gz"
	)

	# ====== estructuras strict BI ======
	# Clave = superficie exacta en minúsculas. Valor = (superficie_original_opuesta, pair_id)
	ES2NI = {} # es_surface_lower -> (ni_surface, pair_id)
	NI2ES = {} # ni_surface_lower -> (es_surface, pair_id)
	ES2NI_VERB = {} # alternativa verbal cuando ES2NI tiene sustantivo
	ES2NI_POS = {} # es_surface_lower -> POS de la entrada principal en ES2NI
	ES2NI_MORPH = {} # es_surface_lower -> morfología (PRS, PST, IMP, FUT, etc.)

	# N-gramas/frases:
	ESPHRASE2NI = {}
	NIPHRASE2ES = {}
	MAX_NGRAM = 3

	# Mapas fold→canónico (se llenan en load_bi_strict_and_diagnose)
	ES_FOLD = {} # fold("carne") → "carne" (pero también fold("carné")→"carne")
	NI_FOLD = {}

	# ====== signos / tokenización mínima ======
	VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’"))

	# v99: marcador interno para preservar saltos de línea del usuario sin que
	# rompan oraciones durante el procesamiento. Se trata como puntuación
	# transparente (no es SENT_END ni CLAUSE_BREAK), así que ningún subsistema
	# (modalidad, capitalización, contexto) lo interpreta como cierre. Al final
	# del traductor se vuelve a convertir en \n.
	NEWLINE_TOK = "⏎"
	VISIBLE_PUNCT.add(NEWLINE_TOK)
	_num_re = re.compile(r"^\d+([.,:]\d+)?$")
	def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))

	# --- conversión de dígitos a numerales neoíberos (base vigesimal) ---
	_NI_UNITS = {0:'',1:'ban',2:'bi',3:'irur',4:'laur',5:'borste',
	6:'sei',7:'sisbi',8:'sorse',9:'bedar',10:'abar'}
	_NI_TWENTIES = {1:'orkei',2:'binorkei',3:'irurokei',4:'laurokei'}

	def digit_to_ni(tok:str)->str:
	"""Convierte un número entero (str de dígitos) a numeral neoíbero."""
	try:
	n = int(tok)
	except (ValueError, TypeError):
	return tok
	if n <= 0: return tok
	if n <= 10: return _NI_UNITS[n]
	if n <= 19: return f"abar-ke-{_NI_UNITS[n-10]}"
	if n == 20: return "orkei"
	if n < 100:
	twenties = n // 20
	remainder = n % 20
	base = _NI_TWENTIES.get(twenties, tok)
	if remainder == 0: return base
	elif remainder == 10: return f"{base}-abar"
	elif remainder > 10: return f"{base}-abar-ke-{_NI_UNITS[remainder-10]}"
	else: return f"{base}-ke-{_NI_UNITS[remainder]}"
	if n == 100: return "atun"
	if n <= 999:
	hundreds = n // 100
	remainder = n % 100
	h = "atun" if hundreds == 1 else f"{_NI_UNITS[hundreds]}-atun"
	if remainder == 0: return h
	r = digit_to_ni(str(remainder))
	return f"{h}-ke-{r}"
	return tok # >999: pass through

	# --- separadores de cláusula + placeholders atómicos ---
	CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
	PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
	def is_placeholder(tok: str) -> bool:
	return bool(PLACEHOLDER_RE.match(tok or ""))

	def _restore_brk(tok, protected):
	m = re.fullmatch(r"__BRK(\d+)__(?:-(na\|ba))?", tok or "")
	if not m: return tok
	idx = int(m.group(1))
	suf = m.group(2)
	base = protected[idx] if 0 <= idx < len(protected) else tok
	return base + (f"-{suf}" if suf else "")

	def simple_tokenize(text:str):
	"""Tokenización mínima, sin romper [ ... ] ni [ ... ]-na/-ba."""
	if not text:
	return []
	protected = []
	def _repl(m):
	key = f"__BRK{len(protected)}__"
	protected.append(m.group(0))
	return key
	t = re.sub(r"\[[^\]]*\]", _repl, (text or "").strip())
	# Proteger números decimales (12,50 / 3.14) y horas (18:30) como tokens
	# atómicos, evitando que se rompan al meter espacios en la puntuación.
	t = re.sub(r"\d+[.,:]\d+", _repl, t)
	t = re.sub(r"\s+"," ", t)
	t = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", t)
	toks = [tok for tok in t.split() if tok]
	for i, tok in enumerate(toks):
	if tok.startswith("__BRK") and "__" in tok:
	toks[i] = _restore_brk(tok, protected)
	return toks

	# Pronombres enclíticos del español (ordenados de más largo a más corto
	# para evitar que "lo" haga match antes que "los").
	_ENCLITICS = ('los','las','les','nos','me','te','lo','la','le','se','os')
	_ACCENTED_VOWELS = str.maketrans('áéíóú', 'aeiou')

	def _strip_accents(s):
	return s.translate(_ACCENTED_VOWELS)

	def expand_enclitics(toks):
	"""
	Separa formas verbo+pronombre enclítico en dos tokens cuando ES2NI
	NO contiene la forma combinada. Si la forma entera SÍ está en el lex
	(gracias a 044/053/059), se respeta y no se descompone — esto preserva
	el 1:1 estricto que prometen los parches.

	Solo se descompone como fallback para palabras raras que no entraron
	en los parches enclíticos. En ese caso se generan dos tokens normales
	(sin marcas especiales) que el motor traduce por separado.

	Ejemplos del fallback:
	'ayudarme' → ['ayudar', 'me']
	'tocarlo' → ['tocar', 'lo']
	'ayudándome'→ ['ayudando', 'me']
	'dárselo' → ['darse', 'lo']
	"""
	if not toks:
	return toks
	if not ES2NI or not ES2NI_POS:
	return toks

	def _stem_is_verb_or_has_infinitive(stem):
	"""¿Es esta raíz un verbo conocido, o tiene un infinitivo en el lex?"""
	if stem not in ES2NI:
	pass
	elif ES2NI_POS.get(stem, "") == "V":
	return True
	for suf in ("r", "er", "ir"):
	inf_candidate = stem + suf
	if inf_candidate in ES2NI and ES2NI_POS.get(inf_candidate, "") == "V":
	return True
	for diph, base in (("ue", "o"), ("ie", "e")):
	idx = stem.rfind(diph)
	if idx < 0:
	continue
	stem_undiph = stem[:idx] + base + stem[idx+2:]
	stem_root = stem_undiph
	if stem_root and stem_root[-1] in "ae":
	stem_root = stem_root[:-1]
	for suf in ("ar", "er", "ir"):
	inf_candidate = stem_root + suf
	if inf_candidate in ES2NI and ES2NI_POS.get(inf_candidate, "") == "V":
	return True
	return False

	out = []
	for tok in toks:
	tok_l = tok.lower()
	# v94 — Si la palabra entera está en el lex, respetarla.
	# No forzamos descomposición (a diferencia del v93).
	if not tok_l.isalpha() or tok_l in ES2NI:
	out.append(tok)
	continue
	tok_noacc = _strip_accents(tok_l)
	if tok_noacc != tok_l and tok_noacc in ES2NI:
	out.append(tok)
	continue

	split = None
	for clit in _ENCLITICS:
	if not tok_l.endswith(clit):
	continue
	stem = tok_l[:-len(clit)]
	if len(stem) < 2:
	continue
	if len(stem) == 2 and stem not in ES2NI:
	continue
	# v124b: si el stem coincide con una palabra existente que NO sea verbo
	# (N, ADJ, DET, PRON, etc.), NO descomponer. Caso patológico:
	# "estela" → stem="este" (DET demostrativo) + "la"
	# "comprobandolas" tenía igual riesgo si "comprobando" fuera N (no lo es)
	# Solo descomponer si stem es CLARAMENTE verbo (POS=V).
	if stem in ES2NI:
	stem_pos = ES2NI_POS.get(stem, "")
	if stem_pos and stem_pos != "V":
	# stem es N/ADJ/DET/PRON/etc. NO descomponer.
	continue
	# Probar la raíz tal cual
	if _stem_is_verb_or_has_infinitive(stem):
	split = (stem, clit, None)
	break
	# Probar la raíz sin tildes
	stem_noacc = _strip_accents(stem)
	if stem_noacc != stem and _stem_is_verb_or_has_infinitive(stem_noacc):
	split = (stem_noacc, clit, None)
	break
	# Probar doble clítico: la raíz también acaba en clítico.
	# v124: bloquear stems de menos de 3 caracteres para evitar el bug crítico
	# "estela" → stem="este" (.endswith "te") → stem2="es" (V "ser") → ('es','te','la')
	# Antes el código permitía stem2="es" si estaba en ES2NI. Ahora >=3 chars.
	for clit2 in _ENCLITICS:
	if not stem.endswith(clit2):
	continue
	stem2 = stem[:-len(clit2)]
	if len(stem2) < 3:
	continue
	if _stem_is_verb_or_has_infinitive(stem2):
	split = (stem2, clit2, clit)
	break
	stem2_noacc = _strip_accents(stem2)
	if stem2_noacc != stem2 and _stem_is_verb_or_has_infinitive(stem2_noacc):
	split = (stem2_noacc, clit2, clit)
	break
	if split:
	break

	if split:
	stem, clit1, clit2 = split
	if tok[0].isupper():
	stem = stem[0].upper() + stem[1:]
	# v94 — Tokens normales, sin marcas especiales.
	# El motor los traducirá por separado a través del lex.
	out.append(stem)
	out.append(clit1)
	if clit2:
	out.append(clit2)
	else:
	out.append(tok)
	return out

	def detokenize(tokens):
	s = " ".join(tokens)
	s = re.sub(r"\s+([,.;:!?])", r"\1", s)
	s = re.sub(r"([¿¡])\s+", r"\1", s)
	s = re.sub(r"\(\s+", "(", s)
	s = re.sub(r"\s+\)", ")", s)
	# v110: comillas españolas/latinas como apertura/cierre.
	# «"“‘[ son aperturas: no llevan espacio después.
	# »"”’] son cierres: no llevan espacio antes.
	s = re.sub(r"([«“‘\[])\s+", r"\1", s)
	s = re.sub(r"\s+([»”’\]])", r"\1", s)
	s = re.sub(r"\s{2,}", " ", s).strip()
	return s

	# Pares verbo+enclítico no ambiguo. Tras NI→ES, "ayudarme" llega como
	# "ayudar me" (separado), porque al expandir enclíticos en ES→NI dividimos
	# el token. Esta función vuelve a unir formas inequívocas.
	_SAFE_CLITICS_RE = r"(me\|te\|nos\|os\|se)"
	_AMBIG_CLITICS_RE = r"(lo\|la\|le\|los\|las\|les)"
	_SAFE_AFTER_AMBIG = (
	r"(?:\s*[.,;:!?)\]»\"”—–]"
	r"\|\s+(?:y\|o\|pero\|sino\|aunque\|mientras\|porque\|si\|cuando\|donde\|que\|"
	r"también\|tampoco\|ya\|no\|todavía\|después\|antes\|ahora\|luego\|aquí\|allí\|"
	r"ahí\|así\|sólo\|solo\|nunca\|jamás\|siempre\|"
	r"bien\|mal\|mucho\|poco\|muy\|más\|menos\|tan\|todo\|todos\|nada\|algo\|"
	r"hoy\|ayer\|mañana\|pronto\|tarde\|"
	r"fuerte\|fuertemente\|suavemente\|fijamente\|atentamente\|"
	# v124: artículos definidos ("acariciándole el pelo")
	r"el\|la\|los\|las\|"
	# v124: preposiciones comunes ("perderlo de vista")
	r"de\|en\|a\|por\|para\|con\|sin\|sobre\|bajo\|tras\|entre\|hasta\|hacia\|desde\|"
	r"un\|una\|algún\|alguna)\b"
	# v128: adverbio genérico en -mente. Caso "abrazando los efusivamente"
	# → "abrazándolos efusivamente". El patrón anterior solo cubría 4
	# adverbios específicos; este captura cualquiera.
	r"\|\s+[a-záéíóúñü]+mente\b"
	r"\|\s+otra\s+vez\b"
	r"\|\s+otras\s+veces\b"
	# v124: placeholder [SIN-LEX:...] / [AMB-NI:...] / [?:...]
	r"\|\s+\["
	r"\|\s+(?:cada\|todos\s+los\|todas\s+las)\s+(?:\d+\s+\|(?:dos\|tres\|cuatro\|cinco\|seis\|siete\|ocho\|nueve\|diez\|once\|doce\|trece\|catorce\|quince\|dieciséis\|diecisiete\|dieciocho\|diecinueve\|veinte\|treinta\|cuarenta\|cincuenta\|sesenta\|setenta\|ochenta\|noventa\|cien\|mil\|pocos\|pocas\|muchos\|muchas\|tantos\|tantas\|varios\|varias\|cuantos\|cuantas\|algunos\|algunas)\s+)?(?:día\|días\|mañana\|mañanas\|tarde\|tardes\|noche\|noches\|mes\|meses\|año\|años\|semana\|semanas\|hora\|horas\|momento\|momentos\|vez\|veces)\b"
	r"\|\s$\|\s\n)"
	)
	_FUSE_INF_SAFE_RE = re.compile(
	r"\b([a-záéíóúñü]+(?:ar\|er\|ir))\s+" + _SAFE_CLITICS_RE + r"\b",
	re.IGNORECASE)
	_FUSE_INF_AMBIG_RE = re.compile(
	r"\b([a-záéíóúñü]+(?:ar\|er\|ir))\s+" + _AMBIG_CLITICS_RE + r"(?=" + _SAFE_AFTER_AMBIG + r")",
	re.IGNORECASE)
	_FUSE_GER_SAFE_RE = re.compile(
	r"\b([a-záéíóúñü]+(?:ando\|iendo\|yendo))\s+" + _SAFE_CLITICS_RE + r"\b",
	re.IGNORECASE)
	_FUSE_GER_AMBIG_RE = re.compile(
	r"\b([a-záéíóúñü]+(?:ando\|iendo\|yendo))\s+" + _AMBIG_CLITICS_RE + r"(?=" + _SAFE_AFTER_AMBIG + r")",
	re.IGNORECASE)
	_FUSE_VERB_SAFE_RE = re.compile(
	r"\b([a-záéíóúñü]+)\s+" + _SAFE_CLITICS_RE + r"\b",
	re.IGNORECASE)
	_FUSE_VERB_AMBIG_RE = re.compile(
	r"\b([a-záéíóúñü]+)\s+" + _AMBIG_CLITICS_RE + r"(?=" + _SAFE_AFTER_AMBIG + r")",
	re.IGNORECASE)
	_GER_ACCENT_MAP = (("ando","ándo"), ("iendo","iéndo"), ("yendo","yéndo"))
	# v114: PRS removido de la lista. Los verbos en presente NO admiten enclíticos en español
	# ("es lo más bonito" NO "eslo"). Solo IMP (imperativo: "guárdalo"), INF ("contar+lo→contarlo")
	# y GER ("contando+lo→contándolo") admiten clíticos.
	_MORPH_ADMITS_ENCLITIC = {"IMP", "INF", "GER"}

	def _verb_admits_enclitic(verb):
	v = (verb or "").lower()
	if v in {"me","te","se","nos","os","lo","la","le","los","las","les"}:
	return False
	if v in {"como", "mientras", "para", "sobre", "luego", "casi", "según",
	"salvo", "bajo", "sin", "pasada", "vista", "puesto", "dada",
	"siendo", "habiendo"}:
	return False
	# v134: lista negra de sust/adj frecuentes con verbo homógrafo marginal.
	# Estas palabras están en lex como N/ADJ y tienen un verbo derivado
	# (nadar, penar, rosar, etc.) pero su forma imperativa con enclítico
	# es prácticamente inexistente en uso moderno. Sin esta lista, frases
	# como "nada me sorprende" se fusionan indebidamente como "nádame".
	# Solo se incluyen palabras donde el riesgo de falso positivo (fusionar
	# mal el sustantivo) es mayor que el de falso negativo (no fusionar un
	# IMP legítimo).
	if v in {"agua", "cinta", "clara", "copa", "fina", "goma", "justa",
	"maja", "manga", "mata", "misa", "mosca", "nada", "novela",
	"novia", "obra", "pena", "perla", "punta", "raya", "recta",
	"rima", "rosa", "sopa", "tabla", "taza", "tela", "tinta",
	"trenza", "tribu", "tumba", "vaca", "venta", "visa"}:
	return False
	# v134b: lista blanca de formas SBJ que en español funcionan como
	# imperativo de cortesía (usted/ustedes). El lex las marca como SBJ
	# pero "diga me" → "dígame", "siga me" → "sígame", etc. son fusión
	# legítima en uso real. Se excluyen formas con homógrafo sust/adj
	# frecuente (cante, saque, toque, viva, meta, baile, salga, lea, crea).
	if v in {"abra","abran","aprenda","aprendan","arregle","arreglen",
	"ayude","ayuden","baje","bajen","beba","beban","cierre","cierren",
	"coma","coman","continúe","continúen","cuente","cuenten",
	"decida","decidan","deje","dejen","diga","digan",
	"disculpe","disculpen","empiece","empiecen","encuentre","encuentren",
	"envíe","envíen","escriba","escriban","escuche","escuchen",
	"espere","esperen","firme","firmen","hable","hablen",
	"haga","hagan","intente","intenten","limpie","limpien",
	"llame","llamen","llegue","lleguen","llene","llenen",
	"mire","miren","muestre","muestren","oiga","oigan",
	"pase","pasen","perdone","perdonen","permita","permitan",
	"piense","piensen","pinte","pinten","ponga","pongan",
	"pregunte","pregunten","prepare","preparen","prometa","prometan",
	"pruebe","prueben","quede","queden","reciba","reciban",
	"recuerde","recuerden","responda","respondan","sea","sean",
	"sepa","sepan","siga","sigan","sirva","sirvan",
	"suba","suban","tenga","tengan","termine","terminen",
	"tome","tomen","traiga","traigan","use","usen",
	"vaya","vayan","vea","vean","venga","vengan",
	"vuelva","vuelvan"}:
	return True
	# Caso principal: V con morph que admite enclítico (IMP/INF/GER)
	if v in ES2NI and ES2NI_POS.get(v, "") == "V":
	morph = ES2NI_MORPH.get(v, "")
	if morph in _MORPH_ADMITS_ENCLITIC:
	return True
	# v126: V no-IMP pero termina en a/e/i con infinitivo regular V →
	# probable homógrafo PRS/IMP. Caso "come" (PRS 3S = IMP 2S de comer).
	if len(v) >= 3 and v[-1] in 'aeií':
	for suf in ('r','er','ir'):
	if v+suf in ES2NI and ES2NI_POS.get(v+suf, "") == "V":
	return True
	# v126b: rescate homógrafos N/V. Si v está en lex como N (o ADJ) PERO su
	# infinitivo regular existe como V, asumir homógrafo verbal. Caso real:
	# "porta" está como N ("puerta") pero "portar" V existe, y "porta te" no
	# puede ser sustantivo (los sustantivos no toman enclíticos). Las
	# preposiciones reales (para, sobre, etc.) ya están vetadas arriba.
	if len(v) >= 3 and v[-1] in 'aeií':
	for suf in ('r','er','ir'):
	if v+suf in ES2NI and ES2NI_POS.get(v+suf, "") == "V":
	return True
	# v142: rescate homógrafos diptongantes ue→o, ie→e. Caso real:
	# "cuenta" PRS-3S/IMP-2S de contar — el lex la tiene como N (factura),
	# "contar" V sí existe. Des-diptongamos antes de buscar el infinitivo.
	# Cubre cuenta/contar, vuelve/volver, siente/sentir, pierde/perder,
	# muestra/mostrar, encuentra/encontrar, etc.
	if len(v) >= 4 and v[-1] in 'aeií':
	for diph, base in (("ue","o"),("ie","e")):
	idx = v.rfind(diph)
	if idx < 0: continue
	stem_undiph = v[:idx] + base + v[idx+2:]
	# Quitar vocal temática para añadir sufijo INF
	stem_root = stem_undiph[:-1] if stem_undiph[-1] in 'ae' else stem_undiph
	for suf in ('ar','er','ir'):
	if stem_root+suf in ES2NI and ES2NI_POS.get(stem_root+suf, "") == "V":
	return True
	# Filtro final: N/ADJ puros (sin homógrafo verbal) → no admiten
	if v in ES2NI:
	pos_actual = ES2NI_POS.get(v, "")
	if pos_actual and pos_actual != "V":
	return False
	# Fallback original: v no en lex pero ends en a/e
	if v not in ES2NI and len(v) >= 3 and v[-1] in 'ae':
	for suf in ('r', 'er', 'ir'):
	inf_candidate = v + suf
	if inf_candidate in ES2NI and ES2NI_POS.get(inf_candidate, "") == "V":
	return True
	return False

	def _fuse_imp_with_accent(verb, clit):
	VOWELS = "aeiouáéíóú"
	fused = verb + clit
	positions = [i for i,c in enumerate(fused) if c.lower() in VOWELS]
	if len(positions) < 3:
	return fused
	verb_positions = [i for i,c in enumerate(verb) if c.lower() in VOWELS]
	if not verb_positions:
	return fused
	if len(verb_positions) == 1:
	tonic_idx = verb_positions[0]
	else:
	tonic_idx = verb_positions[-2]
	if fused[tonic_idx] in 'áéíóú':
	return fused
	vowels_after = sum(1 for p in positions if p > tonic_idx)
	if vowels_after >= 2:
	accent_map = {'a':'á','e':'é','i':'í','o':'ó','u':'ú'}
	ch = fused[tonic_idx].lower()
	if ch in accent_map:
	new_ch = accent_map[ch]
	if fused[tonic_idx].isupper():
	new_ch = new_ch.upper()
	return fused[:tonic_idx] + new_ch + fused[tonic_idx+1:]
	return fused

	def _is_real_infinitive(word):
	w = (word or "").lower()
	if w not in ES2NI:
	return False
	pos = ES2NI_POS.get(w, "")
	return pos == "V"

	def fuse_enclitics_es(es_text):
	if not es_text:
	return es_text

	# Helper: detectar si el clítico ambiguo "lo/la/los/las" va seguido
	# inmediatamente de "que", lo que indica pronombre relativo y NO clítico
	# de objeto directo. En ese caso NO se debe fusionar.
	# Ejemplos:
	# "Pregúntame lo que quieras" → NO fusionar (lo que = relativo)
	# "decir lo que pienso" → NO fusionar (lo que = relativo)
	# "decirle que venga" → SÍ fusionar (le=OI, no aplica esta regla)
	# "llévalo de aquí" → SÍ fusionar (no hay "que" detrás)
	def _is_relative_lo_que(clit, m):
	if (clit or "").lower() not in ("lo","la","los","las"):
	return False
	rest = m.string[m.end():]
	return bool(re.match(r"\s+que\b", rest, re.IGNORECASE))

	def _inf_safe(m):
	verb, clit = m.group(1), m.group(2)
	if _is_real_infinitive(verb):
	return verb + clit
	return m.group(0)
	def _inf_ambig(m):
	verb, clit = m.group(1), m.group(2)
	if _is_real_infinitive(verb):
	if _is_relative_lo_que(clit, m):
	return m.group(0)
	return verb + clit
	return m.group(0)

	def _ger_accent(verb, clit):
	for plain, accented in _GER_ACCENT_MAP:
	if verb.lower().endswith(plain):
	return verb[:-len(plain)] + accented + clit
	return verb + " " + clit
	def _ger_safe(m):
	verb, clit = m.group(1), m.group(2)
	if not _is_real_infinitive(verb):
	return m.group(0)
	return _ger_accent(verb, clit)
	def _ger_ambig(m):
	verb, clit = m.group(1), m.group(2)
	if not _is_real_infinitive(verb):
	return m.group(0)
	if _is_relative_lo_que(clit, m):
	return m.group(0)
	return _ger_accent(verb, clit)

	es_text = _FUSE_INF_SAFE_RE.sub(_inf_safe, es_text)
	es_text = _FUSE_INF_AMBIG_RE.sub(_inf_ambig, es_text)
	es_text = _FUSE_GER_SAFE_RE.sub(_ger_safe, es_text)
	es_text = _FUSE_GER_AMBIG_RE.sub(_ger_ambig, es_text)

	def _verb_safe(m):
	verb, clit = m.group(1), m.group(2)
	v = verb.lower()
	if v.endswith(("ar","er","ir","ando","iendo","yendo")):
	return m.group(0)
	if not _verb_admits_enclitic(v):
	return m.group(0)
	return _fuse_imp_with_accent(verb, clit)
	def _verb_ambig(m):
	verb, clit = m.group(1), m.group(2)
	v = verb.lower()
	if v.endswith(("ar","er","ir","ando","iendo","yendo")):
	return m.group(0)
	if not _verb_admits_enclitic(v):
	return m.group(0)
	if _is_relative_lo_que(clit, m):
	return m.group(0)
	return _fuse_imp_with_accent(verb, clit)

	es_text = _FUSE_VERB_SAFE_RE.sub(_verb_safe, es_text)
	es_text = _FUSE_VERB_AMBIG_RE.sub(_verb_ambig, es_text)

	# Contexto seguro tras clítico ambiguo en fusión DOBLE: igual que el
	# contexto simple (_SAFE_AFTER_AMBIG) PERO excluyendo "que", porque
	# "verbo+pronominal + lo/la/los/las + que" suele introducir un
	# pronombre relativo ("lo que pienso", "la que quiero"), no un OD del
	# verbo. Ejemplo: "Pregúntame lo que quieras" NO debe fusionarse a
	# "Pregúntámelo que quieras". La fusión simple sigue usando el
	# contexto con "que" (ej. "decirle que venga" sí fusiona).
	_SAFE_AFTER_AMBIG_DOUBLE = (
	r"(?:\s*[.,;:!?)\]»\"”—–]" # puntuación
	r"\|\s+(?:y\|o\|pero\|sino\|aunque\|mientras\|porque\|si\|cuando\|donde\|"
	r"también\|tampoco\|ya\|no\|todavía\|después\|antes\|ahora\|luego\|aquí\|allí\|"
	r"ahí\|así\|sólo\|solo\|nunca\|jamás\|siempre\|"
	r"bien\|mal\|mucho\|poco\|muy\|más\|menos\|tan\|todo\|todos\|nada\|algo\|"
	r"hoy\|ayer\|mañana\|pronto\|tarde\|"
	r"fuerte\|fuertemente\|suavemente\|fijamente\|atentamente\|"
	r"un\|una\|algún\|alguna)\b"
	r"\|\s+[a-záéíóúñü]+mente\b"
	r"\|\s+otra\s+vez\b"
	r"\|\s+otras\s+veces\b"
	r"\|\s+(?:cada\|todos\s+los\|todas\s+las)\s+(?:día\|días\|mañana\|mañanas\|tarde\|tardes\|noche\|noches\|mes\|meses\|año\|años\|semana\|semanas\|hora\|horas\|momento\|momentos\|vez\|veces)\b"
	r"\|\s$\|\s\n)"
	)
	_DOUBLE_CLIT_RE = re.compile(
	r"\b([a-záéíóúñü]+(?:me\|te\|se\|nos\|os))\s+" + _AMBIG_CLITICS_RE +
	r"(?=" + _SAFE_AFTER_AMBIG_DOUBLE + r")",
	re.IGNORECASE)
	def _double_clit(m):
	word, clit2 = m.group(1), m.group(2)
	clit1_endings = ("me","te","se","nos","os")
	verb_orig = None
	for end in clit1_endings:
	if word.lower().endswith(end):
	cand = word[:-len(end)]
	cand_noacc = (cand.replace('á','a').replace('é','e')
	.replace('í','i').replace('ó','o').replace('ú','u'))
	if (cand.lower() in ES2NI or cand_noacc.lower() in ES2NI):
	verb_orig = cand_noacc
	break
	if not verb_orig:
	return m.group(0)
	if not _verb_admits_enclitic(verb_orig):
	return m.group(0)
	if _is_relative_lo_que(clit2, m):
	return m.group(0)
	return _fuse_imp_with_accent(word, clit2)

	es_text = _DOUBLE_CLIT_RE.sub(_double_clit, es_text)
	return es_text

	# ====== Modalidad vascoide (-na / -ba) ======
	MODAL_SUFFIX_ENABLE = True
	MODAL_ONLY_ON_FINITE = True
	MODAL_STRIP_QE_IN_NI = True

	SENT_END = {".", "!", "?", "…"}
	OPEN_FOR = {"?": "¿", "!": "¡"}
	WRAP_PREFIX = set(list("«“‘([{\"'—–")) # v103: añadidos — y – para que ¿/¡ se inserten tras el guion de diálogo
	PERS_ENDINGS = ("-n","-śe","-ek","-śek","-k")
	TAM_FINITE = ("-ke","-bo","-ta","-ni","-ir")

	def looks_like_finite_ni(tok:str)->bool:
	t = (tok or "").lower()
	if not t or t.startswith("["): return False
	base = re.sub(r"-(na\|ba)$","", t)
	for tam in TAM_FINITE:
	if base.endswith(tam) or any(base.endswith(tam+pe) for pe in PERS_ENDINGS):
	return True
	return False

	def last_content_index(tokens, start, end_exclusive):
	i = end_exclusive - 1
	while i >= start and tokens[i] in VISIBLE_PUNCT:
	i -= 1
	return i if i >= start else -1

	def strip_qe_punct(tokens):
	# v131: solo eliminar `?` y `!` (signos de cierre, su información se
	# codifica en el sufijo modal `-na`/`-ba` añadido en
	# `add_modal_suffixes_es2ni`). Los `¿/¡` se preservan tal cual: ya están
	# en VISIBLE_PUNCT, así que viajan transparentes por el pipeline NI sin
	# interferir con ningún mapeo. Al volver NI→ES, `strip_modal_suffixes_ni`
	# los emite en su posición original. Esto evita que `add_inverted_openers`
	# tenga que reinventar los `¿/¡` desde cero — los respeta tal y como los
	# escribió el usuario.
	return [t for t in tokens if t not in ("?", "!")]

	def _is_numeric_comma(tokens, i):
	return (0 < i < len(tokens)-1 and tokens[i] == "," and
	is_number(tokens[i-1]) and is_number(tokens[i+1]))

	def _is_time_colon(tokens, i):
	return (0 < i < len(tokens)-1 and tokens[i] == ":" and
	is_number(tokens[i-1]) and is_number(tokens[i+1]))

	def _is_true_clause_break(tokens, i):
	if tokens[i] not in CLAUSE_BREAKS: return False
	if _is_numeric_comma(tokens, i): return False
	if _is_time_colon(tokens, i): return False
	return True

	def add_modal_suffixes_es2ni(tokens):
	if not MODAL_SUFFIX_ENABLE:
	return tokens
	out = tokens[:]
	n = len(out)
	i = 0
	sent_start = 0
	while i < n:
	if out[i] in ("?", "!"):
	closer = out[i]
	target = last_content_index(out, sent_start, i)
	if target != -1:
	suf = "na" if closer == "?" else "ba"
	# v125: SIEMPRE añadir el modal `-na`/`-ba`, incluso si la palabra
	# ya termina en `-na`/`-ba` léxicamente. Sustantivos como "tarde"
	# tienen NI `mirgaŕ-na` (con `-na` lexical), y antes el check impedía
	# añadir el modal interrogativo, perdiendo la marca de pregunta:
	# "¿Tan tarde?" → "bosmes mirgaŕ-na" (sin `?`) → "Tan tarde." al volver.
	# El fix: añadir siempre. La inversa `strip_modal_suffixes_ni` separa el
	# último `-na` y deja "mirgaŕ-na" en buf (lookup OK como sust "tarde"):
	# "mirgaŕ-na-na" → strip → "mirgaŕ-na" + "?" → "tarde ?" → "¿tarde?"
	out[target] = out[target] + "-" + suf
	sent_start = i + 1
	elif out[i] in SENT_END:
	sent_start = i + 1
	i += 1
	if MODAL_STRIP_QE_IN_NI:
	out = strip_qe_punct(out)
	return out

	def strip_modal_suffixes_ni(tokens):
	if not MODAL_SUFFIX_ENABLE:
	return tokens

	out = []
	buf = []
	pending_end = None
	mode = None

	def _emit(end_override=None, also_append=None):
	nonlocal buf, mode, pending_end, out
	local = [t for t in buf if t not in ("¿","?","¡","!")]
	if local:
	end_tok = end_override or ("?" if mode == "?" else "!" if mode == "!" else pending_end or ".")
	out.extend(local)
	# No añadir end_tok si:
	# - el último token ya es signo cerrante (.,!?…:;) — evita "Una vez le pregunté:."
	# - O es placeholder Y NO hay modalidad explícita (?/!), evitando "Aitor."
	# espurio. Si hay ?/! explícito, sí debe cerrar (ej. "[Nova]-na" → "Nova?").
	last = local[-1]
	is_punct_already = last in {".", "!", "?", "…", ":", ";"}
	# v131: cuando preservamos los `¿/¡` originales, _emit() se llama en
	# mitad de frase con un buf que a veces solo contiene tokens
	# transparentes (NEWLINE_TOK, guión de diálogo, comillas) sin
	# contenido alfabético. En esos casos NO debe añadirse cierre, o
	# aparecerían "." espurios entre el contexto previo y el `¿/¡`
	# nuevo (ej. "recibido. ⏎ — . ¿Tan tarde?").
	has_alpha = any(any(c.isalpha() for c in t) for t in local)
	if is_punct_already or not has_alpha:
	pass # ya cerrado, o no hay contenido real que cerrar
	else:
	# v115: añadir punto SIEMPRE (también tras placeholder).
	# Antes se omitía para evitar "Aitor." espurio, pero esto
	# causaba que oraciones consecutivas se pegaran cuando un
	# nombre propio cerraba una de ellas:
	# "decía Pablo era..." → "decía Pablo. era..."
	# El punto final del texto se elimina por el ajuste de la
	# línea 673 (".", "." -> ".") cuando hay un centinela.
	out.append(end_tok)
	buf.clear(); mode = None; pending_end = None
	if also_append:
	out.append(also_append)

	# v96: solo añadir "." centinela si la oración no termina ya en sentence-end
	if tokens and tokens[-1] in SENT_END:
	toks = list(tokens)
	else:
	toks = tokens + ["."]
	for i, t in enumerate(toks):
	if t in ("¿", "¡"):
	# v131: el `¿/¡` original del usuario llega hasta aquí preservado
	# (gracias a strip_qe_punct modificado). Volcamos el buffer
	# acumulado a `out` SIN añadir cierre — el contexto previo a la
	# pregunta no es una oración completa, es preámbulo (un guión
	# de diálogo, un vocativo seguido de coma, una conjunción...).
	# Antes esto añadía un "." espurio entre el contexto y el `¿`.
	local = [x for x in buf if x not in ("¿","?","¡","!")]
	out.extend(local)
	buf.clear()
	out.append(t)
	mode = "?" if t == "¿" else "!"
	pending_end = None
	continue
	if t in ("?", "!"):
	pending_end = t; _emit(); continue
	if t in SENT_END:
	pending_end = t; _emit(); continue

	if t in CLAUSE_BREAKS and mode in ("?","!"):
	buf.append(t)
	continue

	m = re.search(r"-(na\|ba)$", (t or "").lower())
	if m:
	if (t or "").lower() in NI2ES:
	buf.append(t)
	continue
	t = t[:-len(m.group(0))]
	if t: buf.append(t)
	mode = "?" if m.group(1) == "na" else "!"
	_emit()
	continue

	if t:
	buf.append(t)

	if len(out) >= 2 and out[-1] == "." and out[-2] == ".": out.pop()
	return out



	# Tildes diacríticas en preguntas
	_DIACR_ALWAYS = {
	"cuando":"cuándo", "donde":"dónde", "como":"cómo",
	"cuanto":"cuánto", "cuanta":"cuánta",
	"cuantos":"cuántos", "cuantas":"cuántas",
	"cuan":"cuán", "cual":"cuál", "cuales":"cuáles",
	"adonde":"adónde",
	}
	_DIACR_HEAD_ONLY = {"que":"qué", "quien":"quién", "quienes":"quiénes"}

	def apply_interrogative_tildes(tokens):
	out = list(tokens)
	i = 0
	n = len(out)
	while i < n:
	if out[i] != "¿":
	i += 1
	continue
	j = i + 1
	depth = 0
	end = -1
	while j < n:
	t = out[j]
	if t == "¿":
	depth += 1
	elif t == "?":
	if depth == 0:
	end = j
	break
	depth -= 1
	elif t in SENT_END:
	break
	j += 1
	if end < 0:
	i += 1
	continue
	head_pending = True
	for k in range(i+1, end):
	tok = out[k]
	if not tok or not tok[0].isalpha():
	if tok in (",", ";", ":"):
	head_pending = True
	continue
	tok_l = tok.lower()
	replacement = None
	if tok_l in _DIACR_ALWAYS:
	replacement = _DIACR_ALWAYS[tok_l]
	elif head_pending and tok_l in _DIACR_HEAD_ONLY:
	replacement = _DIACR_HEAD_ONLY[tok_l]
	# v115: para 'cuando/donde/como' (NO 'qué/quién'), si el verbo
	# cercano es subjuntivo, es subordinada temporal/condicional, no
	# interrogativa indirecta. No poner tilde.
	# Ej: "¿Lo recordarán cuando les contemos?" - 'contemos' SBJ → sin tilde
	# "¿Sabes cuándo viene?" - 'viene' PRS → con tilde
	if replacement and tok_l in ('cuando','donde','como','adonde'):
	# v117: solo cancelar tilde si NO es la primera palabra alfabética
	# de la interrog. Si 'cuando' está al inicio, es interrog directa
	# pura y debe tildarse. Si hay palabras antes (suele ser verbo
	# principal de duda/pregunta), es subordinada → posible cancelación.
	is_first_alpha = True
	for prev_k in range(i+1, k):
	pt = out[prev_k]
	if pt and pt[0].isalpha():
	is_first_alpha = False
	break
	if not is_first_alpha:
	for lookahead in range(1, 4):
	if k + lookahead >= end: break
	next_tok = out[k + lookahead]
	if not next_tok or not next_tok[0].isalpha(): continue
	next_l = next_tok.lower()
	if next_l in ES2NI:
	morph = ES2NI_MORPH.get(next_l, '')
	pos = ES2NI_POS.get(next_l, '')
	# cuando + verbo SBJ/IPFV/PST → temporal subordinada, sin tilde
	if pos == 'V' and morph in ('SBJ', 'IPFV', 'PST'):
	replacement = None
	break
	if replacement is not None:
	if tok and tok[0].isupper():
	replacement = replacement[0].upper() + replacement[1:]
	out[k] = replacement
	head_pending = False
	i = end + 1
	return out

	def add_inverted_openers(tokens):
	out = tokens[:]
	START_BREAKS = SENT_END \| CLAUSE_BREAKS

	# v103: separar palabras-q ACENTUADAS (inequívocas: solo interrogativas)
	# de las SIN TILDE (ambiguas: pueden ser conjunciones/relativos).
	# En preguntas/exclamaciones se prioriza la primera acentuada del tramo.
	# Si no hay acentuadas, se usa la sin-tilde más cercana al cierre (?/!).
	EXCL_ACCENTED = {'qué','cuán','cuánto','cuánta','cuántos','cuántas','cómo'}
	EXCL_PLAIN = {'que','cuan','cuanto','cuanta','cuantos','cuantas','como'}
	INTERR_ACCENTED = {'qué','quién','quiénes','cuándo','dónde','cómo','cuál',
	'cuáles','cuán','cuánto','cuánta','cuántos','cuántas'}
	# v124: 'por' quitado para evitar que 'Por cierto, ¿sabes...?' se trate como
	# interrogativa pura (lo que movía '¿' al inicio absoluto antes de 'Por').
	INTERR_PLAIN = {'que','quien','quienes','cuando','donde','como','cual',
	'cuales','cuan','cuanto','cuanta','cuantos','cuantas'}

	def _is_true_start_break(idx):
	if out[idx] in SENT_END: return True
	if out[idx] == NEWLINE_TOK: return True # v103: salto de línea es break
	if out[idx] in CLAUSE_BREAKS: return _is_true_clause_break(out, idx)
	return False

	# v103: para encontrar el scope de una pregunta/exclamación, paramos
	# también en `:` (salvo que sea una hora `\d:\d`). Esto evita que un
	# encabezado como "Dime una cosa: ¿sigues escribiendo?" se trate como
	# una sola pregunta y el `¿` acabe antes de "Dime".
	def _is_scope_break(idx):
	if out[idx] in SENT_END: return True
	if out[idx] == NEWLINE_TOK: return True
	if out[idx] == ':' and not _is_time_colon(out, idx): return True
	if out[idx] == ';': return True
	return False

	i = 0
	while i < len(out):
	if out[i] in ("?", "!"):
	closer = out[i]; opener = OPEN_FOR[closer]
	accented_set = EXCL_ACCENTED if closer == "!" else INTERR_ACCENTED
	plain_set = EXCL_PLAIN if closer == "!" else INTERR_PLAIN

	# Retroceder hasta scope-break (SENT_END/NEWLINE_TOK/`:`/`;`).
	j = i - 1
	while j >= 0:
	if _is_scope_break(j):
	break
	j -= 1
	scope_start = j + 1

	# Buscar palabra-q en [scope_start, i):
	# 1) PRIMERA acentuada (apertura natural de la pregunta)
	# 2) si no hay acentuada, ÚLTIMA sin tilde (más cercana al cierre)
	qword_pos = -1
	for k in range(scope_start, i):
	if out[k].lower() in accented_set:
	qword_pos = k
	break
	if qword_pos == -1:
	for k in range(i - 1, scope_start - 1, -1):
	if out[k].lower() in plain_set:
	qword_pos = k
	break

	# v129: si no hay palabra-q en el scope inmediato y el break fue
	# un NEWLINE_TOK (salto de línea, no break sintáctico real),
	# extender el scope retrocediendo a través de NEWLINE_TOKs hasta
	# encontrar un break "real" (SENT_END / : / ;). Esto recoge
	# preguntas partidas por \n: "¿O la consideras\ndemasiado X?"
	# debe poner ¿ antes de 'O', no antes de 'demasiado'.
	# v131: ROLLBACK de v129+v130. La extensión de scope a través de
	# NEWLINE_TOK introducida en v129 (y refinada en v130) creaba más
	# bugs de los que arreglaba: posicionaba `¿` huérfanos antes de
	# saltos de línea, dentro de comillas de citas, etc. La filosofía
	# correcta es PRESERVAR los `¿/¡` del texto original (cambios en
	# strip_qe_punct y strip_modal_suffixes_ni). Esta función queda
	# como red de seguridad solo para textos descuidados que no traen
	# signos de apertura.

	if qword_pos != -1:
	# Posicionar ¿/¡ justo después del último break (CLAUSE/SENT)
	# antes de la palabra-q.
	cl_start = scope_start
	for k in range(qword_pos - 1, scope_start - 1, -1):
	if out[k] in CLAUSE_BREAKS:
	if _is_true_clause_break(out, k):
	cl_start = k + 1
	break
	if out[k] in SENT_END or out[k] == NEWLINE_TOK:
	cl_start = k + 1
	break
	start = cl_start
	else:
	# Pregunta/exclamación sí/no: ¿/¡ va al inicio absoluto
	start = scope_start

	# Saltar WRAP_PREFIX (comillas, paréntesis, —, …) iniciales
	k = start
	while k < i and out[k] in WRAP_PREFIX:
	k += 1
	# v131: si entre el último break sintáctico real (. ; : ! ? …) y `i`
	# ya existe un opener (el `¿/¡` del texto original que ahora se
	# preserva), NO insertar otro. Buscamos hacia atrás SIN parar en
	# NEWLINE_TOK ni CLAUSE_BREAKS, porque el usuario puede haber
	# escrito `¿X⏎Y?` o `¿X, Y?` con el `¿` lejos del `?`.
	anti_dup_start = 0
	for jj in range(i - 1, -1, -1):
	if out[jj] in SENT_END or out[jj] in (';', ':'):
	anti_dup_start = jj + 1
	break
	has_opener_already = any(out[kk] == opener for kk in range(anti_dup_start, i))
	if not has_opener_already:
	out.insert(k, opener); i += 1
	i += 1
	return out

	# ====== EXPANSIONES (deterministas, sólo ES→NI) ======
	EXPANSION_ENABLE = True
	FLAG_COLNAMES = ("flags","FLAGS","expand","EXPAND","tags","TAGS","morph","MORPH")
	FLAG_PLURAL = ("S",)
	FLAG_3PL = ("3","V3")

	VOWELS = "aeiouáéíóúüAEIOUÁÉÍÓÚÜ"

	def _has_flag(cell:str, wanted:tuple)->bool:
	c = (cell or "")
	return any(w in c for w in wanted)

	def _pluralize_es_form(s: str) -> str:
	if not s: return s
	sl = s.lower()
	if sl.endswith("z"):
	return s[:-1] + ("ces" if s[-1].islower() else "CES")
	if s[-1] not in VOWELS:
	return s + ("es" if s[-1].islower() else "ES")
	return s + ("s" if s[-1].islower() else "S")

	def _present_3pl_from_3sg(s: str) -> str:
	if not s: return s
	return s + ("n" if s[-1].islower() else "N")

	# ====== TTS (appOld) ======
	print("Cargando modelo de voz (opcional)…")
	device = "cuda" if torch.cuda.is_available() else "cpu"
	processor = model = None
	try:
	processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa")
	model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device)
	print("Modelo de voz cargado.")
	except Exception as e:
	print(f"AVISO TTS: {e}")

	def add_reading_pauses(text: str, level:int=3) -> str:
	if level <= 1: return text
	t = re.sub(r",\s*", ", , ", text)
	t = re.sub(r"\.\s*", ". . ", text)
	return re.sub(r'\s+',' ',t).strip()

	def hispanize_for_tts(ni_text: str) -> str:
	text=unicodedata.normalize('NFC', (ni_text or "").lower())
	text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es').replace('-', ' ')
	text=re.sub(r'\[.*?\]','',text); text=re.sub(r'\s+',' ',text).strip()
	return add_reading_pauses(text, 3)

	def synthesize_speech(text):
	if not text or not text.strip() or model is None or processor is None: return None
	try:
	inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device)
	with torch.no_grad(): output = model(**inputs).waveform
	speech_np = output.cpu().numpy().squeeze()
	mx = max(abs(speech_np.min()), abs(speech_np.max()))
	if mx>0: speech_np = speech_np/mx*0.9
	return (16000, speech_np.astype(np.float32))
	except Exception as e:
	print(f"Error TTS: {e}"); return None

	# ====== Línea ibérica (appOld) ======
	V = "aeiou"
	SYL_FOR = {
	"b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"],
	"d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"],
	"t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"],
	"g":["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"],
	"k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"]
	}
	ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›",
	"l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"}
	CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"}

	def tokens_from_latin(ni:str)->str:
	out=[]; i=0; ni=unicodedata.normalize('NFC', (ni or "").lower())
	while i<len(ni):
	c=ni[i]
	if c=="p": c="b"
	if c=="-": out.append("—"); i+=1; continue
	if c in V:
	out.append(ALPHA_FOR.get(c, c.upper())); i+=1; continue
	if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V:
	idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx]
	coda=ni[i+2] if i+2<len(ni) else ""
	if coda in CODA_FOR and coda!="": tok+=CODA_FOR[coda]; i+=3
	else: i+=2
	out.append(tok); continue
	out.append(ALPHA_FOR.get(c, c.upper())); i+=1
	return "".join(out)

	KEYS_MODE = "full"
	KEYS_OVERRIDE = {}

	def georgeos_keys(token_str:str, ni_plain:str)->str:
	low=unicodedata.normalize('NFC', (ni_plain or "").lower())
	if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low]
	m=re.findall(r"‹(.*?)›", token_str)
	out=[]
	for t in m:
	if KEYS_MODE == "compact":
	if len(t)==2 and t[0] in "BDTGK": out.append(t[0])
	elif t in ("A","E","I","O","U"): out.append(t)
	elif t=="Ś": out.append("X")
	elif t=="Ŕ": out.append("r")
	else: out.append(t[0].upper())
	else:
	if len(t)==2 and t[0] in "BDTGK": out.append(t)
	elif t=="Ś": out.append("X")
	elif t=="Ŕ": out.append("r")
	else: out.append(t)
	return "".join(out)

	TRIDOT = "\|"
	def render_ib_with_tridots(ib_toks):
	res=[]; prev_word=False
	for tk in ib_toks:
	is_punct = tk in VISIBLE_PUNCT
	if is_punct:
	res.append(" "+tk+" "); prev_word=False
	else:
	if prev_word: res.append(" "+TRIDOT+" ")
	res.append(tk); prev_word=True
	return "".join(res).strip()

	# ====== BI loader + diagnóstico ======

	STRICT_BI_ENFORCE = True
	AMBIG_NI = {}
	BI_DIAG_HTML = "<em>Sin CSV cargado.</em>"

	def load_bi_strict_and_diagnose():
	global BI_DIAG_HTML
	ES2NI.clear(); NI2ES.clear(); ESPHRASE2NI.clear(); NIPHRASE2ES.clear()
	AMBIG_NI.clear(); ES2NI_VERB.clear()
	ES2NI_POS.clear()
	ES2NI_MORPH.clear()
	NI2ES_LEMMA = {}

	if not os.path.exists(CSV_BI):
	msg=f"[ERROR] No se encontró el CSV bilingüe: {CSV_BI}"
	print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
	return False

	rows=0; dup_es=0; dup_ni=0; empty_pid=0
	mismatch_backmap = 0
	mismatch_samples = []
	pid_seen=set()

	print(f"Detectado CSV bilingüe: {CSV_BI}")
	try:
	with _open_maybe_gzip(CSV_BI) as f:
	rd = csv.DictReader(f)
	flds=set(rd.fieldnames or [])
	ES_COL = "source_es" if "source_es" in flds else "es_surface" if "es_surface" in flds else "es"
	NI_COL = "target_ni" if "target_ni" in flds else "ni_surface" if "ni_surface" in flds else "ni"
	IDCOL = "pair_id" if "pair_id" in flds else "id" if "id" in flds else None
	FLAGCOL = None
	for cand in FLAG_COLNAMES:
	if cand in flds:
	FLAGCOL = cand; break
	POS_COL = "pos_es" if "pos_es" in flds else "pos" if "pos" in flds else None
	LEMMA_COL = "es_lemma" if "es_lemma" in flds else "lemma" if "lemma" in flds else None
	MORPH_COL = "es_morph" if "es_morph" in flds else "morph" if "morph" in flds else None

	base_rows = []
	for r in rd:
	es_orig = (r.get(ES_COL) or "").strip()
	ni_orig = (r.get(NI_COL) or "").strip()
	if not (es_orig and ni_orig): continue
	pid = (r.get(IDCOL) or "").strip() if IDCOL else ""
	if not pid: empty_pid += 1
	else: pid_seen.add(pid)
	flags = (r.get(FLAGCOL) or "") if FLAGCOL else ""

	es = lower(es_orig)
	ni = lower(ni_orig)

	if " " in es:
	if es not in ESPHRASE2NI:
	ESPHRASE2NI[es] = (ni_orig, pid)
	if " " in ni:
	if ni not in NIPHRASE2ES:
	NIPHRASE2ES[ni] = (es_orig, pid)

	pos = (r.get(POS_COL) or "").strip() if POS_COL else ""
	morph = (r.get(MORPH_COL) or "").strip() if MORPH_COL else ""
	_MORPH_PRIO = {"PRS":10,"PST":9,"IPFV":8,"FUT":7,"COND":6,
	"INF":5,"GER":4,"PART":3,"SBJ":2,"SBJ_IPFV":1,"IMP":0}
	_POS_PRIO = {"ADJ":3, "N":2, "V":1}
	if es in ES2NI:
	dup_es += 1
	old_pos = ES2NI_POS.get(es, "")
	old_morph = ES2NI_MORPH.get(es, "")
	replace = False
	new_p = _POS_PRIO.get(pos, 0)
	old_p = _POS_PRIO.get(old_pos, 0)
	if new_p > old_p:
	if old_pos == "V":
	ES2NI_VERB[es] = ES2NI[es]
	replace = True
	elif pos == "V" and old_pos == "V":
	new_m = _MORPH_PRIO.get(morph, -1)
	old_m = _MORPH_PRIO.get(old_morph, -1)
	if new_m > old_m:
	ES2NI_VERB[es] = ES2NI[es]
	replace = True
	elif pos == "V" and old_pos in ("N", "ADJ"):
	ES2NI_VERB[es] = (ni_orig, pid)
	if replace:
	ES2NI[es] = (ni_orig, pid)
	ES2NI_POS[es] = pos
	ES2NI_MORPH[es] = morph
	else:
	ES2NI[es] = (ni_orig, pid)
	ES2NI_POS[es] = pos
	ES2NI_MORPH[es] = morph

	lemma = (r.get(LEMMA_COL) or "").strip().lower() if LEMMA_COL else ""
	if ni in NI2ES:
	dup_ni += 1
	old_lemma = NI2ES_LEMMA.get(ni, "")
	if lemma and old_lemma and lemma == old_lemma:
	NI2ES[ni] = (es_orig, pid)
	else:
	s = AMBIG_NI.get(ni, set())
	s.add(NI2ES[ni][0]); s.add(es_orig)
	AMBIG_NI[ni] = s
	if STRICT_BI_ENFORCE:
	NI2ES.pop(ni, None)
	else:
	if STRICT_BI_ENFORCE and ni in AMBIG_NI:
	pass
	else:
	NI2ES[ni] = (es_orig, pid)
	NI2ES_LEMMA[ni] = lemma

	base_rows.append((es_orig, ni_orig, pid, flags))
	rows += 1

	if EXPANSION_ENABLE:
	for es_orig, ni_orig, pid, flags in base_rows:
	if not flags: continue
	if _has_flag(flags, FLAG_PLURAL):
	pl = _pluralize_es_form(es_orig)
	pl_key = lower(pl)
	if pl_key not in ES2NI:
	ES2NI[pl_key] = (ni_orig, pid)
	if _has_flag(flags, FLAG_3PL):
	p3 = _present_3pl_from_3sg(es_orig)
	p3_key = lower(p3)
	if p3_key not in ES2NI:
	ES2NI[p3_key] = (ni_orig, pid)

	for es_low, (ni_surf, _) in ES2NI.items():
	ni_low = lower(ni_surf)
	back = NI2ES.get(ni_low)
	if back and lower(back[0]) != es_low:
	mismatch_backmap += 1
	if len(mismatch_samples) < 10:
	mismatch_samples.append((es_low, ni_low, lower(back[0])))

	except Exception as e:
	msg=f"[ERROR] Al leer {CSV_BI}: {e}"
	print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
	return False

	ES_FOLD.clear(); NI_FOLD.clear()
	for es_key in ES2NI:
	fk = fold(es_key)
	if fk != es_key and fk not in ES_FOLD:
	ES_FOLD[fk] = es_key
	for ni_key in NI2ES:
	fk = fold(ni_key)
	if fk != ni_key and fk not in NI_FOLD:
	NI_FOLD[fk] = ni_key
	debug_print(f"Fold maps: ES_FOLD={len(ES_FOLD)}, NI_FOLD={len(NI_FOLD)}")

	es_unique = len(ES2NI)
	ni_unique = len(NI2ES)
	pid_unique = len(pid_seen)

	print(f"✓ BI-ONLY ESTRICTO cargado: {rows:,} filas.")
	if dup_es: print(f"[AVISO] {dup_es:,} duplicados ES (se usó la primera).")
	if dup_ni: print(f"[AVISO] {dup_ni:,} duplicados NI (bloqueados en modo estricto).")
	if empty_pid: print(f"[AVISO] {empty_pid:,} filas sin pair_id.")
	if mismatch_backmap:
	print(f"[ALERTA] {mismatch_backmap:,} asimetrías ES↔NI (misma NI apunta a otro ES).")

	sam_html = ""
	if mismatch_samples:
	sam_rows = "".join(
	f"<li><code>{escape(es)}</code> → <code>{escape(ni)}</code> → <code>{escape(es2)}</code></li>"
	for es,ni,es2 in mismatch_samples
	)
	sam_html = f"<details><summary>Muestras</summary><ul>{sam_rows}</ul></details>"

	ambN = sum(len(v) > 1 for v in AMBIG_NI.values())
	ambList = ", ".join(f"{k}→{sorted(list(v))[:3]}" for k,v in list(AMBIG_NI.items())[:5])

	BI_DIAG_HTML = f"""
	<div style="font-family:Georgia,serif">
	<b>Diagnóstico del CSV BI</b><br>
	Archivo: <b>{escape(CSV_BI)}</b><br>
	Filas base (CSV): <b>{rows:,}</b><br>
	ES únicas (tras expansiones): <b>{es_unique:,}</b>  \|  NI únicas: <b>{ni_unique:,}</b>  \|  pair_id únicos: <b>{pid_unique:,}</b><br>
	Duplicados ES: <b>{dup_es:,}</b>  \|  Duplicados NI: <b>{dup_ni:,}</b> (bloqueados en estricto)  \|  Sin pair_id: <b>{empty_pid:,}</b><br>
	Asimetrías ES↔NI: <b>{mismatch_backmap:,}</b>
	{sam_html}
	<hr style="border:0;border-top:1px solid #caa">
	<small>NI ambiguas bloqueadas: <b>{ambN:,}</b>{(' · ej.: ' + escape(ambList)) if ambN else ''}</small><br>
	<small>Regla: el motor usa <b>sólo</b> tablas 1:1; NI duplicadas se bloquean y se muestran como <code>[AMB-NI:...]</code>.</small>
	</div>
	"""
	return rows > 0

	print("Cargando léxico/pares (BI-estricto)…")
	load_bi_strict_and_diagnose()

	# =====================================================================
	# Sistema de parches NO DESTRUCTIVO
	# =====================================================================
	import glob, re as _re_patches

	LEX_PATCH_PATTERN = _re_patches.compile(r"^\d{3}_.+\.csv(\.gz)?$")
	LEX_PATCH_LOG = []
	LEX_DEPRECATED_DIR = "deprecated"
	STRICT_PATCHES = False

	_KNOWN_LEGACY_ORPHANS = set()
	_LEGACY_ORPHANS_FILE = os.path.join(LEX_DEPRECATED_DIR, "_known_legacy_orphans.csv")

	def _load_known_legacy_orphans():
	if not os.path.exists(_LEGACY_ORPHANS_FILE):
	return
	try:
	with open(_LEGACY_ORPHANS_FILE, "r", encoding="utf-8", newline="") as f:
	for row in csv.DictReader(f):
	es = (row.get("source_es") or "").strip().lower()
	ni = (row.get("target_ni") or "").strip().lower()
	if es and ni:
	_KNOWN_LEGACY_ORPHANS.add((es, ni))
	debug_print(f"[PATCH] Legacy orphans aceptados: {len(_KNOWN_LEGACY_ORPHANS)}")
	except Exception as e:
	debug_print(f"[PATCH] No se pudo leer {_LEGACY_ORPHANS_FILE}: {e}")

	def _append_to_graveyard(patch_name, row_data):
	try:
	os.makedirs(LEX_DEPRECATED_DIR, exist_ok=True)
	path = os.path.join(LEX_DEPRECATED_DIR, patch_name)
	new_file = not os.path.exists(path)
	with open(path, "a", encoding="utf-8", newline="") as f:
	w = csv.DictWriter(f, fieldnames=[
	"source_es","target_ni","pos_es","es_morph","pair_id","reason"
	])
	if new_file:
	w.writeheader()
	w.writerow({k: row_data.get(k, "") for k in
	["source_es","target_ni","pos_es","es_morph","pair_id","reason"]})
	except Exception as e:
	debug_print(f"[PATCH] No se pudo escribir cementerio: {e}")

	def _patch_pre_lint(rows, patch_name, future_targets=None, future_es=None):
	errors = []
	warns = []
	future_targets = future_targets or set()
	future_es = future_es or set()

	es_added = {}
	ni_targets = {}
	for r in rows:
	op = (r.get("op") or "").strip().lower()
	es = (r.get("source_es") or "").strip().lower()
	ni = (r.get("target_ni") or "").strip().lower()
	if op in ("add", "alias", "replace") and es and ni:
	es_added[es] = ni
	ni_targets.setdefault(ni, set()).add(es)

	for r in rows:
	op = (r.get("op") or "").strip().lower()
	es = (r.get("source_es") or "").strip().lower()
	ni = (r.get("target_ni") or "").strip().lower()
	reason = (r.get("reason") or "").strip()

	if op == "delete":
	if es in ES2NI:
	old_ni = ES2NI[es][0].lower() if isinstance(ES2NI[es], tuple) else ES2NI[es]
	ni_rescued = (old_ni in ni_targets) or (es in es_added)
	if not ni_rescued:
	pair = (es, old_ni)
	if pair in _KNOWN_LEGACY_ORPHANS:
	warns.append(("delete-orphan-known", es, old_ni,
	"huérfano legacy aceptado"))
	elif old_ni in future_targets or es in future_es:
	warns.append(("delete-orphan-deferred-rescue", es, old_ni,
	"rescatado en parche posterior"))
	else:
	errors.append(("delete-orphan-ni", es, old_ni,
	f"NI {old_ni!r} quedaría huérfano sin reemplazo"))
	elif op == "retire":
	if not reason:
	errors.append(("retire-no-reason", es, ni,
	"retire requiere campo reason"))
	elif op == "replace":
	if not (es and ni):
	errors.append(("replace-incomplete", es, ni,
	"replace requiere ES y NI"))

	return errors, warns

	def _cleanup_ambig_after_remove(removed_es_l, old_ni_surf):
	"""v102: tras eliminar una entrada (delete/retire/replace), si el NI
	estaba bloqueado en AMBIG_NI por una colisión con el ES borrado, recalcular.
	Si tras la baja queda un solo candidato, sacarlo de AMBIG_NI y restaurarlo
	en NI2ES para que la inversa vuelva a funcionar.
	"""
	if not old_ni_surf:
	return
	ni_l = old_ni_surf.lower()
	if ni_l not in AMBIG_NI:
	return
	AMBIG_NI[ni_l] = {x for x in AMBIG_NI[ni_l] if x.lower() != removed_es_l}
	if len(AMBIG_NI[ni_l]) <= 1:
	survivors = AMBIG_NI.pop(ni_l, set())
	if survivors and ni_l not in NI2ES:
	survivor_es = next(iter(survivors))
	if survivor_es.lower() in ES2NI:
	NI2ES[ni_l] = (survivor_es, "")

	def apply_lex_patches():
	_load_known_legacy_orphans()

	try:
	all_files = os.listdir(".")
	except Exception as e:
	debug_print(f"[PATCH] No se pudo listar el directorio: {e}")
	return

	patch_files = sorted([f for f in all_files if LEX_PATCH_PATTERN.match(f)])
	# Si existen X.csv y X.csv.gz, priorizar la versión comprimida e ignorar
	# la plana. Esto permite migración segura sin duplicar la aplicación de ops.
	_gz_bases = {f[:-3] for f in patch_files if f.endswith(".csv.gz")} # quita ".gz"
	_shadowed = [f for f in patch_files if f.endswith(".csv") and f in _gz_bases]
	if _shadowed:
	patch_files = [f for f in patch_files if f not in _shadowed]
	debug_print(f"[PATCH] Ignorando {len(_shadowed)} .csv sombreados por su .csv.gz")
	if not patch_files:
	debug_print("[PATCH] No se encontraron archivos NNN_*.csv")
	return

	print(f"[PATCH] Aplicando {len(patch_files)} archivo(s) de parche...")
	totals = {"add":0,"override":0,"alias":0,"delete":0,"replace":0,"retire":0,"skipped":0}

	parsed_patches = []
	for pf in patch_files:
	try:
	if pf.endswith(".gz"):
	_f = gzip.open(pf, "rt", encoding="utf-8", newline="")
	else:
	_f = open(pf, "r", encoding="utf-8", newline="")
	with _f as f:
	parsed_patches.append((pf, list(csv.DictReader(f))))
	except Exception:
	parsed_patches.append((pf, None))

	for idx, (patch_path, rows) in enumerate(parsed_patches):
	patch_name = patch_path
	if rows is None:
	print(f"[PATCH] Error leyendo {patch_name}")
	continue
	future_ni = set()
	future_es = set()
	for fp, frows in parsed_patches[idx+1:]:
	if frows is None: continue
	for r in frows:
	op = (r.get("op") or "").strip().lower()
	es = (r.get("source_es") or "").strip().lower()
	ni = (r.get("target_ni") or "").strip().lower()
	if op in ("add", "alias", "replace") and es and ni:
	future_ni.add(ni)
	future_es.add(es)

	ops = {"add":0,"override":0,"alias":0,"delete":0,"replace":0,"retire":0,"skipped":0}

	errors, warns = _patch_pre_lint(rows, patch_name,
	future_targets=future_ni,
	future_es=future_es)
	if errors:
	print(f"[PATCH] {patch_name}: {len(errors)} error(es) destructivo(s) detectado(s):")
	for kind, es, ni, msg in errors[:5]:
	print(f" ✗ [{kind}] {es!r}: {msg}")
	if len(errors) > 5:
	print(f" ... y {len(errors)-5} más")
	if STRICT_PATCHES:
	raise RuntimeError(
	f"Parche {patch_name} no pasa el linter (STRICT_PATCHES=True). "
	"Documenta en deprecated/_known_legacy_orphans.csv o usa op=replace.")
	for kind, es, ni, msg in errors:
	LEX_PATCH_LOG.append((patch_name, kind, es, ni, "lint-error", msg))
	if warns:
	for kind, es, ni, msg in warns:
	LEX_PATCH_LOG.append((patch_name, kind, es, ni, "lint-warn", msg))

	for row in rows:
	op = (row.get("op") or "").strip().lower()
	es = (row.get("source_es") or "").strip()
	ni = (row.get("target_ni") or "").strip()
	pos = (row.get("pos_es") or "").strip()
	morph = (row.get("es_morph") or "").strip()
	pid = (row.get("pair_id") or "").strip() or f"patch::{patch_name}"
	reason = (row.get("reason") or "").strip()

	es_l = es.lower()
	ni_l = ni.lower()

	if op == "add":
	if es_l in ES2NI:
	# v108: si el nuevo POS=V y el existente es ADJ/N de mayor
	# prioridad, añadir como lectura verbal alternativa
	# (poblar ES2NI_VERB sin tocar ES2NI principal). Esto
	# replica el comportamiento de carga del CSV base para
	# palabras ambiguas como "despierto" (ADJ + V/PRS-1S).
	old_pos = ES2NI_POS.get(es_l, "")
	if pos == "V" and old_pos in ("ADJ", "N") and es_l not in ES2NI_VERB:
	ES2NI_VERB[es_l] = (ni, pid)
	# v109: si NI2ES apunta a la forma SIN diptongar
	# (variante ortográfica menos correcta), preferir
	# la nueva forma diptongada como inversa canónica.
	# Ej: NI2ES['titśuśdum-ke']='solta' → cambiar a 'suelta'.
	# Heurística: probar des-diptongar (ie→e, ue→o) la
	# nueva ES; si coincide con la actual inversa, es
	# la variante sin diptongar → reemplazar.
	if ni_l in NI2ES:
	old_inv = NI2ES[ni_l][0].lower()
	if old_inv != es_l:
	replaced = False
	for diph, base in (('ie','e'), ('ue','o')):
	if diph in es_l:
	for idx in range(len(es_l) - 1):
	if es_l[idx:idx+2] == diph:
	cand = es_l[:idx] + base + es_l[idx+2:]
	if cand == old_inv:
	NI2ES[ni_l] = (es, pid)
	replaced = True
	break
	if replaced: break
	ops["add"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok",
	f"lectura verbal alternativa (principal {old_pos} preservado)"))
	continue
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip", "ES ya existe"))
	continue
	# Permitir sinonimia: varias ES pueden mapear a la misma NI.
	# Solo registramos ES→NI; mantenemos NI→ES de la primera entrada
	# registrada (canónica), evitando crear ambigüedad inversa.
	ni_already = ni_l in NI2ES
	ES2NI[es_l] = (ni, pid)
	if not ni_already:
	NI2ES[ni_l] = (es, pid)
	if pos: ES2NI_POS[es_l] = pos
	if morph: ES2NI_MORPH[es_l] = morph
	if " " in es_l and es_l not in ESPHRASE2NI:
	ESPHRASE2NI[es_l] = (ni, pid)
	if " " in ni_l and ni_l not in NIPHRASE2ES and not ni_already:
	NIPHRASE2ES[ni_l] = (es, pid)
	ops["add"] += 1
	note = "ok (sinónimo de NI existente)" if ni_already else "ok"
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", note))

	elif op == "override":
	prev = ES2NI.get(es_l)
	ES2NI[es_l] = (ni, pid)
	NI2ES[ni_l] = (es, pid)
	if pos: ES2NI_POS[es_l] = pos
	if morph: ES2NI_MORPH[es_l] = morph
	if " " in es_l:
	ESPHRASE2NI[es_l] = (ni, pid)
	if " " in ni_l:
	NIPHRASE2ES[ni_l] = (es, pid)
	ops["override"] += 1
	prev_str = f"era {prev[0]}" if prev else "no existía"
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", prev_str))

	elif op == "alias":
	if es_l in ES2NI:
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip", "ES ya existe"))
	continue
	ES2NI[es_l] = (ni, pid)
	if pos: ES2NI_POS[es_l] = pos
	if morph: ES2NI_MORPH[es_l] = morph
	if " " in es_l and es_l not in ESPHRASE2NI:
	ESPHRASE2NI[es_l] = (ni, pid)
	ops["alias"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", "alias ortográfico"))

	elif op == "delete":
	if es_l not in ES2NI:
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip", "no existía"))
	continue
	old_ni_surf, old_pid = ES2NI[es_l]
	old_pos = ES2NI_POS.get(es_l, "")
	old_morph = ES2NI_MORPH.get(es_l, "")
	_append_to_graveyard(patch_name, {
	"source_es": es, "target_ni": old_ni_surf,
	"pos_es": old_pos, "es_morph": old_morph,
	"pair_id": old_pid,
	"reason": reason or "delete sin reason (legacy)",
	})
	del ES2NI[es_l]
	ES2NI_POS.pop(es_l, None)
	ES2NI_MORPH.pop(es_l, None)
	if old_ni_surf.lower() in NI2ES and \
	NI2ES[old_ni_surf.lower()][0].lower() == es_l:
	del NI2ES[old_ni_surf.lower()]
	_cleanup_ambig_after_remove(es_l, old_ni_surf)
	ops["delete"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", "movido a deprecated/"))

	elif op == "replace":
	if not (es and ni):
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
	"replace requiere ES y NI"))
	continue
	if ni_l in NI2ES and NI2ES[ni_l][0].lower() != es_l:
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
	f"NI nuevo ya pertenece a {NI2ES[ni_l][0]!r}"))
	continue
	if es_l in ES2NI:
	old_ni_surf, old_pid = ES2NI[es_l]
	old_pos = ES2NI_POS.get(es_l, "")
	old_morph = ES2NI_MORPH.get(es_l, "")
	_append_to_graveyard(patch_name, {
	"source_es": es, "target_ni": old_ni_surf,
	"pos_es": old_pos, "es_morph": old_morph,
	"pair_id": old_pid,
	"reason": reason or f"replaced by {ni}",
	})
	if old_ni_surf.lower() in NI2ES and \
	NI2ES[old_ni_surf.lower()][0].lower() == es_l:
	del NI2ES[old_ni_surf.lower()]
	ES2NI[es_l] = (ni, pid)
	NI2ES[ni_l] = (es, pid)
	if pos: ES2NI_POS[es_l] = pos
	if morph: ES2NI_MORPH[es_l] = morph
	ops["replace"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", reason or ""))

	elif op == "retire":
	if not reason:
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
	"retire requiere reason"))
	continue
	if es_l not in ES2NI:
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "skip",
	"no existía"))
	continue
	old_ni_surf, old_pid = ES2NI[es_l]
	_append_to_graveyard(patch_name, {
	"source_es": es, "target_ni": old_ni_surf,
	"pos_es": ES2NI_POS.get(es_l,""),
	"es_morph": ES2NI_MORPH.get(es_l,""),
	"pair_id": old_pid, "reason": reason,
	})
	del ES2NI[es_l]
	ES2NI_POS.pop(es_l, None)
	ES2NI_MORPH.pop(es_l, None)
	if old_ni_surf.lower() in NI2ES and \
	NI2ES[old_ni_surf.lower()][0].lower() == es_l:
	del NI2ES[old_ni_surf.lower()]
	_cleanup_ambig_after_remove(es_l, old_ni_surf)
	ops["retire"] += 1
	LEX_PATCH_LOG.append((patch_name, op, es, ni, "ok", reason))

	else:
	ops["skipped"] += 1
	LEX_PATCH_LOG.append((patch_name, "?", es, ni, "skip",
	f"op desconocida: {op!r}"))

	summary = ", ".join(f"{k}={v}" for k,v in ops.items() if v)
	print(f"[PATCH] {patch_name}: {summary or 'sin cambios'}")
	for k in totals:
	totals[k] += ops[k]

	print(f"[PATCH] Total: " + ", ".join(f"{k}={v}" for k,v in totals.items() if v))

	if any(totals[k] for k in ("add","override","replace","delete","retire")):
	ES_FOLD.clear(); NI_FOLD.clear()
	for es_key in ES2NI:
	fk = fold(es_key)
	if fk != es_key and fk not in ES_FOLD:
	ES_FOLD[fk] = es_key
	for ni_key in NI2ES:
	fk = fold(ni_key)
	if fk != ni_key and fk not in NI_FOLD:
	NI_FOLD[fk] = ni_key

	def _restore_orphan_ni_after_patches():
	"""v112: tras aplicar todos los parches, restaurar entradas NI2ES huérfanas.
	Si una entrada ES→NI existe pero el NI no está en NI2ES NI en AMBIG_NI
	(es decir, NI completamente huérfano sin colisión), restaurarla en NI2ES.
	Esto cubre el caso típico: DELETE de fem espuria (invierna) deja NI2ES[anśutdiś]
	vacío pero invierno→anśutdiś sigue siendo válido.
	NO actúa cuando NI está en AMBIG_NI (sin criterio universal para elegir winner).
	"""
	restored = 0
	for es_k, val_k in ES2NI.items():
	ni_k, pid_k = val_k
	ni_l = ni_k.lower()
	if ni_l not in NI2ES and ni_l not in AMBIG_NI:
	NI2ES[ni_l] = (es_k, pid_k)
	restored += 1
	if restored:
	debug_print(f"[PATCH] v112: restauradas {restored} entradas NI huérfanas tras deletes")

	apply_lex_patches()
	_restore_orphan_ni_after_patches()

	def _recompute_ambig_ni_after_patches():
	"""v133: tras aplicar todos los parches, sincronizar AMBIG_NI y NI2ES con
	el estado real de ES2NI. Dos correcciones complementarias:

	1) AMBIG fantasmas (v132): entradas marcadas como ambiguas durante la
	carga inicial pero cuyos contribuyentes cambiaron de NI vía override
	sin limpiar la marca. Ej: śunleŕśet-ir era AMBIG{abierto, abrutado};
	parche 127 movió 'abrutado' a sirudata-ir, así que solo 'abierto'
	apunta ya a śunleŕśet-ir, pero AMBIG seguía bloqueando la inversa.

	2) NI2ES obsoleto (v133): entradas en NI2ES que apuntan a un ES cuyo
	ES2NI actual ya no apunta a ese NI. Esto pasa cuando el motor en
	`override` sobrescribe NI2ES[ni_l] = (es, pid) sin comprobar si el
	NI estaba en AMBIG ni si el ES viejo allí presente sigue siendo
	coherente. Ej real: NI2ES['bemuŕ-k']='gruesas' tras parche 103,
	pero parche 128 movió gruesa→ti-bemuŕ-k; el único ES que apunta a
	bemuŕ-k es ahora 'gruesos', así que NI2ES['bemuŕ-k'] debería ser
	'gruesos', no 'gruesas'.

	Opción de fix elegida (la conservadora): solo corregir cuando hay
	EXACTAMENTE un superviviente único. NO eliminar huérfanas (NI2ES
	apuntando a algo sin candidato actual): podrían venir de versiones
	viejas de NI que aún aparezcan en textos guardados. NO tocar casos
	multi-candidato (irían a AMBIG; preferimos dejarlos como están y no
	introducir nuevos AMB inesperados).

	Coste: O(\|ES2NI\|) + O(\|NI2ES\|). ~6 s en arranque (medido sobre
	1.5M ES2NI + 2M NI2ES tras todos los parches). Una sola pasada al
	final; no toca el bucle interno de override (intento previo de hacer
	cleanup por cada override disparó el tiempo de arranque a >30 min).
	"""
	# Indexar ES2NI por NI: para cada NI, qué ES apunta realmente a él
	# ahora mismo. Necesario tanto para limpiar AMBIG como para detectar
	# NI2ES obsoletos.
	actual_ni_to_es = {}
	for es_l, val in ES2NI.items():
	ni_l = val[0].lower()
	actual_ni_to_es.setdefault(ni_l, []).append((es_l, val[1]))

	# Paso 1: limpiar AMBIG_NI fantasmas y restaurar NI2ES si está vacío
	cleaned_amb = 0
	restored_amb = 0
	for ni_l in list(AMBIG_NI.keys()):
	actual = actual_ni_to_es.get(ni_l, [])
	if len(actual) <= 1:
	AMBIG_NI.pop(ni_l, None)
	cleaned_amb += 1
	if len(actual) == 1 and ni_l not in NI2ES:
	survivor_es, survivor_pid = actual[0]
	NI2ES[ni_l] = (survivor_es, survivor_pid)
	restored_amb += 1

	# Paso 2 (v133): corregir NI2ES obsoletos con superviviente único
	fixed_obsolete = 0
	for ni_l in NI2ES:
	cur_es = NI2ES[ni_l][0].lower()
	# ¿la entrada actual es válida? (cur_es existe en ES2NI y apunta
	# de vuelta a este ni_l)
	if cur_es in ES2NI and ES2NI[cur_es][0].lower() == ni_l:
	continue
	# Está obsoleta. Buscar superviviente único.
	actual = actual_ni_to_es.get(ni_l, [])
	if len(actual) == 1:
	survivor_es, survivor_pid = actual[0]
	NI2ES[ni_l] = (survivor_es, survivor_pid)
	fixed_obsolete += 1
	# Si len(actual) == 0: huérfana, dejar como está (puede ser
	# versión vieja del NI usada en textos guardados).
	# Si len(actual) > 1: multi-candidato, dejar como está (no
	# promovemos a AMBIG aquí para no introducir nuevos bloqueos).

	# Paso 3 (v140): restaurar NI2ES huérfanos. Cuando un parche hace delete
	# de un ES que era el único guardado en NI2ES para ese NI, NI2ES[ni] se
	# borra. Pero puede que OTRO ES siga apuntando a ese NI vía ES2NI (caso
	# típico: parche borra "ilici" pero "elche" sigue apuntando a NI ilici).
	# Aquí detectamos esos casos y restauramos NI2ES con el superviviente único.
	restored_orphan = 0
	for ni_l, actual in actual_ni_to_es.items():
	if ni_l not in NI2ES and ni_l not in AMBIG_NI and len(actual) == 1:
	survivor_es, survivor_pid = actual[0]
	NI2ES[ni_l] = (survivor_es, survivor_pid)
	restored_orphan += 1

	if cleaned_amb or fixed_obsolete or restored_orphan:
	debug_print(f"[PATCH] v140: AMBIG limpiados={cleaned_amb} (restaurados={restored_amb}), NI2ES obsoletos corregidos={fixed_obsolete}, huérfanos restaurados={restored_orphan}")

	_recompute_ambig_ni_after_patches()

	def _register_ipfv_3s_reverse():
	suffixes_1s_to_3s = ['-ska-n', '-tei-n', '-na-n', '-nabo-n']
	added = 0
	skipped = 0
	new_entries = []
	for ni_key, val in list(NI2ES.items()):
	for suf in suffixes_1s_to_3s:
	if ni_key.endswith(suf):
	ni_3s = ni_key[:-2]
	if ni_3s in NI2ES:
	skipped += 1
	break
	new_entries.append((ni_3s, val))
	break
	for ni_3s, val in new_entries:
	NI2ES[ni_3s] = val
	added += 1
	debug_print(f"3S reverse map (IPFV/COND/SBJ/SBJ_IPFV): {added} formas registradas, {skipped} ya existían")

	_register_ipfv_3s_reverse()

	# ====== VERSION MARKER ======
	# v103 (2026-05-10):
	# - Fix `:.` espurio: si el último token de buf en strip_modal_suffixes_ni
	# ya es signo cerrante (.,!?…:;), no añadir un punto extra al final.
	# - Fix punto espurio tras placeholder: si último de buf es [SIN-LEX:...]
	# o similar, tampoco añadir end_tok. Evita "Aitor." cuando input no tenía.
	# - Fix preservar formato hora `\d:\d`: en ES→NI, los dígitos en patrón
	# "número : número" no se convierten a numerales NI, así "6:30" sale
	# como "6 : 30" en NI y vuelve como "6:30" en el roundtrip.
	# - Fix fusión clítico ambiguo + número intermedio: el regex _SAFE_AFTER_AMBIG
	# ahora acepta cuantificador opcional entre "cada/todos los" y la palabra
	# temporal: "cada quince días", "todos los 15 años", etc.
	# - Fix ¡/¿ saltando \n\n: add_inverted_openers ahora trata NEWLINE_TOK como
	# break, así la apertura ¡/¿ se inserta DESPUÉS del salto de línea, no
	# ANTES.
	# - Fix ¡/¿ en preguntas sí/no: la heurística retrocede SIEMPRE hasta
	# SENT_END/NEWLINE_TOK (inicio absoluto), y luego si hay palabra-q en
	# el tramo, posiciona el ¿/¡ justo antes; si no hay palabra-q (preg.
	# sí/no), va al inicio absoluto. Evita "Has llegado bien, ¿Hijo?".
	# - Fix tilde olvidada (v101) anti-falso-positivo: la regla "el/tu + V → tónico"
	# ahora desactiva si la forma plural de la palabra siguiente existe como
	# sustantivo (POS=N), lo que delata ambigüedad sustantivo/verbo.
	# Cubre: "el destino" (destinos=N), "el dedo" (dedos=N), "el marco", etc.
	#
	# v102 (2026-05-10): Helper _cleanup_ambig_after_remove. Tras delete/retire,
	# si el NI estaba bloqueado en AMBIG_NI por colisión con el ES borrado,
	# recalcula: si queda un único candidato superviviente, lo restaura en
	# NI2ES para que la inversa vuelva a funcionar. Antes, una colisión detectada
	# en la carga inicial quedaba bloqueada aunque luego se borrase la
	# entrada conflictiva.
	#
	# v101 (2026-05-09): Ampliada detección de tilde olvidada con "tu/el + V".
	#
	# v104 (2026-05-11): op "add" en patches admite SINÓNIMOS NI. Antes, si una
	# entrada nueva proponía una NI ya ocupada, se descartaba por "ambigüedad".
	# Ahora se permite la sinonimia (varias ES → 1 NI): se registra el mapeo
	# ES2NI siempre; NI2ES conserva la primera entrada registrada (canónica),
	# que será la que devuelva la inversa NI→ES. Esto desbloquea casos como
	# librito/librillo (ambos diminutivos legítimos de "libro" → lundokbek-bdo)
	# o florcita/florecita (variantes ortográficas del mismo diminutivo).
	# v111 (2026-05-13): Regla 4b en _choose_es_to_ni - "det + N + homógrafo → V"
	# para palabras con AMBAS entradas N y V en el lex (vía pos_es).
	# Ej: "El gato araña el sofá" → araña = V; "La oveja bala fuerte" → bala = V.
	# Parche 105 añade las entradas V para araña, bala, palma, turba, fresa.
	#
	# v112 (2026-05-13): Pasada lineal final _restore_orphan_ni_after_patches que
	# restaura NI2ES huérfanos tras DELETE. Si una entrada ES→NI existe pero su NI
	# no está en NI2ES ni en AMBIG_NI, restaurarla en NI2ES. NO actúa cuando hay
	# AMBIG (sin criterio universal para elegir winner). Esto resuelve el caso
	# donde, al borrar una fem espuria (invierna), el masc correspondiente
	# (invierno) quedaba sin entrada inversa.
	#
	# v113 (2026-05-13): Fix bug en apply_apocope_es. Las funciones
	# _is_masc_sg_noun_candidate y _is_singular_noun_candidate ahora verifican que
	# next_word sea realmente N o ADJ en el lex, no cualquier palabra que cumpla
	# los filtros morfológicos. Esto evita que "banco grande, leía" se convierta
	# en "banco gran, leía" (porque "leía" es V, no sustantivo).
	# v114 (2026-05-14): Fix bug 'eslo'. Quitado PRS de _MORPH_ADMITS_ENCLITIC para
	# que verbos en presente NO admitan enclíticos. "es lo" no se fusiona a "eslo".
	# v115 (2026-05-14): Fix bug 'punto faltante tras placeholder'. La lógica que
	# evitaba "Aitor." al final del texto causaba que oraciones consecutivas con
	# nombre propio al final de la primera se pegaran ("decía Pablo era..."). Ahora
	# se añade siempre el punto; el centinela final se limpia por la línea siguiente.
	# v116 (2026-05-14): (a) Añadido em/en dash a _SAFE_AFTER_AMBIG para que
	# "intentar lo — respondió" → "intentarlo — respondió". (b) Ampliada regla
	# tildes interrog: cuando+verbo SBJ/IPFV/PST es temporal subordinado, sin tilde.
	# v117 (2026-05-14): (a) Apocope blacklist: añadidas palabras gramaticales
	# que NUNCA permiten apocope aunque tengan POS=N en lex (porque, que, aunque,
	# sino, etc.). Arregla "Grande porque" → "Gran porque". (b) Tildes interrog:
	# solo cancelar tilde de cuando/donde/como si NO es la primera palabra
	# alfabética de la interrog. "¿Cuándo fue eso?" mantiene tilde (es primera).
	# v118 (2026-05-14): Fix apocope con puntuación pegada. Si el token a apocopar
	# tiene una coma, punto u otro signo adherido (ej. "grande,"), NO se apocopa.
	# Significa que el adjetivo está aislado por pausa, no atributivo de la
	# siguiente palabra. "es grande, espaciosa" mantiene "grande,".
	# v119 (2026-05-14): Añadidos un/una/unos/unas/algún/alguna/algunos/algunas
	# al _SAFE_AFTER_AMBIG. Permite fusionar "dando le un beso" → "dándole un beso".
	# v120 (2026-05-14): Removidos plurales unos/unas/algunos/algunas del SAFE_AFTER.
	# La fusión DOUBLE_CLIT disparaba mal en "recordarnos los unos a los otros"
	# (expresión idiomática con 'unos' = pronombre) generando "recordárnoslos unos".
	# Solo singulares un/una/algún/alguna son seguros como determinantes.
	# v121 (2026-05-14): Capitalización tras cierre de cita. Caracteres »/"/"/'
	# son ahora transparentes cuando start=True (igual que las aperturas), de modo
	# que tras "...todos. »" la siguiente palabra ("Mi padre") se capitaliza
	# correctamente en lugar de quedar "mi padre".
	# v122 (2026-05-14): Fix bug "punto extra al final". El regex 4 de
	# postprocess_spanish añadía espacio entre dos signos de puntuación pegados
	# ("X.Y" → "X. Y"), incluso cuando Y era también puntuación. Esto causaba
	# que ".⏎." (donde ⏎ es NEWLINE_TOK) se convirtiera en ". ⏎." y luego en
	# ".\n." (un punto, salto, otro punto). Solución doble: (a) lookahead
	# excluye .,;:!? del salto de espacio; (b) limpieza final que elimina punto
	# duplicado al final del texto.
	# v124 (2026-05-14): tres fixes prioritarios sobre v123:
	# 1) expand_enclitics: el fallback de doble-clítico ahora exige stem2 >= 3 chars
	# → arregla bug CRÍTICO "estela" → "es"+"te"+"la" (descomponía sustantivos
	# comunes como verbo+enclíticos por raíz de 2 chars).
	# 2) _SAFE_AFTER_AMBIG ampliado con artículos (el/la/los/las), preposiciones
	# (de/en/a/por/...) y placeholders [SIN-LEX:...] → fusiona enclíticos en:
	# "abrazándolos efusivamente", "acariciándole el pelo",
	# "reprochándoles la ausencia", "perderlo de vista".
	# 3) 'por' quitado de INTERR_PLAIN → arregla "Por cierto, ¿sabes...?" donde
	# el motor movía '¿' al inicio absoluto antes de 'Por'.
	# v125 (2026-05-14): fix interrog perdido (siempre añade modal -na/-ba)
	# v126 (2026-05-14): triada de fixes residuales:
	# #2 _verb_admits_enclitic admite verbos V con morph PRS si su infinitivo
	# existe (rescata "porta te" → "pórtate", PRS 3S homógrafa con IMP 2S).
	# #5 post-procesado regex que añade tilde diacrítica a formas
	# gerundio+clítico pegadas sin tilde ("abriendose" → "abriéndose").
	# #6 reordenar signo de pregunta/exclamación + comilla cerrante
	# ("? »" → "»?", "! »" → "»!").
	# v126b (2026-05-14): rescate homógrafos N/V en _verb_admits_enclitic.
	# El v126 (fix #2 aplicado mal en la sesión anterior por sobreescritura del
	# archivo) solo activaba el rescate IMP si la palabra estaba como V en el lex.
	# "porta" está catalogada como N (sustantivo "puerta") y aún así puede ser
	# IMP 2S de "portar". El v126b añade un check independiente del POS principal:
	# si v termina en a/e/i y su infinitivo regular existe como V, admitir el
	# enclítico. El contexto sintáctico ("v + me/te/se/...") garantiza que es
	# verbal: los sustantivos no toman enclíticos.
	# v127 (2026-05-15): elimina espacio antes de comilla cerrante »
	# tras todos los fixes anteriores. Caso: "bien. »" → "bien.»",
	# "hielo. »" → "hielo.»". No interfiere con el fix #6 (?»/!») porque
	# se aplica después. Postprocesado regex sencillo.
	# v128 (2026-05-15): _SAFE_AFTER_AMBIG y _SAFE_AFTER_AMBIG_DOUBLE incluyen
	# adverbios genéricos en -mente como contexto seguro tras clítico AMBIG.
	# Caso: "abrazando los efusivamente" → "abrazándolos efusivamente".
	# El patrón anterior solo cubría 4 adverbios específicos (fuertemente,
	# suavemente, fijamente, atentamente); este captura cualquiera con [a-z]+mente.
	# v133 (2026-05-16): ampliación del fix de motor de v132. Añade una segunda
	# pasada que detecta y corrige entradas NI2ES OBSOLETAS: NI2ES[ni]=es donde
	# ES2NI[es] ya no apunta a este ni. Esto pasa porque el motor en `override`
	# sobrescribe NI2ES sin comprobar si la entrada vieja allí presente está
	# obsoleta o si el NI está en AMBIG.
	#
	# Caso real desencadenante: NI2ES['bemuŕ-k']='gruesas' tras el parche 103,
	# pero el parche 128 movió gruesa→ti-bemuŕ-k. El único ES que apunta a
	# bemuŕ-k es ahora 'gruesos'. v132 no lo corregía porque solo restauraba
	# NI2ES si estaba vacío. v133 corrige también cuando hay superviviente
	# único.
	#
	# Opción conservadora: solo corregir cuando hay EXACTAMENTE 1 superviviente.
	# No eliminar entradas huérfanas (0 candidatos), por si son versiones viejas
	# del NI usadas en textos guardados. No promover multi-candidato a AMBIG.
	#
	# Coste medido: ~6 s al arranque (3 ejecuciones: 5.5, 5.5, 7.4 s). Una
	# sola pasada al final, no afecta el bucle interno de override. Corrige
	# ~328K entradas NI2ES (verificado en lex actual con 129 parches).
	VERSION_MARKER = "v142_2026_05_19_des_diptongacion_enclitica"
	try:
	print(f"[Neoíbero translator] versión cargada: {VERSION_MARKER}", flush=True)
	print(f"[Neoíbero translator] léxico activo: {CSV_BI}", flush=True)
	except Exception:
	pass

	# ====== Utilidad n-grama (longest-match, BI-only) ======
	def _longest_match(tokens, i, phrase_map):
	if not phrase_map: return (0, None)
	max_span = 0; surface = None
	for span in range(1, MAX_NGRAM+1):
	if i+span > len(tokens): break
	cand = " ".join(lower(t) for t in tokens[i:i+span])
	if cand in phrase_map:
	max_span = span
	surface = phrase_map[cand][0]
	else:
	fcand = " ".join(fold(lower(t)) for t in tokens[i:i+span])
	if fcand != cand and fcand in phrase_map:
	max_span = span
	surface = phrase_map[fcand][0]
	return (max_span, surface)

	# ====== Post-proceso ES (espacios + mayúsculas de oración) ======
	def sentence_case_spanish(s: str) -> str:
	out = []
	start = True
	in_br = False
	WRAPS = "¿¡\"'«(“‘[—–" # v95: añadidos — (em dash) y – (en dash) para diálogos
	last_real = None # v103: último carácter "real" (no whitespace ni NEWLINE_TOK)

	for ch in s:
	if ch == '[':
	in_br = True

	# v103: `:` seguido de salto de línea reinicia mayúscula (carta, lista
	# vertical). Cubre "Querido Marco:\nTe escribo..." → "Te" capitalizado.
	if not in_br and last_real == ':' and (ch == '\n' or ch == NEWLINE_TOK):
	start = True

	if not in_br and start:
	# v100: NEWLINE_TOK se trata como espacio para no romper la
	# capitalización tras —/¡/¿ cuando va precedido del marcador.
	if ch.isspace() or ch == NEWLINE_TOK:
	out.append(ch)
	elif ch in WRAPS:
	out.append(ch)
	elif ch in '»"”\'': # v121: cierres de cita son transparentes
	out.append(ch)
	# mantiene start=True para capitalizar la siguiente palabra
	elif ch.isalpha():
	out.append(ch.upper()); start = False
	else:
	out.append(ch)
	start = ch in "¿¡"
	else:
	out.append(ch)
	if not in_br and ch in ".?!…":
	start = True
	elif not in_br and ch in "¿¡":
	start = True

	if ch == ']':
	in_br = False

	# Actualizar last_real solo con caracteres significativos
	if not ch.isspace() and ch != NEWLINE_TOK:
	last_real = ch

	return "".join(out)

	def postprocess_spanish(s: str) -> str:
	s = re.sub(r"(\d)\s:\s(\d)", r"\1:\2", s)
	s = re.sub(r"(\d)\s([.,])\s(\d)", r"\1\2\3", s)
	s = re.sub(r"\s+([,.;:!?])", r"\1", s)
	# v122: excluir puntuación de fin y NEWLINE_TOK del lookahead. Antes,
	# "X.Y" se procesaba como "X. Y" siempre. Ahora si Y es puntuación de fin
	# (.,;:!?) O el NEWLINE_TOK, NO se añade espacio. Evita que ".⏎." se
	# convierta en ". ⏎." y luego en ".\n.".
	s = re.sub(rf"([?.!;])(?!\s\|$\|[.,;:!?]\|{re.escape(NEWLINE_TOK)})([^\s])", r"\1 \2", s)
	s = re.sub(r"([¿¡])\s+", r"\1", s)
	s = re.sub(r"\s{2,}", " ", s).strip()
	return sentence_case_spanish(s)

	# ====== Traducción BI estricta ======
	def translate_es_to_ni_bi(text:str):
	# v99: el texto se procesa como un único bloque. Los saltos de línea del
	# usuario se reemplazan por un marcador transparente (NEWLINE_TOK) que
	# cruza el pipeline sin disparar ningún cierre de oración, modalidad ni
	# capitalización; al final se reconvierten en \n.
	text = (text or "").replace("\r\n", "\n").replace("\r", "\n")
	text = text.replace("\n", f" {NEWLINE_TOK} ")

	toks = simple_tokenize(text)
	toks = expand_enclitics(toks)

	_NOUN_CTX = frozenset({'el','la','los','las','un','una','unos','unas','al','del',
	'de','en','con','por','para','a','sin','sobre','entre',
	'hacia','hasta','desde','contra','según','ante','bajo','tras',
	'mi','tu','su','mis','tus','sus','nuestro','nuestra',
	'nuestros','nuestras','vuestro','vuestra','vuestros','vuestras',
	'este','esta','estos','estas','ese','esa','esos','esas',
	'aquel','aquella','aquellos','aquellas','cada','otro','otra',
	'mucho','mucha','muchos','muchas','poco','poca','pocos','pocas',
	'todo','toda','todos','todas','algún','alguna','ningún','ninguna',
	'buen','mal','gran','primer','tercer','qué','cuánto','cuánta'})
	_VERB_CTX = frozenset({'yo','tú','él','ella','nosotros','nosotras','vosotros','vosotras',
	'ellos','ellas','usted','ustedes',
	'se','me','te','nos','os','le','les','lo',
	'no','ya','también','tampoco','nunca','siempre','aún','todavía',
	'que','quien','quienes','donde','cuando','como','si'})
	_INFINITIVE_ENDINGS = ('ar','er','ir')
	_INFINITIVE_CTX = frozenset({'de','sin','para','por','al','antes','tras','hasta'})
	_VERB_ALWAYS = frozenset({'son','es','ha','he','era','fue','fui','van',
	'dan','das','den','des','hay','doy','soy','voy',
	'iba','di','haya'})

	_TIME_WORDS = frozenset({'año','años','día','días','mes','meses',
	'semana','semanas','hora','horas',
	'minuto','minutos','segundo','segundos',
	'tiempo','rato','siglo','siglos',
	'década','décadas','momento','momentos',
	'instante','instantes','jornada','jornadas',
	'noche','noches','tarde','tardes','mañana','mañanas',
	'milenio','milenios'})
	_TIME_QUANTIFIERS = frozenset({'mucho','muchos','mucha','muchas',
	'poco','pocos','poca','pocas',
	'tanto','tantos','tanta','tantas',
	'algunos','algunas','varios','varias',
	'demasiado','demasiados','demasiada','demasiadas',
	'un','una','unos','unas',
	'dos','tres','cuatro','cinco','seis','siete',
	'ocho','nueve','diez','once','doce','trece',
	'catorce','quince','veinte','treinta','cuarenta',
	'cincuenta','cien','mil','medio','media'})

	_FIRST_PERSON_SUBJECTS = frozenset({'yo'})
	_THIRD_SG_PRON_SUBJECTS = frozenset({'él','ella','ello','usted','esto','eso','aquello'})
	_SINGULAR_NP_DETS = frozenset({'el','la','un','una','este','esta','ese','esa','aquel','aquella',
	'mi','tu','su','nuestro','nuestra','vuestro','vuestra'})

	_PREP_CTX = frozenset({'a','ante','con','contra','de','desde','en','entre',
	'hacia','hasta','para','por','según','sin','sobre','tras'})
	_TILDE_MAP = {'mi': 'mí', 'el': 'él', 'si': 'sí', 'tu': 'tú'}
	_PHRASE_BREAK = frozenset({'y','o','e','u','ni','que','pero','sino','como',
	'porque','cuando','donde','aunque','pues'})

	def _accented_lookup(key):
	if key in _TILDE_MAP and _TILDE_MAP[key] in ES2NI:
	return ES2NI[_TILDE_MAP[key]][0]
	return None

	def _resolve_forms(raw_key:str):
	key = lower(raw_key)
	if key in ES2NI:
	return key, ES2NI[key][0], ES2NI_VERB.get(key, (None, None))[0]
	fkey = fold(key)
	if fkey in ES_FOLD:
	actual = ES_FOLD[fkey]
	return actual, ES2NI[actual][0], ES2NI_VERB.get(actual, (None, None))[0]
	return key, None, None

	def _choose_es_to_ni(current_tok:str, prev_key:str, next_key:str, next_next_key:str, sent_start:bool, prev_prev_key:str=""):
	actual_key, ni_nom, ni_verb = _resolve_forms(current_tok)
	if ni_nom is None and ni_verb is None:
	return None

	key = lower(actual_key)

	if key == 'hace':
	is_time_context = False
	if next_key in _TIME_WORDS:
	is_time_context = True
	elif (next_key in _TIME_QUANTIFIERS or is_number(next_key)) and next_next_key in _TIME_WORDS:
	is_time_context = True
	if is_time_context:
	if 'atrás' in ES2NI:
	return ES2NI['atrás'][0]

	if key.endswith(_INFINITIVE_ENDINGS):
	if key in ES2NI:
	return ES2NI[key][0]
	fkey = fold(key)
	if fkey in ES_FOLD:
	return ES2NI[ES_FOLD[fkey]][0]

	if key in _VERB_ALWAYS and ni_verb:
	return ni_verb

	if key in _TILDE_MAP:
	accented_ni = _accented_lookup(key)
	if accented_ni:
	# Caso 1 (existente): preposición + tilde-pair + (final/conj/puntuación)
	# Cubre "para mí", "con él", "para sí (mismo)", etc.
	if prev_key in _PREP_CTX:
	if not next_key or next_key in VISIBLE_PUNCT or next_key in _PHRASE_BREAK:
	return accented_ni
	# Caso 2 (v101): "tu"/"el" + verbo → pronombre tónico (sujeto).
	# Cubre el error frecuente de omitir la tilde al pronombre sujeto:
	# "tu vienes mañana" → "tú vienes mañana"
	# "el dijo que sí" → "él dijo que sí"
	# No se aplica a "mi" (no puede ser sujeto) ni a "si"
	# (que ante verbo es conjunción condicional, no afirmación).
	#
	# v103 anti-falso-positivo: si la forma plural de la palabra
	# siguiente existe como sustantivo (POS=N), entonces es ambigua
	# nombre/verbo y NO debemos forzar pronombre tónico.
	# Cubre: "el destino" (destinos=N), "el dedo" (dedos=N),
	# "el marco" (marcos=N), "el voto" (votos=N), etc.
	if key in ('tu', 'el') and next_key:
	next_pos = ES2NI_POS.get(next_key, '')
	if next_pos == 'V':
	next_pl = _pluralize_es_form(next_key)
	if ES2NI_POS.get(next_pl, '') != 'N':
	return accented_ni
	# Caso 3 (v101): "si" al inicio de oración + verbo + signo de
	# exclamación al final = afirmación enfática "¡Sí, vengo!".
	# No tocamos: el caso típico "si vienes, dímelo" debe quedar
	# como conjunción condicional. Solo arreglamos cuando es
	# respuesta corta inequívoca: "sí" solo ante puntuación.
	# (Ya cubierto por el caso 1 si hay preposición; si no, lo
	# dejamos al usuario.)

	if prev_key in _NOUN_CTX and ni_nom:
	return ni_nom

	if prev_key in _VERB_CTX and ni_verb:
	return ni_verb

	# v111: det + N(sust) + homógrafo → V (patrón sujeto+verbo)
	# Solo se activa para palabras con AMBAS entradas N y V (homógrafos N/V).
	if ni_verb and ni_nom and prev_prev_key in _NOUN_CTX:
	if ES2NI_POS.get(prev_key, "") == "N":
	return ni_verb

	if prev_key in _INFINITIVE_CTX:
	inf_key = lower(current_tok)
	if inf_key.endswith(_INFINITIVE_ENDINGS):
	if inf_key in ES2NI:
	return ES2NI[inf_key][0]
	ff = fold(inf_key)
	if ff in ES_FOLD:
	return ES2NI[ES_FOLD[ff]][0]
	if ni_verb:
	return ni_verb

	if sent_start:
	if key.endswith(_INFINITIVE_ENDINGS):
	if key in ES2NI:
	return ES2NI[key][0]
	ff = fold(key)
	if ff in ES_FOLD:
	return ES2NI[ES_FOLD[ff]][0]
	if ni_verb and not ni_nom:
	return ni_verb

	if ni_verb and next_key in _NOUN_CTX:
	pos_nom = ES2NI_POS.get(key, "")
	if pos_nom != "ADJ":
	return ni_verb

	if ni_nom is not None:
	return ni_nom
	if ni_verb is not None:
	return ni_verb
	return None

	def _has_explicit_3s_subject(left_context):
	# v98: en formas ambiguas 1S/3S (subjuntivos, imperfecto, condicional...),
	# asumir 3S por defecto cuando no hay "yo" explícito. La 1S debe ir marcada
	# con su pronombre ("yo sea breve"); en su ausencia, "Sea breve" / "que pague"
	# se leen como 3S (cortesía, subordinada con sujeto elidido).
	ctx = [lower(x) for x in (left_context or []) if x]
	tail = ctx[-5:] if ctx else []

	# Disparador firme de 1S: "yo" explícito en el contexto cercano
	if any(tok in _FIRST_PERSON_SUBJECTS for tok in tail):
	return False

	# Disparadores firmes de 3S: pronombre explícito o NP determinada
	if tail and tail[-1] in _THIRD_SG_PRON_SUBJECTS:
	return True

	for j in range(len(tail)-2, -1, -1):
	if tail[j] in _SINGULAR_NP_DETS:
	if j > 0 and tail[j-1] in _PREP_CTX:
	return False # locativo, no es sujeto
	if j < len(tail)-1:
	return True

	# Sin "yo" explícito → 3S por defecto
	return True

	def _adjust_ipfv_ambiguous_person(ni, left_context):
	if not (ni and isinstance(ni, str)):
	return ni
	ambiguous_suffixes = ("-ska-n", "-tei-n", "-na-n", "-nabo-n")
	if any(ni.endswith(suf) for suf in ambiguous_suffixes):
	if _has_explicit_3s_subject(left_context):
	return ni[:-2]
	return ni

	out=[]; ib_toks=[]
	i=0; prev_key=""
	sent_start = True
	left_context=[]
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT:
	out.append(t); ib_toks.append(t); prev_key=""; i+=1
	if t in SENT_END:
	sent_start = True
	left_context=[]
	elif t == NEWLINE_TOK:
	# v103: salto de línea reinicia oración (evita que "Te"
	# tras "Marco:\n" se trate como nombre propio)
	sent_start = True
	left_context=[]
	elif t in CLAUSE_BREAKS:
	left_context=[]
	continue
	if is_placeholder(t):
	out.append(t); ib_toks.append(t); prev_key=""; i+=1
	sent_start = False
	left_context.append(t)
	continue

	# v94 — Sin marcas especiales: todos los tokens pasan por el flujo normal.

	span, ni_surface = _longest_match(toks, i, ESPHRASE2NI)
	if span > 1:
	ni_surface = _adjust_ipfv_ambiguous_person(ni_surface, left_context)
	out.append(ni_surface)
	ib_toks.append(georgeos_keys(tokens_from_latin(ni_surface), ni_surface))
	prev_key = lower(toks[i+span-1]) if i+span-1 < len(toks) else ""
	for k_idx in range(i, i+span):
	left_context.append(toks[k_idx])
	i += span
	sent_start = False
	continue

	next_key = ""
	next_next_key = ""
	j = i + 1
	while j < len(toks):
	if toks[j] in VISIBLE_PUNCT:
	if toks[j] in SENT_END:
	break
	j += 1
	continue
	next_key = lower(toks[j])
	break
	if next_key:
	k = j + 1
	while k < len(toks):
	if toks[k] in VISIBLE_PUNCT:
	if toks[k] in SENT_END:
	break
	k += 1
	continue
	next_next_key = lower(toks[k])
	break

	key = lower(t)
	# v104: detectar NOMBRE PROPIO / TOPÓNIMO NO ATESTIGUADO.
	# Si la palabra capitalizada NO está en el lex, devolverla TAL CUAL
	# (sin corchete), porque puede ser un nombre propio (Ana, Marco) o
	# un topónimo no atestiguado (Madrid, Valencia). El lex contiene
	# solo topónimos atestiguados, así que si está en el lex, se traduce.
	key_in_lex = key in ES2NI or fold(key) in ES_FOLD
	is_proper_noun = (
	len(t) >= 2
	and t[0].isupper()
	and not t.isupper()
	and t.isalpha()
	and not key_in_lex
	)
	if is_proper_noun:
	out.append(t); ib_toks.append(t)
	prev_key = key
	left_context.append(t)
	i += 1
	sent_start = False
	continue

	prev_prev_key = lower(left_context[-2]) if len(left_context) >= 2 else ""
	ni = _choose_es_to_ni(t, prev_key, next_key, next_next_key, sent_start, prev_prev_key)
	ni = _adjust_ipfv_ambiguous_person(ni, left_context)

	if ni is not None:
	out.append(ni)
	ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
	elif is_number(key):
	# v103: si el número está en patrón hora (\d:\d), preservar
	# los dígitos tal cual para no perder el formato en el roundtrip.
	# Detección: el número está adyacente a ":" entre dos números.
	is_hour_context = False
	if i+2 < len(toks) and toks[i+1] == ':' and is_number(toks[i+2]):
	is_hour_context = True
	elif i >= 2 and toks[i-1] == ':' and is_number(toks[i-2]):
	is_hour_context = True
	if is_hour_context:
	out.append(key); ib_toks.append(key)
	else:
	ni_num = digit_to_ni(key)
	out.append(ni_num); ib_toks.append(georgeos_keys(tokens_from_latin(ni_num), ni_num))
	else:
	ph = f"[SIN-LEX:{t}]"
	out.append(ph); ib_toks.append(ph)

	prev_key = key
	left_context.append(t)
	i += 1
	sent_start = False

	if MODAL_SUFFIX_ENABLE:
	out = add_modal_suffixes_es2ni(out)
	ib_toks = []
	for tt in out:
	if tt in VISIBLE_PUNCT or tt.startswith("["):
	ib_toks.append(tt)
	else:
	ib_toks.append(georgeos_keys(tokens_from_latin(tt), tt))

	ni_text = detokenize(out)
	# v99: reconvertir el marcador transparente en saltos de línea reales
	ni_text = re.sub(rf"\s{re.escape(NEWLINE_TOK)}\s", "\n", ni_text)
	ib_html = "<div class='ib-line'>" + escape(render_ib_with_tridots(ib_toks)) + "</div>"
	ib_html = ib_html.replace(NEWLINE_TOK, "\n")
	return ni_text, ib_html


	# ====== Apócope automática (v90+v91) ======
	_APOCOPE_RULES = [
	('ninguno', 'ningún', 'M'),
	('Ninguno', 'Ningún', 'M'),
	('alguno', 'algún', 'M'),
	('Alguno', 'Algún', 'M'),
	('bueno', 'buen', 'M'),
	('Bueno', 'Buen', 'M'),
	('malo', 'mal', 'M'),
	('Malo', 'Mal', 'M'),
	('primero', 'primer', 'M'),
	('Primero', 'Primer', 'M'),
	('tercero', 'tercer', 'M'),
	('Tercero', 'Tercer', 'M'),
	('grande', 'gran', 'X'),
	('Grande', 'Gran', 'X'),
	]

	# v117: palabras que NUNCA son candidato a sustantivo/adjetivo siguiente
	# para apocope, aunque puedan estar mal clasificadas como N en el lex.
	# Conjunciones, preposiciones, adverbios, determinantes, pronombres, etc.
	_APOCOPE_BLACKLIST = {
	"porque","que","aunque","sino","si","como","cuando","donde","mientras","pero",
	"y","o","u","ni","ya","no","sí","muy","más","mas","menos","tan","tanto","tanta",
	"todo","toda","todos","todas","nada","algo","alguno","alguna","algunos","algunas",
	"este","esta","estos","estas","ese","esa","esos","esas","aquel","aquella",
	"mi","tu","su","mis","tus","sus","nuestro","nuestra","nuestros","nuestras",
	"vuestro","vuestra","vuestros","vuestras","de","del","en","a","al","por","para",
	"con","sin","sobre","bajo","tras","entre","hacia","hasta","desde","durante",
	"según","contra","mediante",
	"lo","la","le","los","las","les","me","te","se","nos","os",
	"fue","es","son","era","eran","fueron","será","serán","ha","han","había",
	"habían","habrá","habrán",
	}

	def _is_masc_sg_noun_candidate(word):
	if not word or not word[0].isalpha():
	return False
	if not word[0].islower():
	return False
	wl = word.lower()
	# v117: blacklist explícita de palabras gramaticales
	if wl in _APOCOPE_BLACKLIST:
	return False
	if len(wl) > 3 and wl.endswith('s'):
	return False
	if wl.endswith('a'):
	return False
	if wl.endswith(('dad', 'tad', 'ción', 'sión', 'tud', 'umbre', 'eza')):
	return False
	pos = ES2NI_POS.get(wl, "")
	if pos and pos not in ("N", "ADJ"):
	return False
	return True

	def _is_singular_noun_candidate(word):
	if not word or not word[0].isalpha():
	return False
	if not word[0].islower():
	return False
	wl = word.lower()
	# v117: blacklist explícita
	if wl in _APOCOPE_BLACKLIST:
	return False
	if len(wl) > 3 and wl.endswith('s'):
	return False
	pos = ES2NI_POS.get(wl, "")
	if pos and pos not in ("N", "ADJ"):
	return False
	return True

	def apply_apocope_es(text):
	if not text:
	return text
	tokens = re.findall(r"\S+\|\s+", text)

	for i in range(len(tokens) - 2):
	tok = tokens[i]
	if not tok or not tok[0].isalpha():
	continue

	prev_word = None
	for j in range(i-1, -1, -1):
	tj = tokens[j]
	if tj.strip() == '':
	continue
	if tj[0].isalpha():
	prev_word = tj.lower().rstrip('.,;:!?"\'')
	break
	else:
	break
	if prev_word in ('más', 'mas', 'menos', 'tan', 'muy'):
	continue

	next_word_idx = None
	for j in range(i+1, len(tokens)):
	tj = tokens[j]
	if tj.strip() == '':
	continue
	if tj[0].isalpha():
	next_word_idx = j
	break
	else:
	break

	if next_word_idx is None:
	continue

	next_word = tokens[next_word_idx]
	next_word_clean = re.sub(r'[.,;:!?"\']+$', '', next_word)
	if not next_word_clean:
	continue

	for plena, apocopada, genero in _APOCOPE_RULES:
	tok_clean = re.sub(r'[.,;:!?"\']+$', '', tok)
	if tok_clean != plena:
	continue
	# v118: si el token tiene PUNTUACIÓN pegada (coma, punto, etc.),
	# NO apocopar. Significa que el adjetivo está aislado por pausa.
	# Ej: "es grande, espaciosa" - 'grande,' NO se apocopa a 'gran,'.
	# "grande coche" sí se apocopa a "gran coche".
	if tok != plena:
	# hay signo pegado al final
	break
	if genero == 'M':
	if _is_masc_sg_noun_candidate(next_word_clean):
	tokens[i] = apocopada
	elif genero == 'X':
	if _is_singular_noun_candidate(next_word_clean):
	tokens[i] = apocopada
	break

	return ''.join(tokens)


	def translate_ni_to_es_bi(text:str):
	# v99: el texto se procesa como un único bloque. Los saltos de línea del
	# usuario se reemplazan por un marcador transparente (NEWLINE_TOK) que
	# cruza el pipeline sin disparar ningún cierre de oración, modalidad ni
	# capitalización; al final se reconvierten en \n.
	text = (text or "").replace("\r\n", "\n").replace("\r", "\n")
	text = text.replace("\n", f" {NEWLINE_TOK} ")

	toks = simple_tokenize(text)

	if MODAL_SUFFIX_ENABLE:
	toks = strip_modal_suffixes_ni(toks)

	def _is_doge_hace_context(idx, tokens):
	if idx >= len(tokens) or lower(tokens[idx]) != 'doge':
	return False
	if idx+1 >= len(tokens):
	return False
	es_time_words = {'año','años','día','días','mes','meses','semana','semanas',
	'hora','horas','minuto','minutos','segundo','segundos',
	'momento','momentos','instante','instantes',
	'rato','ratos','tiempo','siglo','siglos',
	'década','décadas','milenio','milenios',
	'jornada','jornadas','noche','noches',
	'tarde','tardes','mañana','mañanas'}

	def _es_of(ni_token):
	t = lower(ni_token)
	es = (NI2ES.get(t, (None,))[0] or "").lower()
	return es

	for probe in (idx+1, idx+2):
	if probe >= len(tokens):
	break
	es = _es_of(tokens[probe])
	if es and es in es_time_words:
	return True
	return False

	def _is_at_sentence_start(idx, tokens):
	if idx == 0:
	return True
	prev = tokens[idx-1]
	return prev in SENT_END or prev == "." or prev == "!" or prev == "?"

	out=[]
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT:
	out.append(t); i+=1; continue
	if is_placeholder(t):
	# v103: si es placeholder de nombre propio (forma [Nombre] sin
	# ":" después de "["), devolver el nombre sin corchetes.
	# Los placeholders [SIN-LEX:...], [?:...], [AMB-NI:...] se
	# mantienen literales para que el usuario los vea.
	inner = t[1:-1]
	if ':' not in inner:
	out.append(inner)
	else:
	out.append(t)
	i += 1
	continue
	span, es_surface = _longest_match(toks, i, NIPHRASE2ES)
	if span > 1:
	out.append(es_surface); i += span; continue

	key = lower(t)
	fkey = fold(key)

	if key == 'doge' and _is_doge_hace_context(i, toks):
	if _is_at_sentence_start(i, toks):
	out.append('Hace')
	else:
	out.append('hace')
	i += 1
	continue

	if key == 'galbi-ke' and i+1 < len(toks):
	nxt = lower(toks[i+1])
	is_part = nxt.endswith('-ir') or '-ir-' in nxt
	if is_part:
	out.append('ha')
	i += 1
	continue

	if key in NI2ES:
	es = NI2ES[key][0] or ""
	out.append(es if es else t) # v141: sin corchete para no-atestiguados
	elif fkey in NI_FOLD:
	es = NI2ES[NI_FOLD[fkey]][0] or ""
	out.append(es if es else t) # v141: sin corchete para no-atestiguados
	elif key in AMBIG_NI or fkey in AMBIG_NI and STRICT_BI_ENFORCE:
	out.append(f"[AMB-NI:{t}]")
	elif is_number(key):
	out.append(t)
	else:
	out.append(t) # v141: sin corchete; coherente con regla v104
	i += 1

	if MODAL_SUFFIX_ENABLE:
	out = add_inverted_openers(out)
	out = apply_interrogative_tildes(out)

	es_text = detokenize(out)
	es_text = postprocess_spanish(es_text)
	es_text = apply_apocope_es(es_text)
	es_text = fuse_enclitics_es(es_text)
	# v126 FIX #5: añadir tilde a gerundios con clítico fusionado. Cuando la
	# forma se ensambla pegada (sin espacio) como "abriendose paso", el
	# gerundio queda sin tilde diacrítica. La fusión correcta es "abriéndose".
	# Detecta cualquier <stem>+ando\|iendo\|yendo seguido inmediatamente de
	# me/te/se/nos/os/le/les/lo/la/los/las y pone la tilde sobre la vocal
	# del sufijo gerundivo.
	_GER_TILDE = {"ando":"ándo", "iendo":"iéndo", "yendo":"yéndo"}
	def _add_ger_tilde(m):
	stem = m.group(1)
	ger = m.group(2)
	clitic = m.group(3)
	return stem + _GER_TILDE[ger] + clitic
	es_text = re.sub(
	r"\b([a-záéíóúñü]*?)(ando\|iendo\|yendo)(me\|te\|se\|nos\|os\|le\|les\|lo\|la\|los\|las)\b",
	_add_ger_tilde, es_text, flags=re.IGNORECASE
	)
	# v126 FIX #6: reordenar signo de pregunta/exclamación + comilla cerrante.
	# El motor coloca `?` justo tras la palabra modal (`mirgaŕ-na»` → `tarde ?
	# »`), pero la convención castellana es que el signo de cierre vaya FUERA
	# de las comillas cuando el material citado es el contenido completo de la
	# pregunta. Detectar "? »" / "! »" y reordenar a "»?" / "»!".
	es_text = re.sub(r"([?!])\s*»", r"»\1", es_text)
	# v127 FIX #A: eliminar cualquier espacio espurio antes de comilla cerrante.
	# El detokenizer trata `»` como token separado y deja un espacio antes
	# ("bien. »", "hielo. »"). En español la comilla cerrante NUNCA va precedida
	# de espacio: «texto.» no «texto. ». Aplicar tras el reordenamiento del fix
	# #6 para no interferir con `?»`/`!»` (esos casos no tienen espacio).
	es_text = re.sub(r"\s+»", "»", es_text)
	# v99: reconvertir el marcador transparente en saltos de línea reales
	es_text = re.sub(rf"\s{re.escape(NEWLINE_TOK)}\s", "\n", es_text)
	# v122: limpiar punto duplicado al final del texto. El centinela `.` final
	# se añade siempre y luego se elimina si los dos últimos tokens son `.`.
	# Pero si quedan separados por whitespace o newline tras el procesado, esa
	# limpieza falla. Este regex final caza el caso.
	es_text = re.sub(r"\.\s\.\s$", ".", es_text)
	return es_text

	# ====== Diagnóstico ======
	def diagnose_text(text, dir_label):
	if not text or not text.strip():
	return "<em>Introduce texto para diagnosticar.</em>"

	toks = simple_tokenize(text)
	if dir_label.startswith("ES"):
	toks = expand_enclitics(toks)
	unknown=set(); asym=set(); amb=set()
	total_tokens=0; covered=0

	if dir_label.startswith("ES"):
	head = "ES→NI"
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT or is_number(t):
	i+=1; continue
	total_tokens += 1
	span, _ = _longest_match(toks, i, ESPHRASE2NI)
	if span > 1:
	covered += 1; i += span; continue
	k=lower(t)
	fk=fold(k)
	if k not in ES2NI and fk not in ES_FOLD:
	unknown.add(t); i+=1; continue
	if k not in ES2NI: k=ES_FOLD.get(fk, k)
	covered += 1
	ni = ES2NI[k][0]
	back = NI2ES.get(lower(ni))
	if back and lower(back[0]) != k:
	asym.add(f"{t} → {ni} → {back[0]}")
	i+=1
	else:
	head = "NI→ES"
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT or is_number(t):
	i+=1; continue
	total_tokens += 1
	span, _ = _longest_match(toks, i, NIPHRASE2ES)
	if span > 1:
	covered += 1; i += span; continue
	k=lower(t)
	fk=fold(k)
	if k in AMBIG_NI or fk in AMBIG_NI:
	amb.add(t); i+=1; continue
	if k not in NI2ES and fk not in NI_FOLD:
	unknown.add(t); i+=1; continue
	if k not in NI2ES: k=NI_FOLD.get(fk, k)
	covered += 1
	es = NI2ES[k][0]
	back = ES2NI.get(lower(es))
	if back and lower(back[0]) != k:
	asym.add(f"{t} → {es} → {back[0]}")
	i+=1

	cov_pct = (covered/total_tokens*100) if total_tokens else 100.0
	cov_html = f"<div><b>Tokens (sin puntuación/numéricos):</b> {total_tokens}  \|  <b>Cubiertos:</b> {covered} ({cov_pct:.1f}%)</div>"

	unk_html = "".join(f"<li><code>{escape(u)}</code></li>" for u in sorted(unknown, key=lambda x: lower(x))) or "<li><i>—</i></li>"
	amb_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(amb, key=lambda x: lower(x))) or "<li><i>—</i></li>"
	asy_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(asym)) or "<li><i>—</i></li>"

	return f"<b>Diagnóstico {head}</b>{cov_html}<b>Ambiguas (NI duplicada):</b><ul>{amb_html}</ul><b>Faltantes:</b><ul>{unk_html}</ul><b>Asimetrías:</b><ul>{asy_html}</ul>"

	# ====== UI (CSS / acordeones / fuentes) ======
	LABELS={
	"ES":{
	"title":"Traductor Español ↔ Neoíbero",
	"subtitle":"CSV estricto (BI-only 1:1; desambiguación ligera ES→NI; .gz) — determinista",
	"in_label_es":"✏️ Entrada (Español)",
	"in_label_ni":"✏️ Entrada (Neoíbero)",
	"in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.",
	"in_ph_ni":"Idatzi hemen. Adib.: nuker-ke ni etxe-ka.",
	"out_lat_esni":"📜 Salida: Neoíbero (latín)",
	"out_lat_nies":"📜 Salida: Español",
	"out_ib":"🗿 Línea ibérica",
	"out_audio":"🔊 Locución (Audio)",
	"btn":"🔄 Traducir",
	"combo":"🌍 Idioma (UI + explicación)",
	"dir":"🔁 Dirección",
	"dir_opts":["ES → NI","NI → ES"],
	"doc_header":"📚 Documentación y Referencia",
	"acc_titles":[
	"🌍 ¿Qué es el neoíbero?",
	"🔤 Fonología y escritura",
	"📐 Sistema nominal: género, número y caso",
	"🔄 Sistema verbal: TAM, persona y clíticos",
	"🌿 Derivación y familias de palabras",
	"🔢 Sistema numérico vigesimal",
	"📝 Sintaxis básica y partículas",
	"❓ Modalidad vascoide (-na / -ba)",
	"⚙️ Pipeline del traductor (1:1 estricto)",
	"📚 Bibliografía y créditos",
	"🧾 Glosario técnico"
	]
	},
	"EN":{
	"title":"Spanish ↔ Neo-Iberian Translator",
	"subtitle":"Strict BI-only (1:1 surfaces; light ES→NI disambiguation; .gz) — deterministic",
	"in_label_es":"✏️ Input (Spanish)",
	"in_label_ni":"✏️ Input (Neo-Iberian)",
	"in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.",
	"in_ph_ni":"Type here. E.g., nuker-ke ni etxe-ka.",
	"out_lat_esni":"📜 Output: Neo-Iberian (Latin)",
	"out_lat_nies":"📜 Output: Spanish",
	"out_ib":"🗿 Iberian line",
	"out_audio":"🔊 Speech (Audio)",
	"btn":"🔄 Translate",
	"combo":"🌍 Language (UI + docs)",
	"dir":"🔁 Direction",
	"dir_opts":["ES → NI","NI → ES"],
	"doc_header":"📚 Documentation & Reference",
	"acc_titles":[
	"🌍 What is Neo-Iberian?",
	"🔤 Phonology and writing",
	"📐 Nominal system: gender, number & case",
	"🔄 Verbal system: TAM, person & clitics",
	"🌿 Derivation and word families",
	"🔢 Vigesimal number system",
	"📝 Basic syntax and particles",
	"❓ Vascoid modality (-na / -ba)",
	"⚙️ Translator pipeline (strict 1:1)",
	"📚 Bibliography and credits",
	"🧾 Technical glossary"
	]
	}
	}

	# ====== CSS + fuente ======
	def build_css():
	b64=None
	if os.path.exists("Iberia-Georgeos.ttf"):
	with open("Iberia-Georgeos.ttf","rb") as f:
	b64=base64.b64encode(f.read()).decode("ascii")
	font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')"
	return f"""
	@font-face {{
	font-family: 'IberiaGeorgeos';
	src: {font_src};
	font-weight: normal; font-style: normal;
	}}
	:root {{
	--iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C;
	--iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32;
	}}
	.gradio-container {{ background:transparent!important;
	font-family:'Georgia','Times New Roman',serif!important; }}
	html, body {{ background: transparent !important; }}
	.gradio-container h1, .gradio-container h2, .gradio-container h3 {{
	color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important;
	border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important;
	}}
	.gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important;
	border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important;
	padding:1.5rem!important; margin-bottom:0.2rem!important; }}
	.gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
	border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }}
	.gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important;
	color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }}
	.gradio-container .gr-textbox textarea, .gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important;
	border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:#000!important;
	font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }}
	.gradio-container .gr-textbox textarea:focus, .gradio-container .gr-textbox input:focus {{
	border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }}
	.gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
	border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 2px 2px rgba(0,0,0,.4)!important;
	box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }}
	.gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important;
	transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }}
	.gradio-container, gradio-app {{
	--button-primary-background-fill: linear-gradient(145deg,#CD7F32,#A0522D) !important;
	--button-primary-background-fill-hover: linear-gradient(145deg,#A0522D,#CD7F32) !important;
	--button-primary-text-color: #fff !important;
	--button-primary-border-color: #8B4513 !important;
	--checkbox-label-background-fill-selected: linear-gradient(145deg,#CD7F32,#A0522D) !important;
	--checkbox-label-text-color-selected: #fff !important;
	--checkbox-label-border-color-selected: #8B4513 !important;
	--block-label-text-color: #4a2e15 !important;
	--block-title-text-color: #4a2e15 !important;
	--block-info-text-color: #4a2e15 !important;
	--button-secondary-text-color: #4a2e15 !important;
	--checkbox-label-text-color: #4a2e15 !important;
	--input-placeholder-color: #8a6f4a !important;
	--input-text-color: #000 !important;
	--input-text-size: inherit !important;
	}}
	.gradio-container label.selected,
	.gradio-container [data-testid$="-radio-label"].selected {{
	background: linear-gradient(145deg,#CD7F32,#A0522D) !important;
	color: #fff !important;
	border-color: #8B4513 !important;
	}}
	.gradio-container label.selected *,
	.gradio-container [data-testid$="-radio-label"].selected * {{
	color: #fff !important;
	}}
	.gradio-container .gr-button.gr-button-secondary,
	.gradio-container button.secondary {{
	color: #4a2e15 !important;
	}}
	.gradio-container span[data-testid="block-info"],
	.gradio-container .block-title,
	.gradio-container .block-label,
	.gradio-container label > span:not(.selected),
	.gradio-container .gr-form > label,
	.gradio-container .gr-block label {{
	color: #4a2e15 !important;
	}}
	.ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important;
	background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important;
	border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important;
	box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }}
	.ib-line::before {{ content:''!important; position:absolute!important; inset:0!important;
	background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important;
	pointer-events:none!important; border-radius:10px!important; }}
	@media (max-width:768px) {{
	.ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }}
	.gradio-container .gr-group {{ padding:1rem!important; }}
	.gradio-container h1 {{ font-size:1.8rem!important; }}
	}}
	@media (max-width:480px) {{
	.ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }}
	.gradio-container h1 {{ font-size:1.5rem!important; }}
	}}
	.gradio-container button[role="tab"] {{
	background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
	border:2px solid var(--iberian-sand)!important;
	border-bottom:none!important;
	color:var(--iberian-clay)!important;
	font-weight:600!important;
	font-family:'Georgia','Times New Roman',serif!important;
	font-size:1.05rem!important;
	padding:0.8rem 2rem!important;
	margin:0 0.3rem 0 0!important;
	border-radius:8px 8px 0 0!important;
	transition:all .25s ease!important;
	box-shadow:2px 2px 6px rgba(0,0,0,.12)!important;
	text-shadow:1px 1px 2px rgba(139,69,19,.08)!important;
	}}
	.gradio-container button[role="tab"]:hover {{
	background:linear-gradient(145deg,var(--iberian-ochre),#CC7722)!important;
	color:#ffffff!important;
	transform:translateY(-3px)!important;
	box-shadow:0 5px 10px rgba(139,69,19,.25)!important;
	text-shadow:1px 1px 3px rgba(0,0,0,.3)!important;
	}}
	.gradio-container button[role="tab"][aria-selected="true"] {{
	background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
	border:3px solid var(--iberian-clay)!important;
	border-bottom:none!important;
	color:#ffffff!important;
	font-weight:700!important;
	box-shadow:0 6px 12px rgba(139,69,19,.35), inset 0 1px 0 rgba(255,255,255,.25)!important;
	text-shadow:1px 2px 3px rgba(0,0,0,.45)!important;
	transform:translateY(0px)!important;
	}}
	.gradio-container div[role="tablist"] {{
	background:linear-gradient(145deg,#e8dcc8,#d9c4b0)!important;
	border-bottom:4px solid var(--iberian-bronze)!important;
	padding:0.5rem 1rem 0 1rem!important;
	border-radius:10px 10px 0 0!important;
	box-shadow:0 2px 8px rgba(139,69,19,.15)!important;
	}}
	"""
	CSS = build_css()

	# ====== leer TU mapa HTML y embeber en iframe (sin tocar su contenido) ======
	def _load_map_html() -> str:
	for cand in ("mapa_iberos_neoibero.html", "salida/mapa_iberos_neoibero.html"):
	if os.path.exists(cand):
	with open(cand, "r", encoding="utf-8") as f:
	return f.read()
	return """<!doctype html><meta charset=utf-8>
	<title>Mapa</title>
	<style>html,body,#m{height:100%;margin:0}#m{height:100vh}</style>
	<link rel=stylesheet href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css">
	<div id=m></div>
	<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
	<script>var map=L.map('m').setView([40,-2],6);
	L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png',{maxZoom:18,attribution:'© OpenStreetMap'}).addTo(map);
	L.circle([39,-0.3],{radius:70000}).addTo(map);</script>"""

	MAP_SRC = _load_map_html()
	MAP_DATA_URL = "data:text/html;base64," + base64.b64encode(MAP_SRC.encode("utf-8")).decode("ascii")

	# ====== Blocks UI ======
	with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="stone", secondary_hue="stone", neutral_hue="stone")) as demo:
	with gr.Group():
	with gr.Row():
	combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"])
	direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"])

	with gr.Group():
	es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5, elem_id="ni_es_input")
	with gr.Row():
	btn_tr = gr.Button(LABELS["ES"]["btn"], variant="primary")
	btn_diag = gr.Button("🔎 Diagnosticar BI con este texto", variant="secondary")
	btn_clear_in = gr.Button("🗑️ Borrar entrada", variant="secondary")
	with gr.Row():
	with gr.Column(scale=2):
	ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False, elem_id="ni_es_output", show_copy_button=True)
	with gr.Row():
	btn_copy_out = gr.Button("📋 Copiar salida", variant="secondary", size="sm")
	btn_cut_out = gr.Button("✂️ Cortar salida", variant="secondary", size="sm")
	btn_clear_out = gr.Button("🗑️ Borrar salida", variant="secondary", size="sm")
	loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=True)
	audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy")
	with gr.Column(scale=1):
	ib_out = gr.HTML(label=LABELS["ES"]["out_ib"])
	diag_out = gr.HTML(value="")

	def do_translate(text, dir_label):
	if not text or not text.strip():
	return (gr.update(value=""),
	gr.update(value="<div class='ib-line'></div>"),
	gr.update(visible=False),
	gr.update(value=None),
	gr.update(value=""))
	if dir_label.startswith("ES"):
	latin, ib = translate_es_to_ni_bi(text)
	return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin),
	gr.update(value=ib),
	gr.update(visible=True),
	gr.update(value=None),
	gr.update(value=""))
	else:
	es_text = translate_ni_to_es_bi(text)
	return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text),
	gr.update(value="<div class='ib-line'></div>"),
	gr.update(visible=False),
	gr.update(value=None),
	gr.update(value=""))

	btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out, diag_out])

	def run_locution(latin_text, dir_label):
	if dir_label.startswith("ES"):
	return synthesize_speech(latin_text)
	return None
	loc_btn.click(run_locution, [ni_out, direction], audio_out)

	def do_diagnose(text, dir_label):
	return gr.update(value=diagnose_text(text, dir_label))
	btn_diag.click(do_diagnose, [es_in, direction], [diag_out])

	def switch_lang(sel_lang, dir_label):
	L=LABELS[sel_lang]
	in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
	in_ph = L["in_ph_es"] if dir_label.startswith("ES") else L["in_ph_ni"]
	out_lab = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
	return (
	gr.update(label=L["combo"], value=sel_lang),
	gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label),
	gr.update(label=in_label, placeholder=in_ph),
	gr.update(label=out_lab),
	gr.update(label=L["out_ib"]),
	gr.update(label=L["out_audio"]),
	gr.update(value=L["btn"])
	)
	combo.change(
	switch_lang,
	[combo, direction],
	[combo, direction,
	es_in, ni_out, ib_out, audio_out, btn_tr]
	)

	def switch_direction(dir_label, sel_lang):
	L=LABELS[sel_lang]
	in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
	in_ph = L["in_ph_es"] if dir_label.startswith("ES") else L["in_ph_ni"]
	out_lab = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
	loc_vis = True if dir_label.startswith("ES") else False
	return (gr.update(label=in_label, placeholder=in_ph),
	gr.update(label=out_lab, value=""),
	gr.update(value="<div class='ib-line'></div>"),
	gr.update(visible=loc_vis),
	gr.update(value=None),
	gr.update(value=""))
	direction.change(
	switch_direction,
	[direction, combo],
	[es_in, ni_out, ib_out, loc_btn, audio_out, diag_out]
	)

	# ---- v123: botones de utilidad UI ----
	# Borrar entrada (reinicia es_in)
	btn_clear_in.click(
	fn=lambda: "",
	inputs=None,
	outputs=[es_in],
	)

	# Borrar salida (reinicia ni_out, ib_out, audio_out, diag_out)
	def _clear_output_block():
	return ("", "<div class='ib-line'></div>", None, "")
	btn_clear_out.click(
	fn=_clear_output_block,
	inputs=None,
	outputs=[ni_out, ib_out, audio_out, diag_out],
	)

	# Copiar salida al portapapeles (solo JS, no toca el state)
	btn_copy_out.click(
	fn=None,
	inputs=[ni_out],
	outputs=None,
	js="(text) => { if (text) { navigator.clipboard.writeText(text); } return []; }",
	)

	# Cortar salida (copia al portapapeles + limpia outputs)
	def _cut_output_block(_text):
	return ("", "<div class='ib-line'></div>", None, "")
	btn_cut_out.click(
	fn=_cut_output_block,
	inputs=[ni_out],
	outputs=[ni_out, ib_out, audio_out, diag_out],
	js="(text) => { if (text) { navigator.clipboard.writeText(text); } return text; }",
	)

	# ====== smoke opcional ======
	def _symmetry_smoketest():
	print("\n[SMOKE] Prueba ES↔NI (BI-estricto, determinista)…")
	probes = [
	"nuker-ke ni etxe-ka ?",
	"¿Pagaste 12,75 en la cafetería?",
	"Marta llega a las 18:30.",
	"[SIN-LEX:Tomás]-na euŕak-ke !"
	]
	for p in probes:
	es_from_ni = translate_ni_to_es_bi(p)
	ni_round, _ = translate_es_to_ni_bi(es_from_ni)
	print(" IN:", p)
	print(" ES:", es_from_ni)
	print(" NI:", ni_round)
	print("---")

	if DEBUG_MODE:
	_symmetry_smoketest()

	if __name__ == "__main__":
	demo.queue().launch()