Upload 2 files
Browse files
app.py
CHANGED
|
@@ -81,6 +81,39 @@ VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’"))
|
|
| 81 |
_num_re = re.compile(r"^\d+([.,]\d+)?$")
|
| 82 |
def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
# --- separadores de cláusula + placeholders atómicos ---
|
| 85 |
CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
|
| 86 |
PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
|
|
@@ -826,7 +859,8 @@ def translate_es_to_ni_bi(text:str):
|
|
| 826 |
out.append(ni)
|
| 827 |
ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
|
| 828 |
elif is_number(key):
|
| 829 |
-
|
|
|
|
| 830 |
else:
|
| 831 |
ph = f"[SIN-LEX:{t}]"
|
| 832 |
out.append(ph); ib_toks.append(ph)
|
|
|
|
| 81 |
_num_re = re.compile(r"^\d+([.,]\d+)?$")
|
| 82 |
def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))
|
| 83 |
|
| 84 |
+
# --- conversión de dígitos a numerales neoíberos (base vigesimal) ---
|
| 85 |
+
_NI_UNITS = {0:'',1:'ban',2:'bi',3:'irur',4:'laur',5:'borste',
|
| 86 |
+
6:'sei',7:'sisbi',8:'sorse',9:'bedar',10:'abar'}
|
| 87 |
+
_NI_TWENTIES = {1:'orkei',2:'binorkei',3:'irurokei',4:'laurokei'}
|
| 88 |
+
|
| 89 |
+
def digit_to_ni(tok:str)->str:
|
| 90 |
+
"""Convierte un número entero (str de dígitos) a numeral neoíbero."""
|
| 91 |
+
try:
|
| 92 |
+
n = int(tok)
|
| 93 |
+
except (ValueError, TypeError):
|
| 94 |
+
return tok
|
| 95 |
+
if n <= 0: return tok
|
| 96 |
+
if n <= 10: return _NI_UNITS[n]
|
| 97 |
+
if n <= 19: return f"abar-ke-{_NI_UNITS[n-10]}"
|
| 98 |
+
if n == 20: return "orkei"
|
| 99 |
+
if n < 100:
|
| 100 |
+
twenties = n // 20
|
| 101 |
+
remainder = n % 20
|
| 102 |
+
base = _NI_TWENTIES.get(twenties, tok)
|
| 103 |
+
if remainder == 0: return base
|
| 104 |
+
elif remainder == 10: return f"{base}-abar"
|
| 105 |
+
elif remainder > 10: return f"{base}-abar-ke-{_NI_UNITS[remainder-10]}"
|
| 106 |
+
else: return f"{base}-ke-{_NI_UNITS[remainder]}"
|
| 107 |
+
if n == 100: return "atun"
|
| 108 |
+
if n <= 999:
|
| 109 |
+
hundreds = n // 100
|
| 110 |
+
remainder = n % 100
|
| 111 |
+
h = "atun" if hundreds == 1 else f"{_NI_UNITS[hundreds]}-atun"
|
| 112 |
+
if remainder == 0: return h
|
| 113 |
+
r = digit_to_ni(str(remainder))
|
| 114 |
+
return f"{h}-ke-{r}"
|
| 115 |
+
return tok # >999: pass through
|
| 116 |
+
|
| 117 |
# --- separadores de cláusula + placeholders atómicos ---
|
| 118 |
CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
|
| 119 |
PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
|
|
|
|
| 859 |
out.append(ni)
|
| 860 |
ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
|
| 861 |
elif is_number(key):
|
| 862 |
+
ni_num = digit_to_ni(key)
|
| 863 |
+
out.append(ni_num); ib_toks.append(georgeos_keys(tokens_from_latin(ni_num), ni_num))
|
| 864 |
else:
|
| 865 |
ph = f"[SIN-LEX:{t}]"
|
| 866 |
out.append(ph); ib_toks.append(ph)
|