LoloSemper commited on
Commit
aecda8a
·
verified ·
1 Parent(s): 87630b9

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -81,6 +81,39 @@ VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’"))
81
  _num_re = re.compile(r"^\d+([.,]\d+)?$")
82
  def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # --- separadores de cláusula + placeholders atómicos ---
85
  CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
86
  PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
@@ -826,7 +859,8 @@ def translate_es_to_ni_bi(text:str):
826
  out.append(ni)
827
  ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
828
  elif is_number(key):
829
- out.append(t); ib_toks.append(t)
 
830
  else:
831
  ph = f"[SIN-LEX:{t}]"
832
  out.append(ph); ib_toks.append(ph)
 
81
  _num_re = re.compile(r"^\d+([.,]\d+)?$")
82
  def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))
83
 
84
+ # --- conversión de dígitos a numerales neoíberos (base vigesimal) ---
85
+ _NI_UNITS = {0:'',1:'ban',2:'bi',3:'irur',4:'laur',5:'borste',
86
+ 6:'sei',7:'sisbi',8:'sorse',9:'bedar',10:'abar'}
87
+ _NI_TWENTIES = {1:'orkei',2:'binorkei',3:'irurokei',4:'laurokei'}
88
+
89
+ def digit_to_ni(tok:str)->str:
90
+ """Convierte un número entero (str de dígitos) a numeral neoíbero."""
91
+ try:
92
+ n = int(tok)
93
+ except (ValueError, TypeError):
94
+ return tok
95
+ if n <= 0: return tok
96
+ if n <= 10: return _NI_UNITS[n]
97
+ if n <= 19: return f"abar-ke-{_NI_UNITS[n-10]}"
98
+ if n == 20: return "orkei"
99
+ if n < 100:
100
+ twenties = n // 20
101
+ remainder = n % 20
102
+ base = _NI_TWENTIES.get(twenties, tok)
103
+ if remainder == 0: return base
104
+ elif remainder == 10: return f"{base}-abar"
105
+ elif remainder > 10: return f"{base}-abar-ke-{_NI_UNITS[remainder-10]}"
106
+ else: return f"{base}-ke-{_NI_UNITS[remainder]}"
107
+ if n == 100: return "atun"
108
+ if n <= 999:
109
+ hundreds = n // 100
110
+ remainder = n % 100
111
+ h = "atun" if hundreds == 1 else f"{_NI_UNITS[hundreds]}-atun"
112
+ if remainder == 0: return h
113
+ r = digit_to_ni(str(remainder))
114
+ return f"{h}-ke-{r}"
115
+ return tok # >999: pass through
116
+
117
  # --- separadores de cláusula + placeholders atómicos ---
118
  CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
119
  PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
 
859
  out.append(ni)
860
  ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
861
  elif is_number(key):
862
+ ni_num = digit_to_ni(key)
863
+ out.append(ni_num); ib_toks.append(georgeos_keys(tokens_from_latin(ni_num), ni_num))
864
  else:
865
  ph = f"[SIN-LEX:{t}]"
866
  out.append(ph); ib_toks.append(ph)