Upload 2 files
Browse files- LEXICON_v82_IBERIAN.csv.gz +3 -0
- app.py +21 -9
LEXICON_v82_IBERIAN.csv.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06bcd502995157e9d3a185e7a482e21c6647ef00d28de679ee84d34696d4390a
|
| 3 |
+
size 12673243
|
app.py
CHANGED
|
@@ -46,6 +46,13 @@ def _cand(*names):
|
|
| 46 |
|
| 47 |
# Prioriza los “master/surface-ready”; luego retrocompatibles
|
| 48 |
CSV_BI = _cand(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
"LEXICON_v75_IBERIAN.csv.gz",
|
| 50 |
"LEXICON_v74_IBERIAN.csv.gz",
|
| 51 |
"LEXICON_v73_IBERIAN.csv.gz",
|
|
@@ -503,26 +510,31 @@ def load_bi_strict_and_diagnose():
|
|
| 503 |
if ni not in NIPHRASE2ES:
|
| 504 |
NIPHRASE2ES[ni] = (es_orig, pid)
|
| 505 |
|
| 506 |
-
# ES→NI — prioridad: N > V; dentro de V: PRS/PST > IMP/SBJ
|
| 507 |
pos = (r.get(POS_COL) or "").strip() if POS_COL else ""
|
| 508 |
morph = (r.get(MORPH_COL) or "").strip() if MORPH_COL else ""
|
| 509 |
_MORPH_PRIO = {"PRS":10,"PST":9,"IPFV":8,"FUT":7,"COND":6,
|
| 510 |
"INF":5,"GER":4,"PART":3,"SBJ":2,"SBJ_IPFV":1,"IMP":0}
|
|
|
|
| 511 |
if es in ES2NI:
|
| 512 |
dup_es += 1
|
| 513 |
old_pos = ES2NI_POS.get(es, "")
|
| 514 |
old_morph = ES2NI_MORPH.get(es, "")
|
| 515 |
replace = False
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
old_p = _MORPH_PRIO.get(old_morph, -1)
|
| 522 |
-
if new_p > old_p:
|
| 523 |
ES2NI_VERB[es] = ES2NI[es] # guardar alternativa verbal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
replace = True # PRS > IMP, etc.
|
| 525 |
-
elif pos == "V" and old_pos
|
| 526 |
ES2NI_VERB[es] = (ni_orig, pid) # el verbo es la alternativa
|
| 527 |
if replace:
|
| 528 |
ES2NI[es] = (ni_orig, pid)
|
|
|
|
| 46 |
|
| 47 |
# Prioriza los “master/surface-ready”; luego retrocompatibles
|
| 48 |
CSV_BI = _cand(
|
| 49 |
+
"LEXICON_v82_IBERIAN.csv.gz",
|
| 50 |
+
"LEXICON_v81_IBERIAN.csv.gz",
|
| 51 |
+
"LEXICON_v80_IBERIAN.csv.gz",
|
| 52 |
+
"LEXICON_v79_IBERIAN.csv.gz",
|
| 53 |
+
"LEXICON_v78_IBERIAN.csv.gz",
|
| 54 |
+
"LEXICON_v77_IBERIAN.csv.gz",
|
| 55 |
+
"LEXICON_v76_IBERIAN.csv.gz",
|
| 56 |
"LEXICON_v75_IBERIAN.csv.gz",
|
| 57 |
"LEXICON_v74_IBERIAN.csv.gz",
|
| 58 |
"LEXICON_v73_IBERIAN.csv.gz",
|
|
|
|
| 510 |
if ni not in NIPHRASE2ES:
|
| 511 |
NIPHRASE2ES[ni] = (es_orig, pid)
|
| 512 |
|
| 513 |
+
# ES→NI — prioridad: ADJ > N > V; dentro de V: PRS/PST > IMP/SBJ
|
| 514 |
pos = (r.get(POS_COL) or "").strip() if POS_COL else ""
|
| 515 |
morph = (r.get(MORPH_COL) or "").strip() if MORPH_COL else ""
|
| 516 |
_MORPH_PRIO = {"PRS":10,"PST":9,"IPFV":8,"FUT":7,"COND":6,
|
| 517 |
"INF":5,"GER":4,"PART":3,"SBJ":2,"SBJ_IPFV":1,"IMP":0}
|
| 518 |
+
_POS_PRIO = {"ADJ":3, "N":2, "V":1}
|
| 519 |
if es in ES2NI:
|
| 520 |
dup_es += 1
|
| 521 |
old_pos = ES2NI_POS.get(es, "")
|
| 522 |
old_morph = ES2NI_MORPH.get(es, "")
|
| 523 |
replace = False
|
| 524 |
+
new_p = _POS_PRIO.get(pos, 0)
|
| 525 |
+
old_p = _POS_PRIO.get(old_pos, 0)
|
| 526 |
+
if new_p > old_p:
|
| 527 |
+
# Mayor prioridad POS → reemplazar (ADJ > N > V)
|
| 528 |
+
if old_pos == "V":
|
|
|
|
|
|
|
| 529 |
ES2NI_VERB[es] = ES2NI[es] # guardar alternativa verbal
|
| 530 |
+
replace = True
|
| 531 |
+
elif pos == "V" and old_pos == "V":
|
| 532 |
+
new_m = _MORPH_PRIO.get(morph, -1)
|
| 533 |
+
old_m = _MORPH_PRIO.get(old_morph, -1)
|
| 534 |
+
if new_m > old_m:
|
| 535 |
+
ES2NI_VERB[es] = ES2NI[es]
|
| 536 |
replace = True # PRS > IMP, etc.
|
| 537 |
+
elif pos == "V" and old_pos in ("N", "ADJ"):
|
| 538 |
ES2NI_VERB[es] = (ni_orig, pid) # el verbo es la alternativa
|
| 539 |
if replace:
|
| 540 |
ES2NI[es] = (ni_orig, pid)
|