LoloSemper commited on
Commit
7fb5740
·
verified ·
1 Parent(s): 5efb67d

Upload 2 files

Browse files
Files changed (2) hide show
  1. LEXICON_v82_IBERIAN.csv.gz +3 -0
  2. app.py +21 -9
LEXICON_v82_IBERIAN.csv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06bcd502995157e9d3a185e7a482e21c6647ef00d28de679ee84d34696d4390a
3
+ size 12673243
app.py CHANGED
@@ -46,6 +46,13 @@ def _cand(*names):
46
 
47
  # Prioriza los “master/surface-ready”; luego retrocompatibles
48
  CSV_BI = _cand(
 
 
 
 
 
 
 
49
  "LEXICON_v75_IBERIAN.csv.gz",
50
  "LEXICON_v74_IBERIAN.csv.gz",
51
  "LEXICON_v73_IBERIAN.csv.gz",
@@ -503,26 +510,31 @@ def load_bi_strict_and_diagnose():
503
  if ni not in NIPHRASE2ES:
504
  NIPHRASE2ES[ni] = (es_orig, pid)
505
 
506
- # ES→NI — prioridad: N > V; dentro de V: PRS/PST > IMP/SBJ
507
  pos = (r.get(POS_COL) or "").strip() if POS_COL else ""
508
  morph = (r.get(MORPH_COL) or "").strip() if MORPH_COL else ""
509
  _MORPH_PRIO = {"PRS":10,"PST":9,"IPFV":8,"FUT":7,"COND":6,
510
  "INF":5,"GER":4,"PART":3,"SBJ":2,"SBJ_IPFV":1,"IMP":0}
 
511
  if es in ES2NI:
512
  dup_es += 1
513
  old_pos = ES2NI_POS.get(es, "")
514
  old_morph = ES2NI_MORPH.get(es, "")
515
  replace = False
516
- if pos == "N" and old_pos == "V":
517
- ES2NI_VERB[es] = ES2NI[es] # guardar alternativa verbal
518
- replace = True # sustantivo > verbo
519
- elif pos == "V" and old_pos == "V":
520
- new_p = _MORPH_PRIO.get(morph, -1)
521
- old_p = _MORPH_PRIO.get(old_morph, -1)
522
- if new_p > old_p:
523
  ES2NI_VERB[es] = ES2NI[es] # guardar alternativa verbal
 
 
 
 
 
 
524
  replace = True # PRS > IMP, etc.
525
- elif pos == "V" and old_pos == "N":
526
  ES2NI_VERB[es] = (ni_orig, pid) # el verbo es la alternativa
527
  if replace:
528
  ES2NI[es] = (ni_orig, pid)
 
46
 
47
  # Prioriza los “master/surface-ready”; luego retrocompatibles
48
  CSV_BI = _cand(
49
+ "LEXICON_v82_IBERIAN.csv.gz",
50
+ "LEXICON_v81_IBERIAN.csv.gz",
51
+ "LEXICON_v80_IBERIAN.csv.gz",
52
+ "LEXICON_v79_IBERIAN.csv.gz",
53
+ "LEXICON_v78_IBERIAN.csv.gz",
54
+ "LEXICON_v77_IBERIAN.csv.gz",
55
+ "LEXICON_v76_IBERIAN.csv.gz",
56
  "LEXICON_v75_IBERIAN.csv.gz",
57
  "LEXICON_v74_IBERIAN.csv.gz",
58
  "LEXICON_v73_IBERIAN.csv.gz",
 
510
  if ni not in NIPHRASE2ES:
511
  NIPHRASE2ES[ni] = (es_orig, pid)
512
 
513
+ # ES→NI — prioridad: ADJ > N > V; dentro de V: PRS/PST > IMP/SBJ
514
  pos = (r.get(POS_COL) or "").strip() if POS_COL else ""
515
  morph = (r.get(MORPH_COL) or "").strip() if MORPH_COL else ""
516
  _MORPH_PRIO = {"PRS":10,"PST":9,"IPFV":8,"FUT":7,"COND":6,
517
  "INF":5,"GER":4,"PART":3,"SBJ":2,"SBJ_IPFV":1,"IMP":0}
518
+ _POS_PRIO = {"ADJ":3, "N":2, "V":1}
519
  if es in ES2NI:
520
  dup_es += 1
521
  old_pos = ES2NI_POS.get(es, "")
522
  old_morph = ES2NI_MORPH.get(es, "")
523
  replace = False
524
+ new_p = _POS_PRIO.get(pos, 0)
525
+ old_p = _POS_PRIO.get(old_pos, 0)
526
+ if new_p > old_p:
527
+ # Mayor prioridad POS reemplazar (ADJ > N > V)
528
+ if old_pos == "V":
 
 
529
  ES2NI_VERB[es] = ES2NI[es] # guardar alternativa verbal
530
+ replace = True
531
+ elif pos == "V" and old_pos == "V":
532
+ new_m = _MORPH_PRIO.get(morph, -1)
533
+ old_m = _MORPH_PRIO.get(old_morph, -1)
534
+ if new_m > old_m:
535
+ ES2NI_VERB[es] = ES2NI[es]
536
  replace = True # PRS > IMP, etc.
537
+ elif pos == "V" and old_pos in ("N", "ADJ"):
538
  ES2NI_VERB[es] = (ni_orig, pid) # el verbo es la alternativa
539
  if replace:
540
  ES2NI[es] = (ni_orig, pid)