FerasMad commited on
Commit
d2a2fc9
·
verified ·
1 Parent(s): 86805fd

Initial deploy

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +61 -27
  3. requirements.txt +3 -0
__pycache__/app.cpython-313.pyc CHANGED
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
 
app.py CHANGED
@@ -83,6 +83,12 @@ print(f"Loading {HF_REPO_ID} ...")
83
  device = "cuda" if torch.cuda.is_available() else "cpu"
84
  tokenizer = AutoTokenizer.from_pretrained(HF_REPO_ID)
85
  model = AutoModelForSequenceClassification.from_pretrained(HF_REPO_ID).to(device).eval()
 
 
 
 
 
 
86
  print(f"Model loaded on {device}.")
87
 
88
  HERE = Path(__file__).parent
@@ -540,8 +546,18 @@ def render_message(headline_ar: str, headline_en: str) -> str:
540
  CATEGORY_TO_ID = {cat: i for i, cat in ID2LABEL.items()}
541
 
542
  # Rescue rules: unambiguous Arabic phrases that must produce the matched
543
- # category as top-1. Mirrors app/ensemble_inference.py so the Space behaves
544
- # the same as the API. See src/audit_predictions.py for validation.
 
 
 
 
 
 
 
 
 
 
545
  RESCUE_RULES: list[tuple[str, list[str]]] = [
546
  ("النظافة", ["الحمام", "تواليت", "ذباب", "صراصير"]),
547
  ("وقت الانتظار", [
@@ -587,8 +603,13 @@ def apply_rescue(
587
  re-normalize gives a cleaner ~78%/22% split that matches user intent.
588
 
589
  Conservative: only rescues when phrase clearly indicates one category.
590
- Audit-validated: takes the model from 85% to 100% on the behavioral
591
- audit set (see src/audit_predictions.py); held-out test cost is ~0.9%.
 
 
 
 
 
592
  """
593
  out = probs.copy()
594
  rescued_idx = None
@@ -634,20 +655,38 @@ NEGATIVE_WORDS = frozenset({
634
  "مالح", "مالحه", "مالحة", "بارد", "بارده", "باردة",
635
  "تاخر", "تاخرت", "تأخر", "تأخرت", "متأخر", "متاخر",
636
  "قذر", "قذره", "قذرة", "متسخ", "متسخه", "متسخة", "وسخ", "وسخه", "وسخة",
637
- "غير", "ما", "لا", "مو", "مب", # negation particles — only match as whole words
 
 
 
 
638
  "مزعج", "مزعجه", "مزعجة", "غالي", "غاليه", "غالية",
639
  "ضاع", "ضاعت", "محروق", "محروقه", "محروقة", "نسي", "نسوا",
640
  "مشكله", "مشكلة", "مشاكل", "خربان", "خربانه", "خربانة",
641
  })
642
 
 
 
 
 
 
 
 
 
643
 
644
  def looks_like_praise(cleaned: str) -> bool:
645
  """True when the input has positive sentiment words and no negatives.
646
 
647
  Uses whole-word matching (not substring) so short negation particles like
648
  'لا' / 'ما' don't false-match inside common words such as 'الاكل'.
 
 
 
 
649
  """
650
  words = set(cleaned.split())
 
 
651
  has_praise = bool(words & PRAISE_WORDS)
652
  has_negative = bool(words & NEGATIVE_WORDS)
653
  return has_praise and not has_negative
@@ -660,6 +699,12 @@ def predict(text: str) -> str:
660
  "اكتب شكوى أطول من ٣ أحرف",
661
  "type a longer complaint (at least 3 characters)",
662
  )
 
 
 
 
 
 
663
  if not is_arabic_enough(text):
664
  return render_message(
665
  "النص ليس بالعربية",
@@ -1625,10 +1670,13 @@ body.dark .multi-aspect-badge { border-color: rgba(138, 148, 104, 0.40); }
1625
  }
1626
 
1627
  .perf-card-sub {
1628
- font-size: 0.82rem;
1629
- color: var(--ink-muted);
 
 
1630
  direction: ltr;
1631
  }
 
1632
 
1633
  .perf-card-chart {
1634
  width: 100%;
@@ -1646,13 +1694,15 @@ body.dark .chart-light { display: none; }
1646
  body.dark .chart-dark { display: block; }
1647
 
1648
  .perf-card-caption {
1649
- font-size: 0.92rem;
1650
- color: var(--ink-muted);
1651
- line-height: 1.7;
 
1652
  margin: 16px 0 0;
1653
  direction: rtl;
1654
  text-align: right;
1655
  }
 
1656
 
1657
  /* ---- Inline About section ---- */
1658
 
@@ -1866,18 +1916,6 @@ with gr.Blocks(
1866
  </div>
1867
  </section>
1868
 
1869
- <div id="headline-wrap">
1870
- <div id="headline">
1871
- <div class="headline-stat">
1872
- <span class="headline-figure">95.05%</span>
1873
- <span class="headline-unit">test accuracy</span>
1874
- </div>
1875
- <p class="headline-note">
1876
- 13,986 held-out real reviews. 8 categories, all above 80% F1.
1877
- Single best CAMeLBERT-mix from a 4-model ensemble.
1878
- </p>
1879
- </div>
1880
- </div>
1881
  """
1882
  )
1883
 
@@ -1970,10 +2008,6 @@ with gr.Blocks(
1970
  <div class="section-inner">
1971
  <h2 class="section-title">عن النموذج</h2>
1972
  {ABOUT_HTML}
1973
- <div class="categories-rail">
1974
- <span class="rail-label">الفئات</span>
1975
- {CATEGORIES_GRID_HTML}
1976
- </div>
1977
  </div>
1978
  </section>
1979
  """
@@ -1984,7 +2018,7 @@ with gr.Blocks(
1984
  f"""
1985
  <footer id="footer">
1986
  <div class="footer-inner">
1987
- <span class="made-by">Made by the NLP team at AI Club</span>
1988
  <span class="links">
1989
  <a href="{GITHUB_URL}" target="_blank" rel="noopener">Source on GitHub</a>
1990
  <a href="https://huggingface.co/{HF_REPO_ID}" target="_blank" rel="noopener">Model on HF Hub</a>
 
83
  device = "cuda" if torch.cuda.is_available() else "cpu"
84
  tokenizer = AutoTokenizer.from_pretrained(HF_REPO_ID)
85
  model = AutoModelForSequenceClassification.from_pretrained(HF_REPO_ID).to(device).eval()
86
+ # Fail loudly if a wrong-shape model is ever fetched — covers the HF_REPO_ID
87
+ # env-override case + future model-card mismatch. (Adversarial Q&A I-5.)
88
+ assert model.config.num_labels == len(CATEGORIES), (
89
+ f"loaded model has {model.config.num_labels} labels but Space expects "
90
+ f"{len(CATEGORIES)} ({CATEGORIES}). Refusing to start."
91
+ )
92
  print(f"Model loaded on {device}.")
93
 
94
  HERE = Path(__file__).parent
 
546
  CATEGORY_TO_ID = {cat: i for i, cat in ID2LABEL.items()}
547
 
548
  # Rescue rules: unambiguous Arabic phrases that must produce the matched
549
+ # category as top-1.
550
+ #
551
+ # ORDER IS LOAD-BEARING — first match wins (see `break` in apply_rescue).
552
+ # Most-specific phrases first, with the catch-all `عامة` last.
553
+ #
554
+ # NOTE: this layer has diverged from app/ensemble_inference.py::apply_keyword_priors.
555
+ # The Space adds: (a) the `عامة` rescue category, (b) first-match-only
556
+ # semantics, (c) 0.30x dampening on non-rescued classes. The "Audit-validated
557
+ # 85% → 100%" claim in `apply_rescue` was measured against the API's older
558
+ # rescue, not this one — treat the docstring as historical context, not a
559
+ # current measurement. The 500-row Space test in dataset/_audits/test_run_results.md
560
+ # is the authoritative measurement for this rescue layer.
561
  RESCUE_RULES: list[tuple[str, list[str]]] = [
562
  ("النظافة", ["الحمام", "تواليت", "ذباب", "صراصير"]),
563
  ("وقت الانتظار", [
 
603
  re-normalize gives a cleaner ~78%/22% split that matches user intent.
604
 
605
  Conservative: only rescues when phrase clearly indicates one category.
606
+
607
+ Historical context: an early version of this rescue (no dampening, 4
608
+ categories, iterate-all) was audited via src/audit_predictions.py and
609
+ moved the API model from 85% to 100% on a 20-row behavioral set. The
610
+ current Space layer has diverged from that — the canonical measurement
611
+ for THIS rescue is the 500-row Space test in
612
+ dataset/_audits/test_run_results.md, not the older 20-row audit.
613
  """
614
  out = probs.copy()
615
  rescued_idx = None
 
655
  "مالح", "مالحه", "مالحة", "بارد", "بارده", "باردة",
656
  "تاخر", "تاخرت", "تأخر", "تأخرت", "متأخر", "متاخر",
657
  "قذر", "قذره", "قذرة", "متسخ", "متسخه", "متسخة", "وسخ", "وسخه", "وسخة",
658
+ # negation particles — only match as whole words.
659
+ # Includes ليس/ليست (formal MSA), ابدا/ابداً (emphatic Saudi negator),
660
+ # and Gulf colloquial مهو/مهي. (Adversarial Q&A I-2.)
661
+ "غير", "ما", "لا", "مو", "مب", "ليس", "ليست", "ابدا", "ابداً",
662
+ "مهو", "مهي", "مهوب", "مهوش",
663
  "مزعج", "مزعجه", "مزعجة", "غالي", "غاليه", "غالية",
664
  "ضاع", "ضاعت", "محروق", "محروقه", "محروقة", "نسي", "نسوا",
665
  "مشكله", "مشكلة", "مشاكل", "خربان", "خربانه", "خربانة",
666
  })
667
 
668
+ # Contrastive markers — if any appear in the cleaned text, skip the praise
669
+ # screen. Inputs like "ممتاز بس الجو حار" or "الموظف رائع لكن الكاشير غلط"
670
+ # are legitimate complaints framed politely; the bare praise word should not
671
+ # swallow them. (Adversarial Q&A I-3.)
672
+ CONTRASTIVE_MARKERS = frozenset({
673
+ "بس", "لكن", "لاكن", "مع", "رغم", "بالرغم", "الا", "إلا",
674
+ })
675
+
676
 
677
  def looks_like_praise(cleaned: str) -> bool:
678
  """True when the input has positive sentiment words and no negatives.
679
 
680
  Uses whole-word matching (not substring) so short negation particles like
681
  'لا' / 'ما' don't false-match inside common words such as 'الاكل'.
682
+
683
+ Contrastive markers (بس، لكن، رغم، ...) bypass the praise screen so that
684
+ "ممتاز بس الجو حار" reaches the model — a mixed-sentiment complaint
685
+ should not be discarded just because it contains one praise word.
686
  """
687
  words = set(cleaned.split())
688
+ if words & CONTRASTIVE_MARKERS:
689
+ return False
690
  has_praise = bool(words & PRAISE_WORDS)
691
  has_negative = bool(words & NEGATIVE_WORDS)
692
  return has_praise and not has_negative
 
699
  "اكتب شكوى أطول من ٣ أحرف",
700
  "type a longer complaint (at least 3 characters)",
701
  )
702
+ # DoS guard — the model truncates at MAX_LENGTH (192) tokens anyway, but
703
+ # the regex passes in clean() are O(n) over the raw input. A 1MB string
704
+ # would pin the cpu-basic worker; cap at 4000 chars (~500 Arabic words,
705
+ # well above any real complaint). (Adversarial Q&A I-10, Code audit #3.)
706
+ if len(text) > 4000:
707
+ text = text[:4000]
708
  if not is_arabic_enough(text):
709
  return render_message(
710
  "النص ليس بالعربية",
 
1670
  }
1671
 
1672
  .perf-card-sub {
1673
+ font-size: 0.88rem;
1674
+ font-weight: 600;
1675
+ color: var(--ink);
1676
+ opacity: 0.88;
1677
  direction: ltr;
1678
  }
1679
+ body.dark .perf-card-sub { opacity: 0.92; }
1680
 
1681
  .perf-card-chart {
1682
  width: 100%;
 
1694
  body.dark .chart-dark { display: block; }
1695
 
1696
  .perf-card-caption {
1697
+ font-size: 0.95rem;
1698
+ color: var(--ink);
1699
+ opacity: 0.88;
1700
+ line-height: 1.75;
1701
  margin: 16px 0 0;
1702
  direction: rtl;
1703
  text-align: right;
1704
  }
1705
+ body.dark .perf-card-caption { opacity: 0.94; }
1706
 
1707
  /* ---- Inline About section ---- */
1708
 
 
1916
  </div>
1917
  </section>
1918
 
 
 
 
 
 
 
 
 
 
 
 
 
1919
  """
1920
  )
1921
 
 
2008
  <div class="section-inner">
2009
  <h2 class="section-title">عن النموذج</h2>
2010
  {ABOUT_HTML}
 
 
 
 
2011
  </div>
2012
  </section>
2013
  """
 
2018
  f"""
2019
  <footer id="footer">
2020
  <div class="footer-inner">
2021
+ <span class="made-by">Made by the NLP team at the AI Club of KSU</span>
2022
  <span class="links">
2023
  <a href="{GITHUB_URL}" target="_blank" rel="noopener">Source on GitHub</a>
2024
  <a href="https://huggingface.co/{HF_REPO_ID}" target="_blank" rel="noopener">Model on HF Hub</a>
requirements.txt CHANGED
@@ -4,4 +4,7 @@ huggingface_hub>=0.24
4
  sentencepiece>=0.2
5
  gradio>=5.0,<6
6
  numpy>=1.24
 
 
 
7
  pysarf @ git+https://github.com/Rashidbm/pysarf.git
 
4
  sentencepiece>=0.2
5
  gradio>=5.0,<6
6
  numpy>=1.24
7
+ # TODO(team): pin to a commit SHA — currently tracks HEAD of an external
8
+ # repo, meaning every Space rebuild is non-reproducible and a force-push
9
+ # upstream could silently change behavior. See dataset/_audits/adversarial_findings.md I-6.
10
  pysarf @ git+https://github.com/Rashidbm/pysarf.git