FerasMad commited on
Commit
5d562c6
·
verified ·
1 Parent(s): 248048f

Initial deploy

Browse files
Files changed (3) hide show
  1. README.md +50 -8
  2. app.py +109 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,14 +1,56 @@
1
  ---
2
- title: Arabic Complaints Classifier
3
- emoji: 🚀
4
- colorFrom: indigo
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.14.0
8
- python_version: '3.13'
9
  app_file: app.py
10
- pinned: false
11
  license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Arabic Restaurant Complaints Classifier
3
+ emoji: 🍽️
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
+ python_version: "3.11"
9
  app_file: app.py
10
+ pinned: true
11
  license: mit
12
+ short_description: Classify Arabic restaurant complaints into 8 categories
13
+ tags:
14
+ - arabic
15
+ - nlp
16
+ - classification
17
+ - bert
18
+ - saudi
19
+ - dialectal-arabic
20
+ models:
21
+ - CAMeL-Lab/bert-base-arabic-camelbert-mix
22
+ - UBC-NLP/MARBERT
23
+ - aubmindlab/bert-base-arabertv02
24
+ language:
25
+ - ar
26
  ---
27
 
28
+ # Arabic Restaurant Complaints Classifier
29
+
30
+ Classify Arabic restaurant complaints into 8 actionable categories. Saudi-Gulf dialect specialization.
31
+
32
+ ## Performance
33
+
34
+ | Metric | Value |
35
+ |---|---:|
36
+ | Test accuracy | 95.05% |
37
+ | Test weighted F1 | 95.08% |
38
+ | Test macro F1 | 92.03% |
39
+ | Min class F1 (عامة) | 84.84% |
40
+
41
+ ## Categories
42
+
43
+ | Arabic | English |
44
+ |---|---|
45
+ | التوصيل | Delivery |
46
+ | السعر والقيمة | Price / value |
47
+ | النظافة | Cleanliness |
48
+ | جودة الطعام | Food quality |
49
+ | خدمة الموظفين | Staff service |
50
+ | دقة الطلب | Order accuracy |
51
+ | عامة | General |
52
+ | وقت الانتظار | Wait time |
53
+
54
+ ## Source
55
+
56
+ https://github.com/FerasMad/NLP-complaints-system
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HuggingFace Spaces entry point — Arabic Restaurant Complaints Classifier.
2
+
3
+ Loads the single best CAMeLBERT-mix model from HuggingFace Hub and serves
4
+ a Gradio UI. Set HF_REPO_ID env var in the Space settings to point at
5
+ your model repo.
6
+ """
7
+ import os
8
+ import re
9
+
10
+ import gradio as gr
11
+ import torch
12
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
13
+
14
+ HF_REPO_ID = os.environ.get(
15
+ "HF_REPO_ID", "FerasMad/arabic-complaints-classifier"
16
+ )
17
+ MAX_LENGTH = 192
18
+ MIN_ARABIC_RATIO = 0.30
19
+
20
+ CATEGORIES = [
21
+ "التوصيل",
22
+ "السعر والقيمة",
23
+ "النظافة",
24
+ "جودة الطعام",
25
+ "خدمة الموظفين",
26
+ "دقة الطلب",
27
+ "عامة",
28
+ "وقت الانتظار",
29
+ ]
30
+ ID2LABEL = dict(enumerate(CATEGORIES))
31
+
32
+ TASHKEEL = re.compile(r"[ً-ٟ]")
33
+ NON_ARABIC = re.compile(r"[^؀-ۿa-zA-Z0-9٠-٩\s]")
34
+ WHITESPACE = re.compile(r"\s+")
35
+ ARABIC_CHAR = re.compile(r"[؀-ۿ]")
36
+
37
+
38
+ def clean(text: str) -> str:
39
+ if not text:
40
+ return ""
41
+ t = TASHKEEL.sub("", text)
42
+ t = t.translate(str.maketrans({"أ": "ا", "إ": "ا", "آ": "ا", "ٱ": "ا", "ى": "ي", "ة": "ه"}))
43
+ t = NON_ARABIC.sub(" ", t)
44
+ return WHITESPACE.sub(" ", t).strip().lower()
45
+
46
+
47
+ def is_arabic_enough(text: str) -> bool:
48
+ if not text or len(text) < 3:
49
+ return False
50
+ return len(ARABIC_CHAR.findall(text)) / max(len(text), 1) >= MIN_ARABIC_RATIO
51
+
52
+
53
+ print(f"Loading {HF_REPO_ID} ...")
54
+ device = "cuda" if torch.cuda.is_available() else "cpu"
55
+ tokenizer = AutoTokenizer.from_pretrained(HF_REPO_ID)
56
+ model = AutoModelForSequenceClassification.from_pretrained(HF_REPO_ID).to(device).eval()
57
+ print(f"Model loaded on {device}.")
58
+
59
+
60
+ @torch.no_grad()
61
+ def predict(text: str):
62
+ if not text or len(text.strip()) < 3:
63
+ return {"النص قصير جدا — please type a longer Arabic complaint": 1.0}
64
+ if not is_arabic_enough(text):
65
+ return {"النص ليس باللغه العربيه — please use Arabic input": 1.0}
66
+
67
+ enc = tokenizer(clean(text), return_tensors="pt", truncation=True, max_length=MAX_LENGTH).to(device)
68
+ probs = torch.softmax(model(**enc).logits[0], dim=-1).cpu().numpy()
69
+ top_idx = probs.argsort()[::-1][:3]
70
+ return {ID2LABEL[int(i)]: float(probs[i]) for i in top_idx}
71
+
72
+
73
+ EXAMPLES = [
74
+ "وصل الطلب بارد جدا والمندوب تاخر اكثر من ساعتين",
75
+ "الاسعار مبالغ فيها لا تناسب الجوده المقدمه ابدا",
76
+ "النظافه سيئه الطاولات متسخه والارض غير نظيفه",
77
+ "طلبت برجر بدون بصل لكنهم وضعوه رغم تنبيهي",
78
+ "انتظرت ساعه كامله في المطعم قبل ان ياتي طلبي",
79
+ "الموظف اسلوبه سيء جدا وغير محترم",
80
+ "الاكل بايخ ومالح والطبخ مو متقن",
81
+ "تجربه سيئه عموما لن اعود لهذا المكان",
82
+ ]
83
+
84
+
85
+ demo = gr.Interface(
86
+ fn=predict,
87
+ inputs=gr.Textbox(
88
+ lines=4,
89
+ label="اكتب الشكوى",
90
+ placeholder="مثال: الاكل بايخ ومالح والطبخ مو متقن",
91
+ rtl=True,
92
+ ),
93
+ outputs=gr.Label(num_top_classes=3, label="التصنيف"),
94
+ examples=EXAMPLES,
95
+ title="تصنيف شكاوى المطاعم العربية — Arabic Restaurant Complaints Classifier",
96
+ description=(
97
+ "نموذج CAMeLBERT-mix مدرب على ٨ فئات من الشكاوى. لهجة سعودية / خليجية. "
98
+ "Fine-tuned CAMeLBERT-mix · 8 categories · 95% test accuracy · Saudi-Gulf dialect."
99
+ ),
100
+ article=(
101
+ "**Source:** [github.com/FerasMad/NLP-complaints-system]"
102
+ "(https://github.com/FerasMad/NLP-complaints-system)"
103
+ ),
104
+ allow_flagging="never",
105
+ )
106
+
107
+
108
+ if __name__ == "__main__":
109
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch>=2.0
2
+ transformers>=4.40,<5
3
+ huggingface_hub>=0.24,<1.0
4
+ sentencepiece>=0.2
5
+ gradio==4.44.0
6
+ numpy>=1.24