anirudh-np-ds commited on
Commit
b66924a
Β·
1 Parent(s): 95ac500

feat: AI resume screener with scoring and ranking

Browse files
quiz_generator_app.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz
3
+ import os
4
+ import requests
5
+ import json
6
+ import re
7
+ import random
8
+
9
+ st.set_page_config(page_title="AI Quiz Generator", page_icon="🧠", layout="wide")
10
+
11
+ st.markdown("""
12
+ <style>
13
+ @import url('https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
14
+ html, body, [class*="css"] { font-family: 'Plus Jakarta Sans', sans-serif; }
15
+ .main { background: #fafaf8; }
16
+
17
+ .hero {
18
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
19
+ border-radius: 16px; padding: 32px 36px; margin-bottom: 24px; color: white;
20
+ }
21
+ .hero h1 { font-size: 1.9rem; font-weight: 700; margin: 0 0 6px 0; }
22
+ .hero p { color: #94a3b8; margin: 0; font-size: 0.92rem; }
23
+
24
+ .quiz-card {
25
+ background: white; border: 1px solid #e8e8e4;
26
+ border-radius: 14px; padding: 24px 28px; margin: 16px 0;
27
+ box-shadow: 0 2px 8px rgba(0,0,0,0.06);
28
+ transition: box-shadow 0.2s;
29
+ }
30
+ .quiz-card:hover { box-shadow: 0 4px 16px rgba(0,0,0,0.1); }
31
+
32
+ .question-num {
33
+ font-size: 0.72rem; font-weight: 700; text-transform: uppercase;
34
+ letter-spacing: 0.08em; color: #94a3b8; margin-bottom: 8px;
35
+ }
36
+ .question-text {
37
+ font-size: 1.05rem; font-weight: 600; color: #1a1a2e;
38
+ line-height: 1.5; margin-bottom: 18px;
39
+ }
40
+ .difficulty-badge {
41
+ display: inline-block; font-size: 0.7rem; font-weight: 600;
42
+ padding: 3px 10px; border-radius: 20px; margin-left: 8px;
43
+ text-transform: uppercase; letter-spacing: 0.05em; vertical-align: middle;
44
+ }
45
+ .easy { background: #dcfce7; color: #15803d; }
46
+ .medium { background: #fef9c3; color: #854d0e; }
47
+ .hard { background: #fee2e2; color: #991b1b; }
48
+
49
+ .option-btn {
50
+ display: block; width: 100%; text-align: left;
51
+ background: #f8f8f6; border: 2px solid #e8e8e4;
52
+ border-radius: 10px; padding: 12px 16px; margin: 6px 0;
53
+ font-size: 0.9rem; color: #374151; cursor: pointer;
54
+ font-family: 'Plus Jakarta Sans', sans-serif;
55
+ transition: all 0.15s;
56
+ }
57
+ .option-correct { background: #dcfce7 !important; border-color: #22c55e !important; color: #15803d !important; font-weight: 600; }
58
+ .option-wrong { background: #fee2e2 !important; border-color: #ef4444 !important; color: #991b1b !important; }
59
+ .option-reveal { background: #dbeafe !important; border-color: #3b82f6 !important; color: #1d4ed8 !important; }
60
+
61
+ .explanation-box {
62
+ background: #f0fdf4; border: 1px solid #bbf7d0; border-left: 3px solid #22c55e;
63
+ border-radius: 10px; padding: 14px 18px; margin-top: 14px;
64
+ font-size: 0.88rem; color: #15803d; line-height: 1.6;
65
+ }
66
+
67
+ .score-display {
68
+ text-align: center; background: white; border: 1px solid #e8e8e4;
69
+ border-radius: 16px; padding: 32px; margin: 20px 0;
70
+ box-shadow: 0 4px 16px rgba(0,0,0,0.08);
71
+ }
72
+ .score-big { font-size: 4rem; font-weight: 700; color: #1a1a2e; }
73
+ .score-label { font-size: 1rem; color: #94a3b8; margin-top: 4px; }
74
+
75
+ .section-label {
76
+ font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.08em;
77
+ color: #94a3b8; font-weight: 600; margin: 20px 0 8px 0;
78
+ }
79
+ .stat-row { display: flex; gap: 12px; margin: 16px 0; }
80
+ .stat-box {
81
+ flex: 1; background: white; border: 1px solid #e8e8e4;
82
+ border-radius: 10px; padding: 14px; text-align: center;
83
+ }
84
+ .stat-val { font-size: 1.4rem; font-weight: 700; color: #1a1a2e; }
85
+ .stat-lbl { font-size: 0.7rem; color: #94a3b8; margin-top: 2px; }
86
+
87
+ .progress-bar-bg { background: #e8e8e4; border-radius: 4px; height: 6px; margin: 8px 0; }
88
+ .progress-bar-fill { height: 6px; border-radius: 4px; background: linear-gradient(90deg, #0f3460, #533483); }
89
+ </style>
90
+ """, unsafe_allow_html=True)
91
+
92
+
93
+ # ─── Session State ────────────────────────────────────────────────────────────
94
+ if "questions" not in st.session_state:
95
+ st.session_state.questions = []
96
+ if "answers" not in st.session_state:
97
+ st.session_state.answers = {}
98
+ if "revealed" not in st.session_state:
99
+ st.session_state.revealed = {}
100
+ if "quiz_submitted" not in st.session_state:
101
+ st.session_state.quiz_submitted = False
102
+ if "doc_title" not in st.session_state:
103
+ st.session_state.doc_title = ""
104
+
105
+
106
+ # ─── Helpers ──────────────────────────────────────────────────────────────────
107
+ def extract_pdf_text(pdf_bytes: bytes) -> str:
108
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
109
+ text = ""
110
+ for page in doc:
111
+ text += page.get_text("text") + "\n"
112
+ doc.close()
113
+ return text.strip()
114
+
115
+
116
+ def generate_quiz(text: str, num_questions: int, difficulty: str, topic_focus: str, api_key: str) -> list:
117
+ focus_instruction = f"Focus specifically on: {topic_focus}." if topic_focus else "Cover the most important concepts."
118
+
119
+ prompt = f"""You are an expert educator and assessment designer. Create a high-quality multiple choice quiz based on the document content below.
120
+
121
+ Document Content:
122
+ {text[:4000]}
123
+
124
+ Requirements:
125
+ - Generate exactly {num_questions} questions
126
+ - Difficulty level: {difficulty}
127
+ - {focus_instruction}
128
+ - Each question must have exactly 4 options (A, B, C, D)
129
+ - Only one option is correct
130
+ - Explanations must be educational and reference the document
131
+
132
+ Respond ONLY with a valid JSON array in exactly this format:
133
+ [
134
+ {{
135
+ "question": "<clear, specific question>",
136
+ "options": {{
137
+ "A": "<option A text>",
138
+ "B": "<option B text>",
139
+ "C": "<option C text>",
140
+ "D": "<option D text>"
141
+ }},
142
+ "correct": "<A, B, C, or D>",
143
+ "explanation": "<2-3 sentence explanation of why the answer is correct, referencing the document>",
144
+ "difficulty": "<Easy | Medium | Hard>",
145
+ "topic": "<the sub-topic this question covers>"
146
+ }}
147
+ ]
148
+
149
+ Make questions test understanding, not just memorization. Vary the question types."""
150
+
151
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
152
+ payload = {
153
+ "model": "llama-3.3-70b-versatile",
154
+ "messages": [{"role": "user", "content": prompt}],
155
+ "max_tokens": 3000,
156
+ "temperature": 0.4,
157
+ }
158
+ r = requests.post("https://api.groq.com/openai/v1/chat/completions",
159
+ headers=headers, json=payload, timeout=45)
160
+ r.raise_for_status()
161
+ raw = r.json()["choices"][0]["message"]["content"]
162
+ raw = re.sub(r"```json|```", "", raw).strip()
163
+ return json.loads(raw)
164
+
165
+
166
+ # ─── Sidebar ──────────────────────────────────────────────────────────────────
167
+ with st.sidebar:
168
+ st.markdown("## 🧠 Quiz Generator")
169
+ st.markdown("<div style='color:#94a3b8;font-size:0.8rem'>AI-powered assessment builder</div>", unsafe_allow_html=True)
170
+ st.markdown("---")
171
+ env_key = os.environ.get("GROQ_API_KEY", "")
172
+ api_key = env_key if env_key else st.text_input("πŸ”‘ Groq API Key", type="password", placeholder="gsk_...")
173
+ if not env_key and not api_key:
174
+ st.caption("Free key β†’ [console.groq.com](https://console.groq.com)")
175
+ st.markdown("---")
176
+
177
+ st.markdown("<div class='section-label'>Quiz Settings</div>", unsafe_allow_html=True)
178
+ num_questions = st.slider("Number of Questions", min_value=3, max_value=15, value=5)
179
+ difficulty = st.selectbox("Difficulty", ["Mixed", "Easy", "Medium", "Hard"])
180
+ topic_focus = st.text_input("Topic Focus (optional)", placeholder="e.g. neural networks, photosynthesis")
181
+
182
+ st.markdown("---")
183
+ if st.session_state.questions:
184
+ total = len(st.session_state.questions)
185
+ answered = len(st.session_state.answers)
186
+ st.markdown(f"""
187
+ <div style='font-size:0.82rem;color:#94a3b8'>
188
+ Progress: {answered}/{total} answered
189
+ </div>
190
+ <div class='progress-bar-bg'>
191
+ <div class='progress-bar-fill' style='width:{int(answered/total*100) if total else 0}%'></div>
192
+ </div>
193
+ """, unsafe_allow_html=True)
194
+ if st.button("πŸ”„ Reset Quiz", use_container_width=True):
195
+ st.session_state.answers = {}
196
+ st.session_state.revealed = {}
197
+ st.session_state.quiz_submitted = False
198
+ st.rerun()
199
+ if st.button("πŸ—‘οΈ Clear & New Quiz", use_container_width=True):
200
+ st.session_state.questions = []
201
+ st.session_state.answers = {}
202
+ st.session_state.revealed = {}
203
+ st.session_state.quiz_submitted = False
204
+ st.session_state.doc_title = ""
205
+ st.rerun()
206
+
207
+
208
+ # ─── Main UI ──────────────────────────────────────────────────────────────────
209
+ st.markdown("""
210
+ <div class='hero'>
211
+ <h1>🧠 AI Knowledge Quiz Generator</h1>
212
+ <p>Upload any PDF β€” textbook, training doc, research paper β€” and AI generates a complete multiple choice quiz with explanations</p>
213
+ </div>
214
+ """, unsafe_allow_html=True)
215
+
216
+ if not api_key:
217
+ st.warning("πŸ‘ˆ Add your Groq API key to get started.")
218
+ st.stop()
219
+
220
+ # Upload section (only if no quiz yet)
221
+ if not st.session_state.questions:
222
+ st.markdown("<div class='section-label'>Upload a Document</div>", unsafe_allow_html=True)
223
+ uploaded_file = st.file_uploader("Upload PDF", type=["pdf"], label_visibility="collapsed")
224
+
225
+ if uploaded_file:
226
+ st.markdown(f"<div style='font-size:0.85rem;color:#475569;padding:8px 0'>πŸ“„ {uploaded_file.name} Β· {round(uploaded_file.size/1024,1)}KB</div>", unsafe_allow_html=True)
227
+
228
+ if st.button(f"🧠 Generate {num_questions} Questions", type="primary", use_container_width=True):
229
+ with st.spinner("Reading document and generating quiz..."):
230
+ try:
231
+ pdf_text = extract_pdf_text(uploaded_file.read())
232
+ questions = generate_quiz(pdf_text, num_questions, difficulty, topic_focus, api_key)
233
+ st.session_state.questions = questions
234
+ st.session_state.doc_title = uploaded_file.name.replace(".pdf", "")
235
+ st.session_state.answers = {}
236
+ st.session_state.revealed = {}
237
+ st.session_state.quiz_submitted = False
238
+ st.rerun()
239
+ except json.JSONDecodeError:
240
+ st.error("❌ AI returned unexpected format. Try again.")
241
+ except Exception as e:
242
+ st.error(f"❌ Error: {str(e)}")
243
+ else:
244
+ st.markdown("""
245
+ <div style='text-align:center;padding:48px 24px;border:2px dashed #e8e8e4;border-radius:14px;color:#94a3b8'>
246
+ <div style='font-size:2.5rem;margin-bottom:12px'>πŸ“š</div>
247
+ <p style='font-size:0.92rem;margin:0'>Upload a PDF to generate your quiz.<br>Works with textbooks, training docs, research papers, manuals.</p>
248
+ </div>""", unsafe_allow_html=True)
249
+
250
+ # ─── Quiz Display ──────────────────────────────────────────────────────────────
251
+ if st.session_state.questions:
252
+ questions = st.session_state.questions
253
+ total = len(questions)
254
+
255
+ # Stats bar
256
+ answered = len(st.session_state.answers)
257
+ correct_so_far = sum(
258
+ 1 for i, q in enumerate(questions)
259
+ if st.session_state.answers.get(i) == q.get("correct")
260
+ )
261
+ easy_c = sum(1 for q in questions if q.get("difficulty") == "Easy")
262
+ med_c = sum(1 for q in questions if q.get("difficulty") == "Medium")
263
+ hard_c = sum(1 for q in questions if q.get("difficulty") == "Hard")
264
+
265
+ st.markdown(f"### πŸ“ Quiz: {st.session_state.doc_title}")
266
+ st.markdown(f"""
267
+ <div class='stat-row'>
268
+ <div class='stat-box'><div class='stat-val'>{total}</div><div class='stat-lbl'>Questions</div></div>
269
+ <div class='stat-box'><div class='stat-val'>{answered}</div><div class='stat-lbl'>Answered</div></div>
270
+ <div class='stat-box'><div class='stat-val' style='color:#22c55e'>{easy_c}</div><div class='stat-lbl'>Easy</div></div>
271
+ <div class='stat-box'><div class='stat-val' style='color:#f59e0b'>{med_c}</div><div class='stat-lbl'>Medium</div></div>
272
+ <div class='stat-box'><div class='stat-val' style='color:#ef4444'>{hard_c}</div><div class='stat-lbl'>Hard</div></div>
273
+ </div>
274
+ """, unsafe_allow_html=True)
275
+
276
+ # ── Questions ──
277
+ for i, q in enumerate(questions):
278
+ diff = q.get("difficulty", "Medium")
279
+ diff_class = diff.lower()
280
+ user_answer = st.session_state.answers.get(i)
281
+ is_revealed = st.session_state.revealed.get(i, False)
282
+ is_submitted = st.session_state.quiz_submitted
283
+
284
+ with st.container():
285
+ st.markdown(f"""
286
+ <div class='quiz-card'>
287
+ <div class='question-num'>Question {i+1} of {total} Β· {q.get("topic","")}
288
+ <span class='difficulty-badge {diff_class}'>{diff}</span>
289
+ </div>
290
+ <div class='question-text'>{q["question"]}</div>
291
+ </div>
292
+ """, unsafe_allow_html=True)
293
+
294
+ options = q.get("options", {})
295
+ correct = q.get("correct", "A")
296
+
297
+ cols = st.columns(2)
298
+ for j, (key, val) in enumerate(options.items()):
299
+ col = cols[j % 2]
300
+ with col:
301
+ show_result = is_submitted or is_revealed
302
+ if show_result:
303
+ if key == correct:
304
+ btn_style = "option-correct"
305
+ elif key == user_answer and user_answer != correct:
306
+ btn_style = "option-wrong"
307
+ else:
308
+ btn_style = ""
309
+ st.markdown(f"<div class='option-btn {btn_style}'><b>{key}.</b> {val}</div>", unsafe_allow_html=True)
310
+ else:
311
+ if st.button(f"{key}. {val}", key=f"q{i}_opt_{key}", use_container_width=True):
312
+ st.session_state.answers[i] = key
313
+ st.rerun()
314
+
315
+ # Show selected answer indicator
316
+ if user_answer and not is_submitted and not is_revealed:
317
+ st.caption(f"βœ… Selected: **{user_answer}** β€” {options.get(user_answer, '')}")
318
+
319
+ # Reveal/explanation
320
+ if is_submitted or is_revealed:
321
+ exp = q.get("explanation", "")
322
+ is_correct = user_answer == correct
323
+ result_text = "βœ… Correct!" if is_correct else f"❌ Incorrect. Correct answer: **{correct}. {options.get(correct,'')}**"
324
+ st.markdown(f"""
325
+ <div class='explanation-box'>
326
+ <div style='margin-bottom:6px;font-weight:600'>{result_text}</div>
327
+ <div>{exp}</div>
328
+ </div>
329
+ """, unsafe_allow_html=True)
330
+ elif user_answer:
331
+ if st.button(f"πŸ’‘ Reveal Answer", key=f"reveal_{i}", use_container_width=False):
332
+ st.session_state.revealed[i] = True
333
+ st.rerun()
334
+
335
+ st.markdown("<div style='margin-bottom:8px'></div>", unsafe_allow_html=True)
336
+
337
+ st.markdown("---")
338
+
339
+ # Submit or Score
340
+ if not st.session_state.quiz_submitted:
341
+ col_sub, col_clear = st.columns([3, 1])
342
+ with col_sub:
343
+ if st.button("πŸ“Š Submit Quiz & See Results", type="primary", use_container_width=True,
344
+ disabled=answered < total):
345
+ st.session_state.quiz_submitted = True
346
+ st.rerun()
347
+ if answered < total:
348
+ st.caption(f"Answer all {total} questions to submit. ({total - answered} remaining)")
349
+ else:
350
+ score = sum(1 for i, q in enumerate(questions)
351
+ if st.session_state.answers.get(i) == q.get("correct"))
352
+ pct = round(score / total * 100)
353
+ grade = "πŸ† Excellent!" if pct >= 90 else "βœ… Good Job!" if pct >= 70 else "πŸ“š Keep Studying!" if pct >= 50 else "πŸ’ͺ Needs Work"
354
+ color = "#22c55e" if pct >= 70 else "#f59e0b" if pct >= 50 else "#ef4444"
355
+
356
+ st.markdown(f"""
357
+ <div class='score-display'>
358
+ <div class='score-big' style='color:{color}'>{pct}%</div>
359
+ <div class='score-label'>{score} out of {total} correct Β· {grade}</div>
360
+ <div style='background:#e8e8e4;border-radius:4px;height:10px;margin:16px auto;max-width:300px'>
361
+ <div style='height:10px;border-radius:4px;width:{pct}%;background:{color}'></div>
362
+ </div>
363
+ </div>
364
+ """, unsafe_allow_html=True)
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  streamlit>=1.32.0
2
- chromadb>=0.4.22
3
- sentence-transformers>=2.7.0
4
  requests>=2.31.0
5
- PyMuPDF>=1.24.0
 
 
1
  streamlit>=1.32.0
 
 
2
  requests>=2.31.0
3
+ PyMuPDF>=1.24.0
4
+ beautifulsoup4>=4.12.0
resume_screener_app.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz
3
+ import os
4
+ import requests
5
+ import json
6
+ import re
7
+
8
+ st.set_page_config(page_title="AI Resume Screener", page_icon="πŸ”", layout="wide")
9
+
10
+ st.markdown("""
11
+ <style>
12
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
13
+ html, body, [class*="css"] { font-family: 'Inter', sans-serif; }
14
+ .main { background: #f8fafc; }
15
+
16
+ .hero {
17
+ background: linear-gradient(135deg, #1e3a5f 0%, #0f2027 100%);
18
+ border-radius: 16px; padding: 32px 36px; margin-bottom: 24px; color: white;
19
+ }
20
+ .hero h1 { font-size: 1.9rem; font-weight: 700; margin: 0 0 6px 0; }
21
+ .hero p { color: #94a3b8; margin: 0; font-size: 0.92rem; }
22
+
23
+ .card {
24
+ background: white; border: 1px solid #e2e8f0;
25
+ border-radius: 12px; padding: 20px 24px; margin: 12px 0;
26
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
27
+ }
28
+
29
+ .rank-1 { border-left: 4px solid #22c55e; }
30
+ .rank-2 { border-left: 4px solid #3b82f6; }
31
+ .rank-3 { border-left: 4px solid #f59e0b; }
32
+ .rank-other { border-left: 4px solid #e2e8f0; }
33
+
34
+ .score-badge {
35
+ display: inline-block; font-size: 1.4rem; font-weight: 700;
36
+ padding: 8px 16px; border-radius: 50px; margin-bottom: 8px;
37
+ }
38
+ .score-high { background: #dcfce7; color: #15803d; }
39
+ .score-mid { background: #dbeafe; color: #1d4ed8; }
40
+ .score-low { background: #fef9c3; color: #854d0e; }
41
+
42
+ .candidate-name { font-size: 1.1rem; font-weight: 600; color: #1e293b; }
43
+ .rank-label { font-size: 0.75rem; font-weight: 600; color: #64748b; text-transform: uppercase; letter-spacing: 0.05em; }
44
+
45
+ .strength-tag {
46
+ display: inline-block; background: #dcfce7; color: #15803d;
47
+ border: 1px solid #bbf7d0; border-radius: 20px;
48
+ padding: 3px 10px; font-size: 0.78rem; margin: 2px;
49
+ }
50
+ .gap-tag {
51
+ display: inline-block; background: #fee2e2; color: #991b1b;
52
+ border: 1px solid #fecaca; border-radius: 20px;
53
+ padding: 3px 10px; font-size: 0.78rem; margin: 2px;
54
+ }
55
+
56
+ .score-bar-bg { background: #f1f5f9; border-radius: 4px; height: 8px; margin: 8px 0; }
57
+ .score-bar-fill { height: 8px; border-radius: 4px; transition: width 0.3s; }
58
+
59
+ .section-label {
60
+ font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.08em;
61
+ color: #94a3b8; font-weight: 600; margin: 20px 0 8px 0;
62
+ }
63
+ .stat-row { display: flex; gap: 12px; margin: 16px 0; }
64
+ .stat-box {
65
+ flex: 1; background: white; border: 1px solid #e2e8f0;
66
+ border-radius: 10px; padding: 14px; text-align: center;
67
+ }
68
+ .stat-val { font-size: 1.5rem; font-weight: 700; color: #1e293b; }
69
+ .stat-lbl { font-size: 0.72rem; color: #94a3b8; margin-top: 2px; }
70
+ </style>
71
+ """, unsafe_allow_html=True)
72
+
73
+
74
+ # ─── Helpers ──────────────────────────────────────────────────────────────────
75
+ def extract_pdf_text(pdf_bytes: bytes) -> str:
76
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
77
+ text = ""
78
+ for page in doc:
79
+ text += page.get_text("text") + "\n"
80
+ doc.close()
81
+ return text.strip()
82
+
83
+
84
+ def score_resume(jd_text: str, resume_text: str, candidate_name: str, api_key: str) -> dict:
85
+ prompt = f"""You are an expert HR recruiter and talent evaluator. Analyze the candidate's resume against the job description and provide a detailed evaluation.
86
+
87
+ Job Description:
88
+ {jd_text[:2000]}
89
+
90
+ Candidate Resume ({candidate_name}):
91
+ {resume_text[:2500]}
92
+
93
+ Respond ONLY with a valid JSON object in exactly this format:
94
+ {{
95
+ "score": <integer 0-100>,
96
+ "verdict": "<one of: Strong Match | Good Match | Partial Match | Weak Match>",
97
+ "summary": "<2-3 sentence overall assessment>",
98
+ "strengths": ["<strength 1>", "<strength 2>", "<strength 3>"],
99
+ "gaps": ["<gap 1>", "<gap 2>"],
100
+ "recommendation": "<one sentence hiring recommendation>",
101
+ "experience_match": <integer 0-100>,
102
+ "skills_match": <integer 0-100>,
103
+ "education_match": <integer 0-100>
104
+ }}
105
+
106
+ Be objective and specific. Base scores purely on how well the resume matches the JD requirements."""
107
+
108
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
109
+ payload = {
110
+ "model": "llama-3.3-70b-versatile",
111
+ "messages": [{"role": "user", "content": prompt}],
112
+ "max_tokens": 800,
113
+ "temperature": 0.1,
114
+ }
115
+ r = requests.post("https://api.groq.com/openai/v1/chat/completions",
116
+ headers=headers, json=payload, timeout=30)
117
+ r.raise_for_status()
118
+ raw = r.json()["choices"][0]["message"]["content"]
119
+ raw = re.sub(r"```json|```", "", raw).strip()
120
+ return json.loads(raw)
121
+
122
+
123
+ # ─── Sidebar ──────────────────────────────────────────────────────────────────
124
+ with st.sidebar:
125
+ st.markdown("## πŸ” Resume Screener")
126
+ st.markdown("<div style='color:#94a3b8;font-size:0.8rem'>Powered by Groq Β· Llama 3.3 70B</div>", unsafe_allow_html=True)
127
+ st.markdown("---")
128
+ env_key = os.environ.get("GROQ_API_KEY", "")
129
+ api_key = env_key if env_key else st.text_input("πŸ”‘ Groq API Key", type="password", placeholder="gsk_...")
130
+ if not env_key and not api_key:
131
+ st.caption("Free key β†’ [console.groq.com](https://console.groq.com)")
132
+ st.markdown("---")
133
+ st.markdown("""
134
+ <div style='font-size:0.78rem;color:#94a3b8;line-height:1.9'>
135
+ <b>How it works</b><br>
136
+ 1. Paste the Job Description<br>
137
+ 2. Upload candidate resumes (PDF)<br>
138
+ 3. AI scores each resume 0–100<br>
139
+ 4. Candidates ranked automatically<br><br>
140
+ <b>Scoring Dimensions</b><br>
141
+ β€’ Overall fit score<br>
142
+ β€’ Skills match %<br>
143
+ β€’ Experience match %<br>
144
+ β€’ Education match %
145
+ </div>""", unsafe_allow_html=True)
146
+
147
+
148
+ # ─── Main UI ──────────────────────────────────────────────────────────────────
149
+ st.markdown("""
150
+ <div class='hero'>
151
+ <h1>πŸ” AI Resume Screener</h1>
152
+ <p>Upload a Job Description and multiple resumes β€” AI scores, ranks, and explains each candidate automatically</p>
153
+ </div>
154
+ """, unsafe_allow_html=True)
155
+
156
+ col_jd, col_resumes = st.columns([1, 1], gap="large")
157
+
158
+ with col_jd:
159
+ st.markdown("<div class='section-label'>Step 1 β€” Job Description</div>", unsafe_allow_html=True)
160
+ jd_input = st.text_area(
161
+ "Job Description",
162
+ placeholder="Paste the full job description here including role, responsibilities, required skills, and qualifications...",
163
+ height=320,
164
+ label_visibility="collapsed"
165
+ )
166
+
167
+ with col_resumes:
168
+ st.markdown("<div class='section-label'>Step 2 β€” Upload Resumes (PDF)</div>", unsafe_allow_html=True)
169
+ uploaded_resumes = st.file_uploader(
170
+ "Upload Resumes",
171
+ type=["pdf"],
172
+ accept_multiple_files=True,
173
+ label_visibility="collapsed"
174
+ )
175
+ if uploaded_resumes:
176
+ for r in uploaded_resumes:
177
+ st.markdown(f"<div style='font-size:0.82rem;color:#475569;padding:4px 0'>πŸ“„ {r.name} Β· {round(r.size/1024,1)}KB</div>", unsafe_allow_html=True)
178
+
179
+ st.markdown("")
180
+ run_btn = st.button("πŸš€ Screen All Candidates", type="primary", use_container_width=True,
181
+ disabled=not (jd_input and uploaded_resumes and api_key))
182
+
183
+ if not api_key:
184
+ st.warning("πŸ‘ˆ Add your Groq API key to get started.")
185
+ elif not jd_input:
186
+ st.info("πŸ“‹ Paste the job description on the left to begin.")
187
+ elif not uploaded_resumes:
188
+ st.info("πŸ“‚ Upload at least one resume PDF to begin.")
189
+
190
+ if run_btn and jd_input and uploaded_resumes and api_key:
191
+ results = []
192
+ progress = st.progress(0, text="Screening candidates...")
193
+
194
+ for i, resume_file in enumerate(uploaded_resumes):
195
+ candidate_name = resume_file.name.replace(".pdf", "").replace("_", " ").replace("-", " ").title()
196
+ progress.progress(i / len(uploaded_resumes), text=f"Analyzing {candidate_name}...")
197
+
198
+ with st.spinner(f"Evaluating {candidate_name}..."):
199
+ try:
200
+ resume_text = extract_pdf_text(resume_file.read())
201
+ result = score_resume(jd_input, resume_text, candidate_name, api_key)
202
+ result["name"] = candidate_name
203
+ result["filename"] = resume_file.name
204
+ results.append(result)
205
+ except Exception as e:
206
+ st.error(f"❌ Error processing {candidate_name}: {str(e)}")
207
+
208
+ progress.progress(1.0, text="Screening complete!")
209
+
210
+ if results:
211
+ # Sort by score
212
+ results.sort(key=lambda x: x.get("score", 0), reverse=True)
213
+
214
+ st.markdown("---")
215
+ st.markdown("## πŸ“Š Screening Results")
216
+
217
+ # Summary stats
218
+ avg_score = round(sum(r.get("score", 0) for r in results) / len(results))
219
+ top_score = results[0].get("score", 0)
220
+ strong = sum(1 for r in results if r.get("score", 0) >= 70)
221
+
222
+ st.markdown(f"""
223
+ <div class='stat-row'>
224
+ <div class='stat-box'><div class='stat-val'>{len(results)}</div><div class='stat-lbl'>Candidates Screened</div></div>
225
+ <div class='stat-box'><div class='stat-val'>{top_score}</div><div class='stat-lbl'>Top Score</div></div>
226
+ <div class='stat-box'><div class='stat-val'>{avg_score}</div><div class='stat-lbl'>Average Score</div></div>
227
+ <div class='stat-box'><div class='stat-val'>{strong}</div><div class='stat-lbl'>Strong Matches</div></div>
228
+ </div>
229
+ """, unsafe_allow_html=True)
230
+
231
+ # Ranked results
232
+ for rank, result in enumerate(results, start=1):
233
+ score = result.get("score", 0)
234
+ rank_class = f"rank-{rank}" if rank <= 3 else "rank-other"
235
+ score_class = "score-high" if score >= 70 else "score-mid" if score >= 50 else "score-low"
236
+ rank_emoji = "πŸ₯‡" if rank == 1 else "πŸ₯ˆ" if rank == 2 else "πŸ₯‰" if rank == 3 else f"#{rank}"
237
+
238
+ skills_w = result.get("skills_match", 0)
239
+ exp_w = result.get("experience_match", 0)
240
+ edu_w = result.get("education_match", 0)
241
+
242
+ strengths_html = "".join([f"<span class='strength-tag'>βœ“ {s}</span>" for s in result.get("strengths", [])])
243
+ gaps_html = "".join([f"<span class='gap-tag'>βœ— {g}</span>" for g in result.get("gaps", [])])
244
+
245
+ with st.expander(f"{rank_emoji} {result['name']} β€” {score}/100 Β· {result.get('verdict', '')}", expanded=(rank <= 3)):
246
+ st.markdown(f"""
247
+ <div class='card {rank_class}'>
248
+ <div style='display:flex;justify-content:space-between;align-items:flex-start;flex-wrap:wrap;gap:12px'>
249
+ <div>
250
+ <div class='rank-label'>Rank #{rank}</div>
251
+ <div class='candidate-name'>{result['name']}</div>
252
+ <div style='color:#64748b;font-size:0.82rem;margin-top:2px'>πŸ“„ {result['filename']}</div>
253
+ </div>
254
+ <div class='score-badge {score_class}'>{score} / 100</div>
255
+ </div>
256
+
257
+ <div style='margin:16px 0;color:#334155;font-size:0.92rem;line-height:1.7'>{result.get("summary","")}</div>
258
+
259
+ <div style='display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;margin:16px 0'>
260
+ <div>
261
+ <div style='font-size:0.75rem;color:#64748b;margin-bottom:4px'>Skills Match</div>
262
+ <div class='score-bar-bg'><div class='score-bar-fill' style='width:{skills_w}%;background:#3b82f6'></div></div>
263
+ <div style='font-size:0.78rem;font-weight:600;color:#3b82f6'>{skills_w}%</div>
264
+ </div>
265
+ <div>
266
+ <div style='font-size:0.75rem;color:#64748b;margin-bottom:4px'>Experience Match</div>
267
+ <div class='score-bar-bg'><div class='score-bar-fill' style='width:{exp_w}%;background:#22c55e'></div></div>
268
+ <div style='font-size:0.78rem;font-weight:600;color:#22c55e'>{exp_w}%</div>
269
+ </div>
270
+ <div>
271
+ <div style='font-size:0.75rem;color:#64748b;margin-bottom:4px'>Education Match</div>
272
+ <div class='score-bar-bg'><div class='score-bar-fill' style='width:{edu_w}%;background:#f59e0b'></div></div>
273
+ <div style='font-size:0.78rem;font-weight:600;color:#f59e0b'>{edu_w}%</div>
274
+ </div>
275
+ </div>
276
+
277
+ <div style='margin-bottom:10px'><div style='font-size:0.78rem;font-weight:600;color:#15803d;margin-bottom:6px'>βœ… Strengths</div>{strengths_html}</div>
278
+ <div style='margin-bottom:14px'><div style='font-size:0.78rem;font-weight:600;color:#991b1b;margin-bottom:6px'>⚠️ Gaps</div>{gaps_html}</div>
279
+ <div style='background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:12px;font-size:0.88rem;color:#334155'>
280
+ πŸ’‘ <b>Recommendation:</b> {result.get("recommendation","")}
281
+ </div>
282
+ </div>
283
+ """, unsafe_allow_html=True)
sentiment_analyzer_app.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import os
4
+ import json
5
+ import re
6
+ from bs4 import BeautifulSoup
7
+ from urllib.parse import urlparse
8
+
9
+ st.set_page_config(page_title="AI Sentiment Analyzer", page_icon="🌐", layout="wide")
10
+
11
+ st.markdown("""
12
+ <style>
13
+ @import url('https://fonts.googleapis.com/css2?family=Sora:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
14
+ html, body, [class*="css"] { font-family: 'Sora', sans-serif; }
15
+ .main { background: #0a0a0f; }
16
+
17
+ .hero {
18
+ background: linear-gradient(135deg, #0d0d1a 0%, #0a0a0f 100%);
19
+ border: 1px solid #1e1e2e; border-top: 3px solid #a78bfa;
20
+ border-radius: 14px; padding: 28px 32px; margin-bottom: 24px;
21
+ }
22
+ .hero h1 { font-size: 1.8rem; font-weight: 700; color: #f1f5f9; margin: 0 0 6px 0; }
23
+ .hero p { color: #4b5563; font-size: 0.88rem; margin: 0; }
24
+
25
+ .insight-card {
26
+ background: #0d0d1a; border: 1px solid #1e1e2e;
27
+ border-radius: 12px; padding: 20px 24px; margin: 10px 0;
28
+ }
29
+
30
+ /* Sentiment meter */
31
+ .sentiment-positive { color: #4ade80; }
32
+ .sentiment-negative { color: #f87171; }
33
+ .sentiment-neutral { color: #94a3b8; }
34
+ .sentiment-mixed { color: #fbbf24; }
35
+
36
+ .big-sentiment {
37
+ font-size: 3rem; font-weight: 700; text-align: center;
38
+ padding: 20px; letter-spacing: -0.02em;
39
+ }
40
+ .sentiment-score-label {
41
+ text-align: center; font-size: 0.82rem; color: #4b5563;
42
+ font-family: 'JetBrains Mono', monospace;
43
+ }
44
+
45
+ .entity-tag {
46
+ display: inline-block; border-radius: 6px;
47
+ padding: 4px 10px; font-size: 0.78rem; margin: 3px;
48
+ font-family: 'JetBrains Mono', monospace;
49
+ }
50
+ .entity-person { background: rgba(167,139,250,0.12); color: #a78bfa; border: 1px solid rgba(167,139,250,0.25); }
51
+ .entity-org { background: rgba(59,130,246,0.1); color: #60a5fa; border: 1px solid rgba(59,130,246,0.25); }
52
+ .entity-location { background: rgba(34,197,94,0.1); color: #4ade80; border: 1px solid rgba(34,197,94,0.25); }
53
+ .entity-topic { background: rgba(251,191,36,0.1); color: #fbbf24; border: 1px solid rgba(251,191,36,0.25); }
54
+ .entity-product { background: rgba(248,113,113,0.1); color: #f87171; border: 1px solid rgba(248,113,113,0.25); }
55
+
56
+ .theme-pill {
57
+ display: inline-block; background: #1e1e2e; border: 1px solid #2d2d3e;
58
+ border-radius: 20px; padding: 5px 14px; margin: 4px;
59
+ font-size: 0.8rem; color: #94a3b8;
60
+ }
61
+
62
+ .section-label {
63
+ font-size: 0.68rem; text-transform: uppercase; letter-spacing: 0.1em;
64
+ color: #2d2d3e; font-weight: 600; margin: 18px 0 8px 0;
65
+ }
66
+
67
+ .stat-row { display: flex; gap: 10px; margin: 16px 0; }
68
+ .stat-box {
69
+ flex: 1; background: #0d0d1a; border: 1px solid #1e1e2e;
70
+ border-radius: 10px; padding: 14px; text-align: center;
71
+ }
72
+ .stat-val { font-size: 1.3rem; font-weight: 700; color: #f1f5f9; }
73
+ .stat-lbl { font-size: 0.68rem; color: #4b5563; margin-top: 2px; }
74
+
75
+ .url-chip {
76
+ background: #0d0d1a; border: 1px solid #1e1e2e; border-radius: 8px;
77
+ padding: 10px 14px; font-family: 'JetBrains Mono', monospace;
78
+ font-size: 0.78rem; color: #4b5563; word-break: break-all;
79
+ margin-bottom: 16px;
80
+ }
81
+
82
+ section[data-testid="stSidebar"] { background: #060609; border-right: 1px solid #1e1e2e; }
83
+ </style>
84
+ """, unsafe_allow_html=True)
85
+
86
+
87
+ def fetch_url_text(url: str) -> tuple[str, str]:
88
+ headers = {"User-Agent": "Mozilla/5.0 (compatible; InsightBot/1.0)"}
89
+ r = requests.get(url, headers=headers, timeout=15)
90
+ r.raise_for_status()
91
+ soup = BeautifulSoup(r.text, "html.parser")
92
+ title = soup.title.string.strip() if soup.title else urlparse(url).netloc
93
+ for tag in soup(["script", "style", "nav", "footer", "header", "aside", "form"]):
94
+ tag.decompose()
95
+ text = soup.get_text(separator=" ", strip=True)
96
+ text = re.sub(r'\s+', ' ', text).strip()
97
+ return text[:4000], title
98
+
99
+
100
+ def analyze_content(text: str, url: str, title: str, api_key: str) -> dict:
101
+ prompt = f"""You are an expert content analyst. Analyze the following webpage content and extract deep insights.
102
+
103
+ Source URL: {url}
104
+ Page Title: {title}
105
+
106
+ Content:
107
+ {text}
108
+
109
+ Respond ONLY with a valid JSON object in exactly this format:
110
+ {{
111
+ "sentiment": "<one of: Positive | Negative | Neutral | Mixed>",
112
+ "sentiment_score": <float between -1.0 (very negative) and 1.0 (very positive)>,
113
+ "sentiment_explanation": "<1-2 sentences explaining the sentiment>",
114
+ "one_line_summary": "<single sentence capturing the entire content>",
115
+ "key_themes": ["<theme 1>", "<theme 2>", "<theme 3>", "<theme 4>", "<theme 5>"],
116
+ "named_entities": {{
117
+ "persons": ["<name>"],
118
+ "organizations": ["<org>"],
119
+ "locations": ["<location>"],
120
+ "products": ["<product>"]
121
+ }},
122
+ "content_type": "<one of: News Article | Product Page | Review | Blog Post | Research | Social Media | Other>",
123
+ "target_audience": "<who this content is written for>",
124
+ "key_insights": ["<insight 1>", "<insight 2>", "<insight 3>"],
125
+ "tone": "<one of: Informative | Promotional | Critical | Analytical | Emotional | Persuasive | Neutral>",
126
+ "credibility_signals": ["<signal 1>", "<signal 2>"],
127
+ "word_count_estimate": <integer>
128
+ }}"""
129
+
130
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
131
+ payload = {
132
+ "model": "llama-3.3-70b-versatile",
133
+ "messages": [{"role": "user", "content": prompt}],
134
+ "max_tokens": 1000,
135
+ "temperature": 0.1,
136
+ }
137
+ r = requests.post("https://api.groq.com/openai/v1/chat/completions",
138
+ headers=headers, json=payload, timeout=30)
139
+ r.raise_for_status()
140
+ raw = r.json()["choices"][0]["message"]["content"]
141
+ raw = re.sub(r"```json|```", "", raw).strip()
142
+ return json.loads(raw)
143
+
144
+
145
+ # ─── Sidebar ──────────────────────────────────────────────────────────────────
146
+ with st.sidebar:
147
+ st.markdown("## 🌐 Sentiment Analyzer")
148
+ st.markdown("<div style='color:#2d2d3e;font-size:0.8rem'>AI-powered content intelligence</div>", unsafe_allow_html=True)
149
+ st.markdown("---")
150
+ env_key = os.environ.get("GROQ_API_KEY", "")
151
+ api_key = env_key if env_key else st.text_input("πŸ”‘ Groq API Key", type="password", placeholder="gsk_...")
152
+ if not env_key and not api_key:
153
+ st.caption("Free key β†’ [console.groq.com](https://console.groq.com)")
154
+ st.markdown("---")
155
+ st.markdown("""
156
+ <div style='font-size:0.78rem;color:#2d2d3e;line-height:2'>
157
+ <b style='color:#4b5563'>Extracts</b><br>
158
+ 🎭 Sentiment & Score<br>
159
+ 🏷️ Key Themes<br>
160
+ πŸ‘€ Named Entities<br>
161
+ πŸ’‘ Key Insights<br>
162
+ 🎯 Target Audience<br>
163
+ πŸ—£οΈ Content Tone<br>
164
+ πŸ“° Content Type
165
+ </div>""", unsafe_allow_html=True)
166
+ st.markdown("---")
167
+ st.markdown("""
168
+ <div style='font-size:0.78rem;color:#2d2d3e;line-height:2'>
169
+ <b style='color:#4b5563'>Try these URLs</b><br>
170
+ β€’ Any news article<br>
171
+ β€’ Amazon product page<br>
172
+ β€’ Wikipedia article<br>
173
+ β€’ Company blog post<br>
174
+ β€’ G2 / Trustpilot review
175
+ </div>""", unsafe_allow_html=True)
176
+
177
+
178
+ # ─── Main UI ──────────────────────────────────────────────────────────────────
179
+ st.markdown("""
180
+ <div class='hero'>
181
+ <h1>🌐 AI Webpage Sentiment & Insight Analyzer</h1>
182
+ <p>Paste any URL β€” AI extracts sentiment, themes, entities, tone, and key insights in seconds</p>
183
+ </div>
184
+ """, unsafe_allow_html=True)
185
+
186
+ if not api_key:
187
+ st.warning("πŸ‘ˆ Add your Groq API key in the sidebar.")
188
+ st.stop()
189
+
190
+ st.markdown("<div class='section-label'>Paste a URL to analyze</div>", unsafe_allow_html=True)
191
+ col_input, col_btn = st.columns([5, 1])
192
+ with col_input:
193
+ url_input = st.text_input("URL", placeholder="https://...", label_visibility="collapsed")
194
+ with col_btn:
195
+ analyze_btn = st.button("Analyze ➀", type="primary", use_container_width=True)
196
+
197
+ # Example URLs
198
+ st.markdown("<div class='section-label'>Quick examples</div>", unsafe_allow_html=True)
199
+ examples = [
200
+ "https://en.wikipedia.org/wiki/Artificial_intelligence",
201
+ "https://techcrunch.com",
202
+ "https://www.bbc.com/news",
203
+ ]
204
+ cols = st.columns(len(examples))
205
+ clicked_url = None
206
+ for i, ex in enumerate(examples):
207
+ parsed = urlparse(ex)
208
+ label = parsed.netloc
209
+ if cols[i].button(f"πŸ”— {label}", key=f"ex_{i}", use_container_width=True):
210
+ clicked_url = ex
211
+
212
+ final_url = clicked_url or (url_input if analyze_btn else None)
213
+
214
+ if final_url:
215
+ with st.spinner(f"Fetching and analyzing {final_url}..."):
216
+ try:
217
+ content_text, page_title = fetch_url_text(final_url)
218
+ result = analyze_content(content_text, final_url, page_title, api_key)
219
+
220
+ sentiment = result.get("sentiment", "Neutral")
221
+ score = result.get("sentiment_score", 0)
222
+ sentiment_color = (
223
+ "#4ade80" if sentiment == "Positive" else
224
+ "#f87171" if sentiment == "Negative" else
225
+ "#fbbf24" if sentiment == "Mixed" else
226
+ "#94a3b8"
227
+ )
228
+ sentiment_emoji = (
229
+ "😊" if sentiment == "Positive" else
230
+ "😟" if sentiment == "Negative" else
231
+ "😐" if sentiment == "Neutral" else "πŸ€”"
232
+ )
233
+ score_pct = int((score + 1) / 2 * 100)
234
+
235
+ st.markdown(f"<div class='url-chip'>πŸ”— {final_url}</div>", unsafe_allow_html=True)
236
+ st.markdown(f"### πŸ“„ {page_title}")
237
+
238
+ # Top row
239
+ col_sent, col_summary = st.columns([1, 2])
240
+
241
+ with col_sent:
242
+ st.markdown(f"""
243
+ <div class='insight-card' style='text-align:center'>
244
+ <div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:8px'>Sentiment</div>
245
+ <div style='font-size:3.5rem'>{sentiment_emoji}</div>
246
+ <div style='font-size:1.6rem;font-weight:700;color:{sentiment_color};margin:4px 0'>{sentiment}</div>
247
+ <div style='font-family:JetBrains Mono,monospace;font-size:0.8rem;color:#4b5563'>score: {score:+.2f}</div>
248
+ <div style='background:#1e1e2e;border-radius:4px;height:6px;margin:10px 0'>
249
+ <div style='height:6px;border-radius:4px;width:{score_pct}%;background:{sentiment_color}'></div>
250
+ </div>
251
+ <div style='font-size:0.78rem;color:#4b5563;margin-top:8px'>{result.get("sentiment_explanation","")}</div>
252
+ </div>
253
+ """, unsafe_allow_html=True)
254
+
255
+ with col_summary:
256
+ entities = result.get("named_entities", {})
257
+ persons_html = "".join([f"<span class='entity-tag entity-person'>πŸ‘€ {e}</span>" for e in entities.get("persons", [])[:4]])
258
+ orgs_html = "".join([f"<span class='entity-tag entity-org'>🏒 {e}</span>" for e in entities.get("organizations", [])[:4]])
259
+ locations_html = "".join([f"<span class='entity-tag entity-location'>πŸ“ {e}</span>" for e in entities.get("locations", [])[:3]])
260
+ products_html = "".join([f"<span class='entity-tag entity-product'>πŸ“¦ {e}</span>" for e in entities.get("products", [])[:3]])
261
+ entities_html = persons_html + orgs_html + locations_html + products_html or "<span style='color:#4b5563;font-size:0.82rem'>None detected</span>"
262
+
263
+ st.markdown(f"""
264
+ <div class='insight-card'>
265
+ <div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:10px'>One-Line Summary</div>
266
+ <div style='font-size:1rem;color:#f1f5f9;font-weight:500;line-height:1.6;margin-bottom:16px'>"{result.get("one_line_summary","")}"</div>
267
+ <div style='display:flex;gap:16px;margin-bottom:14px'>
268
+ <div><span style='font-size:0.72rem;color:#4b5563'>Content Type</span><br><span style='color:#a78bfa;font-weight:600;font-size:0.88rem'>{result.get("content_type","")}</span></div>
269
+ <div><span style='font-size:0.72rem;color:#4b5563'>Tone</span><br><span style='color:#60a5fa;font-weight:600;font-size:0.88rem'>{result.get("tone","")}</span></div>
270
+ <div><span style='font-size:0.72rem;color:#4b5563'>Audience</span><br><span style='color:#4ade80;font-weight:600;font-size:0.88rem'>{result.get("target_audience","")}</span></div>
271
+ </div>
272
+ <div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:8px'>Named Entities</div>
273
+ {entities_html}
274
+ </div>
275
+ """, unsafe_allow_html=True)
276
+
277
+ # Themes + Insights
278
+ col_themes, col_insights = st.columns(2)
279
+
280
+ with col_themes:
281
+ themes_html = "".join([f"<div class='theme-pill'>#{t}</div>" for t in result.get("key_themes", [])])
282
+ st.markdown(f"""
283
+ <div class='insight-card'>
284
+ <div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:12px'>🏷️ Key Themes</div>
285
+ {themes_html}
286
+ </div>""", unsafe_allow_html=True)
287
+
288
+ with col_insights:
289
+ insights_html = "".join([f"<div style='padding:8px 0;border-bottom:1px solid #1e1e2e;font-size:0.87rem;color:#94a3b8;line-height:1.6'>β†’ {ins}</div>" for ins in result.get("key_insights", [])])
290
+ st.markdown(f"""
291
+ <div class='insight-card'>
292
+ <div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:12px'>πŸ’‘ Key Insights</div>
293
+ {insights_html}
294
+ </div>""", unsafe_allow_html=True)
295
+
296
+ # Credibility
297
+ cred = result.get("credibility_signals", [])
298
+ if cred:
299
+ cred_html = "".join([f"<span style='background:rgba(74,222,128,0.08);border:1px solid rgba(74,222,128,0.2);border-radius:6px;padding:4px 12px;margin:3px;display:inline-block;font-size:0.8rem;color:#4ade80'>βœ“ {c}</span>" for c in cred])
300
+ st.markdown(f"""
301
+ <div class='insight-card'>
302
+ <div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:10px'>πŸ›‘οΈ Credibility Signals</div>
303
+ {cred_html}
304
+ </div>""", unsafe_allow_html=True)
305
+
306
+ except requests.exceptions.ConnectionError:
307
+ st.error("❌ Could not reach that URL. Make sure it's publicly accessible.")
308
+ except json.JSONDecodeError:
309
+ st.error("❌ AI returned unexpected output. Try again.")
310
+ except Exception as e:
311
+ st.error(f"❌ Error: {str(e)}")
shared_requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit>=1.32.0
2
+ requests>=2.31.0
3
+ PyMuPDF>=1.24.0
4
+ beautifulsoup4>=4.12.0
src/streamlit_app.py CHANGED
@@ -1,468 +1,283 @@
1
  import streamlit as st
2
- import chromadb
3
- from sentence_transformers import SentenceTransformer
4
- import fitz # PyMuPDF
5
  import os
6
  import requests
 
7
  import re
8
- import hashlib
9
 
10
- # ─── Page Config ──────────────────────────────────────────────────────────────
11
- st.set_page_config(
12
- page_title="PDF RAG Β· Upload & Ask",
13
- page_icon="πŸ“‚",
14
- layout="wide",
15
- initial_sidebar_state="expanded"
16
- )
17
 
18
- # ─── CSS ──────────────────────────────────────────────────────────────────────
19
  st.markdown("""
20
  <style>
21
- @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500;600&family=IBM+Plex+Mono:wght@400;500&display=swap');
22
-
23
- html, body, [class*="css"] { font-family: 'IBM Plex Sans', sans-serif; }
24
- .main { background-color: #0b0f1a; }
25
 
26
  .hero {
27
- background: linear-gradient(160deg, #0d1424 0%, #0b0f1a 100%);
28
- border: 1px solid #1e2a3e;
29
- border-top: 3px solid #22d3ee;
30
- border-radius: 12px;
31
- padding: 28px 32px;
32
- margin-bottom: 24px;
33
  }
34
- .hero h1 { font-size: 1.8rem; font-weight: 600; color: #e2e8f0; margin: 0 0 6px 0; }
35
- .hero p { color: #64748b; font-size: 0.95rem; margin: 0; }
36
 
37
- .phase-bar {
38
- display: flex; gap: 0; margin-bottom: 28px;
39
- border: 1px solid #1e2a3e; border-radius: 10px; overflow: hidden;
40
- }
41
- .phase {
42
- flex: 1; padding: 10px 6px; text-align: center;
43
- font-size: 0.75rem; color: #4b5563; background: #0d1117;
44
- border-right: 1px solid #1e2a3e; line-height: 1.5;
45
- }
46
- .phase:last-child { border-right: none; }
47
- .phase.done { color: #22d3ee; background: rgba(34,211,238,0.05); }
48
- .phase.active { color: #f8fafc; background: rgba(34,211,238,0.1); font-weight: 600; }
49
- .phase-icon { font-size: 1.1rem; display: block; margin-bottom: 2px; }
50
-
51
- .pdf-card {
52
- background: #0d1424;
53
- border: 1px solid #1e2a3e;
54
- border-radius: 10px;
55
- padding: 14px 16px;
56
- margin: 8px 0;
57
- display: flex;
58
- align-items: center;
59
- justify-content: space-between;
60
- }
61
- .pdf-name { font-size: 0.85rem; color: #e2e8f0; font-weight: 500; }
62
- .pdf-meta { font-family: 'IBM Plex Mono', monospace; font-size: 0.72rem; color: #475569; margin-top: 3px; }
63
- .pdf-badge {
64
- font-size: 0.72rem; font-family: 'IBM Plex Mono', monospace;
65
- background: rgba(34,211,238,0.1); color: #22d3ee;
66
- border: 1px solid rgba(34,211,238,0.25); padding: 3px 10px; border-radius: 20px;
67
  }
68
 
69
- .answer-box {
70
- background: #0d1424;
71
- border: 1px solid #1e3a4a;
72
- border-left: 3px solid #22d3ee;
73
- border-radius: 10px;
74
- padding: 22px 24px;
75
- color: #e2e8f0;
76
- line-height: 1.75;
77
- font-size: 0.96rem;
78
- margin: 12px 0 20px 0;
79
- }
80
 
81
- .chunk-card {
82
- background: #0d1117;
83
- border: 1px solid #1e2a3e;
84
- border-radius: 9px;
85
- padding: 14px 18px;
86
- margin: 8px 0;
87
  }
88
- .chunk-top {
89
- display: flex; justify-content: space-between;
90
- align-items: center; margin-bottom: 8px;
 
 
 
 
 
 
 
 
91
  }
92
- .chunk-source { font-size: 0.77rem; font-weight: 600; color: #22d3ee; text-transform: uppercase; letter-spacing: 0.05em; }
93
- .chunk-page { font-family: 'IBM Plex Mono', monospace; font-size: 0.72rem; color: #475569; }
94
- .score-bar-wrap { display: flex; align-items: center; gap: 8px; }
95
- .score-bar {
96
- height: 4px; border-radius: 2px; background: #1e2a3e; width: 80px; overflow: hidden;
97
  }
98
- .score-fill { height: 100%; border-radius: 2px; background: #22d3ee; }
99
- .score-num { font-family: 'IBM Plex Mono', monospace; font-size: 0.72rem; color: #22d3ee; }
100
- .chunk-text { font-size: 0.86rem; color: #94a3b8; line-height: 1.65; }
101
 
102
- .stat-row { display: flex; gap: 10px; margin: 16px 0; }
103
- .stat-box {
104
- flex: 1; background: #0d1424; border: 1px solid #1e2a3e;
105
- border-radius: 8px; padding: 12px; text-align: center;
106
- }
107
- .stat-val { font-size: 1.35rem; font-weight: 600; color: #22d3ee; }
108
- .stat-lbl { font-size: 0.7rem; color: #475569; margin-top: 2px; }
109
 
110
  .section-label {
111
- font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.1em;
112
- color: #374151; font-weight: 600; margin: 18px 0 8px 0;
113
  }
114
-
115
- section[data-testid="stSidebar"] {
116
- background-color: #080c14; border-right: 1px solid #131c2e;
117
- }
118
-
119
- .empty-state {
120
- text-align: center; padding: 48px 24px;
121
- border: 2px dashed #1e2a3e; border-radius: 12px; color: #374151;
122
  }
123
- .empty-state .icon { font-size: 2.5rem; margin-bottom: 12px; }
124
- .empty-state p { font-size: 0.9rem; line-height: 1.6; }
125
  </style>
126
  """, unsafe_allow_html=True)
127
 
128
 
129
- # ─── Session State ────────────────────────────────────────────────────────────
130
- if "indexed_files" not in st.session_state:
131
- st.session_state.indexed_files = {} # filename β†’ {chunks, pages, size}
132
- if "chroma_collection" not in st.session_state:
133
- st.session_state.chroma_collection = None
134
- if "chroma_client" not in st.session_state:
135
- st.session_state.chroma_client = None
136
- if "total_chunks" not in st.session_state:
137
- st.session_state.total_chunks = 0
138
-
139
-
140
- # ─── Load embedding model (cached globally) ───────────────────────────────────
141
- @st.cache_resource(show_spinner=False)
142
- def load_embed_model():
143
- return SentenceTransformer('all-MiniLM-L6-v2')
144
-
145
-
146
- # ─── PDF Extraction ───────────────────────────────────────────────────────────
147
- def extract_text_from_pdf(pdf_bytes: bytes) -> list[dict]:
148
- """Returns list of {page, text} dicts."""
149
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
150
- pages = []
151
- for page_num, page in enumerate(doc, start=1):
152
- text = page.get_text("text").strip()
153
- if text:
154
- pages.append({"page": page_num, "text": text})
155
  doc.close()
156
- return pages
157
-
158
-
159
- # ─── Chunking ─────────────────────────────────────────────────────────────────
160
- def chunk_text(pages: list[dict], chunk_size: int = 400, overlap: int = 60) -> list[dict]:
161
- """Splits page text into overlapping word-based chunks."""
162
- chunks = []
163
- for p in pages:
164
- words = p["text"].split()
165
- start = 0
166
- while start < len(words):
167
- end = start + chunk_size
168
- chunk_words = words[start:end]
169
- chunk_text_str = " ".join(chunk_words).strip()
170
- if len(chunk_text_str) > 60:
171
- chunks.append({"text": chunk_text_str, "page": p["page"]})
172
- start += chunk_size - overlap
173
- return chunks
174
-
175
-
176
- # ─── Index PDF into ChromaDB ──────────────────────────────────────────────────
177
- def index_pdf(filename: str, pdf_bytes: bytes, embed_model):
178
- # Init or reuse ChromaDB
179
- if st.session_state.chroma_client is None:
180
- st.session_state.chroma_client = chromadb.Client()
181
- st.session_state.chroma_collection = st.session_state.chroma_client.get_or_create_collection(
182
- name="pdf_rag", metadata={"hnsw:space": "cosine"}
183
- )
184
-
185
- collection = st.session_state.chroma_collection
186
-
187
- # Extract & chunk
188
- pages = extract_text_from_pdf(pdf_bytes)
189
- chunks = chunk_text(pages)
190
-
191
- if not chunks:
192
- return 0, 0
193
-
194
- # Embed & add
195
- texts = [c["text"] for c in chunks]
196
- embeddings = embed_model.encode(texts, batch_size=32, show_progress_bar=False).tolist()
197
-
198
- ids, docs, metas, embeds = [], [], [], []
199
- for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
200
- chunk_id = f"{hashlib.md5(filename.encode()).hexdigest()[:8]}_chunk_{i}"
201
- ids.append(chunk_id)
202
- docs.append(chunk["text"])
203
- metas.append({"filename": filename, "page": chunk["page"]})
204
- embeds.append(emb)
205
-
206
- collection.add(ids=ids, embeddings=embeds, documents=docs, metadatas=metas)
207
-
208
- st.session_state.indexed_files[filename] = {
209
- "chunks": len(chunks),
210
- "pages": len(pages),
211
- "size_kb": round(len(pdf_bytes) / 1024, 1)
212
- }
213
- st.session_state.total_chunks += len(chunks)
214
- return len(chunks), len(pages)
215
 
216
 
217
- # ─── RAG Query ────────────────────────────────────────────────────────────────
218
- def rag_query(question: str, embed_model, top_k: int, api_key: str):
219
- collection = st.session_state.chroma_collection
220
- q_emb = embed_model.encode(question).tolist()
221
- results = collection.query(query_embeddings=[q_emb], n_results=top_k)
222
 
223
- chunks = []
224
- for i in range(len(results["documents"][0])):
225
- distance = results["distances"][0][i]
226
- chunks.append({
227
- "text": results["documents"][0][i],
228
- "filename": results["metadatas"][0][i]["filename"],
229
- "page": results["metadatas"][0][i]["page"],
230
- "relevance": round((1 - distance) * 100, 1),
231
- })
232
 
233
- context = "\n\n".join([
234
- f"[Source: {c['filename']}, Page {c['page']}]\n{c['text']}" for c in chunks
235
- ])
236
 
237
- prompt = f"""You are a helpful assistant. Answer the user's question using ONLY the document context provided below. Be concise and clear. Always mention the source filename and page number when referencing specific information. If the answer cannot be found in the provided context, say "I couldn't find that information in the uploaded documents."
 
 
 
 
 
 
 
 
 
 
 
238
 
239
- Document Context:
240
- {context}
241
-
242
- Question: {question}
243
-
244
- Answer:"""
245
 
246
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
247
  payload = {
248
  "model": "llama-3.3-70b-versatile",
249
  "messages": [{"role": "user", "content": prompt}],
250
- "max_tokens": 600,
251
- "temperature": 0.2,
252
  }
253
- r = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=30)
 
254
  r.raise_for_status()
255
- answer = r.json()["choices"][0]["message"]["content"]
256
- return answer, chunks
257
-
258
-
259
- # ─── Determine current phase ──────────────────────────────────────────────────
260
- has_docs = len(st.session_state.indexed_files) > 0
261
- phase = 1 if not has_docs else 2
262
 
263
 
264
  # ─── Sidebar ──────────────────────────────────────────────────────────────────
265
  with st.sidebar:
266
- st.markdown("## πŸ“‚ PDF RAG Demo")
267
- st.markdown("<div style='color:#374151;font-size:0.8rem'>Upload β†’ Extract β†’ Index β†’ Ask</div>", unsafe_allow_html=True)
268
  st.markdown("---")
269
-
270
  env_key = os.environ.get("GROQ_API_KEY", "")
271
- if env_key:
272
- api_key = env_key
273
- st.success("βœ… Groq key loaded from secrets")
274
- else:
275
- api_key = st.text_input("πŸ”‘ Groq API Key", type="password", placeholder="gsk_...", help="Free at console.groq.com")
276
- if not api_key:
277
- st.caption("Get free key β†’ [console.groq.com](https://console.groq.com)")
278
-
279
- st.markdown("---")
280
- st.markdown("<div class='section-label'>Indexed Documents</div>", unsafe_allow_html=True)
281
-
282
- if st.session_state.indexed_files:
283
- for fname, info in st.session_state.indexed_files.items():
284
- st.markdown(f"""
285
- <div style='padding:6px 0;border-bottom:1px solid #131c2e'>
286
- <div style='font-size:0.8rem;color:#e2e8f0'>πŸ“„ {fname}</div>
287
- <div style='font-size:0.72rem;color:#475569;font-family:IBM Plex Mono,monospace'>
288
- {info["pages"]}p Β· {info["chunks"]} chunks Β· {info["size_kb"]}KB
289
- </div>
290
- </div>""", unsafe_allow_html=True)
291
-
292
- st.markdown("---")
293
- if st.button("πŸ—‘οΈ Clear all & reset", use_container_width=True):
294
- for key in ["indexed_files", "chroma_collection", "chroma_client", "total_chunks"]:
295
- del st.session_state[key]
296
- st.rerun()
297
- else:
298
- st.markdown("<div style='color:#374151;font-size:0.82rem'>No documents indexed yet.</div>", unsafe_allow_html=True)
299
-
300
  st.markdown("---")
301
  st.markdown("""
302
- <div style='font-size:0.77rem;color:#374151;line-height:1.9'>
303
- <b style='color:#4b5563'>Stack</b><br>
304
- πŸ“„ PDF parsing: PyMuPDF<br>
305
- βœ‚οΈ Chunking: word-overlap (400w)<br>
306
- πŸ”’ Embeddings: all-MiniLM-L6-v2<br>
307
- πŸ—„οΈ Vector DB: ChromaDB in-memory<br>
308
- 🧠 LLM: Groq · Llama 3.3 70B<br>
309
- 🌐 Hosting: HuggingFace Spaces
310
- </div>
311
- """, unsafe_allow_html=True)
 
 
312
 
313
 
314
- # ─── Hero ─────────────────────────────────────────────────────────────────────
315
  st.markdown("""
316
  <div class='hero'>
317
- <h1>πŸ“‚ PDF RAG β€” Upload & Ask</h1>
318
- <p>Upload any PDF documents Β· They get extracted, chunked, embedded, and indexed Β· Then ask questions across all of them</p>
319
- </div>
320
- """, unsafe_allow_html=True)
321
-
322
- # Phase bar
323
- st.markdown(f"""
324
- <div class='phase-bar'>
325
- <div class='phase {"done" if phase > 1 else "active"}'>
326
- <span class='phase-icon'>πŸ“€</span>Upload PDFs
327
- </div>
328
- <div class='phase {"active" if phase == 1 else "done"}'>
329
- <span class='phase-icon'>πŸ“‘</span>Extract Text
330
- </div>
331
- <div class='phase {"active" if phase == 1 else "done"}'>
332
- <span class='phase-icon'>βœ‚οΈ</span>Chunk
333
- </div>
334
- <div class='phase {"active" if phase == 1 else "done"}'>
335
- <span class='phase-icon'>πŸ”’</span>Embed
336
- </div>
337
- <div class='phase {"active" if phase == 1 else "done"}'>
338
- <span class='phase-icon'>πŸ—„οΈ</span>Index
339
- </div>
340
- <div class='phase {"active" if phase == 2 else ""}'>
341
- <span class='phase-icon'>πŸ’¬</span>Ask Questions
342
- </div>
343
  </div>
344
  """, unsafe_allow_html=True)
345
 
346
- # ─── Load model ───────────────────────────────────────────────────────────────
347
- with st.spinner("βš™οΈ Loading embedding model..."):
348
- embed_model = load_embed_model()
349
-
350
-
351
- # ════════════════════════════════════════════════════════════
352
- # PHASE 1 β€” Upload & Index
353
- # ════════════════════════════════════════════════════════════
354
- st.markdown("<div class='section-label'>Step 1 β€” Upload PDF Documents</div>", unsafe_allow_html=True)
355
-
356
- uploaded_files = st.file_uploader(
357
- "Drop your PDF files here",
358
- type=["pdf"],
359
- accept_multiple_files=True,
360
- label_visibility="collapsed"
361
- )
362
-
363
- if uploaded_files:
364
- new_files = [f for f in uploaded_files if f.name not in st.session_state.indexed_files]
365
-
366
- if new_files:
367
- st.markdown(f"**{len(new_files)} new file(s) ready to index:**")
368
- for f in new_files:
369
- st.markdown(f"<div class='pdf-card'><div><div class='pdf-name'>πŸ“„ {f.name}</div><div class='pdf-meta'>{round(f.size/1024,1)} KB</div></div><div class='pdf-badge'>ready</div></div>", unsafe_allow_html=True)
370
-
371
- if st.button(f"⚑ Extract & Index {len(new_files)} PDF(s)", type="primary", use_container_width=True):
372
- progress = st.progress(0, text="Starting...")
373
- for idx, f in enumerate(new_files):
374
- progress.progress((idx) / len(new_files), text=f"Processing: {f.name}")
375
- pdf_bytes = f.read()
376
-
377
- with st.spinner(f"Extracting & indexing **{f.name}**..."):
378
- n_chunks, n_pages = index_pdf(f.name, pdf_bytes, embed_model)
379
-
380
- st.success(f"βœ… **{f.name}** β†’ {n_pages} pages Β· {n_chunks} chunks indexed")
381
-
382
- progress.progress(1.0, text="Done!")
383
- st.balloons()
384
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
- else:
387
- st.info("All uploaded files are already indexed. Upload new files or ask questions below.")
388
 
389
- elif not has_docs:
390
- st.markdown("""
391
- <div class='empty-state'>
392
- <div class='icon'>πŸ“‚</div>
393
- <p><b style='color:#94a3b8'>No documents uploaded yet</b><br>
394
- Upload one or more PDF files above to get started.<br>
395
- Any topic works β€” reports, manuals, research papers, policies.</p>
396
- </div>
397
- """, unsafe_allow_html=True)
398
 
 
 
399
 
400
- # ════════════════════════════════════════════════════════════
401
- # PHASE 2 β€” Stats & Query
402
- # ════════════════════════════════════════════════════════════
403
- if has_docs:
404
- total_pages = sum(v["pages"] for v in st.session_state.indexed_files.values())
405
 
406
- st.markdown("<div class='section-label' style='margin-top:24px'>Index Summary</div>", unsafe_allow_html=True)
407
- st.markdown(f"""
408
  <div class='stat-row'>
409
- <div class='stat-box'><div class='stat-val'>{len(st.session_state.indexed_files)}</div><div class='stat-lbl'>Documents</div></div>
410
- <div class='stat-box'><div class='stat-val'>{total_pages}</div><div class='stat-lbl'>Pages Parsed</div></div>
411
- <div class='stat-box'><div class='stat-val'>{st.session_state.total_chunks}</div><div class='stat-lbl'>Chunks Indexed</div></div>
412
- <div class='stat-box'><div class='stat-val'>384</div><div class='stat-lbl'>Embedding Dims</div></div>
413
  </div>
414
  """, unsafe_allow_html=True)
415
 
416
- if not api_key:
417
- st.warning("πŸ‘ˆ Enter your Groq API key in the sidebar to start asking questions.")
418
- st.stop()
419
-
420
- st.markdown("---")
421
- st.markdown("<div class='section-label'>Step 2 β€” Ask a Question</div>", unsafe_allow_html=True)
422
 
423
- col1, col2 = st.columns([5, 1])
424
- with col1:
425
- question = st.text_input("", placeholder="What does the document say about...?", label_visibility="collapsed")
426
- with col2:
427
- top_k = st.selectbox("Top K", [2, 3, 4, 5], index=1, help="Number of chunks to retrieve")
428
 
429
- ask_btn = st.button("πŸ” Search & Answer", type="primary", use_container_width=True)
 
430
 
431
- if ask_btn and question:
432
- with st.spinner("πŸ” Searching index and generating answer..."):
433
- try:
434
- answer, chunks = rag_query(question, embed_model, top_k, api_key)
435
-
436
- st.markdown(f"<div class='section-label'>Answer</div>", unsafe_allow_html=True)
437
- st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
 
 
 
 
438
 
439
- st.markdown("<div class='section-label'>Retrieved Chunks (context sent to LLM)</div>", unsafe_allow_html=True)
440
 
441
- for i, chunk in enumerate(chunks):
442
- bar_width = int(chunk['relevance'])
443
- st.markdown(f"""
444
- <div class='chunk-card'>
445
- <div class='chunk-top'>
446
  <div>
447
- <div class='chunk-source'>πŸ“„ {chunk['filename']}</div>
448
- <div class='chunk-page'>Page {chunk['page']}</div>
 
449
  </div>
450
- <div class='score-bar-wrap'>
451
- <div class='score-bar'><div class='score-fill' style='width:{bar_width}%'></div></div>
452
- <div class='score-num'>{chunk['relevance']}%</div>
 
 
 
 
 
 
453
  </div>
454
  </div>
455
- <div class='chunk-text'>{chunk['text']}</div>
 
 
 
 
 
456
  </div>
457
  """, unsafe_allow_html=True)
458
-
459
- except requests.HTTPError as e:
460
- if e.response.status_code == 401:
461
- st.error("❌ Invalid Groq API key.")
462
- else:
463
- st.error(f"❌ API error: {str(e)}")
464
- except Exception as e:
465
- st.error(f"❌ Error: {str(e)}")
466
-
467
- elif ask_btn and not question:
468
- st.warning("Please enter a question.")
 
1
  import streamlit as st
2
+ import fitz
 
 
3
  import os
4
  import requests
5
+ import json
6
  import re
 
7
 
8
+ st.set_page_config(page_title="AI Resume Screener", page_icon="πŸ”", layout="wide")
 
 
 
 
 
 
9
 
 
10
  st.markdown("""
11
  <style>
12
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
13
+ html, body, [class*="css"] { font-family: 'Inter', sans-serif; }
14
+ .main { background: #f8fafc; }
 
15
 
16
  .hero {
17
+ background: linear-gradient(135deg, #1e3a5f 0%, #0f2027 100%);
18
+ border-radius: 16px; padding: 32px 36px; margin-bottom: 24px; color: white;
 
 
 
 
19
  }
20
+ .hero h1 { font-size: 1.9rem; font-weight: 700; margin: 0 0 6px 0; }
21
+ .hero p { color: #94a3b8; margin: 0; font-size: 0.92rem; }
22
 
23
+ .card {
24
+ background: white; border: 1px solid #e2e8f0;
25
+ border-radius: 12px; padding: 20px 24px; margin: 12px 0;
26
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
 
29
+ .rank-1 { border-left: 4px solid #22c55e; }
30
+ .rank-2 { border-left: 4px solid #3b82f6; }
31
+ .rank-3 { border-left: 4px solid #f59e0b; }
32
+ .rank-other { border-left: 4px solid #e2e8f0; }
 
 
 
 
 
 
 
33
 
34
+ .score-badge {
35
+ display: inline-block; font-size: 1.4rem; font-weight: 700;
36
+ padding: 8px 16px; border-radius: 50px; margin-bottom: 8px;
 
 
 
37
  }
38
+ .score-high { background: #dcfce7; color: #15803d; }
39
+ .score-mid { background: #dbeafe; color: #1d4ed8; }
40
+ .score-low { background: #fef9c3; color: #854d0e; }
41
+
42
+ .candidate-name { font-size: 1.1rem; font-weight: 600; color: #1e293b; }
43
+ .rank-label { font-size: 0.75rem; font-weight: 600; color: #64748b; text-transform: uppercase; letter-spacing: 0.05em; }
44
+
45
+ .strength-tag {
46
+ display: inline-block; background: #dcfce7; color: #15803d;
47
+ border: 1px solid #bbf7d0; border-radius: 20px;
48
+ padding: 3px 10px; font-size: 0.78rem; margin: 2px;
49
  }
50
+ .gap-tag {
51
+ display: inline-block; background: #fee2e2; color: #991b1b;
52
+ border: 1px solid #fecaca; border-radius: 20px;
53
+ padding: 3px 10px; font-size: 0.78rem; margin: 2px;
 
54
  }
 
 
 
55
 
56
+ .score-bar-bg { background: #f1f5f9; border-radius: 4px; height: 8px; margin: 8px 0; }
57
+ .score-bar-fill { height: 8px; border-radius: 4px; transition: width 0.3s; }
 
 
 
 
 
58
 
59
  .section-label {
60
+ font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.08em;
61
+ color: #94a3b8; font-weight: 600; margin: 20px 0 8px 0;
62
  }
63
+ .stat-row { display: flex; gap: 12px; margin: 16px 0; }
64
+ .stat-box {
65
+ flex: 1; background: white; border: 1px solid #e2e8f0;
66
+ border-radius: 10px; padding: 14px; text-align: center;
 
 
 
 
67
  }
68
+ .stat-val { font-size: 1.5rem; font-weight: 700; color: #1e293b; }
69
+ .stat-lbl { font-size: 0.72rem; color: #94a3b8; margin-top: 2px; }
70
  </style>
71
  """, unsafe_allow_html=True)
72
 
73
 
74
+ # ─── Helpers ──────────────────────────────────────────────────────────────────
75
+ def extract_pdf_text(pdf_bytes: bytes) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
77
+ text = ""
78
+ for page in doc:
79
+ text += page.get_text("text") + "\n"
 
 
80
  doc.close()
81
+ return text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
 
84
+ def score_resume(jd_text: str, resume_text: str, candidate_name: str, api_key: str) -> dict:
85
+ prompt = f"""You are an expert HR recruiter and talent evaluator. Analyze the candidate's resume against the job description and provide a detailed evaluation.
 
 
 
86
 
87
+ Job Description:
88
+ {jd_text[:2000]}
 
 
 
 
 
 
 
89
 
90
+ Candidate Resume ({candidate_name}):
91
+ {resume_text[:2500]}
 
92
 
93
+ Respond ONLY with a valid JSON object in exactly this format:
94
+ {{
95
+ "score": <integer 0-100>,
96
+ "verdict": "<one of: Strong Match | Good Match | Partial Match | Weak Match>",
97
+ "summary": "<2-3 sentence overall assessment>",
98
+ "strengths": ["<strength 1>", "<strength 2>", "<strength 3>"],
99
+ "gaps": ["<gap 1>", "<gap 2>"],
100
+ "recommendation": "<one sentence hiring recommendation>",
101
+ "experience_match": <integer 0-100>,
102
+ "skills_match": <integer 0-100>,
103
+ "education_match": <integer 0-100>
104
+ }}
105
 
106
+ Be objective and specific. Base scores purely on how well the resume matches the JD requirements."""
 
 
 
 
 
107
 
108
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
109
  payload = {
110
  "model": "llama-3.3-70b-versatile",
111
  "messages": [{"role": "user", "content": prompt}],
112
+ "max_tokens": 800,
113
+ "temperature": 0.1,
114
  }
115
+ r = requests.post("https://api.groq.com/openai/v1/chat/completions",
116
+ headers=headers, json=payload, timeout=30)
117
  r.raise_for_status()
118
+ raw = r.json()["choices"][0]["message"]["content"]
119
+ raw = re.sub(r"```json|```", "", raw).strip()
120
+ return json.loads(raw)
 
 
 
 
121
 
122
 
123
  # ─── Sidebar ──────────────────────────────────────────────────────────────────
124
  with st.sidebar:
125
+ st.markdown("## πŸ” Resume Screener")
126
+ st.markdown("<div style='color:#94a3b8;font-size:0.8rem'>Powered by Groq Β· Llama 3.3 70B</div>", unsafe_allow_html=True)
127
  st.markdown("---")
 
128
  env_key = os.environ.get("GROQ_API_KEY", "")
129
+ api_key = env_key if env_key else st.text_input("πŸ”‘ Groq API Key", type="password", placeholder="gsk_...")
130
+ if not env_key and not api_key:
131
+ st.caption("Free key β†’ [console.groq.com](https://console.groq.com)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  st.markdown("---")
133
  st.markdown("""
134
+ <div style='font-size:0.78rem;color:#94a3b8;line-height:1.9'>
135
+ <b>How it works</b><br>
136
+ 1. Paste the Job Description<br>
137
+ 2. Upload candidate resumes (PDF)<br>
138
+ 3. AI scores each resume 0–100<br>
139
+ 4. Candidates ranked automatically<br><br>
140
+ <b>Scoring Dimensions</b><br>
141
+ β€’ Overall fit score<br>
142
+ β€’ Skills match %<br>
143
+ β€’ Experience match %<br>
144
+ β€’ Education match %
145
+ </div>""", unsafe_allow_html=True)
146
 
147
 
148
+ # ─── Main UI ──────────────────────────────────────────────────────────────────
149
  st.markdown("""
150
  <div class='hero'>
151
+ <h1>πŸ” AI Resume Screener</h1>
152
+ <p>Upload a Job Description and multiple resumes β€” AI scores, ranks, and explains each candidate automatically</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  </div>
154
  """, unsafe_allow_html=True)
155
 
156
+ col_jd, col_resumes = st.columns([1, 1], gap="large")
157
+
158
+ with col_jd:
159
+ st.markdown("<div class='section-label'>Step 1 β€” Job Description</div>", unsafe_allow_html=True)
160
+ jd_input = st.text_area(
161
+ "Job Description",
162
+ placeholder="Paste the full job description here including role, responsibilities, required skills, and qualifications...",
163
+ height=320,
164
+ label_visibility="collapsed"
165
+ )
166
+
167
+ with col_resumes:
168
+ st.markdown("<div class='section-label'>Step 2 β€” Upload Resumes (PDF)</div>", unsafe_allow_html=True)
169
+ uploaded_resumes = st.file_uploader(
170
+ "Upload Resumes",
171
+ type=["pdf"],
172
+ accept_multiple_files=True,
173
+ label_visibility="collapsed"
174
+ )
175
+ if uploaded_resumes:
176
+ for r in uploaded_resumes:
177
+ st.markdown(f"<div style='font-size:0.82rem;color:#475569;padding:4px 0'>πŸ“„ {r.name} Β· {round(r.size/1024,1)}KB</div>", unsafe_allow_html=True)
178
+
179
+ st.markdown("")
180
+ run_btn = st.button("πŸš€ Screen All Candidates", type="primary", use_container_width=True,
181
+ disabled=not (jd_input and uploaded_resumes and api_key))
182
+
183
+ if not api_key:
184
+ st.warning("πŸ‘ˆ Add your Groq API key to get started.")
185
+ elif not jd_input:
186
+ st.info("πŸ“‹ Paste the job description on the left to begin.")
187
+ elif not uploaded_resumes:
188
+ st.info("πŸ“‚ Upload at least one resume PDF to begin.")
189
+
190
+ if run_btn and jd_input and uploaded_resumes and api_key:
191
+ results = []
192
+ progress = st.progress(0, text="Screening candidates...")
193
+
194
+ for i, resume_file in enumerate(uploaded_resumes):
195
+ candidate_name = resume_file.name.replace(".pdf", "").replace("_", " ").replace("-", " ").title()
196
+ progress.progress(i / len(uploaded_resumes), text=f"Analyzing {candidate_name}...")
197
+
198
+ with st.spinner(f"Evaluating {candidate_name}..."):
199
+ try:
200
+ resume_text = extract_pdf_text(resume_file.read())
201
+ result = score_resume(jd_input, resume_text, candidate_name, api_key)
202
+ result["name"] = candidate_name
203
+ result["filename"] = resume_file.name
204
+ results.append(result)
205
+ except Exception as e:
206
+ st.error(f"❌ Error processing {candidate_name}: {str(e)}")
207
 
208
+ progress.progress(1.0, text="Screening complete!")
 
209
 
210
+ if results:
211
+ # Sort by score
212
+ results.sort(key=lambda x: x.get("score", 0), reverse=True)
 
 
 
 
 
 
213
 
214
+ st.markdown("---")
215
+ st.markdown("## πŸ“Š Screening Results")
216
 
217
+ # Summary stats
218
+ avg_score = round(sum(r.get("score", 0) for r in results) / len(results))
219
+ top_score = results[0].get("score", 0)
220
+ strong = sum(1 for r in results if r.get("score", 0) >= 70)
 
221
 
222
+ st.markdown(f"""
 
223
  <div class='stat-row'>
224
+ <div class='stat-box'><div class='stat-val'>{len(results)}</div><div class='stat-lbl'>Candidates Screened</div></div>
225
+ <div class='stat-box'><div class='stat-val'>{top_score}</div><div class='stat-lbl'>Top Score</div></div>
226
+ <div class='stat-box'><div class='stat-val'>{avg_score}</div><div class='stat-lbl'>Average Score</div></div>
227
+ <div class='stat-box'><div class='stat-val'>{strong}</div><div class='stat-lbl'>Strong Matches</div></div>
228
  </div>
229
  """, unsafe_allow_html=True)
230
 
231
+ # Ranked results
232
+ for rank, result in enumerate(results, start=1):
233
+ score = result.get("score", 0)
234
+ rank_class = f"rank-{rank}" if rank <= 3 else "rank-other"
235
+ score_class = "score-high" if score >= 70 else "score-mid" if score >= 50 else "score-low"
236
+ rank_emoji = "πŸ₯‡" if rank == 1 else "πŸ₯ˆ" if rank == 2 else "πŸ₯‰" if rank == 3 else f"#{rank}"
237
 
238
+ skills_w = result.get("skills_match", 0)
239
+ exp_w = result.get("experience_match", 0)
240
+ edu_w = result.get("education_match", 0)
 
 
241
 
242
+ strengths_html = "".join([f"<span class='strength-tag'>βœ“ {s}</span>" for s in result.get("strengths", [])])
243
+ gaps_html = "".join([f"<span class='gap-tag'>βœ— {g}</span>" for g in result.get("gaps", [])])
244
 
245
+ with st.expander(f"{rank_emoji} {result['name']} β€” {score}/100 Β· {result.get('verdict', '')}", expanded=(rank <= 3)):
246
+ st.markdown(f"""
247
+ <div class='card {rank_class}'>
248
+ <div style='display:flex;justify-content:space-between;align-items:flex-start;flex-wrap:wrap;gap:12px'>
249
+ <div>
250
+ <div class='rank-label'>Rank #{rank}</div>
251
+ <div class='candidate-name'>{result['name']}</div>
252
+ <div style='color:#64748b;font-size:0.82rem;margin-top:2px'>πŸ“„ {result['filename']}</div>
253
+ </div>
254
+ <div class='score-badge {score_class}'>{score} / 100</div>
255
+ </div>
256
 
257
+ <div style='margin:16px 0;color:#334155;font-size:0.92rem;line-height:1.7'>{result.get("summary","")}</div>
258
 
259
+ <div style='display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;margin:16px 0'>
 
 
 
 
260
  <div>
261
+ <div style='font-size:0.75rem;color:#64748b;margin-bottom:4px'>Skills Match</div>
262
+ <div class='score-bar-bg'><div class='score-bar-fill' style='width:{skills_w}%;background:#3b82f6'></div></div>
263
+ <div style='font-size:0.78rem;font-weight:600;color:#3b82f6'>{skills_w}%</div>
264
  </div>
265
+ <div>
266
+ <div style='font-size:0.75rem;color:#64748b;margin-bottom:4px'>Experience Match</div>
267
+ <div class='score-bar-bg'><div class='score-bar-fill' style='width:{exp_w}%;background:#22c55e'></div></div>
268
+ <div style='font-size:0.78rem;font-weight:600;color:#22c55e'>{exp_w}%</div>
269
+ </div>
270
+ <div>
271
+ <div style='font-size:0.75rem;color:#64748b;margin-bottom:4px'>Education Match</div>
272
+ <div class='score-bar-bg'><div class='score-bar-fill' style='width:{edu_w}%;background:#f59e0b'></div></div>
273
+ <div style='font-size:0.78rem;font-weight:600;color:#f59e0b'>{edu_w}%</div>
274
  </div>
275
  </div>
276
+
277
+ <div style='margin-bottom:10px'><div style='font-size:0.78rem;font-weight:600;color:#15803d;margin-bottom:6px'>βœ… Strengths</div>{strengths_html}</div>
278
+ <div style='margin-bottom:14px'><div style='font-size:0.78rem;font-weight:600;color:#991b1b;margin-bottom:6px'>⚠️ Gaps</div>{gaps_html}</div>
279
+ <div style='background:#f8fafc;border:1px solid #e2e8f0;border-radius:8px;padding:12px;font-size:0.88rem;color:#334155'>
280
+ πŸ’‘ <b>Recommendation:</b> {result.get("recommendation","")}
281
+ </div>
282
  </div>
283
  """, unsafe_allow_html=True)