anir-1995 commited on
Commit
622a72f
Β·
verified Β·
1 Parent(s): 4434777

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +466 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,468 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import chromadb
3
+ from sentence_transformers import SentenceTransformer
4
+ import fitz # PyMuPDF
5
+ import os
6
+ import requests
7
+ import re
8
+ import hashlib
9
 
10
+ # ─── Page Config ──────────────────────────────────────────────────────────────
11
+ st.set_page_config(
12
+ page_title="PDF RAG Β· Upload & Ask",
13
+ page_icon="πŸ“‚",
14
+ layout="wide",
15
+ initial_sidebar_state="expanded"
16
+ )
17
+
18
+ # ─── CSS ──────────────────────────────────────────────────────────────────────
19
+ st.markdown("""
20
+ <style>
21
+ @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500;600&family=IBM+Plex+Mono:wght@400;500&display=swap');
22
+
23
+ html, body, [class*="css"] { font-family: 'IBM Plex Sans', sans-serif; }
24
+ .main { background-color: #0b0f1a; }
25
+
26
+ .hero {
27
+ background: linear-gradient(160deg, #0d1424 0%, #0b0f1a 100%);
28
+ border: 1px solid #1e2a3e;
29
+ border-top: 3px solid #22d3ee;
30
+ border-radius: 12px;
31
+ padding: 28px 32px;
32
+ margin-bottom: 24px;
33
+ }
34
+ .hero h1 { font-size: 1.8rem; font-weight: 600; color: #e2e8f0; margin: 0 0 6px 0; }
35
+ .hero p { color: #64748b; font-size: 0.95rem; margin: 0; }
36
+
37
+ .phase-bar {
38
+ display: flex; gap: 0; margin-bottom: 28px;
39
+ border: 1px solid #1e2a3e; border-radius: 10px; overflow: hidden;
40
+ }
41
+ .phase {
42
+ flex: 1; padding: 10px 6px; text-align: center;
43
+ font-size: 0.75rem; color: #4b5563; background: #0d1117;
44
+ border-right: 1px solid #1e2a3e; line-height: 1.5;
45
+ }
46
+ .phase:last-child { border-right: none; }
47
+ .phase.done { color: #22d3ee; background: rgba(34,211,238,0.05); }
48
+ .phase.active { color: #f8fafc; background: rgba(34,211,238,0.1); font-weight: 600; }
49
+ .phase-icon { font-size: 1.1rem; display: block; margin-bottom: 2px; }
50
+
51
+ .pdf-card {
52
+ background: #0d1424;
53
+ border: 1px solid #1e2a3e;
54
+ border-radius: 10px;
55
+ padding: 14px 16px;
56
+ margin: 8px 0;
57
+ display: flex;
58
+ align-items: center;
59
+ justify-content: space-between;
60
+ }
61
+ .pdf-name { font-size: 0.85rem; color: #e2e8f0; font-weight: 500; }
62
+ .pdf-meta { font-family: 'IBM Plex Mono', monospace; font-size: 0.72rem; color: #475569; margin-top: 3px; }
63
+ .pdf-badge {
64
+ font-size: 0.72rem; font-family: 'IBM Plex Mono', monospace;
65
+ background: rgba(34,211,238,0.1); color: #22d3ee;
66
+ border: 1px solid rgba(34,211,238,0.25); padding: 3px 10px; border-radius: 20px;
67
+ }
68
+
69
+ .answer-box {
70
+ background: #0d1424;
71
+ border: 1px solid #1e3a4a;
72
+ border-left: 3px solid #22d3ee;
73
+ border-radius: 10px;
74
+ padding: 22px 24px;
75
+ color: #e2e8f0;
76
+ line-height: 1.75;
77
+ font-size: 0.96rem;
78
+ margin: 12px 0 20px 0;
79
+ }
80
+
81
+ .chunk-card {
82
+ background: #0d1117;
83
+ border: 1px solid #1e2a3e;
84
+ border-radius: 9px;
85
+ padding: 14px 18px;
86
+ margin: 8px 0;
87
+ }
88
+ .chunk-top {
89
+ display: flex; justify-content: space-between;
90
+ align-items: center; margin-bottom: 8px;
91
+ }
92
+ .chunk-source { font-size: 0.77rem; font-weight: 600; color: #22d3ee; text-transform: uppercase; letter-spacing: 0.05em; }
93
+ .chunk-page { font-family: 'IBM Plex Mono', monospace; font-size: 0.72rem; color: #475569; }
94
+ .score-bar-wrap { display: flex; align-items: center; gap: 8px; }
95
+ .score-bar {
96
+ height: 4px; border-radius: 2px; background: #1e2a3e; width: 80px; overflow: hidden;
97
+ }
98
+ .score-fill { height: 100%; border-radius: 2px; background: #22d3ee; }
99
+ .score-num { font-family: 'IBM Plex Mono', monospace; font-size: 0.72rem; color: #22d3ee; }
100
+ .chunk-text { font-size: 0.86rem; color: #94a3b8; line-height: 1.65; }
101
+
102
+ .stat-row { display: flex; gap: 10px; margin: 16px 0; }
103
+ .stat-box {
104
+ flex: 1; background: #0d1424; border: 1px solid #1e2a3e;
105
+ border-radius: 8px; padding: 12px; text-align: center;
106
+ }
107
+ .stat-val { font-size: 1.35rem; font-weight: 600; color: #22d3ee; }
108
+ .stat-lbl { font-size: 0.7rem; color: #475569; margin-top: 2px; }
109
+
110
+ .section-label {
111
+ font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.1em;
112
+ color: #374151; font-weight: 600; margin: 18px 0 8px 0;
113
+ }
114
+
115
+ section[data-testid="stSidebar"] {
116
+ background-color: #080c14; border-right: 1px solid #131c2e;
117
+ }
118
+
119
+ .empty-state {
120
+ text-align: center; padding: 48px 24px;
121
+ border: 2px dashed #1e2a3e; border-radius: 12px; color: #374151;
122
+ }
123
+ .empty-state .icon { font-size: 2.5rem; margin-bottom: 12px; }
124
+ .empty-state p { font-size: 0.9rem; line-height: 1.6; }
125
+ </style>
126
+ """, unsafe_allow_html=True)
127
+
128
+
129
+ # ─── Session State ────────────────────────────────────────────────────────────
130
+ if "indexed_files" not in st.session_state:
131
+ st.session_state.indexed_files = {} # filename β†’ {chunks, pages, size}
132
+ if "chroma_collection" not in st.session_state:
133
+ st.session_state.chroma_collection = None
134
+ if "chroma_client" not in st.session_state:
135
+ st.session_state.chroma_client = None
136
+ if "total_chunks" not in st.session_state:
137
+ st.session_state.total_chunks = 0
138
+
139
+
140
+ # ─── Load embedding model (cached globally) ───────────────────────────────────
141
+ @st.cache_resource(show_spinner=False)
142
+ def load_embed_model():
143
+ return SentenceTransformer('all-MiniLM-L6-v2')
144
+
145
+
146
+ # ─── PDF Extraction ───────────────────────────────────────────────────────────
147
+ def extract_text_from_pdf(pdf_bytes: bytes) -> list[dict]:
148
+ """Returns list of {page, text} dicts."""
149
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
150
+ pages = []
151
+ for page_num, page in enumerate(doc, start=1):
152
+ text = page.get_text("text").strip()
153
+ if text:
154
+ pages.append({"page": page_num, "text": text})
155
+ doc.close()
156
+ return pages
157
+
158
+
159
+ # ─── Chunking ─────────────────────────────────────────────────────────────────
160
+ def chunk_text(pages: list[dict], chunk_size: int = 400, overlap: int = 60) -> list[dict]:
161
+ """Splits page text into overlapping word-based chunks."""
162
+ chunks = []
163
+ for p in pages:
164
+ words = p["text"].split()
165
+ start = 0
166
+ while start < len(words):
167
+ end = start + chunk_size
168
+ chunk_words = words[start:end]
169
+ chunk_text_str = " ".join(chunk_words).strip()
170
+ if len(chunk_text_str) > 60:
171
+ chunks.append({"text": chunk_text_str, "page": p["page"]})
172
+ start += chunk_size - overlap
173
+ return chunks
174
+
175
+
176
+ # ─── Index PDF into ChromaDB ──────────────────────────────────────────────────
177
+ def index_pdf(filename: str, pdf_bytes: bytes, embed_model):
178
+ # Init or reuse ChromaDB
179
+ if st.session_state.chroma_client is None:
180
+ st.session_state.chroma_client = chromadb.Client()
181
+ st.session_state.chroma_collection = st.session_state.chroma_client.get_or_create_collection(
182
+ name="pdf_rag", metadata={"hnsw:space": "cosine"}
183
+ )
184
+
185
+ collection = st.session_state.chroma_collection
186
+
187
+ # Extract & chunk
188
+ pages = extract_text_from_pdf(pdf_bytes)
189
+ chunks = chunk_text(pages)
190
+
191
+ if not chunks:
192
+ return 0, 0
193
+
194
+ # Embed & add
195
+ texts = [c["text"] for c in chunks]
196
+ embeddings = embed_model.encode(texts, batch_size=32, show_progress_bar=False).tolist()
197
+
198
+ ids, docs, metas, embeds = [], [], [], []
199
+ for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
200
+ chunk_id = f"{hashlib.md5(filename.encode()).hexdigest()[:8]}_chunk_{i}"
201
+ ids.append(chunk_id)
202
+ docs.append(chunk["text"])
203
+ metas.append({"filename": filename, "page": chunk["page"]})
204
+ embeds.append(emb)
205
+
206
+ collection.add(ids=ids, embeddings=embeds, documents=docs, metadatas=metas)
207
+
208
+ st.session_state.indexed_files[filename] = {
209
+ "chunks": len(chunks),
210
+ "pages": len(pages),
211
+ "size_kb": round(len(pdf_bytes) / 1024, 1)
212
+ }
213
+ st.session_state.total_chunks += len(chunks)
214
+ return len(chunks), len(pages)
215
+
216
+
217
+ # ─── RAG Query ────────────────────────────────────────────────────────────────
218
+ def rag_query(question: str, embed_model, top_k: int, api_key: str):
219
+ collection = st.session_state.chroma_collection
220
+ q_emb = embed_model.encode(question).tolist()
221
+ results = collection.query(query_embeddings=[q_emb], n_results=top_k)
222
+
223
+ chunks = []
224
+ for i in range(len(results["documents"][0])):
225
+ distance = results["distances"][0][i]
226
+ chunks.append({
227
+ "text": results["documents"][0][i],
228
+ "filename": results["metadatas"][0][i]["filename"],
229
+ "page": results["metadatas"][0][i]["page"],
230
+ "relevance": round((1 - distance) * 100, 1),
231
+ })
232
+
233
+ context = "\n\n".join([
234
+ f"[Source: {c['filename']}, Page {c['page']}]\n{c['text']}" for c in chunks
235
+ ])
236
+
237
+ prompt = f"""You are a helpful assistant. Answer the user's question using ONLY the document context provided below. Be concise and clear. Always mention the source filename and page number when referencing specific information. If the answer cannot be found in the provided context, say "I couldn't find that information in the uploaded documents."
238
+
239
+ Document Context:
240
+ {context}
241
+
242
+ Question: {question}
243
+
244
+ Answer:"""
245
+
246
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
247
+ payload = {
248
+ "model": "llama-3.3-70b-versatile",
249
+ "messages": [{"role": "user", "content": prompt}],
250
+ "max_tokens": 600,
251
+ "temperature": 0.2,
252
+ }
253
+ r = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=30)
254
+ r.raise_for_status()
255
+ answer = r.json()["choices"][0]["message"]["content"]
256
+ return answer, chunks
257
+
258
+
259
+ # ─── Determine current phase ──────────────────────────────────────────────────
260
+ has_docs = len(st.session_state.indexed_files) > 0
261
+ phase = 1 if not has_docs else 2
262
+
263
+
264
+ # ─── Sidebar ──────────────────────────────────────────────────────────────────
265
+ with st.sidebar:
266
+ st.markdown("## πŸ“‚ PDF RAG Demo")
267
+ st.markdown("<div style='color:#374151;font-size:0.8rem'>Upload β†’ Extract β†’ Index β†’ Ask</div>", unsafe_allow_html=True)
268
+ st.markdown("---")
269
+
270
+ env_key = os.environ.get("GROQ_API_KEY", "")
271
+ if env_key:
272
+ api_key = env_key
273
+ st.success("βœ… Groq key loaded from secrets")
274
+ else:
275
+ api_key = st.text_input("πŸ”‘ Groq API Key", type="password", placeholder="gsk_...", help="Free at console.groq.com")
276
+ if not api_key:
277
+ st.caption("Get free key β†’ [console.groq.com](https://console.groq.com)")
278
+
279
+ st.markdown("---")
280
+ st.markdown("<div class='section-label'>Indexed Documents</div>", unsafe_allow_html=True)
281
+
282
+ if st.session_state.indexed_files:
283
+ for fname, info in st.session_state.indexed_files.items():
284
+ st.markdown(f"""
285
+ <div style='padding:6px 0;border-bottom:1px solid #131c2e'>
286
+ <div style='font-size:0.8rem;color:#e2e8f0'>πŸ“„ {fname}</div>
287
+ <div style='font-size:0.72rem;color:#475569;font-family:IBM Plex Mono,monospace'>
288
+ {info["pages"]}p Β· {info["chunks"]} chunks Β· {info["size_kb"]}KB
289
+ </div>
290
+ </div>""", unsafe_allow_html=True)
291
+
292
+ st.markdown("---")
293
+ if st.button("πŸ—‘οΈ Clear all & reset", use_container_width=True):
294
+ for key in ["indexed_files", "chroma_collection", "chroma_client", "total_chunks"]:
295
+ del st.session_state[key]
296
+ st.rerun()
297
+ else:
298
+ st.markdown("<div style='color:#374151;font-size:0.82rem'>No documents indexed yet.</div>", unsafe_allow_html=True)
299
+
300
+ st.markdown("---")
301
+ st.markdown("""
302
+ <div style='font-size:0.77rem;color:#374151;line-height:1.9'>
303
+ <b style='color:#4b5563'>Stack</b><br>
304
+ πŸ“„ PDF parsing: PyMuPDF<br>
305
+ βœ‚οΈ Chunking: word-overlap (400w)<br>
306
+ πŸ”’ Embeddings: all-MiniLM-L6-v2<br>
307
+ πŸ—„οΈ Vector DB: ChromaDB in-memory<br>
308
+ 🧠 LLM: Groq · Llama 3.3 70B<br>
309
+ 🌐 Hosting: HuggingFace Spaces
310
+ </div>
311
+ """, unsafe_allow_html=True)
312
+
313
+
314
+ # ─── Hero ─────────────────────────────────────────────────────────────────────
315
+ st.markdown("""
316
+ <div class='hero'>
317
+ <h1>πŸ“‚ PDF RAG β€” Upload & Ask</h1>
318
+ <p>Upload any PDF documents Β· They get extracted, chunked, embedded, and indexed Β· Then ask questions across all of them</p>
319
+ </div>
320
+ """, unsafe_allow_html=True)
321
+
322
+ # Phase bar
323
+ st.markdown(f"""
324
+ <div class='phase-bar'>
325
+ <div class='phase {"done" if phase > 1 else "active"}'>
326
+ <span class='phase-icon'>πŸ“€</span>Upload PDFs
327
+ </div>
328
+ <div class='phase {"active" if phase == 1 else "done"}'>
329
+ <span class='phase-icon'>πŸ“‘</span>Extract Text
330
+ </div>
331
+ <div class='phase {"active" if phase == 1 else "done"}'>
332
+ <span class='phase-icon'>βœ‚οΈ</span>Chunk
333
+ </div>
334
+ <div class='phase {"active" if phase == 1 else "done"}'>
335
+ <span class='phase-icon'>πŸ”’</span>Embed
336
+ </div>
337
+ <div class='phase {"active" if phase == 1 else "done"}'>
338
+ <span class='phase-icon'>πŸ—„οΈ</span>Index
339
+ </div>
340
+ <div class='phase {"active" if phase == 2 else ""}'>
341
+ <span class='phase-icon'>πŸ’¬</span>Ask Questions
342
+ </div>
343
+ </div>
344
+ """, unsafe_allow_html=True)
345
+
346
+ # ─── Load model ───────────────────────────────────────────────────────────────
347
+ with st.spinner("βš™οΈ Loading embedding model..."):
348
+ embed_model = load_embed_model()
349
+
350
+
351
+ # ════════════════════════════════════════════════════════════
352
+ # PHASE 1 β€” Upload & Index
353
+ # ════════════════════════════════════════════════════════════
354
+ st.markdown("<div class='section-label'>Step 1 β€” Upload PDF Documents</div>", unsafe_allow_html=True)
355
+
356
+ uploaded_files = st.file_uploader(
357
+ "Drop your PDF files here",
358
+ type=["pdf"],
359
+ accept_multiple_files=True,
360
+ label_visibility="collapsed"
361
+ )
362
+
363
+ if uploaded_files:
364
+ new_files = [f for f in uploaded_files if f.name not in st.session_state.indexed_files]
365
+
366
+ if new_files:
367
+ st.markdown(f"**{len(new_files)} new file(s) ready to index:**")
368
+ for f in new_files:
369
+ st.markdown(f"<div class='pdf-card'><div><div class='pdf-name'>πŸ“„ {f.name}</div><div class='pdf-meta'>{round(f.size/1024,1)} KB</div></div><div class='pdf-badge'>ready</div></div>", unsafe_allow_html=True)
370
+
371
+ if st.button(f"⚑ Extract & Index {len(new_files)} PDF(s)", type="primary", use_container_width=True):
372
+ progress = st.progress(0, text="Starting...")
373
+ for idx, f in enumerate(new_files):
374
+ progress.progress((idx) / len(new_files), text=f"Processing: {f.name}")
375
+ pdf_bytes = f.read()
376
+
377
+ with st.spinner(f"Extracting & indexing **{f.name}**..."):
378
+ n_chunks, n_pages = index_pdf(f.name, pdf_bytes, embed_model)
379
+
380
+ st.success(f"βœ… **{f.name}** β†’ {n_pages} pages Β· {n_chunks} chunks indexed")
381
+
382
+ progress.progress(1.0, text="Done!")
383
+ st.balloons()
384
+ st.rerun()
385
+
386
+ else:
387
+ st.info("All uploaded files are already indexed. Upload new files or ask questions below.")
388
+
389
+ elif not has_docs:
390
+ st.markdown("""
391
+ <div class='empty-state'>
392
+ <div class='icon'>πŸ“‚</div>
393
+ <p><b style='color:#94a3b8'>No documents uploaded yet</b><br>
394
+ Upload one or more PDF files above to get started.<br>
395
+ Any topic works β€” reports, manuals, research papers, policies.</p>
396
+ </div>
397
+ """, unsafe_allow_html=True)
398
+
399
+
400
+ # ════════════════════════════════════════════════════════════
401
+ # PHASE 2 β€” Stats & Query
402
+ # ════════════════════════════════════════════════════════════
403
+ if has_docs:
404
+ total_pages = sum(v["pages"] for v in st.session_state.indexed_files.values())
405
+
406
+ st.markdown("<div class='section-label' style='margin-top:24px'>Index Summary</div>", unsafe_allow_html=True)
407
+ st.markdown(f"""
408
+ <div class='stat-row'>
409
+ <div class='stat-box'><div class='stat-val'>{len(st.session_state.indexed_files)}</div><div class='stat-lbl'>Documents</div></div>
410
+ <div class='stat-box'><div class='stat-val'>{total_pages}</div><div class='stat-lbl'>Pages Parsed</div></div>
411
+ <div class='stat-box'><div class='stat-val'>{st.session_state.total_chunks}</div><div class='stat-lbl'>Chunks Indexed</div></div>
412
+ <div class='stat-box'><div class='stat-val'>384</div><div class='stat-lbl'>Embedding Dims</div></div>
413
+ </div>
414
+ """, unsafe_allow_html=True)
415
+
416
+ if not api_key:
417
+ st.warning("πŸ‘ˆ Enter your Groq API key in the sidebar to start asking questions.")
418
+ st.stop()
419
+
420
+ st.markdown("---")
421
+ st.markdown("<div class='section-label'>Step 2 β€” Ask a Question</div>", unsafe_allow_html=True)
422
+
423
+ col1, col2 = st.columns([5, 1])
424
+ with col1:
425
+ question = st.text_input("", placeholder="What does the document say about...?", label_visibility="collapsed")
426
+ with col2:
427
+ top_k = st.selectbox("Top K", [2, 3, 4, 5], index=1, help="Number of chunks to retrieve")
428
+
429
+ ask_btn = st.button("πŸ” Search & Answer", type="primary", use_container_width=True)
430
+
431
+ if ask_btn and question:
432
+ with st.spinner("πŸ” Searching index and generating answer..."):
433
+ try:
434
+ answer, chunks = rag_query(question, embed_model, top_k, api_key)
435
+
436
+ st.markdown(f"<div class='section-label'>Answer</div>", unsafe_allow_html=True)
437
+ st.markdown(f"<div class='answer-box'>{answer}</div>", unsafe_allow_html=True)
438
+
439
+ st.markdown("<div class='section-label'>Retrieved Chunks (context sent to LLM)</div>", unsafe_allow_html=True)
440
+
441
+ for i, chunk in enumerate(chunks):
442
+ bar_width = int(chunk['relevance'])
443
+ st.markdown(f"""
444
+ <div class='chunk-card'>
445
+ <div class='chunk-top'>
446
+ <div>
447
+ <div class='chunk-source'>πŸ“„ {chunk['filename']}</div>
448
+ <div class='chunk-page'>Page {chunk['page']}</div>
449
+ </div>
450
+ <div class='score-bar-wrap'>
451
+ <div class='score-bar'><div class='score-fill' style='width:{bar_width}%'></div></div>
452
+ <div class='score-num'>{chunk['relevance']}%</div>
453
+ </div>
454
+ </div>
455
+ <div class='chunk-text'>{chunk['text']}</div>
456
+ </div>
457
+ """, unsafe_allow_html=True)
458
+
459
+ except requests.HTTPError as e:
460
+ if e.response.status_code == 401:
461
+ st.error("❌ Invalid Groq API key.")
462
+ else:
463
+ st.error(f"❌ API error: {str(e)}")
464
+ except Exception as e:
465
+ st.error(f"❌ Error: {str(e)}")
466
+
467
+ elif ask_btn and not question:
468
+ st.warning("Please enter a question.")