import json import requests from typing import List, Tuple, Dict, Any from src.config import LLM_MODEL, LLM_HOST """ answer_generator.py What it does: Takes retrieved context chunks and the user's question, and uses a local LLM (Mistral via Ollama) to generate a medically accurate answer with citations. How it works: It constructs a prompt containing the medical sources and the user's question. It sends an HTTP POST request to the local Ollama API endpoint. It asks the LLM to format the response as JSON to easily extract the answer, citations, and confidence. Example input: - chunks: [("Pemetrexed dose is 500mg...", 0.85)] - question: "What is the dosage for Pemetrexed?" Example output: { "answer": "The recommended dosage for Pemetrexed is 500mg...", "sources": [{"text": "Pemetrexed dose is 500mg...", "source_name": "unknown", "confidence": 0.85}], "confidence": 0.95 } """ def generate_answer(query: str, retrieved_chunks: List[Tuple[str, float]]) -> Dict[str, Any]: """ Generates an answer using the local Ollama LLM based on provided context. Args: query (str): The user's question. retrieved_chunks (List[Tuple[str, float]]): The context chunks and their retrieval scores. Returns: Dict: A dictionary containing the answer, sources, and confidence score. """ if not retrieved_chunks: return { "answer": "I could not find any relevant medical sources to answer your question.", "sources": [], "confidence": 0.0 } # Prepare context string context_parts = [] for i, (chunk, score) in enumerate(retrieved_chunks): context_parts.append(f"[Source {i+1}] (Relevance: {score:.2f}):\n{chunk}") context_str = "\n\n".join(context_parts) prompt = f""" You are a highly capable medical AI assistant. Based strictly on the following medical sources, answer the question. If the sources do not contain the answer, say "I don't know based on the provided sources." Always cite the source number (e.g., [Source 1]) in your answer. MEDICAL SOURCES: {context_str} QUESTION: {query} You MUST return your response in the following JSON format: {{ "answer": "Your detailed answer here with citations like [Source 1].", "confidence": 0.95 }} """ try: response = requests.post( f"{LLM_HOST}/api/generate", json={ "model": LLM_MODEL, "prompt": prompt, "stream": False, "format": "json" }, timeout=60 ) response.raise_for_status() result_json = response.json() # Parse the JSON response from the LLM llm_output = json.loads(result_json.get("response", "{}")) answer = llm_output.get("answer", "Error parsing answer.") confidence = llm_output.get("confidence", 0.5) except requests.exceptions.RequestException as e: print(f"Error communicating with Ollama: {e}") answer = "Sorry, I encountered an error connecting to the LLM service." confidence = 0.0 except json.JSONDecodeError: print("Error: LLM did not return valid JSON.") answer = result_json.get("response", "Error parsing output.") confidence = 0.5 # Format the sources list sources_out = [] for i, (chunk, score) in enumerate(retrieved_chunks): sources_out.append({ "source_id": f"Source {i+1}", "text": chunk[:100] + "...", # truncate for display "confidence": score }) return { "answer": answer, "sources": sources_out, "confidence": confidence } if __name__ == "__main__": # Test block (requires Ollama running) print("Answer generator module ready.")