| import json |
| import requests |
| from typing import List, Tuple, Dict, Any |
| from src.config import LLM_MODEL, LLM_HOST |
|
|
| """ |
| answer_generator.py |
| |
| What it does: |
| Takes retrieved context chunks and the user's question, and uses a local LLM (Mistral via Ollama) |
| to generate a medically accurate answer with citations. |
| |
| How it works: |
| It constructs a prompt containing the medical sources and the user's question. |
| It sends an HTTP POST request to the local Ollama API endpoint. |
| It asks the LLM to format the response as JSON to easily extract the answer, citations, and confidence. |
| |
| Example input: |
| - chunks: [("Pemetrexed dose is 500mg...", 0.85)] |
| - question: "What is the dosage for Pemetrexed?" |
| |
| Example output: |
| { |
| "answer": "The recommended dosage for Pemetrexed is 500mg...", |
| "sources": [{"text": "Pemetrexed dose is 500mg...", "source_name": "unknown", "confidence": 0.85}], |
| "confidence": 0.95 |
| } |
| """ |
|
|
| def generate_answer(query: str, retrieved_chunks: List[Tuple[str, float]]) -> Dict[str, Any]: |
| """ |
| Generates an answer using the local Ollama LLM based on provided context. |
| |
| Args: |
| query (str): The user's question. |
| retrieved_chunks (List[Tuple[str, float]]): The context chunks and their retrieval scores. |
| |
| Returns: |
| Dict: A dictionary containing the answer, sources, and confidence score. |
| """ |
| if not retrieved_chunks: |
| return { |
| "answer": "I could not find any relevant medical sources to answer your question.", |
| "sources": [], |
| "confidence": 0.0 |
| } |
| |
| |
| context_parts = [] |
| for i, (chunk, score) in enumerate(retrieved_chunks): |
| context_parts.append(f"[Source {i+1}] (Relevance: {score:.2f}):\n{chunk}") |
| context_str = "\n\n".join(context_parts) |
| |
| prompt = f""" |
| You are a highly capable medical AI assistant. |
| Based strictly on the following medical sources, answer the question. |
| If the sources do not contain the answer, say "I don't know based on the provided sources." |
| Always cite the source number (e.g., [Source 1]) in your answer. |
| |
| MEDICAL SOURCES: |
| {context_str} |
| |
| QUESTION: |
| {query} |
| |
| You MUST return your response in the following JSON format: |
| {{ |
| "answer": "Your detailed answer here with citations like [Source 1].", |
| "confidence": 0.95 |
| }} |
| """ |
|
|
| try: |
| response = requests.post( |
| f"{LLM_HOST}/api/generate", |
| json={ |
| "model": LLM_MODEL, |
| "prompt": prompt, |
| "stream": False, |
| "format": "json" |
| }, |
| timeout=60 |
| ) |
| response.raise_for_status() |
| result_json = response.json() |
| |
| |
| llm_output = json.loads(result_json.get("response", "{}")) |
| answer = llm_output.get("answer", "Error parsing answer.") |
| confidence = llm_output.get("confidence", 0.5) |
| |
| except requests.exceptions.RequestException as e: |
| print(f"Error communicating with Ollama: {e}") |
| answer = "Sorry, I encountered an error connecting to the LLM service." |
| confidence = 0.0 |
| except json.JSONDecodeError: |
| print("Error: LLM did not return valid JSON.") |
| answer = result_json.get("response", "Error parsing output.") |
| confidence = 0.5 |
| |
| |
| sources_out = [] |
| for i, (chunk, score) in enumerate(retrieved_chunks): |
| sources_out.append({ |
| "source_id": f"Source {i+1}", |
| "text": chunk[:100] + "...", |
| "confidence": score |
| }) |
| |
| return { |
| "answer": answer, |
| "sources": sources_out, |
| "confidence": confidence |
| } |
|
|
| if __name__ == "__main__": |
| |
| print("Answer generator module ready.") |
|
|