File size: 3,856 Bytes
08fd094
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import json
import requests
from typing import List, Tuple, Dict, Any
from src.config import LLM_MODEL, LLM_HOST

"""
answer_generator.py

What it does:
Takes retrieved context chunks and the user's question, and uses a local LLM (Mistral via Ollama)
to generate a medically accurate answer with citations.

How it works:
It constructs a prompt containing the medical sources and the user's question.
It sends an HTTP POST request to the local Ollama API endpoint.
It asks the LLM to format the response as JSON to easily extract the answer, citations, and confidence.

Example input:
- chunks: [("Pemetrexed dose is 500mg...", 0.85)]
- question: "What is the dosage for Pemetrexed?"

Example output:
{
  "answer": "The recommended dosage for Pemetrexed is 500mg...",
  "sources": [{"text": "Pemetrexed dose is 500mg...", "source_name": "unknown", "confidence": 0.85}],
  "confidence": 0.95
}
"""

def generate_answer(query: str, retrieved_chunks: List[Tuple[str, float]]) -> Dict[str, Any]:
    """
    Generates an answer using the local Ollama LLM based on provided context.
    
    Args:
        query (str): The user's question.
        retrieved_chunks (List[Tuple[str, float]]): The context chunks and their retrieval scores.
        
    Returns:
        Dict: A dictionary containing the answer, sources, and confidence score.
    """
    if not retrieved_chunks:
        return {
            "answer": "I could not find any relevant medical sources to answer your question.",
            "sources": [],
            "confidence": 0.0
        }
        
    # Prepare context string
    context_parts = []
    for i, (chunk, score) in enumerate(retrieved_chunks):
        context_parts.append(f"[Source {i+1}] (Relevance: {score:.2f}):\n{chunk}")
    context_str = "\n\n".join(context_parts)
    
    prompt = f"""
You are a highly capable medical AI assistant.
Based strictly on the following medical sources, answer the question.
If the sources do not contain the answer, say "I don't know based on the provided sources."
Always cite the source number (e.g., [Source 1]) in your answer.

MEDICAL SOURCES:
{context_str}

QUESTION:
{query}

You MUST return your response in the following JSON format:
{{
  "answer": "Your detailed answer here with citations like [Source 1].",
  "confidence": 0.95
}}
"""

    try:
        response = requests.post(
            f"{LLM_HOST}/api/generate",
            json={
                "model": LLM_MODEL,
                "prompt": prompt,
                "stream": False,
                "format": "json"
            },
            timeout=60
        )
        response.raise_for_status()
        result_json = response.json()
        
        # Parse the JSON response from the LLM
        llm_output = json.loads(result_json.get("response", "{}"))
        answer = llm_output.get("answer", "Error parsing answer.")
        confidence = llm_output.get("confidence", 0.5)
        
    except requests.exceptions.RequestException as e:
        print(f"Error communicating with Ollama: {e}")
        answer = "Sorry, I encountered an error connecting to the LLM service."
        confidence = 0.0
    except json.JSONDecodeError:
        print("Error: LLM did not return valid JSON.")
        answer = result_json.get("response", "Error parsing output.")
        confidence = 0.5
        
    # Format the sources list
    sources_out = []
    for i, (chunk, score) in enumerate(retrieved_chunks):
        sources_out.append({
            "source_id": f"Source {i+1}",
            "text": chunk[:100] + "...", # truncate for display
            "confidence": score
        })
        
    return {
        "answer": answer,
        "sources": sources_out,
        "confidence": confidence
    }

if __name__ == "__main__":
    # Test block (requires Ollama running)
    print("Answer generator module ready.")