Spaces:
Paused
Paused
Commit ·
8eb4114
1
Parent(s): 3add352
summary fixes
Browse files- TODO.md +11 -5
- ai_med_extract/__pycache__/app.cpython-311.pyc +0 -0
- ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
- ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc +0 -0
- ai_med_extract/api/__pycache__/routes.cpython-311.pyc +0 -0
- ai_med_extract/api/routes.py +9 -3
- ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
- ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc +0 -0
- ai_med_extract/utils/model_loader_gguf.py +14 -5
- test_clinical_assessment_header.py +0 -73
- test_generate_full_summary.py +0 -279
- test_gguf.py +0 -137
TODO.md
CHANGED
|
@@ -1,7 +1,13 @@
|
|
| 1 |
-
# TODO:
|
| 2 |
|
| 3 |
## Tasks
|
| 4 |
-
- [x]
|
| 5 |
-
- [x]
|
| 6 |
-
- [
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TODO: Fix GGUF Model Context Window Error and Optimize Speed
|
| 2 |
|
| 3 |
## Tasks
|
| 4 |
+
- [x] Modify generate method in model_loader_gguf.py to dynamically adjust max_tokens based on prompt length
|
| 5 |
+
- [x] Tune n_threads in model initialization for maximum speed
|
| 6 |
+
- [ ] Test the changes to ensure no breaking
|
| 7 |
+
|
| 8 |
+
## Details
|
| 9 |
+
- Approximate prompt tokens by word count (split on whitespace)
|
| 10 |
+
- Calculate allowed max_tokens = 2048 - prompt_tokens
|
| 11 |
+
- Reduce max_tokens if necessary, log warning
|
| 12 |
+
- Raise error if prompt too long
|
| 13 |
+
- Set n_threads to os.cpu_count() for speed
|
ai_med_extract/__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/__pycache__/app.cpython-311.pyc and b/ai_med_extract/__pycache__/app.cpython-311.pyc differ
|
|
|
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
|
|
|
ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc differ
|
|
|
ai_med_extract/api/__pycache__/routes.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ
|
|
|
ai_med_extract/api/routes.py
CHANGED
|
@@ -1057,9 +1057,15 @@ def register_routes(app, agents):
|
|
| 1057 |
|
| 1058 |
|
| 1059 |
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1063 |
@app.route('/generate_patient_summary', methods=['POST'])
|
| 1064 |
def generate_patient_summary():
|
| 1065 |
"""
|
|
|
|
| 1057 |
|
| 1058 |
|
| 1059 |
|
| 1060 |
+
# Initialize GGUF pipeline with proper model name handling
|
| 1061 |
+
gguf_model_name = "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"
|
| 1062 |
+
if gguf_model_name.endswith('.gguf') and '/' in gguf_model_name:
|
| 1063 |
+
repo_id, filename = gguf_model_name.rsplit('/', 1)
|
| 1064 |
+
PIPELINE = get_gguf_pipeline(repo_id, filename)
|
| 1065 |
+
else:
|
| 1066 |
+
PIPELINE = get_gguf_pipeline(gguf_model_name)
|
| 1067 |
+
_ = PIPELINE.generate("Hello", max_tokens=5)
|
| 1068 |
+
|
| 1069 |
@app.route('/generate_patient_summary', methods=['POST'])
|
| 1070 |
def generate_patient_summary():
|
| 1071 |
"""
|
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
|
|
|
ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc differ
|
|
|
ai_med_extract/utils/model_loader_gguf.py
CHANGED
|
@@ -53,9 +53,8 @@ class GGUFModelPipeline:
|
|
| 53 |
# Performance tuning and CPU-friendly defaults for Spaces
|
| 54 |
try:
|
| 55 |
cpu_count = os.cpu_count() or 2
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
n_batch = int(os.environ.get("GGUF_N_BATCH", "64")) # Reduced from 128
|
| 59 |
|
| 60 |
# Memory-optimized settings for Hugging Face Spaces
|
| 61 |
self.model = Llama(
|
|
@@ -87,8 +86,18 @@ class GGUFModelPipeline:
|
|
| 87 |
text = re.sub(p, "", text, flags=re.IGNORECASE)
|
| 88 |
return text.strip()
|
| 89 |
|
| 90 |
-
def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=
|
| 91 |
"""Generate text with timeout using threading"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
def _generate():
|
| 93 |
try:
|
| 94 |
output = self.model(
|
|
@@ -114,7 +123,7 @@ class GGUFModelPipeline:
|
|
| 114 |
def generate(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95):
|
| 115 |
t0 = time.time()
|
| 116 |
try:
|
| 117 |
-
output = self._generate_with_timeout(prompt, max_tokens, temperature, top_p, timeout=
|
| 118 |
dt = time.time() - t0
|
| 119 |
text = output["choices"][0]["text"].strip()
|
| 120 |
text = self._strip_special_tokens(text)
|
|
|
|
| 53 |
# Performance tuning and CPU-friendly defaults for Spaces
|
| 54 |
try:
|
| 55 |
cpu_count = os.cpu_count() or 2
|
| 56 |
+
n_threads = cpu_count # Set to max CPU cores for speed
|
| 57 |
+
n_batch = int(os.environ.get("GGUF_N_BATCH", "64")) # Keep batch size reasonable
|
|
|
|
| 58 |
|
| 59 |
# Memory-optimized settings for Hugging Face Spaces
|
| 60 |
self.model = Llama(
|
|
|
|
| 86 |
text = re.sub(p, "", text, flags=re.IGNORECASE)
|
| 87 |
return text.strip()
|
| 88 |
|
| 89 |
+
def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=500):
|
| 90 |
"""Generate text with timeout using threading"""
|
| 91 |
+
# Approximate token count by splitting on whitespace
|
| 92 |
+
prompt_tokens = len(prompt.split())
|
| 93 |
+
n_ctx = 2048
|
| 94 |
+
allowed_max_tokens = n_ctx - prompt_tokens
|
| 95 |
+
if allowed_max_tokens <= 0:
|
| 96 |
+
raise ValueError(f"Prompt too long: {prompt_tokens} tokens exceed context window of {n_ctx}")
|
| 97 |
+
if max_tokens > allowed_max_tokens:
|
| 98 |
+
logger.warning(f"Requested max_tokens {max_tokens} exceeds allowed {allowed_max_tokens}, reducing max_tokens")
|
| 99 |
+
max_tokens = allowed_max_tokens
|
| 100 |
+
|
| 101 |
def _generate():
|
| 102 |
try:
|
| 103 |
output = self.model(
|
|
|
|
| 123 |
def generate(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95):
|
| 124 |
t0 = time.time()
|
| 125 |
try:
|
| 126 |
+
output = self._generate_with_timeout(prompt, max_tokens, temperature, top_p, timeout=500)
|
| 127 |
dt = time.time() - t0
|
| 128 |
text = output["choices"][0]["text"].strip()
|
| 129 |
text = self._strip_special_tokens(text)
|
test_clinical_assessment_header.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script to verify that the Clinical Assessment header is present in the formatted summary
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import sys
|
| 7 |
-
import os
|
| 8 |
-
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 9 |
-
|
| 10 |
-
from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
|
| 11 |
-
|
| 12 |
-
def test_clinical_assessment_header():
|
| 13 |
-
"""Test that Clinical Assessment header is present in formatted summary"""
|
| 14 |
-
|
| 15 |
-
# Create a mock patient data
|
| 16 |
-
patient_data = {
|
| 17 |
-
"result": {
|
| 18 |
-
"patientname": "John Doe",
|
| 19 |
-
"patientnumber": "12345",
|
| 20 |
-
"agey": "65",
|
| 21 |
-
"gender": "Male",
|
| 22 |
-
"allergies": ["Penicillin"],
|
| 23 |
-
"social_history": "Retired, former smoker",
|
| 24 |
-
"past_medical_history": ["Hypertension", "Diabetes"],
|
| 25 |
-
"encounters": [
|
| 26 |
-
{
|
| 27 |
-
"visit_date": "2024-01-15",
|
| 28 |
-
"chief_complaint": "Chest pain",
|
| 29 |
-
"symptoms": "Shortness of breath",
|
| 30 |
-
"diagnosis": ["Acute coronary syndrome"],
|
| 31 |
-
"dr_notes": "Patient presents with chest pain",
|
| 32 |
-
"vitals": {"BP": "150/90", "HR": "85"},
|
| 33 |
-
"lab_results": {"Troponin": "0.5"},
|
| 34 |
-
"medications": ["Aspirin", "Metoprolol"],
|
| 35 |
-
"treatment": "Medical management"
|
| 36 |
-
}
|
| 37 |
-
]
|
| 38 |
-
}
|
| 39 |
-
}
|
| 40 |
-
|
| 41 |
-
# Create agent with fallback loader (since we don't have actual models)
|
| 42 |
-
agent = PatientSummarizerAgent(model_name="test", model_type="test")
|
| 43 |
-
|
| 44 |
-
# Generate summary
|
| 45 |
-
summary = agent.generate_clinical_summary(patient_data)
|
| 46 |
-
|
| 47 |
-
# Check if Clinical Assessment header is present
|
| 48 |
-
has_clinical_assessment = "## Clinical Assessment" in summary
|
| 49 |
-
|
| 50 |
-
print("Test Results:")
|
| 51 |
-
print("=" * 50)
|
| 52 |
-
print(f"Clinical Assessment header present: {has_clinical_assessment}")
|
| 53 |
-
|
| 54 |
-
if has_clinical_assessment:
|
| 55 |
-
print("✅ SUCCESS: Clinical Assessment header is present in the summary")
|
| 56 |
-
else:
|
| 57 |
-
print("❌ FAILURE: Clinical Assessment header is missing from the summary")
|
| 58 |
-
|
| 59 |
-
print("\nSummary excerpt:")
|
| 60 |
-
print("-" * 30)
|
| 61 |
-
# Find the AI-Generated Narrative section
|
| 62 |
-
narrative_start = summary.find("--- AI-GENERATED CLINICAL NARRATIVE ---")
|
| 63 |
-
if narrative_start != -1:
|
| 64 |
-
excerpt = summary[narrative_start:narrative_start + 500]
|
| 65 |
-
print(excerpt + "...")
|
| 66 |
-
else:
|
| 67 |
-
print("Could not find AI-Generated Narrative section")
|
| 68 |
-
|
| 69 |
-
return has_clinical_assessment
|
| 70 |
-
|
| 71 |
-
if __name__ == "__main__":
|
| 72 |
-
success = test_clinical_assessment_header()
|
| 73 |
-
sys.exit(0 if success else 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_generate_full_summary.py
DELETED
|
@@ -1,279 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script for the enhanced generate_full_summary method
|
| 4 |
-
Tests the logic for ensuring complete summaries with all 4 sections
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import re
|
| 8 |
-
import time
|
| 9 |
-
import logging
|
| 10 |
-
|
| 11 |
-
# Mock GGUFModelPipeline for testing
|
| 12 |
-
class MockGGUFModelPipeline:
|
| 13 |
-
def __init__(self, responses=None):
|
| 14 |
-
self.responses = responses or []
|
| 15 |
-
self.call_count = 0
|
| 16 |
-
self.generated_sections = set()
|
| 17 |
-
|
| 18 |
-
def generate(self, prompt, max_tokens=2048, temperature=0.5, top_p=0.95):
|
| 19 |
-
"""Mock generate method that returns predefined responses"""
|
| 20 |
-
if self.call_count < len(self.responses):
|
| 21 |
-
response = self.responses[self.call_count]
|
| 22 |
-
self.call_count += 1
|
| 23 |
-
print(f"[MOCK] Generate call {self.call_count}: returning {len(response)} chars")
|
| 24 |
-
return response
|
| 25 |
-
else:
|
| 26 |
-
# Default response for additional calls
|
| 27 |
-
return "## Clinical Assessment\nMock assessment content."
|
| 28 |
-
|
| 29 |
-
def generate_full_summary(self, prompt, max_tokens=2048, max_loops=5):
|
| 30 |
-
"""Copy of the enhanced generate_full_summary method for testing"""
|
| 31 |
-
def is_complete(text):
|
| 32 |
-
required_sections = [
|
| 33 |
-
'Clinical Assessment',
|
| 34 |
-
'Key Trends & Changes',
|
| 35 |
-
'Plan & Suggested Actions',
|
| 36 |
-
'Direct Guidance for Physician'
|
| 37 |
-
]
|
| 38 |
-
missing_sections = [s for s in required_sections if s not in text]
|
| 39 |
-
if missing_sections:
|
| 40 |
-
print(f"[TEST] Missing sections: {missing_sections}")
|
| 41 |
-
return False, missing_sections
|
| 42 |
-
ends_with_punct = bool(re.search(r'[.!?][\s\n]*$', text))
|
| 43 |
-
if not ends_with_punct:
|
| 44 |
-
print("[TEST] Summary does not end with a full sentence")
|
| 45 |
-
return ends_with_punct, []
|
| 46 |
-
|
| 47 |
-
def generate_missing_section(section_name, base_prompt, existing_output):
|
| 48 |
-
"""Generate a specific missing section using targeted prompt"""
|
| 49 |
-
section_prompts = {
|
| 50 |
-
'Clinical Assessment': f"Based on the patient data provided, generate only the Clinical Assessment section in markdown format. Focus on the current clinical status, key findings, and overall patient condition.\n\nPatient Data:\n{base_prompt}\n\n## Clinical Assessment\n",
|
| 51 |
-
'Key Trends & Changes': f"Based on the patient data provided, generate only the Key Trends & Changes section in markdown format. Analyze trends in vitals, labs, diagnoses, and medications over time.\n\nPatient Data:\n{base_prompt}\n\n## Key Trends & Changes\n",
|
| 52 |
-
'Plan & Suggested Actions': f"Based on the patient data provided, generate only the Plan & Suggested Actions section in markdown format. Recommend next steps, follow-up actions, and treatment considerations.\n\nPatient Data:\n{base_prompt}\n\n## Plan & Suggested Actions\n",
|
| 53 |
-
'Direct Guidance for Physician': f"Based on the patient data provided, generate only the Direct Guidance for Physician section in markdown format. Provide specific recommendations for the treating physician.\n\nPatient Data:\n{base_prompt}\n\n## Direct Guidance for Physician\n"
|
| 54 |
-
}
|
| 55 |
-
|
| 56 |
-
targeted_prompt = section_prompts.get(section_name, f"Generate the {section_name} section based on the patient data.\n\n{base_prompt}\n\n## {section_name}\n")
|
| 57 |
-
|
| 58 |
-
try:
|
| 59 |
-
section_output = self.generate(targeted_prompt, max_tokens=max_tokens//2)
|
| 60 |
-
# Clean up the output to extract just the section content
|
| 61 |
-
if f"## {section_name}" in section_output:
|
| 62 |
-
section_content = section_output.split(f"## {section_name}", 1)[1].strip()
|
| 63 |
-
# Remove any subsequent section headers
|
| 64 |
-
section_content = re.split(r'##\s+', section_content, 1)[0].strip()
|
| 65 |
-
return f"## {section_name}\n{section_content}"
|
| 66 |
-
else:
|
| 67 |
-
# If the model didn't follow the format, use the raw output
|
| 68 |
-
return f"## {section_name}\n{section_output.strip()}"
|
| 69 |
-
except Exception as e:
|
| 70 |
-
print(f"[TEST] Failed to generate {section_name} section: {e}")
|
| 71 |
-
# Return a minimal section if generation fails
|
| 72 |
-
return f"## {section_name}\nUnable to generate this section due to processing error. Please review patient data manually."
|
| 73 |
-
|
| 74 |
-
full_output = ""
|
| 75 |
-
current_prompt = prompt
|
| 76 |
-
total_start = time.time()
|
| 77 |
-
|
| 78 |
-
try:
|
| 79 |
-
print(f"[TEST] Starting enhanced full summary generation with max_loops={max_loops}")
|
| 80 |
-
print(f"[TEST] Prompt length: {len(prompt)} characters")
|
| 81 |
-
|
| 82 |
-
# Main generation loops
|
| 83 |
-
for loop_idx in range(max_loops):
|
| 84 |
-
loop_start = time.time()
|
| 85 |
-
print(f"[TEST] Starting loop {loop_idx+1}/{max_loops}")
|
| 86 |
-
print(f"[TEST] Current prompt length: {len(current_prompt)} characters")
|
| 87 |
-
|
| 88 |
-
output = self.generate(current_prompt, max_tokens=max_tokens)
|
| 89 |
-
|
| 90 |
-
if output.startswith(prompt):
|
| 91 |
-
output = output[len(prompt):].strip()
|
| 92 |
-
|
| 93 |
-
full_output += output
|
| 94 |
-
loop_time = time.time() - loop_start
|
| 95 |
-
|
| 96 |
-
print(f"[TEST] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
|
| 97 |
-
print(f"[TEST] Generated {len(output)} characters in this loop")
|
| 98 |
-
|
| 99 |
-
complete, missing_sections = is_complete(full_output)
|
| 100 |
-
|
| 101 |
-
if complete:
|
| 102 |
-
print(f"[TEST] All required sections found after loop {loop_idx+1}")
|
| 103 |
-
break
|
| 104 |
-
|
| 105 |
-
# If not complete and this is not the last loop, prepare next prompt
|
| 106 |
-
if loop_idx < max_loops - 1:
|
| 107 |
-
if missing_sections:
|
| 108 |
-
missing_list = ", ".join(missing_sections)
|
| 109 |
-
current_prompt = f"{prompt}\n\n{full_output}\n\nThe summary is missing these sections: {missing_list}. Please continue and complete all sections in markdown format:"
|
| 110 |
-
else:
|
| 111 |
-
current_prompt = f"{prompt}\n\n{full_output}\n\nContinue the summary and ensure it ends with a complete sentence:"
|
| 112 |
-
print(f"[TEST] Preparing next prompt for loop {loop_idx+2}")
|
| 113 |
-
|
| 114 |
-
# Post-processing: Generate any remaining missing sections
|
| 115 |
-
complete, missing_sections = is_complete(full_output)
|
| 116 |
-
|
| 117 |
-
if missing_sections:
|
| 118 |
-
print(f"[TEST] Generating {len(missing_sections)} missing sections post-processing")
|
| 119 |
-
generated_sections = []
|
| 120 |
-
|
| 121 |
-
for section in missing_sections:
|
| 122 |
-
print(f"[TEST] Generating missing section: {section}")
|
| 123 |
-
section_content = generate_missing_section(section, prompt, full_output)
|
| 124 |
-
generated_sections.append(section_content)
|
| 125 |
-
|
| 126 |
-
# Append generated sections to the main output
|
| 127 |
-
if generated_sections:
|
| 128 |
-
full_output += "\n\n" + "\n\n".join(generated_sections)
|
| 129 |
-
|
| 130 |
-
total_time = time.time() - total_start
|
| 131 |
-
print(f"[TEST] generate_full_summary completed in {total_time:.2f}s")
|
| 132 |
-
print(f"[TEST] Final summary length: {len(full_output)} characters")
|
| 133 |
-
|
| 134 |
-
# Final validation
|
| 135 |
-
final_complete, final_missing = is_complete(full_output)
|
| 136 |
-
if not final_complete:
|
| 137 |
-
print(f"[TEST] Final summary still incomplete. Missing: {final_missing}")
|
| 138 |
-
# As a last resort, ensure at least basic structure
|
| 139 |
-
if final_missing:
|
| 140 |
-
fallback_sections = []
|
| 141 |
-
for section in final_missing:
|
| 142 |
-
fallback_sections.append(f"## {section}\nPlease review the patient data for this section.")
|
| 143 |
-
full_output += "\n\n" + "\n\n".join(fallback_sections)
|
| 144 |
-
|
| 145 |
-
return full_output.strip()
|
| 146 |
-
|
| 147 |
-
except Exception as e:
|
| 148 |
-
print(f"[TEST] Full summary generation failed: {e}")
|
| 149 |
-
# Instead of raising error, return a minimal complete summary
|
| 150 |
-
minimal_sections = [
|
| 151 |
-
"## Clinical Assessment\nPatient data processing encountered an error. Please review the raw patient information manually.",
|
| 152 |
-
"## Key Trends & Changes\nUnable to analyze trends due to processing error. Manual review recommended.",
|
| 153 |
-
"## Plan & Suggested Actions\nError in generating action plan. Consult with healthcare provider for appropriate next steps.",
|
| 154 |
-
"## Direct Guidance for Physician\nProcessing error occurred. Please conduct a thorough manual review of all patient data."
|
| 155 |
-
]
|
| 156 |
-
return "\n\n".join(minimal_sections)
|
| 157 |
-
|
| 158 |
-
def test_complete_summary():
|
| 159 |
-
"""Test case: Model generates complete summary in first attempt"""
|
| 160 |
-
print("\n" + "="*60)
|
| 161 |
-
print("TEST 1: Complete Summary in First Attempt")
|
| 162 |
-
print("="*60)
|
| 163 |
-
|
| 164 |
-
mock_responses = [
|
| 165 |
-
"""## Clinical Assessment
|
| 166 |
-
The patient presents with Type 1 diabetes mellitus with ketoacidosis. Current vitals show elevated blood pressure and recent lab results indicate abnormal thyroid function.
|
| 167 |
-
|
| 168 |
-
## Key Trends & Changes
|
| 169 |
-
Weight has decreased from 73kg to current levels. Blood pressure shows systolic readings around 135 mmHg. Thyroid panel has increased significantly from baseline.
|
| 170 |
-
|
| 171 |
-
## Plan & Suggested Actions
|
| 172 |
-
Continue current medication regimen. Schedule follow-up appointment in 2 weeks. Monitor blood glucose levels closely.
|
| 173 |
-
|
| 174 |
-
## Direct Guidance for Physician
|
| 175 |
-
Consider adjusting antihypertensive therapy. Evaluate thyroid function further with additional testing."""
|
| 176 |
-
]
|
| 177 |
-
|
| 178 |
-
mock_pipeline = MockGGUFModelPipeline(mock_responses)
|
| 179 |
-
prompt = "Generate a patient summary with all 4 sections."
|
| 180 |
-
|
| 181 |
-
result = mock_pipeline.generate_full_summary(prompt, max_loops=3)
|
| 182 |
-
|
| 183 |
-
# Check if all sections are present
|
| 184 |
-
sections_present = all(section in result for section in [
|
| 185 |
-
'Clinical Assessment', 'Key Trends & Changes',
|
| 186 |
-
'Plan & Suggested Actions', 'Direct Guidance for Physician'
|
| 187 |
-
])
|
| 188 |
-
|
| 189 |
-
print(f"\nResult has all sections: {sections_present}")
|
| 190 |
-
print(f"Result length: {len(result)} characters")
|
| 191 |
-
return sections_present
|
| 192 |
-
|
| 193 |
-
def test_incomplete_summary_fixed():
|
| 194 |
-
"""Test case: Model generates incomplete summary, then sections are added"""
|
| 195 |
-
print("\n" + "="*60)
|
| 196 |
-
print("TEST 2: Incomplete Summary Fixed by Post-processing")
|
| 197 |
-
print("="*60)
|
| 198 |
-
|
| 199 |
-
mock_responses = [
|
| 200 |
-
# First response: missing some sections
|
| 201 |
-
"""## Clinical Assessment
|
| 202 |
-
The patient has diabetes and hypertension.
|
| 203 |
-
|
| 204 |
-
## Key Trends & Changes
|
| 205 |
-
Blood pressure has been elevated.""",
|
| 206 |
-
# Second response for missing section
|
| 207 |
-
"""## Plan & Suggested Actions
|
| 208 |
-
Continue medications and follow up.""",
|
| 209 |
-
# Third response for missing section
|
| 210 |
-
"""## Direct Guidance for Physician
|
| 211 |
-
Monitor closely and adjust therapy as needed."""
|
| 212 |
-
]
|
| 213 |
-
|
| 214 |
-
mock_pipeline = MockGGUFModelPipeline(mock_responses)
|
| 215 |
-
prompt = "Generate a patient summary with all 4 sections."
|
| 216 |
-
|
| 217 |
-
result = mock_pipeline.generate_full_summary(prompt, max_loops=2)
|
| 218 |
-
|
| 219 |
-
# Check if all sections are present
|
| 220 |
-
sections_present = all(section in result for section in [
|
| 221 |
-
'Clinical Assessment', 'Key Trends & Changes',
|
| 222 |
-
'Plan & Suggested Actions', 'Direct Guidance for Physician'
|
| 223 |
-
])
|
| 224 |
-
|
| 225 |
-
print(f"\nResult has all sections: {sections_present}")
|
| 226 |
-
print(f"Result length: {len(result)} characters")
|
| 227 |
-
print(f"Number of generate calls made: {mock_pipeline.call_count}")
|
| 228 |
-
return sections_present
|
| 229 |
-
|
| 230 |
-
def test_error_handling():
|
| 231 |
-
"""Test case: Model fails, but method still returns complete summary"""
|
| 232 |
-
print("\n" + "="*60)
|
| 233 |
-
print("TEST 3: Error Handling - Complete Summary Returned")
|
| 234 |
-
print("="*60)
|
| 235 |
-
|
| 236 |
-
# Mock pipeline that raises exceptions
|
| 237 |
-
class FailingMockPipeline(MockGGUFModelPipeline):
|
| 238 |
-
def generate(self, prompt, **kwargs):
|
| 239 |
-
raise Exception("Model generation failed")
|
| 240 |
-
|
| 241 |
-
mock_pipeline = FailingMockPipeline()
|
| 242 |
-
prompt = "Generate a patient summary."
|
| 243 |
-
|
| 244 |
-
result = mock_pipeline.generate_full_summary(prompt, max_loops=2)
|
| 245 |
-
|
| 246 |
-
# Check if all sections are present even after error
|
| 247 |
-
sections_present = all(section in result for section in [
|
| 248 |
-
'Clinical Assessment', 'Key Trends & Changes',
|
| 249 |
-
'Plan & Suggested Actions', 'Direct Guidance for Physician'
|
| 250 |
-
])
|
| 251 |
-
|
| 252 |
-
print(f"\nResult has all sections despite error: {sections_present}")
|
| 253 |
-
print(f"Result length: {len(result)} characters")
|
| 254 |
-
return sections_present
|
| 255 |
-
|
| 256 |
-
if __name__ == "__main__":
|
| 257 |
-
print("Testing Enhanced generate_full_summary Method")
|
| 258 |
-
print("="*60)
|
| 259 |
-
|
| 260 |
-
# Run all tests
|
| 261 |
-
test1_pass = test_complete_summary()
|
| 262 |
-
test2_pass = test_incomplete_summary_fixed()
|
| 263 |
-
test3_pass = test_error_handling()
|
| 264 |
-
|
| 265 |
-
print("\n" + "="*60)
|
| 266 |
-
print("TEST SUMMARY")
|
| 267 |
-
print("="*60)
|
| 268 |
-
print(f"Test 1 (Complete Summary): {'PASS' if test1_pass else 'FAIL'}")
|
| 269 |
-
print(f"Test 2 (Incomplete Fixed): {'PASS' if test2_pass else 'FAIL'}")
|
| 270 |
-
print(f"Test 3 (Error Handling): {'PASS' if test3_pass else 'FAIL'}")
|
| 271 |
-
|
| 272 |
-
all_pass = all([test1_pass, test2_pass, test3_pass])
|
| 273 |
-
print(f"\nOverall Result: {'ALL TESTS PASS' if all_pass else 'SOME TESTS FAILED'}")
|
| 274 |
-
|
| 275 |
-
if all_pass:
|
| 276 |
-
print("\n✅ Enhanced generate_full_summary method is working correctly!")
|
| 277 |
-
print("The method ensures complete summaries with all 4 sections are always generated.")
|
| 278 |
-
else:
|
| 279 |
-
print("\n❌ Some tests failed. The method needs further refinement.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_gguf.py
DELETED
|
@@ -1,137 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script for GGUF model loading in Hugging Face Spaces
|
| 4 |
-
This helps identify issues before they cause 500 errors in production
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import os
|
| 8 |
-
import sys
|
| 9 |
-
import time
|
| 10 |
-
import logging
|
| 11 |
-
|
| 12 |
-
# Configure logging
|
| 13 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 14 |
-
logger = logging.getLogger(__name__)
|
| 15 |
-
|
| 16 |
-
def test_gguf_loading():
|
| 17 |
-
"""Test GGUF model loading with the same parameters used in production"""
|
| 18 |
-
|
| 19 |
-
# Set environment variables for Hugging Face Spaces
|
| 20 |
-
os.environ['HF_HOME'] = '/tmp/huggingface'
|
| 21 |
-
os.environ['GGUF_N_THREADS'] = '2'
|
| 22 |
-
os.environ['GGUF_N_BATCH'] = '64'
|
| 23 |
-
|
| 24 |
-
try:
|
| 25 |
-
logger.info("Testing GGUF model loading...")
|
| 26 |
-
|
| 27 |
-
# Test the exact model name from your API call
|
| 28 |
-
model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
|
| 29 |
-
filename = "Phi-3-mini-4k-instruct-q4.gguf"
|
| 30 |
-
|
| 31 |
-
logger.info(f"Model: {model_name}")
|
| 32 |
-
logger.info(f"Filename: {filename}")
|
| 33 |
-
|
| 34 |
-
# Test import
|
| 35 |
-
try:
|
| 36 |
-
from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
|
| 37 |
-
logger.info("✓ GGUFModelPipeline import successful")
|
| 38 |
-
except ImportError as e:
|
| 39 |
-
logger.error(f"✗ Failed to import GGUFModelPipeline: {e}")
|
| 40 |
-
return False
|
| 41 |
-
|
| 42 |
-
# Test model loading with timeout
|
| 43 |
-
start_time = time.time()
|
| 44 |
-
try:
|
| 45 |
-
pipeline = GGUFModelPipeline(model_name, filename, timeout=300)
|
| 46 |
-
load_time = time.time() - start_time
|
| 47 |
-
logger.info(f"✓ Model loaded successfully in {load_time:.2f}s")
|
| 48 |
-
except Exception as e:
|
| 49 |
-
load_time = time.time() - start_time
|
| 50 |
-
logger.error(f"✗ Model loading failed after {load_time:.2f}s: {e}")
|
| 51 |
-
return False
|
| 52 |
-
|
| 53 |
-
# Test basic generation
|
| 54 |
-
try:
|
| 55 |
-
test_prompt = "Generate a brief medical summary: Patient has fever and cough."
|
| 56 |
-
logger.info("Testing basic generation...")
|
| 57 |
-
|
| 58 |
-
start_gen = time.time()
|
| 59 |
-
result = pipeline.generate(test_prompt, max_tokens=100)
|
| 60 |
-
gen_time = time.time() - start_gen
|
| 61 |
-
|
| 62 |
-
logger.info(f"✓ Generation successful in {gen_time:.2f}s")
|
| 63 |
-
logger.info(f"Generated text length: {len(result)} characters")
|
| 64 |
-
logger.info(f"Sample output: {result[:200]}...")
|
| 65 |
-
|
| 66 |
-
except Exception as e:
|
| 67 |
-
logger.error(f"✗ Generation failed: {e}")
|
| 68 |
-
return False
|
| 69 |
-
|
| 70 |
-
# Test full summary generation
|
| 71 |
-
try:
|
| 72 |
-
logger.info("Testing full summary generation...")
|
| 73 |
-
|
| 74 |
-
start_summary = time.time()
|
| 75 |
-
summary = pipeline.generate_full_summary(test_prompt, max_tokens=200, max_loops=1)
|
| 76 |
-
summary_time = time.time() - start_summary
|
| 77 |
-
|
| 78 |
-
logger.info(f"✓ Full summary generation successful in {summary_time:.2f}s")
|
| 79 |
-
logger.info(f"Summary length: {len(summary)} characters")
|
| 80 |
-
|
| 81 |
-
except Exception as e:
|
| 82 |
-
logger.error(f"✗ Full summary generation failed: {e}")
|
| 83 |
-
return False
|
| 84 |
-
|
| 85 |
-
logger.info("🎉 All tests passed! GGUF model is working correctly.")
|
| 86 |
-
return True
|
| 87 |
-
|
| 88 |
-
except Exception as e:
|
| 89 |
-
logger.error(f"✗ Test failed with unexpected error: {e}")
|
| 90 |
-
return False
|
| 91 |
-
|
| 92 |
-
def test_fallback_pipeline():
|
| 93 |
-
"""Test the fallback pipeline when GGUF fails"""
|
| 94 |
-
try:
|
| 95 |
-
logger.info("Testing fallback pipeline...")
|
| 96 |
-
|
| 97 |
-
from ai_med_extract.utils.model_loader_gguf import create_fallback_pipeline
|
| 98 |
-
|
| 99 |
-
fallback = create_fallback_pipeline()
|
| 100 |
-
result = fallback.generate("Test prompt")
|
| 101 |
-
|
| 102 |
-
logger.info(f"✓ Fallback pipeline working: {len(result)} characters generated")
|
| 103 |
-
return True
|
| 104 |
-
|
| 105 |
-
except Exception as e:
|
| 106 |
-
logger.error(f"✗ Fallback pipeline failed: {e}")
|
| 107 |
-
return False
|
| 108 |
-
|
| 109 |
-
def main():
|
| 110 |
-
"""Main test function"""
|
| 111 |
-
logger.info("Starting GGUF model tests...")
|
| 112 |
-
|
| 113 |
-
# Test 1: GGUF model loading
|
| 114 |
-
gguf_success = test_gguf_loading()
|
| 115 |
-
|
| 116 |
-
# Test 2: Fallback pipeline
|
| 117 |
-
fallback_success = test_fallback_pipeline()
|
| 118 |
-
|
| 119 |
-
# Summary
|
| 120 |
-
logger.info("\n" + "="*50)
|
| 121 |
-
logger.info("TEST SUMMARY")
|
| 122 |
-
logger.info("="*50)
|
| 123 |
-
logger.info(f"GGUF Model Loading: {'✓ PASS' if gguf_success else '✗ FAIL'}")
|
| 124 |
-
logger.info(f"Fallback Pipeline: {'✓ PASS' if fallback_success else '✗ PASS'}")
|
| 125 |
-
|
| 126 |
-
if gguf_success:
|
| 127 |
-
logger.info("🎉 GGUF model is working correctly!")
|
| 128 |
-
logger.info("Your API should work without 500 errors.")
|
| 129 |
-
else:
|
| 130 |
-
logger.warning("⚠️ GGUF model has issues. The fallback will be used.")
|
| 131 |
-
logger.info("Your API will still work but with reduced functionality.")
|
| 132 |
-
|
| 133 |
-
return gguf_success
|
| 134 |
-
|
| 135 |
-
if __name__ == "__main__":
|
| 136 |
-
success = main()
|
| 137 |
-
sys.exit(0 if success else 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|