sachinchandrankallar commited on
Commit
8eb4114
·
1 Parent(s): 3add352

summary fixes

Browse files
TODO.md CHANGED
@@ -1,7 +1,13 @@
1
- # TODO: Rewrite generate_patient_summary for parallel section generation
2
 
3
  ## Tasks
4
- - [x] Add import for concurrent.futures in patient_summary_agent.py
5
- - [x] Create generate_summary_sections method to generate 4 sections in parallel
6
- - [x] Create generate_patient_summary method that stitches sections together
7
- - [x] Test the new method to ensure correct output (skipped by user)
 
 
 
 
 
 
 
1
+ # TODO: Fix GGUF Model Context Window Error and Optimize Speed
2
 
3
  ## Tasks
4
+ - [x] Modify generate method in model_loader_gguf.py to dynamically adjust max_tokens based on prompt length
5
+ - [x] Tune n_threads in model initialization for maximum speed
6
+ - [ ] Test the changes to ensure no breaking
7
+
8
+ ## Details
9
+ - Approximate prompt tokens by word count (split on whitespace)
10
+ - Calculate allowed max_tokens = 2048 - prompt_tokens
11
+ - Reduce max_tokens if necessary, log warning
12
+ - Raise error if prompt too long
13
+ - Set n_threads to os.cpu_count() for speed
ai_med_extract/__pycache__/app.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/__pycache__/app.cpython-311.pyc and b/ai_med_extract/__pycache__/app.cpython-311.pyc differ
 
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
 
ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc differ
 
ai_med_extract/api/__pycache__/routes.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ
 
ai_med_extract/api/routes.py CHANGED
@@ -1057,9 +1057,15 @@ def register_routes(app, agents):
1057
 
1058
 
1059
 
1060
-
1061
-
1062
-
 
 
 
 
 
 
1063
  @app.route('/generate_patient_summary', methods=['POST'])
1064
  def generate_patient_summary():
1065
  """
 
1057
 
1058
 
1059
 
1060
+ # Initialize GGUF pipeline with proper model name handling
1061
+ gguf_model_name = "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"
1062
+ if gguf_model_name.endswith('.gguf') and '/' in gguf_model_name:
1063
+ repo_id, filename = gguf_model_name.rsplit('/', 1)
1064
+ PIPELINE = get_gguf_pipeline(repo_id, filename)
1065
+ else:
1066
+ PIPELINE = get_gguf_pipeline(gguf_model_name)
1067
+ _ = PIPELINE.generate("Hello", max_tokens=5)
1068
+
1069
  @app.route('/generate_patient_summary', methods=['POST'])
1070
  def generate_patient_summary():
1071
  """
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
 
ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc differ
 
ai_med_extract/utils/model_loader_gguf.py CHANGED
@@ -53,9 +53,8 @@ class GGUFModelPipeline:
53
  # Performance tuning and CPU-friendly defaults for Spaces
54
  try:
55
  cpu_count = os.cpu_count() or 2
56
- default_threads = max(2, min(4, cpu_count))
57
- n_threads = int(os.environ.get("GGUF_N_THREADS", str(default_threads)))
58
- n_batch = int(os.environ.get("GGUF_N_BATCH", "64")) # Reduced from 128
59
 
60
  # Memory-optimized settings for Hugging Face Spaces
61
  self.model = Llama(
@@ -87,8 +86,18 @@ class GGUFModelPipeline:
87
  text = re.sub(p, "", text, flags=re.IGNORECASE)
88
  return text.strip()
89
 
90
- def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=120):
91
  """Generate text with timeout using threading"""
 
 
 
 
 
 
 
 
 
 
92
  def _generate():
93
  try:
94
  output = self.model(
@@ -114,7 +123,7 @@ class GGUFModelPipeline:
114
  def generate(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95):
115
  t0 = time.time()
116
  try:
117
- output = self._generate_with_timeout(prompt, max_tokens, temperature, top_p, timeout=120)
118
  dt = time.time() - t0
119
  text = output["choices"][0]["text"].strip()
120
  text = self._strip_special_tokens(text)
 
53
  # Performance tuning and CPU-friendly defaults for Spaces
54
  try:
55
  cpu_count = os.cpu_count() or 2
56
+ n_threads = cpu_count # Set to max CPU cores for speed
57
+ n_batch = int(os.environ.get("GGUF_N_BATCH", "64")) # Keep batch size reasonable
 
58
 
59
  # Memory-optimized settings for Hugging Face Spaces
60
  self.model = Llama(
 
86
  text = re.sub(p, "", text, flags=re.IGNORECASE)
87
  return text.strip()
88
 
89
+ def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=500):
90
  """Generate text with timeout using threading"""
91
+ # Approximate token count by splitting on whitespace
92
+ prompt_tokens = len(prompt.split())
93
+ n_ctx = 2048
94
+ allowed_max_tokens = n_ctx - prompt_tokens
95
+ if allowed_max_tokens <= 0:
96
+ raise ValueError(f"Prompt too long: {prompt_tokens} tokens exceed context window of {n_ctx}")
97
+ if max_tokens > allowed_max_tokens:
98
+ logger.warning(f"Requested max_tokens {max_tokens} exceeds allowed {allowed_max_tokens}, reducing max_tokens")
99
+ max_tokens = allowed_max_tokens
100
+
101
  def _generate():
102
  try:
103
  output = self.model(
 
123
  def generate(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95):
124
  t0 = time.time()
125
  try:
126
+ output = self._generate_with_timeout(prompt, max_tokens, temperature, top_p, timeout=500)
127
  dt = time.time() - t0
128
  text = output["choices"][0]["text"].strip()
129
  text = self._strip_special_tokens(text)
test_clinical_assessment_header.py DELETED
@@ -1,73 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script to verify that the Clinical Assessment header is present in the formatted summary
4
- """
5
-
6
- import sys
7
- import os
8
- sys.path.append(os.path.dirname(os.path.abspath(__file__)))
9
-
10
- from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
11
-
12
- def test_clinical_assessment_header():
13
- """Test that Clinical Assessment header is present in formatted summary"""
14
-
15
- # Create a mock patient data
16
- patient_data = {
17
- "result": {
18
- "patientname": "John Doe",
19
- "patientnumber": "12345",
20
- "agey": "65",
21
- "gender": "Male",
22
- "allergies": ["Penicillin"],
23
- "social_history": "Retired, former smoker",
24
- "past_medical_history": ["Hypertension", "Diabetes"],
25
- "encounters": [
26
- {
27
- "visit_date": "2024-01-15",
28
- "chief_complaint": "Chest pain",
29
- "symptoms": "Shortness of breath",
30
- "diagnosis": ["Acute coronary syndrome"],
31
- "dr_notes": "Patient presents with chest pain",
32
- "vitals": {"BP": "150/90", "HR": "85"},
33
- "lab_results": {"Troponin": "0.5"},
34
- "medications": ["Aspirin", "Metoprolol"],
35
- "treatment": "Medical management"
36
- }
37
- ]
38
- }
39
- }
40
-
41
- # Create agent with fallback loader (since we don't have actual models)
42
- agent = PatientSummarizerAgent(model_name="test", model_type="test")
43
-
44
- # Generate summary
45
- summary = agent.generate_clinical_summary(patient_data)
46
-
47
- # Check if Clinical Assessment header is present
48
- has_clinical_assessment = "## Clinical Assessment" in summary
49
-
50
- print("Test Results:")
51
- print("=" * 50)
52
- print(f"Clinical Assessment header present: {has_clinical_assessment}")
53
-
54
- if has_clinical_assessment:
55
- print("✅ SUCCESS: Clinical Assessment header is present in the summary")
56
- else:
57
- print("❌ FAILURE: Clinical Assessment header is missing from the summary")
58
-
59
- print("\nSummary excerpt:")
60
- print("-" * 30)
61
- # Find the AI-Generated Narrative section
62
- narrative_start = summary.find("--- AI-GENERATED CLINICAL NARRATIVE ---")
63
- if narrative_start != -1:
64
- excerpt = summary[narrative_start:narrative_start + 500]
65
- print(excerpt + "...")
66
- else:
67
- print("Could not find AI-Generated Narrative section")
68
-
69
- return has_clinical_assessment
70
-
71
- if __name__ == "__main__":
72
- success = test_clinical_assessment_header()
73
- sys.exit(0 if success else 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_generate_full_summary.py DELETED
@@ -1,279 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for the enhanced generate_full_summary method
4
- Tests the logic for ensuring complete summaries with all 4 sections
5
- """
6
-
7
- import re
8
- import time
9
- import logging
10
-
11
- # Mock GGUFModelPipeline for testing
12
- class MockGGUFModelPipeline:
13
- def __init__(self, responses=None):
14
- self.responses = responses or []
15
- self.call_count = 0
16
- self.generated_sections = set()
17
-
18
- def generate(self, prompt, max_tokens=2048, temperature=0.5, top_p=0.95):
19
- """Mock generate method that returns predefined responses"""
20
- if self.call_count < len(self.responses):
21
- response = self.responses[self.call_count]
22
- self.call_count += 1
23
- print(f"[MOCK] Generate call {self.call_count}: returning {len(response)} chars")
24
- return response
25
- else:
26
- # Default response for additional calls
27
- return "## Clinical Assessment\nMock assessment content."
28
-
29
- def generate_full_summary(self, prompt, max_tokens=2048, max_loops=5):
30
- """Copy of the enhanced generate_full_summary method for testing"""
31
- def is_complete(text):
32
- required_sections = [
33
- 'Clinical Assessment',
34
- 'Key Trends & Changes',
35
- 'Plan & Suggested Actions',
36
- 'Direct Guidance for Physician'
37
- ]
38
- missing_sections = [s for s in required_sections if s not in text]
39
- if missing_sections:
40
- print(f"[TEST] Missing sections: {missing_sections}")
41
- return False, missing_sections
42
- ends_with_punct = bool(re.search(r'[.!?][\s\n]*$', text))
43
- if not ends_with_punct:
44
- print("[TEST] Summary does not end with a full sentence")
45
- return ends_with_punct, []
46
-
47
- def generate_missing_section(section_name, base_prompt, existing_output):
48
- """Generate a specific missing section using targeted prompt"""
49
- section_prompts = {
50
- 'Clinical Assessment': f"Based on the patient data provided, generate only the Clinical Assessment section in markdown format. Focus on the current clinical status, key findings, and overall patient condition.\n\nPatient Data:\n{base_prompt}\n\n## Clinical Assessment\n",
51
- 'Key Trends & Changes': f"Based on the patient data provided, generate only the Key Trends & Changes section in markdown format. Analyze trends in vitals, labs, diagnoses, and medications over time.\n\nPatient Data:\n{base_prompt}\n\n## Key Trends & Changes\n",
52
- 'Plan & Suggested Actions': f"Based on the patient data provided, generate only the Plan & Suggested Actions section in markdown format. Recommend next steps, follow-up actions, and treatment considerations.\n\nPatient Data:\n{base_prompt}\n\n## Plan & Suggested Actions\n",
53
- 'Direct Guidance for Physician': f"Based on the patient data provided, generate only the Direct Guidance for Physician section in markdown format. Provide specific recommendations for the treating physician.\n\nPatient Data:\n{base_prompt}\n\n## Direct Guidance for Physician\n"
54
- }
55
-
56
- targeted_prompt = section_prompts.get(section_name, f"Generate the {section_name} section based on the patient data.\n\n{base_prompt}\n\n## {section_name}\n")
57
-
58
- try:
59
- section_output = self.generate(targeted_prompt, max_tokens=max_tokens//2)
60
- # Clean up the output to extract just the section content
61
- if f"## {section_name}" in section_output:
62
- section_content = section_output.split(f"## {section_name}", 1)[1].strip()
63
- # Remove any subsequent section headers
64
- section_content = re.split(r'##\s+', section_content, 1)[0].strip()
65
- return f"## {section_name}\n{section_content}"
66
- else:
67
- # If the model didn't follow the format, use the raw output
68
- return f"## {section_name}\n{section_output.strip()}"
69
- except Exception as e:
70
- print(f"[TEST] Failed to generate {section_name} section: {e}")
71
- # Return a minimal section if generation fails
72
- return f"## {section_name}\nUnable to generate this section due to processing error. Please review patient data manually."
73
-
74
- full_output = ""
75
- current_prompt = prompt
76
- total_start = time.time()
77
-
78
- try:
79
- print(f"[TEST] Starting enhanced full summary generation with max_loops={max_loops}")
80
- print(f"[TEST] Prompt length: {len(prompt)} characters")
81
-
82
- # Main generation loops
83
- for loop_idx in range(max_loops):
84
- loop_start = time.time()
85
- print(f"[TEST] Starting loop {loop_idx+1}/{max_loops}")
86
- print(f"[TEST] Current prompt length: {len(current_prompt)} characters")
87
-
88
- output = self.generate(current_prompt, max_tokens=max_tokens)
89
-
90
- if output.startswith(prompt):
91
- output = output[len(prompt):].strip()
92
-
93
- full_output += output
94
- loop_time = time.time() - loop_start
95
-
96
- print(f"[TEST] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
97
- print(f"[TEST] Generated {len(output)} characters in this loop")
98
-
99
- complete, missing_sections = is_complete(full_output)
100
-
101
- if complete:
102
- print(f"[TEST] All required sections found after loop {loop_idx+1}")
103
- break
104
-
105
- # If not complete and this is not the last loop, prepare next prompt
106
- if loop_idx < max_loops - 1:
107
- if missing_sections:
108
- missing_list = ", ".join(missing_sections)
109
- current_prompt = f"{prompt}\n\n{full_output}\n\nThe summary is missing these sections: {missing_list}. Please continue and complete all sections in markdown format:"
110
- else:
111
- current_prompt = f"{prompt}\n\n{full_output}\n\nContinue the summary and ensure it ends with a complete sentence:"
112
- print(f"[TEST] Preparing next prompt for loop {loop_idx+2}")
113
-
114
- # Post-processing: Generate any remaining missing sections
115
- complete, missing_sections = is_complete(full_output)
116
-
117
- if missing_sections:
118
- print(f"[TEST] Generating {len(missing_sections)} missing sections post-processing")
119
- generated_sections = []
120
-
121
- for section in missing_sections:
122
- print(f"[TEST] Generating missing section: {section}")
123
- section_content = generate_missing_section(section, prompt, full_output)
124
- generated_sections.append(section_content)
125
-
126
- # Append generated sections to the main output
127
- if generated_sections:
128
- full_output += "\n\n" + "\n\n".join(generated_sections)
129
-
130
- total_time = time.time() - total_start
131
- print(f"[TEST] generate_full_summary completed in {total_time:.2f}s")
132
- print(f"[TEST] Final summary length: {len(full_output)} characters")
133
-
134
- # Final validation
135
- final_complete, final_missing = is_complete(full_output)
136
- if not final_complete:
137
- print(f"[TEST] Final summary still incomplete. Missing: {final_missing}")
138
- # As a last resort, ensure at least basic structure
139
- if final_missing:
140
- fallback_sections = []
141
- for section in final_missing:
142
- fallback_sections.append(f"## {section}\nPlease review the patient data for this section.")
143
- full_output += "\n\n" + "\n\n".join(fallback_sections)
144
-
145
- return full_output.strip()
146
-
147
- except Exception as e:
148
- print(f"[TEST] Full summary generation failed: {e}")
149
- # Instead of raising error, return a minimal complete summary
150
- minimal_sections = [
151
- "## Clinical Assessment\nPatient data processing encountered an error. Please review the raw patient information manually.",
152
- "## Key Trends & Changes\nUnable to analyze trends due to processing error. Manual review recommended.",
153
- "## Plan & Suggested Actions\nError in generating action plan. Consult with healthcare provider for appropriate next steps.",
154
- "## Direct Guidance for Physician\nProcessing error occurred. Please conduct a thorough manual review of all patient data."
155
- ]
156
- return "\n\n".join(minimal_sections)
157
-
158
- def test_complete_summary():
159
- """Test case: Model generates complete summary in first attempt"""
160
- print("\n" + "="*60)
161
- print("TEST 1: Complete Summary in First Attempt")
162
- print("="*60)
163
-
164
- mock_responses = [
165
- """## Clinical Assessment
166
- The patient presents with Type 1 diabetes mellitus with ketoacidosis. Current vitals show elevated blood pressure and recent lab results indicate abnormal thyroid function.
167
-
168
- ## Key Trends & Changes
169
- Weight has decreased from 73kg to current levels. Blood pressure shows systolic readings around 135 mmHg. Thyroid panel has increased significantly from baseline.
170
-
171
- ## Plan & Suggested Actions
172
- Continue current medication regimen. Schedule follow-up appointment in 2 weeks. Monitor blood glucose levels closely.
173
-
174
- ## Direct Guidance for Physician
175
- Consider adjusting antihypertensive therapy. Evaluate thyroid function further with additional testing."""
176
- ]
177
-
178
- mock_pipeline = MockGGUFModelPipeline(mock_responses)
179
- prompt = "Generate a patient summary with all 4 sections."
180
-
181
- result = mock_pipeline.generate_full_summary(prompt, max_loops=3)
182
-
183
- # Check if all sections are present
184
- sections_present = all(section in result for section in [
185
- 'Clinical Assessment', 'Key Trends & Changes',
186
- 'Plan & Suggested Actions', 'Direct Guidance for Physician'
187
- ])
188
-
189
- print(f"\nResult has all sections: {sections_present}")
190
- print(f"Result length: {len(result)} characters")
191
- return sections_present
192
-
193
- def test_incomplete_summary_fixed():
194
- """Test case: Model generates incomplete summary, then sections are added"""
195
- print("\n" + "="*60)
196
- print("TEST 2: Incomplete Summary Fixed by Post-processing")
197
- print("="*60)
198
-
199
- mock_responses = [
200
- # First response: missing some sections
201
- """## Clinical Assessment
202
- The patient has diabetes and hypertension.
203
-
204
- ## Key Trends & Changes
205
- Blood pressure has been elevated.""",
206
- # Second response for missing section
207
- """## Plan & Suggested Actions
208
- Continue medications and follow up.""",
209
- # Third response for missing section
210
- """## Direct Guidance for Physician
211
- Monitor closely and adjust therapy as needed."""
212
- ]
213
-
214
- mock_pipeline = MockGGUFModelPipeline(mock_responses)
215
- prompt = "Generate a patient summary with all 4 sections."
216
-
217
- result = mock_pipeline.generate_full_summary(prompt, max_loops=2)
218
-
219
- # Check if all sections are present
220
- sections_present = all(section in result for section in [
221
- 'Clinical Assessment', 'Key Trends & Changes',
222
- 'Plan & Suggested Actions', 'Direct Guidance for Physician'
223
- ])
224
-
225
- print(f"\nResult has all sections: {sections_present}")
226
- print(f"Result length: {len(result)} characters")
227
- print(f"Number of generate calls made: {mock_pipeline.call_count}")
228
- return sections_present
229
-
230
- def test_error_handling():
231
- """Test case: Model fails, but method still returns complete summary"""
232
- print("\n" + "="*60)
233
- print("TEST 3: Error Handling - Complete Summary Returned")
234
- print("="*60)
235
-
236
- # Mock pipeline that raises exceptions
237
- class FailingMockPipeline(MockGGUFModelPipeline):
238
- def generate(self, prompt, **kwargs):
239
- raise Exception("Model generation failed")
240
-
241
- mock_pipeline = FailingMockPipeline()
242
- prompt = "Generate a patient summary."
243
-
244
- result = mock_pipeline.generate_full_summary(prompt, max_loops=2)
245
-
246
- # Check if all sections are present even after error
247
- sections_present = all(section in result for section in [
248
- 'Clinical Assessment', 'Key Trends & Changes',
249
- 'Plan & Suggested Actions', 'Direct Guidance for Physician'
250
- ])
251
-
252
- print(f"\nResult has all sections despite error: {sections_present}")
253
- print(f"Result length: {len(result)} characters")
254
- return sections_present
255
-
256
- if __name__ == "__main__":
257
- print("Testing Enhanced generate_full_summary Method")
258
- print("="*60)
259
-
260
- # Run all tests
261
- test1_pass = test_complete_summary()
262
- test2_pass = test_incomplete_summary_fixed()
263
- test3_pass = test_error_handling()
264
-
265
- print("\n" + "="*60)
266
- print("TEST SUMMARY")
267
- print("="*60)
268
- print(f"Test 1 (Complete Summary): {'PASS' if test1_pass else 'FAIL'}")
269
- print(f"Test 2 (Incomplete Fixed): {'PASS' if test2_pass else 'FAIL'}")
270
- print(f"Test 3 (Error Handling): {'PASS' if test3_pass else 'FAIL'}")
271
-
272
- all_pass = all([test1_pass, test2_pass, test3_pass])
273
- print(f"\nOverall Result: {'ALL TESTS PASS' if all_pass else 'SOME TESTS FAILED'}")
274
-
275
- if all_pass:
276
- print("\n✅ Enhanced generate_full_summary method is working correctly!")
277
- print("The method ensures complete summaries with all 4 sections are always generated.")
278
- else:
279
- print("\n❌ Some tests failed. The method needs further refinement.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_gguf.py DELETED
@@ -1,137 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for GGUF model loading in Hugging Face Spaces
4
- This helps identify issues before they cause 500 errors in production
5
- """
6
-
7
- import os
8
- import sys
9
- import time
10
- import logging
11
-
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
- logger = logging.getLogger(__name__)
15
-
16
- def test_gguf_loading():
17
- """Test GGUF model loading with the same parameters used in production"""
18
-
19
- # Set environment variables for Hugging Face Spaces
20
- os.environ['HF_HOME'] = '/tmp/huggingface'
21
- os.environ['GGUF_N_THREADS'] = '2'
22
- os.environ['GGUF_N_BATCH'] = '64'
23
-
24
- try:
25
- logger.info("Testing GGUF model loading...")
26
-
27
- # Test the exact model name from your API call
28
- model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
29
- filename = "Phi-3-mini-4k-instruct-q4.gguf"
30
-
31
- logger.info(f"Model: {model_name}")
32
- logger.info(f"Filename: {filename}")
33
-
34
- # Test import
35
- try:
36
- from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
37
- logger.info("✓ GGUFModelPipeline import successful")
38
- except ImportError as e:
39
- logger.error(f"✗ Failed to import GGUFModelPipeline: {e}")
40
- return False
41
-
42
- # Test model loading with timeout
43
- start_time = time.time()
44
- try:
45
- pipeline = GGUFModelPipeline(model_name, filename, timeout=300)
46
- load_time = time.time() - start_time
47
- logger.info(f"✓ Model loaded successfully in {load_time:.2f}s")
48
- except Exception as e:
49
- load_time = time.time() - start_time
50
- logger.error(f"✗ Model loading failed after {load_time:.2f}s: {e}")
51
- return False
52
-
53
- # Test basic generation
54
- try:
55
- test_prompt = "Generate a brief medical summary: Patient has fever and cough."
56
- logger.info("Testing basic generation...")
57
-
58
- start_gen = time.time()
59
- result = pipeline.generate(test_prompt, max_tokens=100)
60
- gen_time = time.time() - start_gen
61
-
62
- logger.info(f"✓ Generation successful in {gen_time:.2f}s")
63
- logger.info(f"Generated text length: {len(result)} characters")
64
- logger.info(f"Sample output: {result[:200]}...")
65
-
66
- except Exception as e:
67
- logger.error(f"✗ Generation failed: {e}")
68
- return False
69
-
70
- # Test full summary generation
71
- try:
72
- logger.info("Testing full summary generation...")
73
-
74
- start_summary = time.time()
75
- summary = pipeline.generate_full_summary(test_prompt, max_tokens=200, max_loops=1)
76
- summary_time = time.time() - start_summary
77
-
78
- logger.info(f"✓ Full summary generation successful in {summary_time:.2f}s")
79
- logger.info(f"Summary length: {len(summary)} characters")
80
-
81
- except Exception as e:
82
- logger.error(f"✗ Full summary generation failed: {e}")
83
- return False
84
-
85
- logger.info("🎉 All tests passed! GGUF model is working correctly.")
86
- return True
87
-
88
- except Exception as e:
89
- logger.error(f"✗ Test failed with unexpected error: {e}")
90
- return False
91
-
92
- def test_fallback_pipeline():
93
- """Test the fallback pipeline when GGUF fails"""
94
- try:
95
- logger.info("Testing fallback pipeline...")
96
-
97
- from ai_med_extract.utils.model_loader_gguf import create_fallback_pipeline
98
-
99
- fallback = create_fallback_pipeline()
100
- result = fallback.generate("Test prompt")
101
-
102
- logger.info(f"✓ Fallback pipeline working: {len(result)} characters generated")
103
- return True
104
-
105
- except Exception as e:
106
- logger.error(f"✗ Fallback pipeline failed: {e}")
107
- return False
108
-
109
- def main():
110
- """Main test function"""
111
- logger.info("Starting GGUF model tests...")
112
-
113
- # Test 1: GGUF model loading
114
- gguf_success = test_gguf_loading()
115
-
116
- # Test 2: Fallback pipeline
117
- fallback_success = test_fallback_pipeline()
118
-
119
- # Summary
120
- logger.info("\n" + "="*50)
121
- logger.info("TEST SUMMARY")
122
- logger.info("="*50)
123
- logger.info(f"GGUF Model Loading: {'✓ PASS' if gguf_success else '✗ FAIL'}")
124
- logger.info(f"Fallback Pipeline: {'✓ PASS' if fallback_success else '✗ PASS'}")
125
-
126
- if gguf_success:
127
- logger.info("🎉 GGUF model is working correctly!")
128
- logger.info("Your API should work without 500 errors.")
129
- else:
130
- logger.warning("⚠️ GGUF model has issues. The fallback will be used.")
131
- logger.info("Your API will still work but with reduced functionality.")
132
-
133
- return gguf_success
134
-
135
- if __name__ == "__main__":
136
- success = main()
137
- sys.exit(0 if success else 1)