Spaces:

salvinjose
/

HNTAI

Paused

App Files Files Community

sachinchandrankallar commited on Sep 9, 2025

Commit

8eb4114

1 Parent(s): 3add352

summary fixes

Browse files

Files changed (12) hide show

TODO.md +11 -5
ai_med_extract/__pycache__/app.cpython-311.pyc +0 -0
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc +0 -0
ai_med_extract/api/__pycache__/routes.cpython-311.pyc +0 -0
ai_med_extract/api/routes.py +9 -3
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc +0 -0
ai_med_extract/utils/model_loader_gguf.py +14 -5
test_clinical_assessment_header.py +0 -73
test_generate_full_summary.py +0 -279
test_gguf.py +0 -137

TODO.md CHANGED Viewed

@@ -1,7 +1,13 @@
-# TODO: Rewrite generate_patient_summary for parallel section generation
 ## Tasks
-- [x] Add import for concurrent.futures in patient_summary_agent.py
-- [x] Create generate_summary_sections method to generate 4 sections in parallel
-- [x] Create generate_patient_summary method that stitches sections together
-- [x] Test the new method to ensure correct output (skipped by user)

+# TODO: Fix GGUF Model Context Window Error and Optimize Speed
 ## Tasks
+- [x] Modify generate method in model_loader_gguf.py to dynamically adjust max_tokens based on prompt length
+- [x] Tune n_threads in model initialization for maximum speed
+- [ ] Test the changes to ensure no breaking
+## Details
+- Approximate prompt tokens by word count (split on whitespace)
+- Calculate allowed max_tokens = 2048 - prompt_tokens
+- Reduce max_tokens if necessary, log warning
+- Raise error if prompt too long
+- Set n_threads to os.cpu_count() for speed

ai_med_extract/__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/__pycache__/app.cpython-311.pyc and b/ai_med_extract/__pycache__/app.cpython-311.pyc differ

ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ

ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc differ

ai_med_extract/api/__pycache__/routes.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ

ai_med_extract/api/routes.py CHANGED Viewed

@@ -1057,9 +1057,15 @@ def register_routes(app, agents):
     @app.route('/generate_patient_summary', methods=['POST'])
     def generate_patient_summary():
         """

+    # Initialize GGUF pipeline with proper model name handling
+    gguf_model_name = "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"
+    if gguf_model_name.endswith('.gguf') and '/' in gguf_model_name:
+        repo_id, filename = gguf_model_name.rsplit('/', 1)
+        PIPELINE = get_gguf_pipeline(repo_id, filename)
+    else:
+        PIPELINE = get_gguf_pipeline(gguf_model_name)
+    _ = PIPELINE.generate("Hello", max_tokens=5)
     @app.route('/generate_patient_summary', methods=['POST'])
     def generate_patient_summary():
         """

ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ

ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc differ

ai_med_extract/utils/model_loader_gguf.py CHANGED Viewed

@@ -53,9 +53,8 @@ class GGUFModelPipeline:
         # Performance tuning and CPU-friendly defaults for Spaces
         try:
             cpu_count = os.cpu_count() or 2
-            default_threads = max(2, min(4, cpu_count))
-            n_threads = int(os.environ.get("GGUF_N_THREADS", str(default_threads)))
-            n_batch = int(os.environ.get("GGUF_N_BATCH", "64"))  # Reduced from 128
             # Memory-optimized settings for Hugging Face Spaces
             self.model = Llama(
@@ -87,8 +86,18 @@ class GGUFModelPipeline:
             text = re.sub(p, "", text, flags=re.IGNORECASE)
         return text.strip()
-    def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=120):
         """Generate text with timeout using threading"""
         def _generate():
             try:
                 output = self.model(
@@ -114,7 +123,7 @@ class GGUFModelPipeline:
     def generate(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95):
         t0 = time.time()
         try:
-            output = self._generate_with_timeout(prompt, max_tokens, temperature, top_p, timeout=120)
             dt = time.time() - t0
             text = output["choices"][0]["text"].strip()
             text = self._strip_special_tokens(text)

         # Performance tuning and CPU-friendly defaults for Spaces
         try:
             cpu_count = os.cpu_count() or 2
+            n_threads = cpu_count  # Set to max CPU cores for speed
+            n_batch = int(os.environ.get("GGUF_N_BATCH", "64"))  # Keep batch size reasonable
             # Memory-optimized settings for Hugging Face Spaces
             self.model = Llama(
             text = re.sub(p, "", text, flags=re.IGNORECASE)
         return text.strip()
+    def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=500):
         """Generate text with timeout using threading"""
+        # Approximate token count by splitting on whitespace
+        prompt_tokens = len(prompt.split())
+        n_ctx = 2048
+        allowed_max_tokens = n_ctx - prompt_tokens
+        if allowed_max_tokens <= 0:
+            raise ValueError(f"Prompt too long: {prompt_tokens} tokens exceed context window of {n_ctx}")
+        if max_tokens > allowed_max_tokens:
+            logger.warning(f"Requested max_tokens {max_tokens} exceeds allowed {allowed_max_tokens}, reducing max_tokens")
+            max_tokens = allowed_max_tokens
         def _generate():
             try:
                 output = self.model(
     def generate(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95):
         t0 = time.time()
         try:
+            output = self._generate_with_timeout(prompt, max_tokens, temperature, top_p, timeout=500)
             dt = time.time() - t0
             text = output["choices"][0]["text"].strip()
             text = self._strip_special_tokens(text)

test_clinical_assessment_header.py DELETED Viewed

@@ -1,73 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script to verify that the Clinical Assessment header is present in the formatted summary
-"""
-import sys
-import os
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
-def test_clinical_assessment_header():
-    """Test that Clinical Assessment header is present in formatted summary"""
-    # Create a mock patient data
-    patient_data = {
-        "result": {
-            "patientname": "John Doe",
-            "patientnumber": "12345",
-            "agey": "65",
-            "gender": "Male",
-            "allergies": ["Penicillin"],
-            "social_history": "Retired, former smoker",
-            "past_medical_history": ["Hypertension", "Diabetes"],
-            "encounters": [
-                {
-                    "visit_date": "2024-01-15",
-                    "chief_complaint": "Chest pain",
-                    "symptoms": "Shortness of breath",
-                    "diagnosis": ["Acute coronary syndrome"],
-                    "dr_notes": "Patient presents with chest pain",
-                    "vitals": {"BP": "150/90", "HR": "85"},
-                    "lab_results": {"Troponin": "0.5"},
-                    "medications": ["Aspirin", "Metoprolol"],
-                    "treatment": "Medical management"
-                }
-            ]
-        }
-    }
-    # Create agent with fallback loader (since we don't have actual models)
-    agent = PatientSummarizerAgent(model_name="test", model_type="test")
-    # Generate summary
-    summary = agent.generate_clinical_summary(patient_data)
-    # Check if Clinical Assessment header is present
-    has_clinical_assessment = "## Clinical Assessment" in summary
-    print("Test Results:")
-    print("=" * 50)
-    print(f"Clinical Assessment header present: {has_clinical_assessment}")
-    if has_clinical_assessment:
-        print("✅ SUCCESS: Clinical Assessment header is present in the summary")
-    else:
-        print("❌ FAILURE: Clinical Assessment header is missing from the summary")
-    print("\nSummary excerpt:")
-    print("-" * 30)
-    # Find the AI-Generated Narrative section
-    narrative_start = summary.find("--- AI-GENERATED CLINICAL NARRATIVE ---")
-    if narrative_start != -1:
-        excerpt = summary[narrative_start:narrative_start + 500]
-        print(excerpt + "...")
-    else:
-        print("Could not find AI-Generated Narrative section")
-    return has_clinical_assessment
-if __name__ == "__main__":
-    success = test_clinical_assessment_header()
-    sys.exit(0 if success else 1)

test_generate_full_summary.py DELETED Viewed

@@ -1,279 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for the enhanced generate_full_summary method
-Tests the logic for ensuring complete summaries with all 4 sections
-"""
-import re
-import time
-import logging
-# Mock GGUFModelPipeline for testing
-class MockGGUFModelPipeline:
-    def __init__(self, responses=None):
-        self.responses = responses or []
-        self.call_count = 0
-        self.generated_sections = set()
-    def generate(self, prompt, max_tokens=2048, temperature=0.5, top_p=0.95):
-        """Mock generate method that returns predefined responses"""
-        if self.call_count < len(self.responses):
-            response = self.responses[self.call_count]
-            self.call_count += 1
-            print(f"[MOCK] Generate call {self.call_count}: returning {len(response)} chars")
-            return response
-        else:
-            # Default response for additional calls
-            return "## Clinical Assessment\nMock assessment content."
-    def generate_full_summary(self, prompt, max_tokens=2048, max_loops=5):
-        """Copy of the enhanced generate_full_summary method for testing"""
-        def is_complete(text):
-            required_sections = [
-                'Clinical Assessment',
-                'Key Trends & Changes',
-                'Plan & Suggested Actions',
-                'Direct Guidance for Physician'
-            ]
-            missing_sections = [s for s in required_sections if s not in text]
-            if missing_sections:
-                print(f"[TEST] Missing sections: {missing_sections}")
-                return False, missing_sections
-            ends_with_punct = bool(re.search(r'[.!?][\s\n]*$', text))
-            if not ends_with_punct:
-                print("[TEST] Summary does not end with a full sentence")
-            return ends_with_punct, []
-        def generate_missing_section(section_name, base_prompt, existing_output):
-            """Generate a specific missing section using targeted prompt"""
-            section_prompts = {
-                'Clinical Assessment': f"Based on the patient data provided, generate only the Clinical Assessment section in markdown format. Focus on the current clinical status, key findings, and overall patient condition.\n\nPatient Data:\n{base_prompt}\n\n## Clinical Assessment\n",
-                'Key Trends & Changes': f"Based on the patient data provided, generate only the Key Trends & Changes section in markdown format. Analyze trends in vitals, labs, diagnoses, and medications over time.\n\nPatient Data:\n{base_prompt}\n\n## Key Trends & Changes\n",
-                'Plan & Suggested Actions': f"Based on the patient data provided, generate only the Plan & Suggested Actions section in markdown format. Recommend next steps, follow-up actions, and treatment considerations.\n\nPatient Data:\n{base_prompt}\n\n## Plan & Suggested Actions\n",
-                'Direct Guidance for Physician': f"Based on the patient data provided, generate only the Direct Guidance for Physician section in markdown format. Provide specific recommendations for the treating physician.\n\nPatient Data:\n{base_prompt}\n\n## Direct Guidance for Physician\n"
-            }
-            targeted_prompt = section_prompts.get(section_name, f"Generate the {section_name} section based on the patient data.\n\n{base_prompt}\n\n## {section_name}\n")
-            try:
-                section_output = self.generate(targeted_prompt, max_tokens=max_tokens//2)
-                # Clean up the output to extract just the section content
-                if f"## {section_name}" in section_output:
-                    section_content = section_output.split(f"## {section_name}", 1)[1].strip()
-                    # Remove any subsequent section headers
-                    section_content = re.split(r'##\s+', section_content, 1)[0].strip()
-                    return f"## {section_name}\n{section_content}"
-                else:
-                    # If the model didn't follow the format, use the raw output
-                    return f"## {section_name}\n{section_output.strip()}"
-            except Exception as e:
-                print(f"[TEST] Failed to generate {section_name} section: {e}")
-                # Return a minimal section if generation fails
-                return f"## {section_name}\nUnable to generate this section due to processing error. Please review patient data manually."
-        full_output = ""
-        current_prompt = prompt
-        total_start = time.time()
-        try:
-            print(f"[TEST] Starting enhanced full summary generation with max_loops={max_loops}")
-            print(f"[TEST] Prompt length: {len(prompt)} characters")
-            # Main generation loops
-            for loop_idx in range(max_loops):
-                loop_start = time.time()
-                print(f"[TEST] Starting loop {loop_idx+1}/{max_loops}")
-                print(f"[TEST] Current prompt length: {len(current_prompt)} characters")
-                output = self.generate(current_prompt, max_tokens=max_tokens)
-                if output.startswith(prompt):
-                    output = output[len(prompt):].strip()
-                full_output += output
-                loop_time = time.time() - loop_start
-                print(f"[TEST] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
-                print(f"[TEST] Generated {len(output)} characters in this loop")
-                complete, missing_sections = is_complete(full_output)
-                if complete:
-                    print(f"[TEST] All required sections found after loop {loop_idx+1}")
-                    break
-                # If not complete and this is not the last loop, prepare next prompt
-                if loop_idx < max_loops - 1:
-                    if missing_sections:
-                        missing_list = ", ".join(missing_sections)
-                        current_prompt = f"{prompt}\n\n{full_output}\n\nThe summary is missing these sections: {missing_list}. Please continue and complete all sections in markdown format:"
-                    else:
-                        current_prompt = f"{prompt}\n\n{full_output}\n\nContinue the summary and ensure it ends with a complete sentence:"
-                    print(f"[TEST] Preparing next prompt for loop {loop_idx+2}")
-            # Post-processing: Generate any remaining missing sections
-            complete, missing_sections = is_complete(full_output)
-            if missing_sections:
-                print(f"[TEST] Generating {len(missing_sections)} missing sections post-processing")
-                generated_sections = []
-                for section in missing_sections:
-                    print(f"[TEST] Generating missing section: {section}")
-                    section_content = generate_missing_section(section, prompt, full_output)
-                    generated_sections.append(section_content)
-                # Append generated sections to the main output
-                if generated_sections:
-                    full_output += "\n\n" + "\n\n".join(generated_sections)
-            total_time = time.time() - total_start
-            print(f"[TEST] generate_full_summary completed in {total_time:.2f}s")
-            print(f"[TEST] Final summary length: {len(full_output)} characters")
-            # Final validation
-            final_complete, final_missing = is_complete(full_output)
-            if not final_complete:
-                print(f"[TEST] Final summary still incomplete. Missing: {final_missing}")
-                # As a last resort, ensure at least basic structure
-                if final_missing:
-                    fallback_sections = []
-                    for section in final_missing:
-                        fallback_sections.append(f"## {section}\nPlease review the patient data for this section.")
-                    full_output += "\n\n" + "\n\n".join(fallback_sections)
-            return full_output.strip()
-        except Exception as e:
-            print(f"[TEST] Full summary generation failed: {e}")
-            # Instead of raising error, return a minimal complete summary
-            minimal_sections = [
-                "## Clinical Assessment\nPatient data processing encountered an error. Please review the raw patient information manually.",
-                "## Key Trends & Changes\nUnable to analyze trends due to processing error. Manual review recommended.",
-                "## Plan & Suggested Actions\nError in generating action plan. Consult with healthcare provider for appropriate next steps.",
-                "## Direct Guidance for Physician\nProcessing error occurred. Please conduct a thorough manual review of all patient data."
-            ]
-            return "\n\n".join(minimal_sections)
-def test_complete_summary():
-    """Test case: Model generates complete summary in first attempt"""
-    print("\n" + "="*60)
-    print("TEST 1: Complete Summary in First Attempt")
-    print("="*60)
-    mock_responses = [
-        """## Clinical Assessment
-The patient presents with Type 1 diabetes mellitus with ketoacidosis. Current vitals show elevated blood pressure and recent lab results indicate abnormal thyroid function.
-## Key Trends & Changes
-Weight has decreased from 73kg to current levels. Blood pressure shows systolic readings around 135 mmHg. Thyroid panel has increased significantly from baseline.
-## Plan & Suggested Actions
-Continue current medication regimen. Schedule follow-up appointment in 2 weeks. Monitor blood glucose levels closely.
-## Direct Guidance for Physician
-Consider adjusting antihypertensive therapy. Evaluate thyroid function further with additional testing."""
-    ]
-    mock_pipeline = MockGGUFModelPipeline(mock_responses)
-    prompt = "Generate a patient summary with all 4 sections."
-    result = mock_pipeline.generate_full_summary(prompt, max_loops=3)
-    # Check if all sections are present
-    sections_present = all(section in result for section in [
-        'Clinical Assessment', 'Key Trends & Changes',
-        'Plan & Suggested Actions', 'Direct Guidance for Physician'
-    ])
-    print(f"\nResult has all sections: {sections_present}")
-    print(f"Result length: {len(result)} characters")
-    return sections_present
-def test_incomplete_summary_fixed():
-    """Test case: Model generates incomplete summary, then sections are added"""
-    print("\n" + "="*60)
-    print("TEST 2: Incomplete Summary Fixed by Post-processing")
-    print("="*60)
-    mock_responses = [
-        # First response: missing some sections
-        """## Clinical Assessment
-The patient has diabetes and hypertension.
-## Key Trends & Changes
-Blood pressure has been elevated.""",
-        # Second response for missing section
-        """## Plan & Suggested Actions
-Continue medications and follow up.""",
-        # Third response for missing section
-        """## Direct Guidance for Physician
-Monitor closely and adjust therapy as needed."""
-    ]
-    mock_pipeline = MockGGUFModelPipeline(mock_responses)
-    prompt = "Generate a patient summary with all 4 sections."
-    result = mock_pipeline.generate_full_summary(prompt, max_loops=2)
-    # Check if all sections are present
-    sections_present = all(section in result for section in [
-        'Clinical Assessment', 'Key Trends & Changes',
-        'Plan & Suggested Actions', 'Direct Guidance for Physician'
-    ])
-    print(f"\nResult has all sections: {sections_present}")
-    print(f"Result length: {len(result)} characters")
-    print(f"Number of generate calls made: {mock_pipeline.call_count}")
-    return sections_present
-def test_error_handling():
-    """Test case: Model fails, but method still returns complete summary"""
-    print("\n" + "="*60)
-    print("TEST 3: Error Handling - Complete Summary Returned")
-    print("="*60)
-    # Mock pipeline that raises exceptions
-    class FailingMockPipeline(MockGGUFModelPipeline):
-        def generate(self, prompt, **kwargs):
-            raise Exception("Model generation failed")
-    mock_pipeline = FailingMockPipeline()
-    prompt = "Generate a patient summary."
-    result = mock_pipeline.generate_full_summary(prompt, max_loops=2)
-    # Check if all sections are present even after error
-    sections_present = all(section in result for section in [
-        'Clinical Assessment', 'Key Trends & Changes',
-        'Plan & Suggested Actions', 'Direct Guidance for Physician'
-    ])
-    print(f"\nResult has all sections despite error: {sections_present}")
-    print(f"Result length: {len(result)} characters")
-    return sections_present
-if __name__ == "__main__":
-    print("Testing Enhanced generate_full_summary Method")
-    print("="*60)
-    # Run all tests
-    test1_pass = test_complete_summary()
-    test2_pass = test_incomplete_summary_fixed()
-    test3_pass = test_error_handling()
-    print("\n" + "="*60)
-    print("TEST SUMMARY")
-    print("="*60)
-    print(f"Test 1 (Complete Summary): {'PASS' if test1_pass else 'FAIL'}")
-    print(f"Test 2 (Incomplete Fixed): {'PASS' if test2_pass else 'FAIL'}")
-    print(f"Test 3 (Error Handling): {'PASS' if test3_pass else 'FAIL'}")
-    all_pass = all([test1_pass, test2_pass, test3_pass])
-    print(f"\nOverall Result: {'ALL TESTS PASS' if all_pass else 'SOME TESTS FAILED'}")
-    if all_pass:
-        print("\n✅ Enhanced generate_full_summary method is working correctly!")
-        print("The method ensures complete summaries with all 4 sections are always generated.")
-    else:
-        print("\n❌ Some tests failed. The method needs further refinement.")

test_gguf.py DELETED Viewed

@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for GGUF model loading in Hugging Face Spaces
-This helps identify issues before they cause 500 errors in production
-"""
-import os
-import sys
-import time
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-def test_gguf_loading():
-    """Test GGUF model loading with the same parameters used in production"""
-    # Set environment variables for Hugging Face Spaces
-    os.environ['HF_HOME'] = '/tmp/huggingface'
-    os.environ['GGUF_N_THREADS'] = '2'
-    os.environ['GGUF_N_BATCH'] = '64'
-    try:
-        logger.info("Testing GGUF model loading...")
-        # Test the exact model name from your API call
-        model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
-        filename = "Phi-3-mini-4k-instruct-q4.gguf"
-        logger.info(f"Model: {model_name}")
-        logger.info(f"Filename: {filename}")
-        # Test import
-        try:
-            from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
-            logger.info("✓ GGUFModelPipeline import successful")
-        except ImportError as e:
-            logger.error(f"✗ Failed to import GGUFModelPipeline: {e}")
-            return False
-        # Test model loading with timeout
-        start_time = time.time()
-        try:
-            pipeline = GGUFModelPipeline(model_name, filename, timeout=300)
-            load_time = time.time() - start_time
-            logger.info(f"✓ Model loaded successfully in {load_time:.2f}s")
-        except Exception as e:
-            load_time = time.time() - start_time
-            logger.error(f"✗ Model loading failed after {load_time:.2f}s: {e}")
-            return False
-        # Test basic generation
-        try:
-            test_prompt = "Generate a brief medical summary: Patient has fever and cough."
-            logger.info("Testing basic generation...")
-            start_gen = time.time()
-            result = pipeline.generate(test_prompt, max_tokens=100)
-            gen_time = time.time() - start_gen
-            logger.info(f"✓ Generation successful in {gen_time:.2f}s")
-            logger.info(f"Generated text length: {len(result)} characters")
-            logger.info(f"Sample output: {result[:200]}...")
-        except Exception as e:
-            logger.error(f"✗ Generation failed: {e}")
-            return False
-        # Test full summary generation
-        try:
-            logger.info("Testing full summary generation...")
-            start_summary = time.time()
-            summary = pipeline.generate_full_summary(test_prompt, max_tokens=200, max_loops=1)
-            summary_time = time.time() - start_summary
-            logger.info(f"✓ Full summary generation successful in {summary_time:.2f}s")
-            logger.info(f"Summary length: {len(summary)} characters")
-        except Exception as e:
-            logger.error(f"✗ Full summary generation failed: {e}")
-            return False
-        logger.info("🎉 All tests passed! GGUF model is working correctly.")
-        return True
-    except Exception as e:
-        logger.error(f"✗ Test failed with unexpected error: {e}")
-        return False
-def test_fallback_pipeline():
-    """Test the fallback pipeline when GGUF fails"""
-    try:
-        logger.info("Testing fallback pipeline...")
-        from ai_med_extract.utils.model_loader_gguf import create_fallback_pipeline
-        fallback = create_fallback_pipeline()
-        result = fallback.generate("Test prompt")
-        logger.info(f"✓ Fallback pipeline working: {len(result)} characters generated")
-        return True
-    except Exception as e:
-        logger.error(f"✗ Fallback pipeline failed: {e}")
-        return False
-def main():
-    """Main test function"""
-    logger.info("Starting GGUF model tests...")
-    # Test 1: GGUF model loading
-    gguf_success = test_gguf_loading()
-    # Test 2: Fallback pipeline
-    fallback_success = test_fallback_pipeline()
-    # Summary
-    logger.info("\n" + "="*50)
-    logger.info("TEST SUMMARY")
-    logger.info("="*50)
-    logger.info(f"GGUF Model Loading: {'✓ PASS' if gguf_success else '✗ FAIL'}")
-    logger.info(f"Fallback Pipeline: {'✓ PASS' if fallback_success else '✗ PASS'}")
-    if gguf_success:
-        logger.info("🎉 GGUF model is working correctly!")
-        logger.info("Your API should work without 500 errors.")
-    else:
-        logger.warning("⚠️  GGUF model has issues. The fallback will be used.")
-        logger.info("Your API will still work but with reduced functionality.")
-    return gguf_success
-if __name__ == "__main__":
-    success = main()
-    sys.exit(0 if success else 1)