Spaces:
Paused
Paused
Commit ·
aba0d25
1
Parent(s): 7d153bf
Revert "Merge branch 'FT-DEV-17/09/2025'"
Browse filesThis reverts commit 715f2878b980fc505d7114dbecf45bcd9357f2ea, reversing
changes made to 11a4a59e6aa7a42c5e9997141b04dc042803ca0c.
This view is limited to 50 files because it contains too many changes. See raw diff
- .env +1 -7
- .env.example +0 -16
- .huggingface.yaml +0 -1
- .vscode/launch.json +0 -22
- .vscode/settings.json +1 -2
- 0.41.0' +0 -0
- DEPLOYMENT.md +10 -103
- DEVELOPMENT.md +0 -377
- Dockerfile +1 -1
- FINAL_PROGRESS.md +72 -0
- GGUF_TROUBLESHOOTING.md +178 -0
- PROGRESS_UPDATE.md +32 -0
- README.md +12 -56
- README_SPACES.md +46 -0
- REFACTORED_README.md +463 -0
- TODO.md +12 -10
- __pycache__/ai_med_extract.cpython-311.pyc +0 -0
- __pycache__/test_chunking.cpython-311.pyc +0 -0
- __pycache__/test_summary_consistency.cpython-311.pyc +0 -0
- ai_med_extract.py +0 -15
- ai_med_extract/__init__.py +1 -0
- ai_med_extract/__main__.py +5 -0
- ai_med_extract/__pycache__/__init__.cpython-311.pyc +0 -0
- ai_med_extract/__pycache__/app.cpython-311.pyc +0 -0
- ai_med_extract/agents/__init__.py +1 -0
- ai_med_extract/agents/__pycache__/__init__.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/medical_data_extractor.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/phi_scrubber.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/summarizer.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/text_extractor.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/medical_data_extractor.py +1 -1
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/patient_summary_agent.py +34 -40
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/phi_scrubber.py +6 -7
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/summarizer.py +1 -1
- {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/text_extractor.py +119 -9
- ai_med_extract/api/__init__.py +1 -0
- ai_med_extract/api/__pycache__/__init__.cpython-311.pyc +0 -0
- ai_med_extract/api/__pycache__/routes.cpython-311.pyc +0 -0
- ai_med_extract/api/model_management.py +397 -0
- services/ai-service/src/ai_med_extract/api/routes_fastapi.py → ai_med_extract/api/routes.py +0 -0
- ai_med_extract/app.py +175 -0
- ai_med_extract/gradio_app.py +70 -0
- ai_med_extract/utils/__init__.py +1 -0
- ai_med_extract/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/file_utils.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/json_slimmer.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
- ai_med_extract/utils/__pycache__/model_loader_spaces.cpython-311.pyc +0 -0
- {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_manager.cpython-311.pyc +0 -0
.env
CHANGED
|
@@ -3,10 +3,4 @@ HF_HOME=/tmp/huggingface
|
|
| 3 |
XDG_CACHE_HOME=/tmp
|
| 4 |
TORCH_HOME=/tmp/torch
|
| 5 |
WHISPER_CACHE=/tmp/whisper
|
| 6 |
-
UPLOAD_DIR=/tmp/uploads
|
| 7 |
-
|
| 8 |
-
# Redis configuration (uncomment to enable scalable features)
|
| 9 |
-
# REDIS_URL=redis://localhost:6379/0
|
| 10 |
-
|
| 11 |
-
# Database configuration for audit logging (optional)
|
| 12 |
-
# DATABASE_URL=postgresql://username:password@localhost:5432/audit_db
|
|
|
|
| 3 |
XDG_CACHE_HOME=/tmp
|
| 4 |
TORCH_HOME=/tmp/torch
|
| 5 |
WHISPER_CACHE=/tmp/whisper
|
| 6 |
+
UPLOAD_DIR=/tmp/uploads
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.env.example
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
# .env.example - Environment configuration for local development
|
| 2 |
-
|
| 3 |
-
# Database configuration
|
| 4 |
-
DATABASE_URL=postgresql://user:password@localhost:5432/hnai_db
|
| 5 |
-
|
| 6 |
-
# API Key for external services
|
| 7 |
-
API_KEY=your-api-key-here
|
| 8 |
-
|
| 9 |
-
# Secret key for cryptographic signing
|
| 10 |
-
SECRET_KEY=your-secret-key-here
|
| 11 |
-
|
| 12 |
-
# Debug mode
|
| 13 |
-
DEBUG=True
|
| 14 |
-
|
| 15 |
-
# Application port
|
| 16 |
-
PORT=8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.huggingface.yaml
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
runtime: docker
|
| 2 |
sdk: docker
|
| 3 |
python_version: "3.10"
|
| 4 |
-
app: services/ai-service/src/ai_med_extract/app.py
|
| 5 |
build:
|
| 6 |
system_packages:
|
| 7 |
- tesseract-ocr
|
|
|
|
| 1 |
runtime: docker
|
| 2 |
sdk: docker
|
| 3 |
python_version: "3.10"
|
|
|
|
| 4 |
build:
|
| 5 |
system_packages:
|
| 6 |
- tesseract-ocr
|
.vscode/launch.json
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"version": "0.2.0",
|
| 3 |
-
"configurations": [
|
| 4 |
-
{
|
| 5 |
-
"name": "Python: FastAPI (Localhost)",
|
| 6 |
-
"type": "debugpy",
|
| 7 |
-
"request": "launch",
|
| 8 |
-
"module": "uvicorn",
|
| 9 |
-
"args": [
|
| 10 |
-
"src.ai_med_extract.app:create_app",
|
| 11 |
-
"--factory",
|
| 12 |
-
"--host", "127.0.0.1",
|
| 13 |
-
"--port", "8000",
|
| 14 |
-
"--reload"
|
| 15 |
-
],
|
| 16 |
-
"cwd": "${workspaceFolder}/services/ai-service",
|
| 17 |
-
"env": {
|
| 18 |
-
"PYTHONPATH": "${workspaceFolder}/services/ai-service"
|
| 19 |
-
}
|
| 20 |
-
}
|
| 21 |
-
]
|
| 22 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.vscode/settings.json
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"python.analysis.extraPaths": [
|
| 3 |
-
"./ai_med_extract/utils"
|
| 4 |
-
"./services/ai-service/src"
|
| 5 |
]
|
| 6 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"python.analysis.extraPaths": [
|
| 3 |
+
"./ai_med_extract/utils"
|
|
|
|
| 4 |
]
|
| 5 |
}
|
0.41.0'
ADDED
|
File without changes
|
DEPLOYMENT.md
CHANGED
|
@@ -1,106 +1,13 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
#
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
- Python 3.10+
|
| 9 |
-
- Docker (optional, for containerized testing)
|
| 10 |
-
|
| 11 |
-
### Setup
|
| 12 |
-
1. Clone the repository
|
| 13 |
-
2. Install dependencies: `pip install -r requirements.txt`
|
| 14 |
-
3. Set environment variables (see Configuration section)
|
| 15 |
-
4. Run the application: `python -m uvicorn ai_med_extract.app:create_app --host 0.0.0.0 --port 7860`
|
| 16 |
-
|
| 17 |
-
### Testing
|
| 18 |
-
- Health check: `curl http://localhost:7860/health/live`
|
| 19 |
-
- API docs: `http://localhost:7860/docs` (FastAPI Swagger UI)
|
| 20 |
-
|
| 21 |
-
## Docker Deployment
|
| 22 |
-
|
| 23 |
-
### Build and Run
|
| 24 |
-
```bash
|
| 25 |
-
docker build -t medical-ai-service .
|
| 26 |
-
docker run -p 7860:7860 -e SECRET_KEY=your-secret -e DATABASE_URL=your-db medical-ai-service
|
| 27 |
-
```
|
| 28 |
-
|
| 29 |
-
### Configuration
|
| 30 |
-
- Exposes port 7860
|
| 31 |
-
- Runs FastAPI app with uvicorn
|
| 32 |
-
- Includes model caching optimizations
|
| 33 |
-
|
| 34 |
-
## Kubernetes Deployment
|
| 35 |
-
|
| 36 |
-
### Prerequisites
|
| 37 |
-
- Kubernetes cluster
|
| 38 |
-
- kubectl configured
|
| 39 |
-
- Secrets created for database, Redis, and JWT keys
|
| 40 |
-
|
| 41 |
-
### Deploy
|
| 42 |
-
```bash
|
| 43 |
-
kubectl apply -f infra/k8s/secure_deployment.yaml
|
| 44 |
-
```
|
| 45 |
-
|
| 46 |
-
### Features
|
| 47 |
-
- Horizontal Pod Autoscaler (2-10 replicas based on CPU/memory)
|
| 48 |
-
- Resource limits: 1-4 CPU, 4-8Gi memory
|
| 49 |
-
- Prometheus monitoring annotations
|
| 50 |
-
- Security contexts and network policies
|
| 51 |
-
|
| 52 |
-
### Scaling
|
| 53 |
-
The HPA automatically scales based on:
|
| 54 |
-
- CPU utilization > 70%
|
| 55 |
-
- Memory utilization > 80%
|
| 56 |
-
|
| 57 |
-
## Hugging Face Spaces Deployment
|
| 58 |
-
|
| 59 |
-
### Prerequisites
|
| 60 |
-
- Hugging Face account
|
| 61 |
-
- Space created with Docker runtime
|
| 62 |
-
|
| 63 |
-
### Configuration
|
| 64 |
-
1. Dockerfile exposes port 7860
|
| 65 |
-
2. FastAPI app listens on 0.0.0.0:7860
|
| 66 |
-
3. requirements.txt includes all dependencies
|
| 67 |
-
4. .huggingface.yaml with `runtime: docker`
|
| 68 |
-
5. .dockerignore and .gitignore present
|
| 69 |
-
|
| 70 |
-
### Deploy
|
| 71 |
-
```bash
|
| 72 |
-
# Test locally
|
| 73 |
-
docker build -t hntai-app .
|
| 74 |
-
docker run -p 7860:7860 hntai-app
|
| 75 |
-
|
| 76 |
-
# Push to HF Spaces
|
| 77 |
-
# App available at your-space-name.hf.space
|
| 78 |
-
```
|
| 79 |
-
|
| 80 |
-
## Configuration
|
| 81 |
-
|
| 82 |
-
### Required Environment Variables
|
| 83 |
-
- `SECRET_KEY`: Application secret key
|
| 84 |
-
- `JWT_SECRET_KEY`: JWT signing key
|
| 85 |
-
- `DATABASE_URL`: PostgreSQL connection string
|
| 86 |
-
- `REDIS_URL`: Redis connection string
|
| 87 |
-
|
| 88 |
-
### Optional
|
| 89 |
-
- `ENVIRONMENT`: prod/dev (default: prod)
|
| 90 |
-
- `PORT`: Service port (default: 7860)
|
| 91 |
-
- `CORS_ORIGINS`: Allowed CORS origins (default: *)
|
| 92 |
-
- Model cache directories and other settings in config_settings.py
|
| 93 |
-
|
| 94 |
-
## Monitoring
|
| 95 |
-
|
| 96 |
-
### Health Checks
|
| 97 |
-
- `/health/live`: Liveness probe
|
| 98 |
-
- `/health/ready`: Readiness probe
|
| 99 |
-
|
| 100 |
-
### Metrics
|
| 101 |
-
- `/metrics`: Prometheus metrics endpoint
|
| 102 |
-
- Includes performance metrics, model loading status
|
| 103 |
-
|
| 104 |
-
### Logging
|
| 105 |
-
- Structured JSON logs for production
|
| 106 |
-
- Configurable log levels
|
|
|
|
| 1 |
+
# Hugging Face Spaces Docker deployment instructions
|
| 2 |
|
| 3 |
+
# 1. Make sure your Dockerfile exposes port 7860 and runs your app on 0.0.0.0:7860
|
| 4 |
+
# 2. Your Flask app should listen on host='0.0.0.0' and port=7860
|
| 5 |
+
# 3. requirements.txt should include all dependencies
|
| 6 |
+
# 4. .huggingface.yaml with 'runtime: docker' is present
|
| 7 |
+
# 5. .dockerignore and .gitignore are present
|
| 8 |
|
| 9 |
+
# To test locally:
|
| 10 |
+
# docker build -t hntai-app .
|
| 11 |
+
# docker run -p 7860:7860 hntai-app
|
| 12 |
|
| 13 |
+
# Your app will be available at http://localhost:7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEVELOPMENT.md
DELETED
|
@@ -1,377 +0,0 @@
|
|
| 1 |
-
# HNTAI - Scalable Medical Data Extraction API - Development Guide
|
| 2 |
-
|
| 3 |
-
## Overview
|
| 4 |
-
|
| 5 |
-
This FastAPI-based application provides scalable medical data extraction services, fully aligned with the "ChatGPT Version 3 - Scalable" architecture. It features async processing, Redis caching, PostgreSQL persistence, and enterprise-grade security.
|
| 6 |
-
|
| 7 |
-
## Architecture
|
| 8 |
-
|
| 9 |
-
### Core Components
|
| 10 |
-
|
| 11 |
-
1. **FastAPI Application** (`app.py`)
|
| 12 |
-
- Main application factory with lifespan events
|
| 13 |
-
- CORS middleware for cross-origin requests
|
| 14 |
-
- Centralized agent initialization
|
| 15 |
-
- Route registration from APIRouter
|
| 16 |
-
|
| 17 |
-
2. **Configuration** (`config_settings.py`)
|
| 18 |
-
- Pydantic-based settings with validation
|
| 19 |
-
- Environment variable loading
|
| 20 |
-
- Database and Redis URL configuration
|
| 21 |
-
|
| 22 |
-
3. **Inference Service** (`inference_service.py`)
|
| 23 |
-
- Async text summarization using thread pools
|
| 24 |
-
- Model caching for performance
|
| 25 |
-
- Chunking for long text processing
|
| 26 |
-
|
| 27 |
-
4. **PHI Scrubber Service** (`phi_scrubber_service.py`)
|
| 28 |
-
- Regex-based PHI detection and redaction
|
| 29 |
-
- Audit logging to PostgreSQL
|
| 30 |
-
- Redis-based statistics tracking
|
| 31 |
-
|
| 32 |
-
5. **API Routes** (`api/routes_fastapi.py`)
|
| 33 |
-
- FastAPI APIRouter with async endpoints
|
| 34 |
-
- Health checks (/live, /ready)
|
| 35 |
-
- Placeholder routes for full migration
|
| 36 |
-
|
| 37 |
-
### Data Flow
|
| 38 |
-
|
| 39 |
-
```
|
| 40 |
-
Client Request → FastAPI → Route Handler → Agent/Service → Redis Cache → PostgreSQL → Response
|
| 41 |
-
```
|
| 42 |
-
|
| 43 |
-
## Development Setup
|
| 44 |
-
|
| 45 |
-
### Prerequisites
|
| 46 |
-
|
| 47 |
-
- Python 3.10+
|
| 48 |
-
- PostgreSQL 13+
|
| 49 |
-
- Redis 6+
|
| 50 |
-
- Docker (optional)
|
| 51 |
-
|
| 52 |
-
### Local Development
|
| 53 |
-
|
| 54 |
-
1. **Clone and Setup Virtual Environment**
|
| 55 |
-
```bash
|
| 56 |
-
git clone <repository>
|
| 57 |
-
cd hntai
|
| 58 |
-
python -m venv venv
|
| 59 |
-
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 60 |
-
```
|
| 61 |
-
|
| 62 |
-
2. **Install Dependencies**
|
| 63 |
-
```bash
|
| 64 |
-
pip install -r requirements.txt
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
3. **Setup Database and Redis**
|
| 68 |
-
```bash
|
| 69 |
-
# Start PostgreSQL (using Docker)
|
| 70 |
-
docker run -d --name postgres -e POSTGRES_PASSWORD=password -p 5432:5432 postgres:13
|
| 71 |
-
|
| 72 |
-
# Start Redis (using Docker)
|
| 73 |
-
docker run -d --name redis -p 6379:6379 redis:6
|
| 74 |
-
|
| 75 |
-
# Create database
|
| 76 |
-
createdb medical_ai
|
| 77 |
-
```
|
| 78 |
-
|
| 79 |
-
4. **Environment Variables**
|
| 80 |
-
Create `.env` file:
|
| 81 |
-
```bash
|
| 82 |
-
DATABASE_URL=postgresql://postgres:password@localhost:5432/medical_ai
|
| 83 |
-
REDIS_URL=redis://localhost:6379/0
|
| 84 |
-
SECRET_KEY=your-secret-key-here
|
| 85 |
-
JWT_SECRET_KEY=your-jwt-secret-key-here
|
| 86 |
-
```
|
| 87 |
-
|
| 88 |
-
5. **Run Database Migrations**
|
| 89 |
-
```bash
|
| 90 |
-
# Apply schema
|
| 91 |
-
psql -d medical_ai -f database/postgresql/001_schema.sql
|
| 92 |
-
```
|
| 93 |
-
|
| 94 |
-
6. **Run the Application**
|
| 95 |
-
```bash
|
| 96 |
-
# Development mode
|
| 97 |
-
python -m ai_med_extract.main
|
| 98 |
-
|
| 99 |
-
# Or directly
|
| 100 |
-
uvicorn ai_med_extract.app:create_app --reload --host 0.0.0.0 --port 7860
|
| 101 |
-
```
|
| 102 |
-
|
| 103 |
-
7. **Access the Application**
|
| 104 |
-
- API: http://localhost:7860
|
| 105 |
-
- Docs: http://localhost:7860/docs (FastAPI auto-generated)
|
| 106 |
-
- Health: http://localhost:7860/live
|
| 107 |
-
|
| 108 |
-
### Debugging
|
| 109 |
-
|
| 110 |
-
1. **Enable Debug Logging**
|
| 111 |
-
```python
|
| 112 |
-
import logging
|
| 113 |
-
logging.basicConfig(level=logging.DEBUG)
|
| 114 |
-
```
|
| 115 |
-
|
| 116 |
-
2. **Use FastAPI Debug Mode**
|
| 117 |
-
```bash
|
| 118 |
-
uvicorn ai_med_extract.app:create_app --reload --debug --host 0.0.0.0 --port 7860
|
| 119 |
-
```
|
| 120 |
-
|
| 121 |
-
3. **Test Endpoints**
|
| 122 |
-
```bash
|
| 123 |
-
# Health check
|
| 124 |
-
curl http://localhost:7860/live
|
| 125 |
-
|
| 126 |
-
# API docs
|
| 127 |
-
curl http://localhost:7860/openapi.json
|
| 128 |
-
```
|
| 129 |
-
|
| 130 |
-
4. **Database Debugging**
|
| 131 |
-
```bash
|
| 132 |
-
# Connect to PostgreSQL
|
| 133 |
-
psql -d medical_ai
|
| 134 |
-
|
| 135 |
-
# Check PHI audit logs
|
| 136 |
-
SELECT * FROM phi_audit_log LIMIT 10;
|
| 137 |
-
```
|
| 138 |
-
|
| 139 |
-
5. **Redis Debugging**
|
| 140 |
-
```bash
|
| 141 |
-
# Connect to Redis CLI
|
| 142 |
-
redis-cli
|
| 143 |
-
|
| 144 |
-
# Check keys
|
| 145 |
-
KEYS *
|
| 146 |
-
```
|
| 147 |
-
|
| 148 |
-
## Production Deployment
|
| 149 |
-
|
| 150 |
-
### Option 1: Docker Deployment
|
| 151 |
-
|
| 152 |
-
1. **Build Docker Image**
|
| 153 |
-
```bash
|
| 154 |
-
docker build -t hntai-api .
|
| 155 |
-
```
|
| 156 |
-
|
| 157 |
-
2. **Run Container**
|
| 158 |
-
```bash
|
| 159 |
-
docker run -d \
|
| 160 |
-
--name hntai-api \
|
| 161 |
-
-p 7860:7860 \
|
| 162 |
-
-e DATABASE_URL=postgresql://... \
|
| 163 |
-
-e REDIS_URL=redis://... \
|
| 164 |
-
-e SECRET_KEY=... \
|
| 165 |
-
-e JWT_SECRET_KEY=... \
|
| 166 |
-
hntai-api
|
| 167 |
-
```
|
| 168 |
-
|
| 169 |
-
### Option 2: Kubernetes Deployment
|
| 170 |
-
|
| 171 |
-
1. **Prerequisites**
|
| 172 |
-
- Kubernetes cluster
|
| 173 |
-
- kubectl configured
|
| 174 |
-
- PostgreSQL and Redis services running
|
| 175 |
-
|
| 176 |
-
2. **Create Secrets**
|
| 177 |
-
```bash
|
| 178 |
-
kubectl create secret generic medical-ai-secrets \
|
| 179 |
-
--from-literal=DATABASE_URL=postgresql://... \
|
| 180 |
-
--from-literal=REDIS_URL=redis://... \
|
| 181 |
-
--from-literal=SECRET_KEY=... \
|
| 182 |
-
--from-literal=JWT_SECRET_KEY=...
|
| 183 |
-
```
|
| 184 |
-
|
| 185 |
-
3. **Deploy to Kubernetes**
|
| 186 |
-
```bash
|
| 187 |
-
kubectl apply -f infra/k8s/secure_deployment.yaml
|
| 188 |
-
```
|
| 189 |
-
|
| 190 |
-
4. **Verify Deployment**
|
| 191 |
-
```bash
|
| 192 |
-
kubectl get pods -n medical-ai
|
| 193 |
-
kubectl logs -n medical-ai deployment/medical-ai-service
|
| 194 |
-
```
|
| 195 |
-
|
| 196 |
-
### Option 3: Hugging Face Spaces (Legacy)
|
| 197 |
-
|
| 198 |
-
The application still supports HF Spaces deployment for lightweight use cases.
|
| 199 |
-
|
| 200 |
-
1. **Update app.py** for HF Spaces compatibility
|
| 201 |
-
2. **Deploy via HF Spaces** with Docker SDK
|
| 202 |
-
|
| 203 |
-
## Monitoring and Observability
|
| 204 |
-
|
| 205 |
-
### Prometheus Metrics
|
| 206 |
-
|
| 207 |
-
The application exposes metrics at `/metrics` endpoint.
|
| 208 |
-
|
| 209 |
-
1. **Setup Prometheus**
|
| 210 |
-
```bash
|
| 211 |
-
kubectl apply -f monitoring/prometheus.yml
|
| 212 |
-
```
|
| 213 |
-
|
| 214 |
-
2. **Access Metrics**
|
| 215 |
-
```bash
|
| 216 |
-
curl http://ai-service.medical-ai.svc.cluster.local:80/metrics
|
| 217 |
-
```
|
| 218 |
-
|
| 219 |
-
### Health Checks
|
| 220 |
-
|
| 221 |
-
- **Liveness** (`/live`): Basic health check
|
| 222 |
-
- **Readiness** (`/ready`): Checks if agents are initialized
|
| 223 |
-
|
| 224 |
-
### Logging
|
| 225 |
-
|
| 226 |
-
- Structured JSON logging
|
| 227 |
-
- PHI operations logged to database
|
| 228 |
-
- Error tracking with stack traces
|
| 229 |
-
|
| 230 |
-
## Security Features
|
| 231 |
-
|
| 232 |
-
### HIPAA Compliance
|
| 233 |
-
|
| 234 |
-
- PHI scrubbing with audit trails
|
| 235 |
-
- Non-root container execution
|
| 236 |
-
- Secrets management via Kubernetes
|
| 237 |
-
- Network policies restricting traffic
|
| 238 |
-
|
| 239 |
-
### Authentication
|
| 240 |
-
|
| 241 |
-
- JWT-based authentication (framework ready)
|
| 242 |
-
- API key support (configurable)
|
| 243 |
-
|
| 244 |
-
## API Usage
|
| 245 |
-
|
| 246 |
-
### Health Endpoints
|
| 247 |
-
|
| 248 |
-
```bash
|
| 249 |
-
GET /live
|
| 250 |
-
GET /ready
|
| 251 |
-
```
|
| 252 |
-
|
| 253 |
-
### PHI Scrubbing
|
| 254 |
-
|
| 255 |
-
```bash
|
| 256 |
-
POST /phi/scrub
|
| 257 |
-
Content-Type: application/json
|
| 258 |
-
|
| 259 |
-
{
|
| 260 |
-
"text": "Patient John Doe, SSN 123-45-6789, diagnosed with diabetes."
|
| 261 |
-
}
|
| 262 |
-
```
|
| 263 |
-
|
| 264 |
-
Response:
|
| 265 |
-
```json
|
| 266 |
-
{
|
| 267 |
-
"scrubbed_text": "Patient [REDACTED], SSN [REDACTED], diagnosed with diabetes.",
|
| 268 |
-
"phi_found": ["NAME", "SSN"],
|
| 269 |
-
"redaction_count": 2
|
| 270 |
-
}
|
| 271 |
-
```
|
| 272 |
-
|
| 273 |
-
### Text Summarization
|
| 274 |
-
|
| 275 |
-
```bash
|
| 276 |
-
POST /api/generate_summary
|
| 277 |
-
Content-Type: application/json
|
| 278 |
-
|
| 279 |
-
{
|
| 280 |
-
"text": "Long medical text...",
|
| 281 |
-
"max_length": 150,
|
| 282 |
-
"min_length": 50
|
| 283 |
-
}
|
| 284 |
-
```
|
| 285 |
-
|
| 286 |
-
### Generate Patient Summary
|
| 287 |
-
|
| 288 |
-
The `generate_patient_summary` endpoint has been migrated from the original Flask implementation to FastAPI. It generates a comprehensive 4-section patient summary from EHR data, with support for streaming (SSE) to handle long-running tasks and prevent timeouts.
|
| 289 |
-
|
| 290 |
-
**Endpoint**: `POST /generate_patient_summary`
|
| 291 |
-
|
| 292 |
-
**Query Parameters**:
|
| 293 |
-
- `stream` (optional, default: `false`): Set to `true` for Server-Sent Events (SSE) streaming updates.
|
| 294 |
-
|
| 295 |
-
**Request Body** (JSON):
|
| 296 |
-
```json
|
| 297 |
-
{
|
| 298 |
-
"patientid": "12345",
|
| 299 |
-
"token": "your-auth-token",
|
| 300 |
-
"key": "your-api-key",
|
| 301 |
-
"patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf",
|
| 302 |
-
"patient_summarizer_model_type": "gguf",
|
| 303 |
-
"generation_mode": "hq", // Options: "hq" (high-quality), "fast", "rule" (deterministic)
|
| 304 |
-
"timeout_mode": "fast" // Options: "fast" (8s EHR timeout), "extended" (30s)
|
| 305 |
-
}
|
| 306 |
-
```
|
| 307 |
-
|
| 308 |
-
**Synchronous Response** (when `stream=false`):
|
| 309 |
-
```json
|
| 310 |
-
{
|
| 311 |
-
"summary": "## Clinical Assessment\n- Patient details...\n\n## Key Trends & Changes\n- Changes detected...\n\n## Plan & Suggested Actions\n- Recommendations...\n\n## Direct Guidance for Physician\n- Clinical insights...",
|
| 312 |
-
"baseline": "Patient baseline data...",
|
| 313 |
-
"delta": "Changes from previous visits...",
|
| 314 |
-
"timing": {"ehr_api": 2.5, "generation": 15.3, "total": 17.8},
|
| 315 |
-
"model_used": "microsoft/Phi-3-mini-4k-instruct (gguf)",
|
| 316 |
-
"timeout_mode_used": "fast"
|
| 317 |
-
}
|
| 318 |
-
```
|
| 319 |
-
|
| 320 |
-
**Streaming Response** (when `stream=true`):
|
| 321 |
-
- Returns a `text/event-stream` response with SSE events:
|
| 322 |
-
- `type: progress` - Progress updates (e.g., 10%, 50%)
|
| 323 |
-
- `type: complete` - Final result with full summary
|
| 324 |
-
- `type: error` - Error details if failed
|
| 325 |
-
- `type: heartbeat` - Keep-alive signals
|
| 326 |
-
|
| 327 |
-
**Notes**:
|
| 328 |
-
- The endpoint integrates with an external EHR API to fetch patient data.
|
| 329 |
-
- Supports multiple model types: GGUF, text-generation, summarization, seq2seq.
|
| 330 |
-
- Includes fallbacks for timeouts, API errors, and model failures.
|
| 331 |
-
- PHI scrubbing is applied automatically.
|
| 332 |
-
- Full implementation includes delta computation, baseline building, and 4-section markdown output.
|
| 333 |
-
|
| 334 |
-
### Other Endpoints (Migration in Progress)
|
| 335 |
-
- `POST /upload` - File upload and text extraction
|
| 336 |
-
- `POST /transcribe` - Audio transcription
|
| 337 |
-
- `POST /extract_medical_data` - Structured medical data extraction
|
| 338 |
-
- `POST /api/extract_medical_data_from_audio` - Audio-based medical extraction
|
| 339 |
-
|
| 340 |
-
## Troubleshooting
|
| 341 |
-
|
| 342 |
-
### Common Issues
|
| 343 |
-
|
| 344 |
-
1. **Model Loading Failures**
|
| 345 |
-
- Check HF_HOME and cache directories
|
| 346 |
-
- Ensure sufficient memory
|
| 347 |
-
- Verify internet connectivity for model downloads
|
| 348 |
-
|
| 349 |
-
2. **Database Connection Errors**
|
| 350 |
-
- Verify DATABASE_URL format
|
| 351 |
-
- Check PostgreSQL service status
|
| 352 |
-
- Ensure database exists and schema applied
|
| 353 |
-
|
| 354 |
-
3. **Redis Connection Issues**
|
| 355 |
-
- Verify REDIS_URL format
|
| 356 |
-
- Check Redis service availability
|
| 357 |
-
- Monitor Redis memory usage
|
| 358 |
-
|
| 359 |
-
4. **PHI Scrubbing Not Working**
|
| 360 |
-
- Check regex patterns in phi_scrubber_service.py
|
| 361 |
-
- Verify Redis connection for stats
|
| 362 |
-
- Check database audit logs
|
| 363 |
-
|
| 364 |
-
### Performance Tuning
|
| 365 |
-
|
| 366 |
-
- Adjust thread pools in inference_service.py
|
| 367 |
-
- Configure Redis connection pooling
|
| 368 |
-
- Set appropriate resource limits in K8s
|
| 369 |
-
- Monitor memory usage for model caching
|
| 370 |
-
|
| 371 |
-
## Contributing
|
| 372 |
-
|
| 373 |
-
1. Follow async/await patterns for new endpoints
|
| 374 |
-
2. Add proper error handling and logging
|
| 375 |
-
3. Update tests for new functionality
|
| 376 |
-
4. Ensure HIPAA compliance for PHI handling
|
| 377 |
-
5. Document API changes in this guide
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
CHANGED
|
@@ -219,4 +219,4 @@ ENTRYPOINT ["/entrypoint.sh"]
|
|
| 219 |
|
| 220 |
EXPOSE 7860
|
| 221 |
|
| 222 |
-
CMD ["
|
|
|
|
| 219 |
|
| 220 |
EXPOSE 7860
|
| 221 |
|
| 222 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "2", "--timeout", "0", "ai_med_extract.app:app"]
|
FINAL_PROGRESS.md
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GGUF Timeout Fix - Complete Implementation
|
| 2 |
+
|
| 3 |
+
## ✅ All Steps Completed:
|
| 4 |
+
|
| 5 |
+
### 1. Increased GGUF Timeout
|
| 6 |
+
- Changed from 120s to 300s for Hugging Face Spaces
|
| 7 |
+
- Maintained 120s for local development
|
| 8 |
+
- Made timeout configurable via `GGUF_GENERATION_TIMEOUT` environment variable
|
| 9 |
+
|
| 10 |
+
### 2. Enhanced Error Handling
|
| 11 |
+
- Added comprehensive timeout handling in `routes.py`
|
| 12 |
+
- Implemented fallback mechanisms when GGUF model fails
|
| 13 |
+
- Added better logging for debugging timeout issues
|
| 14 |
+
- Created robust fallback pipeline for graceful degradation
|
| 15 |
+
|
| 16 |
+
### 3. Optimized GGUF Model Parameters
|
| 17 |
+
- Added CPU-specific optimizations for Hugging Face Spaces:
|
| 18 |
+
- `use_mlock=False` for better container compatibility
|
| 19 |
+
- `vocab_only=False` for full model loading
|
| 20 |
+
- `n_threads_batch=n_threads` for consistent threading
|
| 21 |
+
- `mmap=True` for memory mapping optimizations
|
| 22 |
+
- Cache type optimizations for better performance
|
| 23 |
+
|
| 24 |
+
### 4. Added Progress Logging
|
| 25 |
+
- Enhanced logging throughout the generation process
|
| 26 |
+
- Added detailed timing information for each generation loop
|
| 27 |
+
- Added validation checks for summary completeness
|
| 28 |
+
- Improved debugging capabilities
|
| 29 |
+
|
| 30 |
+
## 🔧 Files Modified:
|
| 31 |
+
|
| 32 |
+
### `ai_med_extract/utils/model_loader_gguf.py`
|
| 33 |
+
- Updated timeout handling with environment variable support
|
| 34 |
+
- Optimized model initialization parameters for Spaces
|
| 35 |
+
- Enhanced logging throughout the generation process
|
| 36 |
+
- Added detailed progress monitoring
|
| 37 |
+
|
| 38 |
+
### `ai_med_extract/api/routes.py`
|
| 39 |
+
- Added comprehensive error handling for GGUF timeouts
|
| 40 |
+
- Implemented fallback mechanisms when GGUF fails
|
| 41 |
+
- Improved logging and error responses
|
| 42 |
+
- Added graceful degradation to template-based fallback
|
| 43 |
+
|
| 44 |
+
## ⚙️ Configuration Options:
|
| 45 |
+
|
| 46 |
+
### Environment Variables:
|
| 47 |
+
- `GGUF_GENERATION_TIMEOUT`: Custom timeout in seconds (default: 300 for Spaces, 120 for local)
|
| 48 |
+
- `GGUF_N_THREADS`: Number of CPU threads to use
|
| 49 |
+
- `GGUF_N_BATCH`: Batch size for processing
|
| 50 |
+
|
| 51 |
+
### Performance Settings:
|
| 52 |
+
- **Hugging Face Spaces**: Ultra-conservative settings (1 thread, 16 batch, 512 context)
|
| 53 |
+
- **Local Development**: Normal settings (2 threads, 32 batch, 1024 context)
|
| 54 |
+
|
| 55 |
+
## 🚀 Ready for Testing:
|
| 56 |
+
|
| 57 |
+
The implementation is now complete and ready for testing. The changes include:
|
| 58 |
+
|
| 59 |
+
1. **Increased timeout** from 120s to 300s for Hugging Face Spaces
|
| 60 |
+
2. **Configurable timeout** via environment variable
|
| 61 |
+
3. **Better error handling** with fallback mechanisms
|
| 62 |
+
4. **Optimized parameters** for CPU performance on Spaces
|
| 63 |
+
5. **Enhanced logging** for debugging and monitoring
|
| 64 |
+
|
| 65 |
+
## 📋 Testing Checklist:
|
| 66 |
+
- [ ] Test GGUF model with Phi-3 model on Spaces
|
| 67 |
+
- [ ] Verify timeout is sufficient for generation
|
| 68 |
+
- [ ] Test fallback mechanisms when GGUF fails
|
| 69 |
+
- [ ] Monitor memory usage and performance
|
| 70 |
+
- [ ] Verify logging provides useful debugging information
|
| 71 |
+
|
| 72 |
+
The implementation should now handle the GGUF timeout issues effectively while providing graceful degradation when the model fails.
|
GGUF_TROUBLESHOOTING.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GGUF Model Troubleshooting Guide for Hugging Face Spaces
|
| 2 |
+
|
| 3 |
+
## Problem Description
|
| 4 |
+
Your Hugging Face Space is throwing 500 errors when calling the `generatepatientsummary` API with GGUF models, specifically with:
|
| 5 |
+
- `"patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"`
|
| 6 |
+
- `"patient_summarizer_model_type": "gguf"`
|
| 7 |
+
|
| 8 |
+
## Root Causes Identified
|
| 9 |
+
|
| 10 |
+
### 1. **Memory Constraints**
|
| 11 |
+
- Phi-3-mini-4k-instruct model is ~2.4GB
|
| 12 |
+
- Hugging Face Spaces have limited memory (Basic: 16GB RAM, Pro: 32GB RAM)
|
| 13 |
+
- Model loading + inference may exceed available memory
|
| 14 |
+
|
| 15 |
+
### 2. **Model Download Timeouts**
|
| 16 |
+
- Large model downloads can timeout in Spaces environment
|
| 17 |
+
- Network issues during model fetching
|
| 18 |
+
- Insufficient timeout handling
|
| 19 |
+
|
| 20 |
+
### 3. **Missing Dependencies**
|
| 21 |
+
- `llama-cpp-python` requires specific system libraries
|
| 22 |
+
- CPU optimization flags may not be set correctly
|
| 23 |
+
|
| 24 |
+
## Solutions Implemented
|
| 25 |
+
|
| 26 |
+
### 1. **Enhanced Error Handling**
|
| 27 |
+
- Added comprehensive logging throughout the pipeline
|
| 28 |
+
- Implemented fallback mechanisms when GGUF fails
|
| 29 |
+
- Better error messages for debugging
|
| 30 |
+
|
| 31 |
+
### 2. **Timeout Management**
|
| 32 |
+
- 5-minute timeout for model loading
|
| 33 |
+
- 2-minute timeout for text generation
|
| 34 |
+
- Threading-based timeout (more reliable than signals)
|
| 35 |
+
|
| 36 |
+
### 3. **Memory Optimization**
|
| 37 |
+
- Reduced context window from 4096 to 4000 tokens
|
| 38 |
+
- Reduced batch size from 128 to 64
|
| 39 |
+
- CPU-only mode with optimized thread usage
|
| 40 |
+
|
| 41 |
+
### 4. **Fallback Pipeline**
|
| 42 |
+
- Template-based response when GGUF fails
|
| 43 |
+
- Ensures API always returns a response
|
| 44 |
+
- Maintains API contract even during failures
|
| 45 |
+
|
| 46 |
+
## Testing Your Fix
|
| 47 |
+
|
| 48 |
+
### Run the Test Script
|
| 49 |
+
```bash
|
| 50 |
+
cd HNTAI
|
| 51 |
+
python test_gguf.py
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
This will test:
|
| 55 |
+
- Model loading
|
| 56 |
+
- Basic generation
|
| 57 |
+
- Full summary generation
|
| 58 |
+
- Fallback pipeline
|
| 59 |
+
|
| 60 |
+
### Expected Output
|
| 61 |
+
```
|
| 62 |
+
✓ Model loaded successfully in X.XXs
|
| 63 |
+
✓ Generation successful in X.XXs
|
| 64 |
+
✓ Full summary generation successful in X.XXs
|
| 65 |
+
🎉 All tests passed! GGUF model is working correctly.
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
## Deployment Steps
|
| 69 |
+
|
| 70 |
+
### 1. **Update Your Space**
|
| 71 |
+
```bash
|
| 72 |
+
git add .
|
| 73 |
+
git commit -m "Fix GGUF model 500 errors with enhanced error handling and fallbacks"
|
| 74 |
+
git push
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### 2. **Monitor Logs**
|
| 78 |
+
Check your Hugging Face Space logs for:
|
| 79 |
+
- Model loading times
|
| 80 |
+
- Memory usage
|
| 81 |
+
- Error messages
|
| 82 |
+
- Fallback activations
|
| 83 |
+
|
| 84 |
+
### 3. **Test the API**
|
| 85 |
+
```bash
|
| 86 |
+
curl -X POST "https://your-space.hf.space/generate_patient_summary" \
|
| 87 |
+
-H "Content-Type: application/json" \
|
| 88 |
+
-d '{
|
| 89 |
+
"patientid": "test123",
|
| 90 |
+
"token": "your_token",
|
| 91 |
+
"key": "your_key",
|
| 92 |
+
"patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf",
|
| 93 |
+
"patient_summarizer_model_type": "gguf"
|
| 94 |
+
}'
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
## Environment Variables
|
| 98 |
+
|
| 99 |
+
Set these in your Hugging Face Space:
|
| 100 |
+
|
| 101 |
+
```bash
|
| 102 |
+
# Memory optimization
|
| 103 |
+
GGUF_N_THREADS=2
|
| 104 |
+
GGUF_N_BATCH=64
|
| 105 |
+
|
| 106 |
+
# Cache directories
|
| 107 |
+
HF_HOME=/tmp/huggingface
|
| 108 |
+
XDG_CACHE_HOME=/tmp
|
| 109 |
+
TORCH_HOME=/tmp/torch
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## Alternative Models
|
| 113 |
+
|
| 114 |
+
If Phi-3-mini-4k-instruct still fails, try smaller models:
|
| 115 |
+
|
| 116 |
+
### Smaller GGUF Models
|
| 117 |
+
```json
|
| 118 |
+
{
|
| 119 |
+
"patient_summarizer_model_name": "TheBloke/Phi-3-mini-4k-instruct-GGUF/phi-3-mini-4k-instruct-q2_k.gguf",
|
| 120 |
+
"patient_summarizer_model_type": "gguf"
|
| 121 |
+
}
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
### Fallback to HuggingFace Models
|
| 125 |
+
```json
|
| 126 |
+
{
|
| 127 |
+
"patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct",
|
| 128 |
+
"patient_summarizer_model_type": "text-generation"
|
| 129 |
+
}
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
## Monitoring and Debugging
|
| 133 |
+
|
| 134 |
+
### 1. **Check Space Logs**
|
| 135 |
+
- Look for "GGUF" prefixed log messages
|
| 136 |
+
- Monitor memory usage patterns
|
| 137 |
+
- Check for timeout errors
|
| 138 |
+
|
| 139 |
+
### 2. **API Response Codes**
|
| 140 |
+
- `200`: Success
|
| 141 |
+
- `408`: Generation timeout
|
| 142 |
+
- `500`: Model loading failure (will use fallback)
|
| 143 |
+
|
| 144 |
+
### 3. **Performance Metrics**
|
| 145 |
+
- Model loading time: Should be < 5 minutes
|
| 146 |
+
- Generation time: Should be < 2 minutes
|
| 147 |
+
- Memory usage: Should stay within Space limits
|
| 148 |
+
|
| 149 |
+
## Common Issues and Solutions
|
| 150 |
+
|
| 151 |
+
### Issue: "Model download failed"
|
| 152 |
+
**Solution**: Check network connectivity and model availability
|
| 153 |
+
|
| 154 |
+
### Issue: "Failed to initialize GGUF model"
|
| 155 |
+
**Solution**: Verify llama-cpp-python installation and system dependencies
|
| 156 |
+
|
| 157 |
+
### Issue: "Generation timed out"
|
| 158 |
+
**Solution**: Reduce max_tokens or use smaller model
|
| 159 |
+
|
| 160 |
+
### Issue: "Out of memory"
|
| 161 |
+
**Solution**: Use smaller model variant (q2_k instead of q4)
|
| 162 |
+
|
| 163 |
+
## Support
|
| 164 |
+
|
| 165 |
+
If issues persist:
|
| 166 |
+
1. Run `test_gguf.py` and share output
|
| 167 |
+
2. Check Hugging Face Space logs
|
| 168 |
+
3. Verify model availability in the Hub
|
| 169 |
+
4. Consider upgrading to Pro tier for more resources
|
| 170 |
+
|
| 171 |
+
## Expected Behavior After Fix
|
| 172 |
+
|
| 173 |
+
✅ **Before**: 500 errors after 5 minutes
|
| 174 |
+
✅ **After**:
|
| 175 |
+
- Successful model loading with detailed logging
|
| 176 |
+
- Graceful fallback if model fails
|
| 177 |
+
- Proper timeout handling
|
| 178 |
+
- Always returns a response (either real or fallback)
|
PROGRESS_UPDATE.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GGUF Timeout Fix - Progress Update
|
| 2 |
+
|
| 3 |
+
## ✅ Completed Steps:
|
| 4 |
+
|
| 5 |
+
1. **Increased GGUF timeout**: Changed from 120s to 300s for Hugging Face Spaces
|
| 6 |
+
2. **Configurable timeout**: Added GGUF_GENERATION_TIMEOUT environment variable support
|
| 7 |
+
3. **Better error handling**: Enhanced timeout and fallback mechanisms in routes.py
|
| 8 |
+
4. **Fallback pipeline**: Added robust fallback when GGUF model fails to load or times out
|
| 9 |
+
|
| 10 |
+
## 🔧 Changes Made:
|
| 11 |
+
|
| 12 |
+
### model_loader_gguf.py:
|
| 13 |
+
- Updated `_generate_with_timeout()` to use 300s default for Spaces, 120s for local
|
| 14 |
+
- Made timeout configurable via environment variable
|
| 15 |
+
- Updated `generate()` to use configurable timeout
|
| 16 |
+
|
| 17 |
+
### routes.py:
|
| 18 |
+
- Added fallback pipeline usage when GGUF times out
|
| 19 |
+
- Added better logging for timeout errors
|
| 20 |
+
- Added fallback for GGUF model loading failures
|
| 21 |
+
- Improved error messages and response handling
|
| 22 |
+
|
| 23 |
+
## 🚀 Next Steps:
|
| 24 |
+
- Test the changes with the GGUF model
|
| 25 |
+
- Verify timeout is sufficient for Phi-3 model
|
| 26 |
+
- Test fallback mechanisms
|
| 27 |
+
- Add progress logging for generation
|
| 28 |
+
|
| 29 |
+
## ⚙️ Configuration:
|
| 30 |
+
- Default timeout: 300s (Spaces) / 120s (local)
|
| 31 |
+
- Environment variable: `GGUF_GENERATION_TIMEOUT`
|
| 32 |
+
- Fallback: Template-based summary when GGUF fails
|
README.md
CHANGED
|
@@ -8,76 +8,32 @@ app_port: 7860
|
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
-
# HNTAI -
|
| 12 |
|
| 13 |
-
This is a
|
| 14 |
|
| 15 |
## Features
|
| 16 |
- Document text extraction (PDF, DOCX, Images)
|
| 17 |
- Audio transcription
|
| 18 |
- Medical data extraction
|
| 19 |
-
- PHI (Protected Health Information) scrubbing
|
| 20 |
-
- Text summarization
|
| 21 |
-
- PostgreSQL database integration for persistence
|
| 22 |
-
- Async processing for scalability
|
| 23 |
-
- Health endpoints (/live, /ready)
|
| 24 |
-
- Security features (non-root containers, secrets management, HIPAA compliance)
|
| 25 |
|
| 26 |
-
##
|
| 27 |
-
|
| 28 |
-
-
|
| 29 |
-
-
|
| 30 |
-
-
|
| 31 |
-
-
|
| 32 |
-
- Network policies and HIPAA compliance
|
| 33 |
-
- Prometheus monitoring
|
| 34 |
-
- Proper resource limits and health probes
|
| 35 |
-
|
| 36 |
-
## Deployment Options
|
| 37 |
-
- **Hugging Face Spaces**: Lightweight Docker deployment (legacy)
|
| 38 |
-
- **Kubernetes**: Scalable production deployment with security features
|
| 39 |
|
| 40 |
## Environment Variables
|
| 41 |
-
-
|
| 42 |
-
- `REDIS_URL`: Redis connection string
|
| 43 |
-
- `SECRET_KEY`: Application secret key
|
| 44 |
-
- `JWT_SECRET_KEY`: JWT signing key
|
| 45 |
|
| 46 |
## API Endpoints
|
| 47 |
-
- GET /health/live - Liveness health check
|
| 48 |
-
- GET /health/ready - Readiness health check
|
| 49 |
-
- GET /metrics - Prometheus metrics
|
| 50 |
-
- POST /generate_patient_summary - Generate comprehensive patient summaries (with streaming support)
|
| 51 |
- POST /upload - Upload and process medical documents
|
| 52 |
-
- GET /get_updated_medical_data - Retrieve processed medical data
|
| 53 |
-
- PUT /update_medical_data - Update medical data fields
|
| 54 |
- POST /transcribe - Transcribe audio files
|
| 55 |
- POST /extract_medical_data - Extract structured medical data
|
| 56 |
- POST /api/generate_summary - Generate text summaries
|
| 57 |
- POST /api/extract_medical_data_from_audio - Process audio recordings
|
| 58 |
-
- POST /api/patient_summary_openvino - Generate patient summaries using OpenVINO
|
| 59 |
-
|
| 60 |
-
## Development
|
| 61 |
-
|
| 62 |
-
### Code Quality
|
| 63 |
-
This project uses the following tools for code quality:
|
| 64 |
-
- **Black**: Code formatting
|
| 65 |
-
- **isort**: Import sorting
|
| 66 |
-
- **flake8**: Linting
|
| 67 |
-
- **mypy**: Type checking
|
| 68 |
-
|
| 69 |
-
Run quality checks:
|
| 70 |
-
```bash
|
| 71 |
-
black .
|
| 72 |
-
isort .
|
| 73 |
-
flake8 .
|
| 74 |
-
mypy .
|
| 75 |
-
```
|
| 76 |
-
|
| 77 |
-
### Testing
|
| 78 |
-
Run tests with:
|
| 79 |
-
```bash
|
| 80 |
-
python -m pytest
|
| 81 |
-
```
|
| 82 |
|
| 83 |
-
For more details, check the API documentation
|
|
|
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# HNTAI - Medical Data Extraction API
|
| 12 |
|
| 13 |
+
This is a Flask-based API for extracting and processing medical data from various document formats.
|
| 14 |
|
| 15 |
## Features
|
| 16 |
- Document text extraction (PDF, DOCX, Images)
|
| 17 |
- Audio transcription
|
| 18 |
- Medical data extraction
|
| 19 |
+
- PHI (Protected Health Information) scrubbing
|
| 20 |
+
- Text summarization
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
## Deployment on Hugging Face Spaces
|
| 23 |
+
- Uses Docker for deployment
|
| 24 |
+
- All models and data are cached in /tmp
|
| 25 |
+
- Optimized for memory usage
|
| 26 |
+
- Auto-retries for model loading
|
| 27 |
+
- Proper error handling
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
## Environment Variables
|
| 30 |
+
All necessary environment variables are pre-configured for Hugging Face Spaces deployment.
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
## API Endpoints
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
- POST /upload - Upload and process medical documents
|
|
|
|
|
|
|
| 34 |
- POST /transcribe - Transcribe audio files
|
| 35 |
- POST /extract_medical_data - Extract structured medical data
|
| 36 |
- POST /api/generate_summary - Generate text summaries
|
| 37 |
- POST /api/extract_medical_data_from_audio - Process audio recordings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
For more details, check the API documentation.
|
README_SPACES.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Medical Document Processing Space
|
| 2 |
+
|
| 3 |
+
This Hugging Face Space provides an AI-powered medical document processing system that can:
|
| 4 |
+
- Extract text from various medical documents (PDFs, images)
|
| 5 |
+
- Transcribe medical audio recordings
|
| 6 |
+
- Scrub PHI (Protected Health Information)
|
| 7 |
+
- Extract medical data and generate summaries
|
| 8 |
+
- Process medical reports and clinical notes
|
| 9 |
+
|
| 10 |
+
## Features
|
| 11 |
+
|
| 12 |
+
- **Document Processing**: Handles PDFs, images, and audio files
|
| 13 |
+
- **PHI Scrubbing**: Automatically removes sensitive health information
|
| 14 |
+
- **Medical Data Extraction**: Extracts key medical information using MedAlpaca
|
| 15 |
+
- **Summarization**: Generates concise summaries of medical documents
|
| 16 |
+
- **Audio Transcription**: Transcribes medical audio using Whisper
|
| 17 |
+
|
| 18 |
+
## Usage
|
| 19 |
+
|
| 20 |
+
1. Upload your medical document (PDF, image, or audio file)
|
| 21 |
+
2. Select the processing options you need
|
| 22 |
+
3. Get the processed results with extracted information and summaries
|
| 23 |
+
|
| 24 |
+
## Technical Details
|
| 25 |
+
|
| 26 |
+
- Built with Flask and modern AI models
|
| 27 |
+
- Uses Hugging Face's infrastructure for model hosting
|
| 28 |
+
- Implements secure file handling and processing
|
| 29 |
+
- Optimized for medical document processing
|
| 30 |
+
|
| 31 |
+
## Model Information
|
| 32 |
+
|
| 33 |
+
- Text Generation: MedAlpaca-13B
|
| 34 |
+
- Summarization: BART-large-CNN
|
| 35 |
+
- Speech-to-Text: Whisper Base
|
| 36 |
+
- Text Extraction: Custom OCR pipeline
|
| 37 |
+
|
| 38 |
+
## Limitations
|
| 39 |
+
|
| 40 |
+
- Maximum file size: 16GB
|
| 41 |
+
- Processing time may vary based on document size and complexity
|
| 42 |
+
- Some features may require specific file formats
|
| 43 |
+
|
| 44 |
+
## Privacy
|
| 45 |
+
|
| 46 |
+
All processing is done securely within the Hugging Face Space environment. No data is stored permanently.
|
REFACTORED_README.md
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HNTAI Medical Data Extraction - Refactored System
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This project has been completely refactored to provide a unified, flexible model management system that supports **any model name and type**, including GGUF models for patient summary generation. The system now offers dynamic model loading, runtime model switching, and robust fallback mechanisms.
|
| 6 |
+
|
| 7 |
+
## 🚀 Key Features
|
| 8 |
+
|
| 9 |
+
### ✨ **Universal Model Support**
|
| 10 |
+
- **Any Model Name**: Use any Hugging Face model, local model, or custom model
|
| 11 |
+
- **Any Model Type**: Support for text-generation, summarization, NER, GGUF, OpenVINO, and more
|
| 12 |
+
- **Automatic Type Detection**: The system automatically detects model types from names
|
| 13 |
+
- **Dynamic Loading**: Load models at runtime without restarting the application
|
| 14 |
+
|
| 15 |
+
### 🔄 **GGUF Model Integration**
|
| 16 |
+
- **Seamless GGUF Support**: Full integration with llama.cpp for GGUF models
|
| 17 |
+
- **Patient Summary Generation**: Optimized for medical text summarization
|
| 18 |
+
- **Memory Efficient**: Ultra-conservative settings for Hugging Face Spaces
|
| 19 |
+
- **Fallback Mechanisms**: Automatic fallback when GGUF models fail
|
| 20 |
+
|
| 21 |
+
### 🧠 **Unified Model Manager**
|
| 22 |
+
- **Single Interface**: One manager handles all model types
|
| 23 |
+
- **Smart Caching**: Intelligent model caching with memory management
|
| 24 |
+
- **Fallback Chains**: Multiple fallback options for robustness
|
| 25 |
+
- **Performance Monitoring**: Built-in timing and memory tracking
|
| 26 |
+
|
| 27 |
+
## 🏗️ Architecture
|
| 28 |
+
|
| 29 |
+
### Core Components
|
| 30 |
+
|
| 31 |
+
1. **`UnifiedModelManager`** - Central model management system
|
| 32 |
+
2. **`BaseModelLoader`** - Abstract interface for all model loaders
|
| 33 |
+
3. **`TransformersModelLoader`** - Hugging Face Transformers models
|
| 34 |
+
4. **`GGUFModelLoader`** - GGUF models via llama.cpp
|
| 35 |
+
5. **`OpenVINOModelLoader`** - OpenVINO optimized models
|
| 36 |
+
6. **`PatientSummarizerAgent`** - Enhanced patient summary generation
|
| 37 |
+
|
| 38 |
+
### Model Type Support
|
| 39 |
+
|
| 40 |
+
| Model Type | Description | Example Models |
|
| 41 |
+
|------------|-------------|----------------|
|
| 42 |
+
| `text-generation` | Causal language models | `facebook/bart-base`, `microsoft/DialoGPT-medium` |
|
| 43 |
+
| `summarization` | Text summarization models | `Falconsai/medical_summarization`, `facebook/bart-large-cnn` |
|
| 44 |
+
| `ner` | Named Entity Recognition | `dslim/bert-base-NER`, `Jean-Baptiste/roberta-large-ner-english` |
|
| 45 |
+
| `gguf` | GGUF format models | `microsoft/Phi-3-mini-4k-instruct-gguf` |
|
| 46 |
+
| `openvino` | OpenVINO optimized models | `microsoft/Phi-3-mini-4k-instruct` |
|
| 47 |
+
|
| 48 |
+
## 🚀 Quick Start
|
| 49 |
+
|
| 50 |
+
### 1. Basic Usage
|
| 51 |
+
|
| 52 |
+
```python
|
| 53 |
+
from ai_med_extract.utils.model_manager import model_manager
|
| 54 |
+
|
| 55 |
+
# Load any model dynamically
|
| 56 |
+
loader = model_manager.get_model_loader(
|
| 57 |
+
model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 58 |
+
model_type="gguf",
|
| 59 |
+
filename="Phi-3-mini-4k-instruct-q4.gguf"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Generate text
|
| 63 |
+
result = loader.generate("Generate a medical summary for...")
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### 2. Patient Summary Generation
|
| 67 |
+
|
| 68 |
+
```python
|
| 69 |
+
from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
|
| 70 |
+
|
| 71 |
+
# Create agent with any model
|
| 72 |
+
agent = PatientSummarizerAgent(
|
| 73 |
+
model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 74 |
+
model_type="gguf"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Generate clinical summary
|
| 78 |
+
summary = agent.generate_clinical_summary(patient_data)
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### 3. Runtime Model Switching
|
| 82 |
+
|
| 83 |
+
```python
|
| 84 |
+
# Switch models at runtime
|
| 85 |
+
agent.update_model(
|
| 86 |
+
model_name="Falconsai/medical_summarization",
|
| 87 |
+
model_type="summarization"
|
| 88 |
+
)
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
## 📡 API Endpoints
|
| 92 |
+
|
| 93 |
+
### Model Management API
|
| 94 |
+
|
| 95 |
+
#### Load Model
|
| 96 |
+
```http
|
| 97 |
+
POST /api/models/load
|
| 98 |
+
Content-Type: application/json
|
| 99 |
+
|
| 100 |
+
{
|
| 101 |
+
"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 102 |
+
"model_type": "gguf",
|
| 103 |
+
"filename": "Phi-3-mini-4k-instruct-q4.gguf",
|
| 104 |
+
"force_reload": false
|
| 105 |
+
}
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
#### Generate Text
|
| 109 |
+
```http
|
| 110 |
+
POST /api/models/generate
|
| 111 |
+
Content-Type: application/json
|
| 112 |
+
|
| 113 |
+
{
|
| 114 |
+
"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 115 |
+
"model_type": "gguf",
|
| 116 |
+
"prompt": "Generate a medical summary for...",
|
| 117 |
+
"max_tokens": 512,
|
| 118 |
+
"temperature": 0.7
|
| 119 |
+
}
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
#### Switch Agent Model
|
| 123 |
+
```http
|
| 124 |
+
POST /api/models/switch
|
| 125 |
+
Content-Type: application/json
|
| 126 |
+
|
| 127 |
+
{
|
| 128 |
+
"agent_name": "patient_summarizer",
|
| 129 |
+
"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 130 |
+
"model_type": "gguf"
|
| 131 |
+
}
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
#### Get Model Information
|
| 135 |
+
```http
|
| 136 |
+
GET /api/models/info?model_name=microsoft/Phi-3-mini-4k-instruct-gguf
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
#### Health Check
|
| 140 |
+
```http
|
| 141 |
+
GET /api/models/health
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### Patient Summary API
|
| 145 |
+
|
| 146 |
+
#### Generate Patient Summary
|
| 147 |
+
```http
|
| 148 |
+
POST /generate_patient_summary
|
| 149 |
+
Content-Type: application/json
|
| 150 |
+
|
| 151 |
+
{
|
| 152 |
+
"patientid": "12345",
|
| 153 |
+
"token": "your_token",
|
| 154 |
+
"key": "your_api_key",
|
| 155 |
+
"patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 156 |
+
"patient_summarizer_model_type": "gguf"
|
| 157 |
+
}
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
## 🔧 Configuration
|
| 161 |
+
|
| 162 |
+
### Environment Variables
|
| 163 |
+
|
| 164 |
+
```bash
|
| 165 |
+
# Cache directories
|
| 166 |
+
HF_HOME=/tmp/huggingface
|
| 167 |
+
XDG_CACHE_HOME=/tmp
|
| 168 |
+
TORCH_HOME=/tmp/torch
|
| 169 |
+
WHISPER_CACHE=/tmp/whisper
|
| 170 |
+
|
| 171 |
+
# GGUF optimization
|
| 172 |
+
GGUF_N_THREADS=2
|
| 173 |
+
GGUF_N_BATCH=64
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### Model Configuration
|
| 177 |
+
|
| 178 |
+
The system automatically uses optimized models for different environments:
|
| 179 |
+
|
| 180 |
+
- **Local Development**: Full model capabilities
|
| 181 |
+
- **Hugging Face Spaces**: Memory-optimized models
|
| 182 |
+
- **Production**: Configurable based on resources
|
| 183 |
+
|
| 184 |
+
## 🎯 Use Cases
|
| 185 |
+
|
| 186 |
+
### 1. **Medical Document Processing**
|
| 187 |
+
```python
|
| 188 |
+
# Extract medical data with any model
|
| 189 |
+
medical_data = model_manager.generate_text(
|
| 190 |
+
model_name="facebook/bart-base",
|
| 191 |
+
model_type="text-generation",
|
| 192 |
+
prompt="Extract medical entities from: " + document_text
|
| 193 |
+
)
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
### 2. **Patient Summary Generation**
|
| 197 |
+
```python
|
| 198 |
+
# Use GGUF model for patient summaries
|
| 199 |
+
summary = model_manager.generate_text(
|
| 200 |
+
model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 201 |
+
model_type="gguf",
|
| 202 |
+
prompt=patient_data_prompt,
|
| 203 |
+
max_tokens=512
|
| 204 |
+
)
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
### 3. **Dynamic Model Switching**
|
| 208 |
+
```python
|
| 209 |
+
# Switch between models based on task requirements
|
| 210 |
+
if task == "summarization":
|
| 211 |
+
model_name = "Falconsai/medical_summarization"
|
| 212 |
+
model_type = "summarization"
|
| 213 |
+
elif task == "extraction":
|
| 214 |
+
model_name = "facebook/bart-base"
|
| 215 |
+
model_type = "text-generation"
|
| 216 |
+
|
| 217 |
+
loader = model_manager.get_model_loader(model_name, model_type)
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
## 🔒 Memory Management
|
| 221 |
+
|
| 222 |
+
### Hugging Face Spaces Optimization
|
| 223 |
+
|
| 224 |
+
The system automatically detects Hugging Face Spaces and applies ultra-conservative memory settings:
|
| 225 |
+
|
| 226 |
+
- **GGUF Models**: 1 thread, 16 batch size, 512 context
|
| 227 |
+
- **Transformers**: Float32 precision, minimal memory usage
|
| 228 |
+
- **Automatic Fallbacks**: Graceful degradation when memory is limited
|
| 229 |
+
|
| 230 |
+
### Memory Monitoring
|
| 231 |
+
|
| 232 |
+
```python
|
| 233 |
+
# Check memory usage
|
| 234 |
+
health = requests.get("/api/models/health").json()
|
| 235 |
+
print(f"GPU Memory: {health['gpu_info']['memory_allocated']}")
|
| 236 |
+
print(f"Loaded Models: {health['loaded_models_count']}")
|
| 237 |
+
```
|
| 238 |
+
|
| 239 |
+
## 🧪 Testing
|
| 240 |
+
|
| 241 |
+
### Test GGUF Models
|
| 242 |
+
|
| 243 |
+
```bash
|
| 244 |
+
# Test GGUF model loading
|
| 245 |
+
python test_gguf.py
|
| 246 |
+
|
| 247 |
+
# Test specific model
|
| 248 |
+
python -c "
|
| 249 |
+
from ai_med_extract.utils.model_manager import model_manager
|
| 250 |
+
loader = model_manager.get_model_loader('microsoft/Phi-3-mini-4k-instruct-gguf', 'gguf')
|
| 251 |
+
result = loader.generate('Test prompt')
|
| 252 |
+
print(f'Success: {len(result)} characters generated')
|
| 253 |
+
"
|
| 254 |
+
```
|
| 255 |
+
|
| 256 |
+
### Model Validation
|
| 257 |
+
|
| 258 |
+
```python
|
| 259 |
+
from ai_med_extract.utils.model_config import validate_model_config
|
| 260 |
+
|
| 261 |
+
# Validate model configuration
|
| 262 |
+
validation = validate_model_config(
|
| 263 |
+
model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 264 |
+
model_type="gguf"
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
print(f"Valid: {validation['valid']}")
|
| 268 |
+
print(f"Warnings: {validation['warnings']}")
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
## 🚨 Error Handling
|
| 272 |
+
|
| 273 |
+
### Fallback Mechanisms
|
| 274 |
+
|
| 275 |
+
1. **Primary Model**: Attempts to load the specified model
|
| 276 |
+
2. **Fallback Model**: Uses predefined fallback for the model type
|
| 277 |
+
3. **Text Fallback**: Generates structured text responses
|
| 278 |
+
4. **Graceful Degradation**: Continues operation with reduced functionality
|
| 279 |
+
|
| 280 |
+
### Common Issues
|
| 281 |
+
|
| 282 |
+
#### GGUF Model Loading Fails
|
| 283 |
+
```python
|
| 284 |
+
# Check model file
|
| 285 |
+
if not os.path.exists(model_path):
|
| 286 |
+
# Download from Hugging Face
|
| 287 |
+
from huggingface_hub import hf_hub_download
|
| 288 |
+
model_path = hf_hub_download(repo_id, filename)
|
| 289 |
+
```
|
| 290 |
+
|
| 291 |
+
#### Memory Issues
|
| 292 |
+
```python
|
| 293 |
+
# Clear cache and reload
|
| 294 |
+
model_manager.clear_cache()
|
| 295 |
+
torch.cuda.empty_cache()
|
| 296 |
+
|
| 297 |
+
# Use smaller model
|
| 298 |
+
loader = model_manager.get_model_loader(
|
| 299 |
+
model_name="facebook/bart-base", # Smaller model
|
| 300 |
+
model_type="text-generation"
|
| 301 |
+
)
|
| 302 |
+
```
|
| 303 |
+
|
| 304 |
+
## 📊 Performance
|
| 305 |
+
|
| 306 |
+
### Benchmarking
|
| 307 |
+
|
| 308 |
+
```python
|
| 309 |
+
import time
|
| 310 |
+
|
| 311 |
+
# Time model loading
|
| 312 |
+
start = time.time()
|
| 313 |
+
loader = model_manager.get_model_loader(model_name, model_type)
|
| 314 |
+
load_time = time.time() - start
|
| 315 |
+
|
| 316 |
+
# Time generation
|
| 317 |
+
start = time.time()
|
| 318 |
+
result = loader.generate(prompt)
|
| 319 |
+
gen_time = time.time() - start
|
| 320 |
+
|
| 321 |
+
print(f"Load: {load_time:.2f}s, Generate: {gen_time:.2f}s")
|
| 322 |
+
```
|
| 323 |
+
|
| 324 |
+
### Optimization Tips
|
| 325 |
+
|
| 326 |
+
1. **Use Appropriate Model Size**: Smaller models for limited resources
|
| 327 |
+
2. **Enable Caching**: Models are cached after first load
|
| 328 |
+
3. **Batch Processing**: Process multiple requests together
|
| 329 |
+
4. **Memory Monitoring**: Regular health checks
|
| 330 |
+
|
| 331 |
+
## 🔮 Future Enhancements
|
| 332 |
+
|
| 333 |
+
### Planned Features
|
| 334 |
+
|
| 335 |
+
- **Model Quantization**: Automatic model optimization
|
| 336 |
+
- **Distributed Loading**: Load models across multiple devices
|
| 337 |
+
- **Model Versioning**: Track and manage model versions
|
| 338 |
+
- **Performance Analytics**: Detailed performance metrics
|
| 339 |
+
- **Auto-scaling**: Automatic model scaling based on load
|
| 340 |
+
|
| 341 |
+
### Extensibility
|
| 342 |
+
|
| 343 |
+
The system is designed for easy extension:
|
| 344 |
+
|
| 345 |
+
```python
|
| 346 |
+
class CustomModelLoader(BaseModelLoader):
|
| 347 |
+
def __init__(self, model_name: str):
|
| 348 |
+
self.model_name = model_name
|
| 349 |
+
|
| 350 |
+
def load(self):
|
| 351 |
+
# Custom loading logic
|
| 352 |
+
pass
|
| 353 |
+
|
| 354 |
+
def generate(self, prompt: str, **kwargs):
|
| 355 |
+
# Custom generation logic
|
| 356 |
+
pass
|
| 357 |
+
```
|
| 358 |
+
|
| 359 |
+
## 📝 Migration Guide
|
| 360 |
+
|
| 361 |
+
### From Old System
|
| 362 |
+
|
| 363 |
+
1. **Replace Hardcoded Models**:
|
| 364 |
+
```python
|
| 365 |
+
# Old
|
| 366 |
+
model = LazyModelLoader("facebook/bart-base", "text-generation")
|
| 367 |
+
|
| 368 |
+
# New
|
| 369 |
+
model = model_manager.get_model_loader("facebook/bart-base", "text-generation")
|
| 370 |
+
```
|
| 371 |
+
|
| 372 |
+
2. **Update Patient Summarizer**:
|
| 373 |
+
```python
|
| 374 |
+
# Old
|
| 375 |
+
agent = PatientSummarizerAgent()
|
| 376 |
+
|
| 377 |
+
# New
|
| 378 |
+
agent = PatientSummarizerAgent(
|
| 379 |
+
model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 380 |
+
model_type="gguf"
|
| 381 |
+
)
|
| 382 |
+
```
|
| 383 |
+
|
| 384 |
+
3. **Use Dynamic Model Selection**:
|
| 385 |
+
```python
|
| 386 |
+
# Old: Fixed model types
|
| 387 |
+
# New: Dynamic model selection
|
| 388 |
+
model_type = request.form.get("model_type", "text-generation")
|
| 389 |
+
model_name = request.form.get("model_name", "facebook/bart-base")
|
| 390 |
+
```
|
| 391 |
+
|
| 392 |
+
## 🤝 Contributing
|
| 393 |
+
|
| 394 |
+
### Development Setup
|
| 395 |
+
|
| 396 |
+
```bash
|
| 397 |
+
# Clone repository
|
| 398 |
+
git clone <repository-url>
|
| 399 |
+
cd HNTAI
|
| 400 |
+
|
| 401 |
+
# Install dependencies
|
| 402 |
+
pip install -r requirements.txt
|
| 403 |
+
|
| 404 |
+
# Run tests
|
| 405 |
+
python -m pytest tests/
|
| 406 |
+
|
| 407 |
+
# Start development server
|
| 408 |
+
python -m ai_med_extract.app
|
| 409 |
+
```
|
| 410 |
+
|
| 411 |
+
### Adding New Model Types
|
| 412 |
+
|
| 413 |
+
1. **Create Loader Class**:
|
| 414 |
+
```python
|
| 415 |
+
class CustomModelLoader(BaseModelLoader):
|
| 416 |
+
# Implement required methods
|
| 417 |
+
pass
|
| 418 |
+
```
|
| 419 |
+
|
| 420 |
+
2. **Update Model Manager**:
|
| 421 |
+
```python
|
| 422 |
+
if model_type == "custom":
|
| 423 |
+
loader = CustomModelLoader(model_name)
|
| 424 |
+
```
|
| 425 |
+
|
| 426 |
+
3. **Add Configuration**:
|
| 427 |
+
```python
|
| 428 |
+
DEFAULT_MODELS["custom"] = {
|
| 429 |
+
"primary": "default/custom-model",
|
| 430 |
+
"fallback": "fallback/custom-model"
|
| 431 |
+
}
|
| 432 |
+
```
|
| 433 |
+
|
| 434 |
+
## 📄 License
|
| 435 |
+
|
| 436 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 437 |
+
|
| 438 |
+
## 🆘 Support
|
| 439 |
+
|
| 440 |
+
### Getting Help
|
| 441 |
+
|
| 442 |
+
- **Documentation**: This README and inline code comments
|
| 443 |
+
- **Issues**: GitHub Issues for bug reports
|
| 444 |
+
- **Discussions**: GitHub Discussions for questions
|
| 445 |
+
- **Examples**: See `test_gguf.py` and other test files
|
| 446 |
+
|
| 447 |
+
### Common Questions
|
| 448 |
+
|
| 449 |
+
**Q: Can I use my own GGUF model?**
|
| 450 |
+
A: Yes! Just provide the path to your .gguf file or upload it to Hugging Face.
|
| 451 |
+
|
| 452 |
+
**Q: How do I optimize for memory?**
|
| 453 |
+
A: Use smaller models, enable caching, and monitor memory usage via `/api/models/health`.
|
| 454 |
+
|
| 455 |
+
**Q: Can I switch models without restarting?**
|
| 456 |
+
A: Yes! Use the `/api/models/switch` endpoint to change models at runtime.
|
| 457 |
+
|
| 458 |
+
**Q: What if a model fails to load?**
|
| 459 |
+
A: The system automatically falls back to alternative models and provides detailed error information.
|
| 460 |
+
|
| 461 |
+
---
|
| 462 |
+
|
| 463 |
+
**🎉 Congratulations!** You now have a powerful, flexible system that can work with any model name and type, including GGUF models for patient summary generation. The system is designed to be robust, efficient, and easy to use while maintaining backward compatibility.
|
TODO.md
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
-
# TODO:
|
| 2 |
|
| 3 |
-
##
|
| 4 |
-
- [x] Create quantization_utils.py with Sinkhorn-Normalized Quantization implementation
|
| 5 |
-
- [x] Modify model_manager.py to support optional quantization during model loading
|
| 6 |
-
- [x] Add configuration options for quantization in model_config.py
|
| 7 |
-
- [x] Test quantization on a sample model without affecting existing workflows
|
| 8 |
-
- [x] Verify that existing model loading and inference still work
|
| 9 |
-
- [ ] Update documentation if needed
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TODO: Fix None Type Handling and Stream Ending on Generation Failure
|
| 2 |
|
| 3 |
+
## Tasks to Complete
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
- [x] Update `active_set` in `openvino_summarizer_utils.py` to handle None visits
|
| 6 |
+
- [x] Update `compute_deltas` in `openvino_summarizer_utils.py` to ensure old_visits and new_visits are lists
|
| 7 |
+
- [x] Update `visits_sorted` in `openvino_summarizer_utils.py` to handle None input
|
| 8 |
+
- [x] Update `latest_value` and related functions in `openvino_summarizer_utils.py` to handle None visits
|
| 9 |
+
- [x] Update `parse_ehr_chartsummarydtl` in `openvino_summarizer_utils.py` to return [] if chartsummarydtl is None
|
| 10 |
+
- [ ] Ensure `visits` is always a list in `routes.py` background_patient_summary
|
| 11 |
+
- [ ] Wrap `delta = compute_deltas([], visits)` in try-except in `routes.py`
|
| 12 |
+
- [ ] Ensure on generation failure in `background_patient_summary`, update job to 'error' and provide fallback
|
| 13 |
+
- [ ] Verify `sse_generator` ends stream properly on error
|
| 14 |
+
- [ ] Test the fixes to ensure no None type errors and proper stream ending
|
__pycache__/ai_med_extract.cpython-311.pyc
DELETED
|
Binary file (898 Bytes)
|
|
|
__pycache__/test_chunking.cpython-311.pyc
ADDED
|
Binary file (4.61 kB). View file
|
|
|
__pycache__/test_summary_consistency.cpython-311.pyc
ADDED
|
Binary file (12.3 kB). View file
|
|
|
ai_med_extract.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
"""Compatibility shim for older imports that expect a top-level `ai_med_extract`.
|
| 2 |
-
|
| 3 |
-
This module re-exports the package located at `services/ai-service/src/ai_med_extract`.
|
| 4 |
-
It keeps older tests and imports working while the canonical package lives under services/ai-service/src.
|
| 5 |
-
"""
|
| 6 |
-
import sys
|
| 7 |
-
import os
|
| 8 |
-
|
| 9 |
-
# Compute path to the migrated package
|
| 10 |
-
ROOT = os.path.dirname(__file__)
|
| 11 |
-
SERVICE_SRC = os.path.join(ROOT, 'services', 'ai-service', 'src')
|
| 12 |
-
if SERVICE_SRC not in sys.path:
|
| 13 |
-
sys.path.insert(0, SERVICE_SRC)
|
| 14 |
-
|
| 15 |
-
from ai_med_extract import * # re-export everything
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ai_med_extract/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ai_med_extract/__init__.py
|
ai_med_extract/__main__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .app import app
|
| 2 |
+
|
| 3 |
+
# Entrypoint for running the app as a module
|
| 4 |
+
if __name__ == "__main__":
|
| 5 |
+
app.run(host="0.0.0.0", port=7860, debug=True)
|
ai_med_extract/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (173 Bytes). View file
|
|
|
ai_med_extract/__pycache__/app.cpython-311.pyc
ADDED
|
Binary file (9.15 kB). View file
|
|
|
ai_med_extract/agents/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ai_med_extract/agents/__init__.py
|
ai_med_extract/agents/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (180 Bytes). View file
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/medical_data_extractor.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/medical_data_extractor.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/medical_data_extractor.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/patient_summary_agent.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/phi_scrubber.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/phi_scrubber.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/phi_scrubber.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/summarizer.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/text_extractor.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/text_extractor.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/text_extractor.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/medical_data_extractor.py
RENAMED
|
@@ -3,7 +3,7 @@ import logging
|
|
| 3 |
import json
|
| 4 |
import torch
|
| 5 |
|
| 6 |
-
from .phi_scrubber import MedicalTextUtils
|
| 7 |
|
| 8 |
class MedicalDataExtractorAgent:
|
| 9 |
def __init__(self, generator):
|
|
|
|
| 3 |
import json
|
| 4 |
import torch
|
| 5 |
|
| 6 |
+
from ai_med_extract.agents.phi_scrubber import MedicalTextUtils
|
| 7 |
|
| 8 |
class MedicalDataExtractorAgent:
|
| 9 |
def __init__(self, generator):
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/patient_summary_agent.py
RENAMED
|
@@ -6,7 +6,6 @@ import json
|
|
| 6 |
from typing import List, Dict, Union, Optional
|
| 7 |
from textwrap import fill
|
| 8 |
import concurrent.futures
|
| 9 |
-
import logging
|
| 10 |
|
| 11 |
# Suppress non-critical warnings
|
| 12 |
warnings.filterwarnings("ignore", category=UserWarning)
|
|
@@ -28,12 +27,9 @@ class PatientSummarizerAgent:
|
|
| 28 |
|
| 29 |
# Initialize model loader through unified model manager
|
| 30 |
self.model_loader = None
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
logging.getLogger(__name__).info(
|
| 35 |
-
f"PatientSummarizerAgent created for {model_name} ({model_type}) on {self.device} (loader deferred)"
|
| 36 |
-
)
|
| 37 |
|
| 38 |
def _initialize_model_loader(self):
|
| 39 |
"""Initialize the model loader using the unified model manager"""
|
|
@@ -72,11 +68,10 @@ class PatientSummarizerAgent:
|
|
| 72 |
self.model_type
|
| 73 |
)
|
| 74 |
|
| 75 |
-
|
| 76 |
-
logging.getLogger(__name__).info(f"Model loader initialized: {self.model_name} ({self.model_type})")
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
-
|
| 80 |
# Create a fallback loader
|
| 81 |
self._create_fallback_loader()
|
| 82 |
|
|
@@ -102,18 +97,16 @@ class PatientSummarizerAgent:
|
|
| 102 |
return self.generate(prompt, **kwargs)
|
| 103 |
|
| 104 |
self.model_loader = FallbackLoader(self.model_name, self.model_type)
|
| 105 |
-
|
| 106 |
-
logging.getLogger(__name__).warning(f"Using fallback loader for {self.model_name}")
|
| 107 |
|
| 108 |
def generate_clinical_summary(self, patient_data: Union[List[str], Dict]) -> str:
|
| 109 |
"""Generate a comprehensive clinical summary using the unified model manager"""
|
| 110 |
-
|
| 111 |
-
logging.getLogger(__name__).info(f"Generating clinical summary using model: {self.model_name} ({self.model_type})...")
|
| 112 |
|
| 113 |
try:
|
| 114 |
# Build the narrative prompt
|
| 115 |
narrative_history = self.build_chronological_narrative(patient_data)
|
| 116 |
-
|
| 117 |
|
| 118 |
# Generate summary using the model loader
|
| 119 |
if hasattr(self.model_loader, 'generate_full_summary'):
|
|
@@ -132,7 +125,7 @@ class PatientSummarizerAgent:
|
|
| 132 |
top_p=0.9
|
| 133 |
)
|
| 134 |
|
| 135 |
-
|
| 136 |
|
| 137 |
# Format the output
|
| 138 |
formatted_report = self.format_clinical_output(raw_summary_text, patient_data)
|
|
@@ -152,7 +145,7 @@ class PatientSummarizerAgent:
|
|
| 152 |
return final_output
|
| 153 |
|
| 154 |
except Exception as e:
|
| 155 |
-
|
| 156 |
import traceback
|
| 157 |
traceback.print_exc()
|
| 158 |
return f"Error generating summary: {str(e)}"
|
|
@@ -174,17 +167,24 @@ class PatientSummarizerAgent:
|
|
| 174 |
)
|
| 175 |
|
| 176 |
def _generate_section_with_instance(self, prompt: str, section_name: str) -> tuple:
|
| 177 |
-
"""Generate a section using
|
| 178 |
try:
|
| 179 |
-
#
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
prompt,
|
| 183 |
max_tokens=256, # Reduced for speed
|
| 184 |
max_loops=1
|
| 185 |
)
|
| 186 |
else:
|
| 187 |
-
result =
|
| 188 |
prompt,
|
| 189 |
max_new_tokens=256, # Reduced for speed
|
| 190 |
temperature=0.7,
|
|
@@ -194,10 +194,8 @@ class PatientSummarizerAgent:
|
|
| 194 |
except Exception as e:
|
| 195 |
return section_name, f"Error generating section {section_name}: {e}"
|
| 196 |
|
| 197 |
-
def generate_summary_sections(self, patient_data: Union[List[str], Dict]
|
| 198 |
"""Generate the 4 summary sections in parallel using dedicated model instances."""
|
| 199 |
-
if self.model_loader is None:
|
| 200 |
-
self._initialize_model_loader()
|
| 201 |
narrative_history = self.build_chronological_narrative(patient_data)
|
| 202 |
|
| 203 |
# Define prompts for each section with markdown format instruction
|
|
@@ -220,21 +218,17 @@ class PatientSummarizerAgent:
|
|
| 220 |
try:
|
| 221 |
section_name, result = future.result()
|
| 222 |
results[section_name] = result
|
| 223 |
-
if callback:
|
| 224 |
-
callback(section_name, result)
|
| 225 |
except Exception as exc:
|
| 226 |
results[section] = f"Error generating section {section}: {exc}"
|
| 227 |
-
if callback:
|
| 228 |
-
callback(section, f"Error generating section {section}: {exc}")
|
| 229 |
|
| 230 |
return results
|
| 231 |
|
| 232 |
-
def generate_patient_summary(self, patient_data: Union[List[str], Dict]
|
| 233 |
"""Generate the complete patient summary by stitching together 4 sections generated in parallel."""
|
| 234 |
-
|
| 235 |
|
| 236 |
try:
|
| 237 |
-
sections = self.generate_summary_sections(patient_data
|
| 238 |
|
| 239 |
# Stitch sections together
|
| 240 |
final_summary = "\n\n".join(
|
|
@@ -259,7 +253,7 @@ class PatientSummarizerAgent:
|
|
| 259 |
return final_output
|
| 260 |
|
| 261 |
except Exception as e:
|
| 262 |
-
|
| 263 |
import traceback
|
| 264 |
traceback.print_exc()
|
| 265 |
return f"Error generating patient summary: {str(e)}"
|
|
@@ -414,20 +408,20 @@ class PatientSummarizerAgent:
|
|
| 414 |
has_afib = any("atrial fibrillation" in dx.lower() for dx in last_enc.get('diagnosis', []))
|
| 415 |
on_anticoag = any("warfarin" in med.lower() or "apixaban" in med.lower() for med in last_enc.get('medications', []))
|
| 416 |
if has_afib:
|
| 417 |
-
evaluation += " -
|
| 418 |
-
else " -
|
| 419 |
|
| 420 |
has_mi = any("myocardial infarction" in hx.lower() for hx in result.get('past_medical_history', []))
|
| 421 |
on_statin = any("atorvastatin" in med.lower() or "statin" in med.lower() for med in last_enc.get('medications', []))
|
| 422 |
if has_mi:
|
| 423 |
-
evaluation += " -
|
| 424 |
-
else " -
|
| 425 |
|
| 426 |
has_aki = any("acute kidney injury" in dx.lower() for dx in last_enc.get('diagnosis', []))
|
| 427 |
acei_held = "hold" in last_enc.get('dr_notes', '').lower() and "lisinopril" in last_enc.get('dr_notes', '')
|
| 428 |
if has_aki:
|
| 429 |
-
evaluation += " -
|
| 430 |
-
else " -
|
| 431 |
|
| 432 |
evaluation += (
|
| 433 |
"\nDisclaimer: This is a simulated evaluation and not a substitute for clinical judgment.\n"
|
|
@@ -439,7 +433,7 @@ class PatientSummarizerAgent:
|
|
| 439 |
self.model_name = model_name
|
| 440 |
self.model_type = model_type
|
| 441 |
self._initialize_model_loader()
|
| 442 |
-
|
| 443 |
|
| 444 |
def get_model_info(self) -> dict:
|
| 445 |
"""Get information about the current model"""
|
|
|
|
| 6 |
from typing import List, Dict, Union, Optional
|
| 7 |
from textwrap import fill
|
| 8 |
import concurrent.futures
|
|
|
|
| 9 |
|
| 10 |
# Suppress non-critical warnings
|
| 11 |
warnings.filterwarnings("ignore", category=UserWarning)
|
|
|
|
| 27 |
|
| 28 |
# Initialize model loader through unified model manager
|
| 29 |
self.model_loader = None
|
| 30 |
+
self._initialize_model_loader()
|
| 31 |
+
|
| 32 |
+
print(f"✅ PatientSummarizerAgent initialized with {model_name} ({model_type}) on {self.device}")
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def _initialize_model_loader(self):
|
| 35 |
"""Initialize the model loader using the unified model manager"""
|
|
|
|
| 68 |
self.model_type
|
| 69 |
)
|
| 70 |
|
| 71 |
+
print(f"✅ Model loader initialized: {self.model_name} ({self.model_type})")
|
|
|
|
| 72 |
|
| 73 |
except Exception as e:
|
| 74 |
+
print(f"❌ Failed to initialize model loader: {e}")
|
| 75 |
# Create a fallback loader
|
| 76 |
self._create_fallback_loader()
|
| 77 |
|
|
|
|
| 97 |
return self.generate(prompt, **kwargs)
|
| 98 |
|
| 99 |
self.model_loader = FallbackLoader(self.model_name, self.model_type)
|
| 100 |
+
print(f"⚠️ Using fallback loader for {self.model_name}")
|
|
|
|
| 101 |
|
| 102 |
def generate_clinical_summary(self, patient_data: Union[List[str], Dict]) -> str:
|
| 103 |
"""Generate a comprehensive clinical summary using the unified model manager"""
|
| 104 |
+
print(f"✨ Generating clinical summary using model: {self.model_name} ({self.model_type})...")
|
|
|
|
| 105 |
|
| 106 |
try:
|
| 107 |
# Build the narrative prompt
|
| 108 |
narrative_history = self.build_chronological_narrative(patient_data)
|
| 109 |
+
print(f"\n--- Prompt Sent to Model (truncated) ---\n{fill(narrative_history, width=80)[:1000]}...")
|
| 110 |
|
| 111 |
# Generate summary using the model loader
|
| 112 |
if hasattr(self.model_loader, 'generate_full_summary'):
|
|
|
|
| 125 |
top_p=0.9
|
| 126 |
)
|
| 127 |
|
| 128 |
+
print(f"\n--- Raw Model Output ---\n{fill(raw_summary_text, width=80)}")
|
| 129 |
|
| 130 |
# Format the output
|
| 131 |
formatted_report = self.format_clinical_output(raw_summary_text, patient_data)
|
|
|
|
| 145 |
return final_output
|
| 146 |
|
| 147 |
except Exception as e:
|
| 148 |
+
print(f"❌ Error during summary generation: {e}")
|
| 149 |
import traceback
|
| 150 |
traceback.print_exc()
|
| 151 |
return f"Error generating summary: {str(e)}"
|
|
|
|
| 167 |
)
|
| 168 |
|
| 169 |
def _generate_section_with_instance(self, prompt: str, section_name: str) -> tuple:
|
| 170 |
+
"""Generate a section using a dedicated model instance."""
|
| 171 |
try:
|
| 172 |
+
# Create a dedicated model loader instance for this section
|
| 173 |
+
from ..utils.model_manager import model_manager
|
| 174 |
+
model_loader = model_manager.get_model_loader(
|
| 175 |
+
self.model_name,
|
| 176 |
+
self.model_type
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# Generate the section
|
| 180 |
+
if hasattr(model_loader, 'generate_full_summary'):
|
| 181 |
+
result = model_loader.generate_full_summary(
|
| 182 |
prompt,
|
| 183 |
max_tokens=256, # Reduced for speed
|
| 184 |
max_loops=1
|
| 185 |
)
|
| 186 |
else:
|
| 187 |
+
result = model_loader.generate(
|
| 188 |
prompt,
|
| 189 |
max_new_tokens=256, # Reduced for speed
|
| 190 |
temperature=0.7,
|
|
|
|
| 194 |
except Exception as e:
|
| 195 |
return section_name, f"Error generating section {section_name}: {e}"
|
| 196 |
|
| 197 |
+
def generate_summary_sections(self, patient_data: Union[List[str], Dict]) -> Dict[str, str]:
|
| 198 |
"""Generate the 4 summary sections in parallel using dedicated model instances."""
|
|
|
|
|
|
|
| 199 |
narrative_history = self.build_chronological_narrative(patient_data)
|
| 200 |
|
| 201 |
# Define prompts for each section with markdown format instruction
|
|
|
|
| 218 |
try:
|
| 219 |
section_name, result = future.result()
|
| 220 |
results[section_name] = result
|
|
|
|
|
|
|
| 221 |
except Exception as exc:
|
| 222 |
results[section] = f"Error generating section {section}: {exc}"
|
|
|
|
|
|
|
| 223 |
|
| 224 |
return results
|
| 225 |
|
| 226 |
+
def generate_patient_summary(self, patient_data: Union[List[str], Dict]) -> str:
|
| 227 |
"""Generate the complete patient summary by stitching together 4 sections generated in parallel."""
|
| 228 |
+
print(f"✨ Generating patient summary in parallel sections using model: {self.model_name} ({self.model_type})...")
|
| 229 |
|
| 230 |
try:
|
| 231 |
+
sections = self.generate_summary_sections(patient_data)
|
| 232 |
|
| 233 |
# Stitch sections together
|
| 234 |
final_summary = "\n\n".join(
|
|
|
|
| 253 |
return final_output
|
| 254 |
|
| 255 |
except Exception as e:
|
| 256 |
+
print(f"❌ Error during parallel summary generation: {e}")
|
| 257 |
import traceback
|
| 258 |
traceback.print_exc()
|
| 259 |
return f"Error generating patient summary: {str(e)}"
|
|
|
|
| 408 |
has_afib = any("atrial fibrillation" in dx.lower() for dx in last_enc.get('diagnosis', []))
|
| 409 |
on_anticoag = any("warfarin" in med.lower() or "apixaban" in med.lower() for med in last_enc.get('medications', []))
|
| 410 |
if has_afib:
|
| 411 |
+
evaluation += " - ✅ Patient with Atrial Fibrillation is on anticoagulation.\n" if on_anticoag \
|
| 412 |
+
else " - ❌ Atrial Fibrillation present but no anticoagulant prescribed.\n"
|
| 413 |
|
| 414 |
has_mi = any("myocardial infarction" in hx.lower() for hx in result.get('past_medical_history', []))
|
| 415 |
on_statin = any("atorvastatin" in med.lower() or "statin" in med.lower() for med in last_enc.get('medications', []))
|
| 416 |
if has_mi:
|
| 417 |
+
evaluation += " - ✅ Patient with MI history is on statin therapy.\n" if on_statin \
|
| 418 |
+
else " - ❌ Patient with MI history is not on statin therapy.\n"
|
| 419 |
|
| 420 |
has_aki = any("acute kidney injury" in dx.lower() for dx in last_enc.get('diagnosis', []))
|
| 421 |
acei_held = "hold" in last_enc.get('dr_notes', '').lower() and "lisinopril" in last_enc.get('dr_notes', '')
|
| 422 |
if has_aki:
|
| 423 |
+
evaluation += " - ✅ AKI noted and ACE inhibitor was appropriately held.\n" if acei_held \
|
| 424 |
+
else " - ⚠️ AKI present but ACE inhibitor not documented as held.\n"
|
| 425 |
|
| 426 |
evaluation += (
|
| 427 |
"\nDisclaimer: This is a simulated evaluation and not a substitute for clinical judgment.\n"
|
|
|
|
| 433 |
self.model_name = model_name
|
| 434 |
self.model_type = model_type
|
| 435 |
self._initialize_model_loader()
|
| 436 |
+
print(f"✅ Model updated to: {model_name} ({model_type})")
|
| 437 |
|
| 438 |
def get_model_info(self) -> dict:
|
| 439 |
"""Get information about the current model"""
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/phi_scrubber.py
RENAMED
|
@@ -7,14 +7,13 @@ from functools import wraps
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
-
|
| 11 |
def log_execution_time():
|
| 12 |
def decorator(func):
|
| 13 |
@wraps(func)
|
| 14 |
def wrapper(*args, **kwargs):
|
| 15 |
start = time.time()
|
| 16 |
result = func(*args, **kwargs)
|
| 17 |
-
logger.debug(f"
|
| 18 |
return result
|
| 19 |
return wrapper
|
| 20 |
return decorator
|
|
@@ -92,7 +91,7 @@ class MedicalTextUtils:
|
|
| 92 |
category_times[cat] = category_times.get(cat, 0) + elapsed
|
| 93 |
|
| 94 |
for cat, details in grouped.items():
|
| 95 |
-
logger.info(f"
|
| 96 |
|
| 97 |
return [{"category": k, "detail": v} for k, v in grouped.items()]
|
| 98 |
|
|
@@ -141,7 +140,7 @@ class MedicalTextUtils:
|
|
| 141 |
json_start = text.index('[')
|
| 142 |
json_text = text[json_start:]
|
| 143 |
except ValueError:
|
| 144 |
-
logger.warning("
|
| 145 |
return []
|
| 146 |
|
| 147 |
try:
|
|
@@ -166,7 +165,7 @@ class MedicalTextUtils:
|
|
| 166 |
obj = json.loads(obj_str)
|
| 167 |
extracted.append(obj)
|
| 168 |
except Exception as e:
|
| 169 |
-
logger.error(f"
|
| 170 |
obj_start = None
|
| 171 |
return extracted
|
| 172 |
|
|
@@ -200,7 +199,7 @@ class MedicalTextUtils:
|
|
| 200 |
- Radiology
|
| 201 |
- Doctor Note
|
| 202 |
|
| 203 |
-
If it doesn
|
| 204 |
|
| 205 |
Text:
|
| 206 |
{chunk}
|
|
@@ -215,7 +214,7 @@ class MedicalTextUtils:
|
|
| 215 |
do_sample=True,
|
| 216 |
temperature=0.3
|
| 217 |
)[0]["generated_text"]
|
| 218 |
-
logger.info(f"
|
| 219 |
return idx, output
|
| 220 |
except Exception as e:
|
| 221 |
logger.error("Error processing chunk %d: %s", idx, e)
|
|
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
|
|
| 10 |
def log_execution_time():
|
| 11 |
def decorator(func):
|
| 12 |
@wraps(func)
|
| 13 |
def wrapper(*args, **kwargs):
|
| 14 |
start = time.time()
|
| 15 |
result = func(*args, **kwargs)
|
| 16 |
+
logger.debug(f"⏱ {func.__name__} executed in {time.time() - start:.4f}s")
|
| 17 |
return result
|
| 18 |
return wrapper
|
| 19 |
return decorator
|
|
|
|
| 91 |
category_times[cat] = category_times.get(cat, 0) + elapsed
|
| 92 |
|
| 93 |
for cat, details in grouped.items():
|
| 94 |
+
logger.info(f"📂 Category '{cat}': {len(details)} items, time taken: {category_times[cat]:.4f}s")
|
| 95 |
|
| 96 |
return [{"category": k, "detail": v} for k, v in grouped.items()]
|
| 97 |
|
|
|
|
| 140 |
json_start = text.index('[')
|
| 141 |
json_text = text[json_start:]
|
| 142 |
except ValueError:
|
| 143 |
+
logger.warning("⚠ '[' not found in output")
|
| 144 |
return []
|
| 145 |
|
| 146 |
try:
|
|
|
|
| 165 |
obj = json.loads(obj_str)
|
| 166 |
extracted.append(obj)
|
| 167 |
except Exception as e:
|
| 168 |
+
logger.error(f"❌ Invalid JSON object: {e}")
|
| 169 |
obj_start = None
|
| 170 |
return extracted
|
| 171 |
|
|
|
|
| 199 |
- Radiology
|
| 200 |
- Doctor Note
|
| 201 |
|
| 202 |
+
If it doesn’t fit, create a new category.
|
| 203 |
|
| 204 |
Text:
|
| 205 |
{chunk}
|
|
|
|
| 214 |
do_sample=True,
|
| 215 |
temperature=0.3
|
| 216 |
)[0]["generated_text"]
|
| 217 |
+
logger.info(f"📤 Output from chunk {idx}: {output}...")
|
| 218 |
return idx, output
|
| 219 |
except Exception as e:
|
| 220 |
logger.error("Error processing chunk %d: %s", idx, e)
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/summarizer.py
RENAMED
|
@@ -124,7 +124,7 @@ class SummarizerAgent:
|
|
| 124 |
summary_text = summary_text.strip()
|
| 125 |
|
| 126 |
full_summary += summary_text + "\n\n" # Concatenate summaries with spacing
|
| 127 |
-
|
| 128 |
summary = full_summary.strip()
|
| 129 |
|
| 130 |
# Ensure required fields are included
|
|
|
|
| 124 |
summary_text = summary_text.strip()
|
| 125 |
|
| 126 |
full_summary += summary_text + "\n\n" # Concatenate summaries with spacing
|
| 127 |
+
|
| 128 |
summary = full_summary.strip()
|
| 129 |
|
| 130 |
# Ensure required fields are included
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/text_extractor.py
RENAMED
|
@@ -38,20 +38,13 @@ class TextExtractorAgent:
|
|
| 38 |
@staticmethod
|
| 39 |
def extract_text_from_image(filepath):
|
| 40 |
image = cv2.imread(filepath)
|
| 41 |
-
if image is None:
|
| 42 |
-
return None
|
| 43 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 44 |
_, processed = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 45 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
|
| 46 |
processed_path = temp_file.name
|
| 47 |
cv2.imwrite(processed_path, processed)
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
finally:
|
| 51 |
-
try:
|
| 52 |
-
os.remove(processed_path)
|
| 53 |
-
except Exception:
|
| 54 |
-
pass
|
| 55 |
return text.strip() or None
|
| 56 |
|
| 57 |
@staticmethod
|
|
@@ -71,3 +64,120 @@ class TextExtractorAgent:
|
|
| 71 |
for df in dfs.values()
|
| 72 |
])
|
| 73 |
return text.strip() or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
@staticmethod
|
| 39 |
def extract_text_from_image(filepath):
|
| 40 |
image = cv2.imread(filepath)
|
|
|
|
|
|
|
| 41 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 42 |
_, processed = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 43 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
|
| 44 |
processed_path = temp_file.name
|
| 45 |
cv2.imwrite(processed_path, processed)
|
| 46 |
+
text = pytesseract.image_to_string(Image.open(processed_path), lang='eng')
|
| 47 |
+
os.remove(processed_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
return text.strip() or None
|
| 49 |
|
| 50 |
@staticmethod
|
|
|
|
| 64 |
for df in dfs.values()
|
| 65 |
])
|
| 66 |
return text.strip() or None
|
| 67 |
+
|
| 68 |
+
# import pytesseract
|
| 69 |
+
# import cv2
|
| 70 |
+
# from PIL import Image
|
| 71 |
+
# from docx import Document
|
| 72 |
+
# from PyPDF2 import PdfReader
|
| 73 |
+
# from pdf2image import convert_from_path
|
| 74 |
+
# from concurrent.futures import ThreadPoolExecutor
|
| 75 |
+
# import tempfile
|
| 76 |
+
# import os
|
| 77 |
+
# import logging
|
| 78 |
+
# import numpy as np
|
| 79 |
+
|
| 80 |
+
# logger = logging.getLogger(__name__)
|
| 81 |
+
|
| 82 |
+
# class TextExtractorAgent:
|
| 83 |
+
# @staticmethod
|
| 84 |
+
# def extract_text(filepath, ext, password=None):
|
| 85 |
+
# try:
|
| 86 |
+
# ext = ext.lower()
|
| 87 |
+
# if ext == "pdf":
|
| 88 |
+
# return TextExtractorAgent.extract_text_from_pdf(filepath, password)
|
| 89 |
+
# elif ext in {"jpg", "jpeg", "png"}:
|
| 90 |
+
# return TextExtractorAgent.extract_text_from_image(filepath)
|
| 91 |
+
# elif ext == "docx":
|
| 92 |
+
# return TextExtractorAgent.extract_text_from_docx(filepath)
|
| 93 |
+
# return None
|
| 94 |
+
# except Exception as e:
|
| 95 |
+
# logger.error(f"Text extraction failed: {e}")
|
| 96 |
+
# return None
|
| 97 |
+
|
| 98 |
+
# @staticmethod
|
| 99 |
+
# def is_blurred(image_path, variance_threshold=150):
|
| 100 |
+
# try:
|
| 101 |
+
# image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
| 102 |
+
# if image is None:
|
| 103 |
+
# logger.error(f"Unable to read image: {image_path}")
|
| 104 |
+
# return True
|
| 105 |
+
|
| 106 |
+
# laplacian_var = cv2.Laplacian(image, cv2.CV_64F).var()
|
| 107 |
+
# edges = cv2.Canny(image, 50, 150)
|
| 108 |
+
# edge_density = np.mean(edges)
|
| 109 |
+
|
| 110 |
+
# logger.info(f"Laplacian: {laplacian_var:.2f}, Edge Density: {edge_density:.2f}")
|
| 111 |
+
# is_blurry = laplacian_var < variance_threshold and edge_density < 10
|
| 112 |
+
|
| 113 |
+
# if is_blurry:
|
| 114 |
+
# logger.warning(f"Image '{image_path}' flagged as blurry.")
|
| 115 |
+
# return is_blurry
|
| 116 |
+
# except Exception as e:
|
| 117 |
+
# logger.exception(f"Error checking blur for '{image_path}': {e}")
|
| 118 |
+
# return True
|
| 119 |
+
|
| 120 |
+
# @staticmethod
|
| 121 |
+
# def extract_text_from_image(filepath):
|
| 122 |
+
# try:
|
| 123 |
+
# if TextExtractorAgent.is_blurred(filepath):
|
| 124 |
+
# logger.warning(f"OCR skipped: '{filepath}' is too blurry.")
|
| 125 |
+
# return "Image is too blurry, OCR failed."
|
| 126 |
+
|
| 127 |
+
# image = cv2.imread(filepath)
|
| 128 |
+
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 129 |
+
# gray = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 130 |
+
# gray = cv2.adaptiveThreshold(
|
| 131 |
+
# gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
|
| 132 |
+
# )
|
| 133 |
+
# gray = cv2.dilate(gray, np.ones((2, 2), np.uint8), iterations=1)
|
| 134 |
+
|
| 135 |
+
# with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
|
| 136 |
+
# processed_path = temp_file.name
|
| 137 |
+
# cv2.imwrite(processed_path, gray)
|
| 138 |
+
|
| 139 |
+
# text = pytesseract.image_to_string(Image.open(processed_path), lang="eng").strip()
|
| 140 |
+
# os.remove(processed_path)
|
| 141 |
+
|
| 142 |
+
# if len(text.split()) < 5:
|
| 143 |
+
# logger.warning(f"Too little OCR output from '{filepath}'.")
|
| 144 |
+
# return "OCR failed to extract meaningful text."
|
| 145 |
+
|
| 146 |
+
# return text
|
| 147 |
+
# except Exception as e:
|
| 148 |
+
# logger.exception(f"OCR failed for image '{filepath}': {e}")
|
| 149 |
+
# return "Failed to extract text"
|
| 150 |
+
|
| 151 |
+
# @staticmethod
|
| 152 |
+
# def extract_text_from_pdf(filepath, password=None):
|
| 153 |
+
# try:
|
| 154 |
+
# reader = PdfReader(filepath)
|
| 155 |
+
# if reader.is_encrypted:
|
| 156 |
+
# if not password:
|
| 157 |
+
# return {"error": "File is password-protected."}, 401
|
| 158 |
+
# if reader.decrypt(password) == 0:
|
| 159 |
+
# return {"error": "Invalid password."}, 403
|
| 160 |
+
|
| 161 |
+
# text = "\n".join([page.extract_text() or "" for page in reader.pages])
|
| 162 |
+
# if text.strip():
|
| 163 |
+
# return text.strip(), 200
|
| 164 |
+
|
| 165 |
+
# logger.info("Falling back to OCR for PDF.")
|
| 166 |
+
# images = convert_from_path(filepath)
|
| 167 |
+
# with ThreadPoolExecutor(max_workers=5) as pool:
|
| 168 |
+
# ocr_text = list(pool.map(lambda img: pytesseract.image_to_string(img, lang="eng"), images))
|
| 169 |
+
# full_text = "\n".join(ocr_text).strip()
|
| 170 |
+
# return (full_text, 200) if full_text else ("No text found", 415)
|
| 171 |
+
# except Exception as e:
|
| 172 |
+
# logger.exception(f"PDF processing error: {filepath}")
|
| 173 |
+
# return "Failed to extract text"
|
| 174 |
+
|
| 175 |
+
# @staticmethod
|
| 176 |
+
# def extract_text_from_docx(filepath):
|
| 177 |
+
# try:
|
| 178 |
+
# doc = Document(filepath)
|
| 179 |
+
# text = "\n".join([para.text for para in doc.paragraphs])
|
| 180 |
+
# return text.strip() or None
|
| 181 |
+
# except Exception as e:
|
| 182 |
+
# logger.exception(f"Failed to extract text from DOCX: {filepath}")
|
| 183 |
+
# return None
|
ai_med_extract/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ai_med_extract/api/__init__.py
|
ai_med_extract/api/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (177 Bytes). View file
|
|
|
ai_med_extract/api/__pycache__/routes.cpython-311.pyc
ADDED
|
Binary file (90 kB). View file
|
|
|
ai_med_extract/api/model_management.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Dynamic Model Management API
|
| 3 |
+
Allows runtime loading, switching, and management of different model types
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from flask import Blueprint, request, jsonify
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Dict, Any, Optional
|
| 9 |
+
import torch
|
| 10 |
+
|
| 11 |
+
from ..utils.model_manager import model_manager
|
| 12 |
+
from ..utils.model_config import (
|
| 13 |
+
get_default_model,
|
| 14 |
+
get_fallback_model,
|
| 15 |
+
detect_model_type,
|
| 16 |
+
validate_model_config,
|
| 17 |
+
get_model_info
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Configure logging
|
| 21 |
+
logging.basicConfig(level=logging.INFO)
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
# Create Blueprint
|
| 25 |
+
model_management_bp = Blueprint('model_management', __name__, url_prefix='/api/models')
|
| 26 |
+
|
| 27 |
+
@model_management_bp.route('/load', methods=['POST'])
|
| 28 |
+
def load_model():
|
| 29 |
+
"""
|
| 30 |
+
Load a new model with specified name and type
|
| 31 |
+
|
| 32 |
+
Request body:
|
| 33 |
+
{
|
| 34 |
+
"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 35 |
+
"model_type": "gguf",
|
| 36 |
+
"filename": "Phi-3-mini-4k-instruct-q4.gguf", # Optional for GGUF
|
| 37 |
+
"force_reload": false # Optional, force reload even if cached
|
| 38 |
+
}
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
data = request.get_json()
|
| 42 |
+
if not data:
|
| 43 |
+
return jsonify({"error": "No data provided"}), 400
|
| 44 |
+
|
| 45 |
+
model_name = data.get("model_name")
|
| 46 |
+
model_type = data.get("model_type")
|
| 47 |
+
filename = data.get("filename")
|
| 48 |
+
force_reload = data.get("force_reload", False)
|
| 49 |
+
|
| 50 |
+
if not model_name:
|
| 51 |
+
return jsonify({"error": "model_name is required"}), 400
|
| 52 |
+
|
| 53 |
+
# Auto-detect model type if not provided
|
| 54 |
+
if not model_type:
|
| 55 |
+
model_type = detect_model_type(model_name)
|
| 56 |
+
logger.info(f"Auto-detected model type: {model_type} for {model_name}")
|
| 57 |
+
|
| 58 |
+
# Validate model configuration
|
| 59 |
+
validation = validate_model_config(model_name, model_type)
|
| 60 |
+
if not validation["valid"]:
|
| 61 |
+
return jsonify({
|
| 62 |
+
"error": "Invalid model configuration",
|
| 63 |
+
"validation": validation
|
| 64 |
+
}), 400
|
| 65 |
+
|
| 66 |
+
# Load the model
|
| 67 |
+
start_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
|
| 68 |
+
end_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
|
| 69 |
+
|
| 70 |
+
if start_time:
|
| 71 |
+
start_time.record()
|
| 72 |
+
|
| 73 |
+
loader = model_manager.get_model_loader(model_name, model_type, filename, force_reload)
|
| 74 |
+
|
| 75 |
+
if end_time:
|
| 76 |
+
end_time.record()
|
| 77 |
+
torch.cuda.synchronize()
|
| 78 |
+
load_time = start_time.elapsed_time(end_time) / 1000.0 # Convert to seconds
|
| 79 |
+
else:
|
| 80 |
+
load_time = None
|
| 81 |
+
|
| 82 |
+
# Get model information
|
| 83 |
+
model_info = loader.get_model_info()
|
| 84 |
+
model_info["load_time_seconds"] = load_time
|
| 85 |
+
|
| 86 |
+
return jsonify({
|
| 87 |
+
"success": True,
|
| 88 |
+
"message": f"Model {model_name} ({model_type}) loaded successfully",
|
| 89 |
+
"model_info": model_info,
|
| 90 |
+
"validation": validation
|
| 91 |
+
}), 200
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"Failed to load model: {str(e)}", exc_info=True)
|
| 95 |
+
return jsonify({
|
| 96 |
+
"success": False,
|
| 97 |
+
"error": f"Model loading failed: {str(e)}"
|
| 98 |
+
}), 500
|
| 99 |
+
|
| 100 |
+
@model_management_bp.route('/generate', methods=['POST'])
|
| 101 |
+
def generate_text():
|
| 102 |
+
"""
|
| 103 |
+
Generate text using a specific model
|
| 104 |
+
|
| 105 |
+
Request body:
|
| 106 |
+
{
|
| 107 |
+
"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 108 |
+
"model_type": "gguf",
|
| 109 |
+
"filename": "Phi-3-mini-4k-instruct-q4.gguf", # Optional for GGUF
|
| 110 |
+
"prompt": "Generate a medical summary for...",
|
| 111 |
+
"max_tokens": 512,
|
| 112 |
+
"temperature": 0.7,
|
| 113 |
+
"top_p": 0.95
|
| 114 |
+
}
|
| 115 |
+
"""
|
| 116 |
+
try:
|
| 117 |
+
data = request.get_json()
|
| 118 |
+
if not data:
|
| 119 |
+
return jsonify({"error": "No data provided"}), 400
|
| 120 |
+
|
| 121 |
+
model_name = data.get("model_name")
|
| 122 |
+
model_type = data.get("model_type")
|
| 123 |
+
filename = data.get("filename")
|
| 124 |
+
prompt = data.get("prompt")
|
| 125 |
+
|
| 126 |
+
if not all([model_name, prompt]):
|
| 127 |
+
return jsonify({"error": "model_name and prompt are required"}), 400
|
| 128 |
+
|
| 129 |
+
# Auto-detect model type if not provided
|
| 130 |
+
if not model_type:
|
| 131 |
+
model_type = detect_model_type(model_name)
|
| 132 |
+
|
| 133 |
+
# Generate text
|
| 134 |
+
start_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
|
| 135 |
+
end_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
|
| 136 |
+
|
| 137 |
+
if start_time:
|
| 138 |
+
start_time.record()
|
| 139 |
+
|
| 140 |
+
generated_text = model_manager.generate_text(
|
| 141 |
+
model_name,
|
| 142 |
+
model_type,
|
| 143 |
+
prompt,
|
| 144 |
+
filename,
|
| 145 |
+
**{k: v for k, v in data.items() if k not in ["model_name", "model_type", "filename", "prompt"]}
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
if end_time:
|
| 149 |
+
end_time.record()
|
| 150 |
+
torch.cuda.synchronize()
|
| 151 |
+
generation_time = start_time.elapsed_time(end_time) / 1000.0
|
| 152 |
+
else:
|
| 153 |
+
generation_time = None
|
| 154 |
+
|
| 155 |
+
return jsonify({
|
| 156 |
+
"success": True,
|
| 157 |
+
"generated_text": generated_text,
|
| 158 |
+
"model_name": model_name,
|
| 159 |
+
"model_type": model_type,
|
| 160 |
+
"generation_time_seconds": generation_time,
|
| 161 |
+
"text_length": len(generated_text)
|
| 162 |
+
}), 200
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
logger.error(f"Text generation failed: {str(e)}", exc_info=True)
|
| 166 |
+
return jsonify({
|
| 167 |
+
"success": False,
|
| 168 |
+
"error": f"Text generation failed: {str(e)}"
|
| 169 |
+
}), 500
|
| 170 |
+
|
| 171 |
+
@model_management_bp.route('/info', methods=['GET'])
|
| 172 |
+
def get_model_information():
|
| 173 |
+
"""
|
| 174 |
+
Get information about a specific model or all loaded models
|
| 175 |
+
|
| 176 |
+
Query parameters:
|
| 177 |
+
- model_name: Optional, specific model to get info for
|
| 178 |
+
- model_type: Optional, filter by model type
|
| 179 |
+
"""
|
| 180 |
+
try:
|
| 181 |
+
model_name = request.args.get("model_name")
|
| 182 |
+
model_type = request.args.get("model_type")
|
| 183 |
+
|
| 184 |
+
if model_name:
|
| 185 |
+
# Get info for specific model
|
| 186 |
+
if not model_type:
|
| 187 |
+
model_type = detect_model_type(model_name)
|
| 188 |
+
|
| 189 |
+
validation = validate_model_config(model_name, model_type)
|
| 190 |
+
model_info = get_model_info(model_name, model_type)
|
| 191 |
+
|
| 192 |
+
return jsonify({
|
| 193 |
+
"success": True,
|
| 194 |
+
"model_info": model_info,
|
| 195 |
+
"validation": validation
|
| 196 |
+
}), 200
|
| 197 |
+
else:
|
| 198 |
+
# Get info for all loaded models
|
| 199 |
+
loaded_models = model_manager.list_loaded_models()
|
| 200 |
+
|
| 201 |
+
# Filter by type if specified
|
| 202 |
+
if model_type:
|
| 203 |
+
loaded_models = {
|
| 204 |
+
k: v for k, v in loaded_models.items()
|
| 205 |
+
if v.get("model_type") == model_type
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
return jsonify({
|
| 209 |
+
"success": True,
|
| 210 |
+
"loaded_models": loaded_models,
|
| 211 |
+
"total_models": len(loaded_models)
|
| 212 |
+
}), 200
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
logger.error(f"Failed to get model information: {str(e)}", exc_info=True)
|
| 216 |
+
return jsonify({
|
| 217 |
+
"success": False,
|
| 218 |
+
"error": f"Failed to get model information: {str(e)}"
|
| 219 |
+
}), 500
|
| 220 |
+
|
| 221 |
+
@model_management_bp.route('/defaults', methods=['GET'])
|
| 222 |
+
def get_default_models():
|
| 223 |
+
"""
|
| 224 |
+
Get default models for different model types
|
| 225 |
+
"""
|
| 226 |
+
try:
|
| 227 |
+
from ..utils.model_config import DEFAULT_MODELS, SPACES_OPTIMIZED_MODELS
|
| 228 |
+
|
| 229 |
+
return jsonify({
|
| 230 |
+
"success": True,
|
| 231 |
+
"default_models": DEFAULT_MODELS,
|
| 232 |
+
"spaces_optimized_models": SPACES_OPTIMIZED_MODELS
|
| 233 |
+
}), 200
|
| 234 |
+
|
| 235 |
+
except Exception as e:
|
| 236 |
+
logger.error(f"Failed to get default models: {str(e)}", exc_info=True)
|
| 237 |
+
return jsonify({
|
| 238 |
+
"success": False,
|
| 239 |
+
"error": f"Failed to get default models: {str(e)}"
|
| 240 |
+
}), 500
|
| 241 |
+
|
| 242 |
+
@model_management_bp.route('/clear_cache', methods=['POST'])
|
| 243 |
+
def clear_model_cache():
|
| 244 |
+
"""
|
| 245 |
+
Clear the model cache and free memory
|
| 246 |
+
"""
|
| 247 |
+
try:
|
| 248 |
+
# Get cache info before clearing
|
| 249 |
+
loaded_models = model_manager.list_loaded_models()
|
| 250 |
+
cache_size = len(loaded_models)
|
| 251 |
+
|
| 252 |
+
# Clear cache
|
| 253 |
+
model_manager.clear_cache()
|
| 254 |
+
|
| 255 |
+
return jsonify({
|
| 256 |
+
"success": True,
|
| 257 |
+
"message": f"Model cache cleared successfully",
|
| 258 |
+
"cleared_models": cache_size,
|
| 259 |
+
"memory_freed": "GPU and CPU memory cleared"
|
| 260 |
+
}), 200
|
| 261 |
+
|
| 262 |
+
except Exception as e:
|
| 263 |
+
logger.error(f"Failed to clear cache: {str(e)}", exc_info=True)
|
| 264 |
+
return jsonify({
|
| 265 |
+
"success": False,
|
| 266 |
+
"error": f"Failed to clear cache: {str(e)}"
|
| 267 |
+
}), 500
|
| 268 |
+
|
| 269 |
+
@model_management_bp.route('/switch', methods=['POST'])
|
| 270 |
+
def switch_model():
|
| 271 |
+
"""
|
| 272 |
+
Switch the model used by a specific agent
|
| 273 |
+
|
| 274 |
+
Request body:
|
| 275 |
+
{
|
| 276 |
+
"agent_name": "patient_summarizer",
|
| 277 |
+
"model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
|
| 278 |
+
"model_type": "gguf",
|
| 279 |
+
"filename": "Phi-3-mini-4k-instruct-q4.gguf" # Optional for GGUF
|
| 280 |
+
}
|
| 281 |
+
"""
|
| 282 |
+
try:
|
| 283 |
+
data = request.get_json()
|
| 284 |
+
if not data:
|
| 285 |
+
return jsonify({"error": "No data provided"}), 400
|
| 286 |
+
|
| 287 |
+
agent_name = data.get("agent_name")
|
| 288 |
+
model_name = data.get("model_name")
|
| 289 |
+
model_type = data.get("model_type")
|
| 290 |
+
filename = data.get("filename")
|
| 291 |
+
|
| 292 |
+
if not all([agent_name, model_name]):
|
| 293 |
+
return jsonify({"error": "agent_name and model_name are required"}), 400
|
| 294 |
+
|
| 295 |
+
# Auto-detect model type if not provided
|
| 296 |
+
if not model_type:
|
| 297 |
+
model_type = detect_model_type(model_name)
|
| 298 |
+
|
| 299 |
+
# Validate model configuration
|
| 300 |
+
validation = validate_model_config(model_name, model_type)
|
| 301 |
+
if not validation["valid"]:
|
| 302 |
+
return jsonify({
|
| 303 |
+
"error": "Invalid model configuration",
|
| 304 |
+
"validation": validation
|
| 305 |
+
}), 400
|
| 306 |
+
|
| 307 |
+
# Get the agent from the current app context
|
| 308 |
+
from flask import current_app
|
| 309 |
+
agents = getattr(current_app, 'agents', {})
|
| 310 |
+
|
| 311 |
+
if agent_name not in agents:
|
| 312 |
+
return jsonify({
|
| 313 |
+
"error": f"Agent '{agent_name}' not found",
|
| 314 |
+
"available_agents": list(agents.keys())
|
| 315 |
+
}), 404
|
| 316 |
+
|
| 317 |
+
agent = agents[agent_name]
|
| 318 |
+
|
| 319 |
+
# Update the agent's model if it supports it
|
| 320 |
+
if hasattr(agent, 'update_model'):
|
| 321 |
+
agent.update_model(model_name, model_type)
|
| 322 |
+
message = f"Agent '{agent_name}' model updated to {model_name} ({model_type})"
|
| 323 |
+
elif hasattr(agent, 'model_loader'):
|
| 324 |
+
# Try to update the model loader
|
| 325 |
+
try:
|
| 326 |
+
from ..utils.model_manager import model_manager
|
| 327 |
+
agent.model_loader = model_manager.get_model_loader(model_name, model_type, filename)
|
| 328 |
+
message = f"Agent '{agent_name}' model loader updated to {model_name} ({model_type})"
|
| 329 |
+
except Exception as e:
|
| 330 |
+
return jsonify({
|
| 331 |
+
"error": f"Failed to update agent model loader: {str(e)}"
|
| 332 |
+
}), 500
|
| 333 |
+
else:
|
| 334 |
+
return jsonify({
|
| 335 |
+
"error": f"Agent '{agent_name}' does not support model switching"
|
| 336 |
+
}), 400
|
| 337 |
+
|
| 338 |
+
return jsonify({
|
| 339 |
+
"success": True,
|
| 340 |
+
"message": message,
|
| 341 |
+
"agent_name": agent_name,
|
| 342 |
+
"model_name": model_name,
|
| 343 |
+
"model_type": model_type,
|
| 344 |
+
"validation": validation
|
| 345 |
+
}), 200
|
| 346 |
+
|
| 347 |
+
except Exception as e:
|
| 348 |
+
logger.error(f"Failed to switch model: {str(e)}", exc_info=True)
|
| 349 |
+
return jsonify({
|
| 350 |
+
"success": False,
|
| 351 |
+
"error": f"Failed to switch model: {str(e)}"
|
| 352 |
+
}), 500
|
| 353 |
+
|
| 354 |
+
@model_management_bp.route('/health', methods=['GET'])
|
| 355 |
+
def model_health_check():
|
| 356 |
+
"""
|
| 357 |
+
Health check for the model management system
|
| 358 |
+
"""
|
| 359 |
+
try:
|
| 360 |
+
# Check if model manager is accessible
|
| 361 |
+
loaded_models = model_manager.list_loaded_models()
|
| 362 |
+
|
| 363 |
+
# Check GPU memory if available
|
| 364 |
+
gpu_info = {}
|
| 365 |
+
if torch.cuda.is_available():
|
| 366 |
+
gpu_info = {
|
| 367 |
+
"available": True,
|
| 368 |
+
"device_count": torch.cuda.device_count(),
|
| 369 |
+
"current_device": torch.cuda.current_device(),
|
| 370 |
+
"memory_allocated": f"{torch.cuda.memory_allocated() / 1024**3:.2f} GB",
|
| 371 |
+
"memory_reserved": f"{torch.cuda.memory_reserved() / 1024**3:.2f} GB"
|
| 372 |
+
}
|
| 373 |
+
else:
|
| 374 |
+
gpu_info = {"available": False}
|
| 375 |
+
|
| 376 |
+
return jsonify({
|
| 377 |
+
"success": True,
|
| 378 |
+
"status": "healthy",
|
| 379 |
+
"model_manager": "operational",
|
| 380 |
+
"loaded_models_count": len(loaded_models),
|
| 381 |
+
"gpu_info": gpu_info,
|
| 382 |
+
"timestamp": torch.cuda.Event(enable_timing=True).elapsed_time(torch.cuda.Event(enable_timing=True)) if torch.cuda.is_available() else None
|
| 383 |
+
}), 200
|
| 384 |
+
|
| 385 |
+
except Exception as e:
|
| 386 |
+
logger.error(f"Health check failed: {str(e)}", exc_info=True)
|
| 387 |
+
return jsonify({
|
| 388 |
+
"success": False,
|
| 389 |
+
"status": "unhealthy",
|
| 390 |
+
"error": f"Health check failed: {str(e)}"
|
| 391 |
+
}), 500
|
| 392 |
+
|
| 393 |
+
# Register the blueprint
|
| 394 |
+
def register_model_management_routes(app):
|
| 395 |
+
"""Register model management routes with the Flask app"""
|
| 396 |
+
app.register_blueprint(model_management_bp)
|
| 397 |
+
logger.info("Model management routes registered successfully")
|
services/ai-service/src/ai_med_extract/api/routes_fastapi.py → ai_med_extract/api/routes.py
RENAMED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ai_med_extract/app.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
from flask import Flask, jsonify
|
| 4 |
+
from flask_cors import CORS
|
| 5 |
+
import whisper
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
from .agents.text_extractor import TextExtractorAgent
|
| 8 |
+
from .agents.phi_scrubber import PHIScrubberAgent
|
| 9 |
+
from .agents.phi_scrubber import MedicalTextUtils
|
| 10 |
+
from .agents.summarizer import SummarizerAgent
|
| 11 |
+
from .agents.medical_data_extractor import MedicalDataExtractorAgent
|
| 12 |
+
from .agents.medical_data_extractor import MedicalDocDataExtractorAgent
|
| 13 |
+
from .agents.patient_summary_agent import PatientSummarizerAgent
|
| 14 |
+
from .utils.model_manager import model_manager
|
| 15 |
+
import torch
|
| 16 |
+
torch.set_num_threads(1) # CPU efficiency for HF Spaces
|
| 17 |
+
|
| 18 |
+
# Load environment variables
|
| 19 |
+
load_dotenv()
|
| 20 |
+
|
| 21 |
+
# Configure logging
|
| 22 |
+
logging.basicConfig(
|
| 23 |
+
level=logging.INFO,
|
| 24 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
| 25 |
+
handlers=[
|
| 26 |
+
logging.StreamHandler(),
|
| 27 |
+
logging.FileHandler('/tmp/app.log')
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
app = Flask(__name__)
|
| 32 |
+
CORS(app)
|
| 33 |
+
|
| 34 |
+
# Configure upload directory with safe fallbacks (avoid creating /data at import time)
|
| 35 |
+
def _resolve_upload_dir() -> str:
|
| 36 |
+
try:
|
| 37 |
+
# Prefer /data/uploads if it already exists and is writable
|
| 38 |
+
data_dir = '/data/uploads'
|
| 39 |
+
if os.path.isdir('/data') and (os.path.isdir(data_dir) or os.access('/data', os.W_OK)):
|
| 40 |
+
os.makedirs(data_dir, exist_ok=True)
|
| 41 |
+
return data_dir
|
| 42 |
+
except Exception:
|
| 43 |
+
pass
|
| 44 |
+
# Fallback to /tmp/uploads which is always writable on Spaces
|
| 45 |
+
tmp_dir = '/tmp/uploads'
|
| 46 |
+
os.makedirs(tmp_dir, exist_ok=True)
|
| 47 |
+
return tmp_dir
|
| 48 |
+
|
| 49 |
+
app.config['UPLOAD_FOLDER'] = _resolve_upload_dir()
|
| 50 |
+
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB max file size
|
| 51 |
+
|
| 52 |
+
# Set cache directories
|
| 53 |
+
CACHE_DIRS = {
|
| 54 |
+
'HF_HOME': '/tmp/huggingface',
|
| 55 |
+
'XDG_CACHE_HOME': '/tmp',
|
| 56 |
+
'TORCH_HOME': '/tmp/torch',
|
| 57 |
+
'WHISPER_CACHE': '/tmp/whisper'
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
for env_var, path in CACHE_DIRS.items():
|
| 61 |
+
os.environ[env_var] = path
|
| 62 |
+
os.makedirs(path, exist_ok=True)
|
| 63 |
+
|
| 64 |
+
# WhisperModelLoader for audio transcription (CPU-only)
|
| 65 |
+
class WhisperModelLoader:
|
| 66 |
+
_instance = None
|
| 67 |
+
|
| 68 |
+
def __init__(self):
|
| 69 |
+
self._model = None
|
| 70 |
+
|
| 71 |
+
@staticmethod
|
| 72 |
+
def get_instance():
|
| 73 |
+
if WhisperModelLoader._instance is None:
|
| 74 |
+
WhisperModelLoader._instance = WhisperModelLoader()
|
| 75 |
+
return WhisperModelLoader._instance
|
| 76 |
+
|
| 77 |
+
def load(self):
|
| 78 |
+
if self._model is None:
|
| 79 |
+
try:
|
| 80 |
+
logging.info("Loading Whisper tiny model (CPU)...")
|
| 81 |
+
self._model = whisper.load_model(
|
| 82 |
+
"tiny",
|
| 83 |
+
device="cpu", # Explicit CPU for Spaces
|
| 84 |
+
download_root=os.environ.get('WHISPER_CACHE', '/tmp/whisper')
|
| 85 |
+
)
|
| 86 |
+
logging.info("Whisper model loaded successfully")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
logging.error(f"Failed to load Whisper model: {str(e)}", exc_info=True)
|
| 89 |
+
raise
|
| 90 |
+
return self._model
|
| 91 |
+
|
| 92 |
+
def transcribe(self, audio_path):
|
| 93 |
+
model = self.load()
|
| 94 |
+
return model.transcribe(audio_path, fp16=False) # CPU, no FP16
|
| 95 |
+
|
| 96 |
+
# Initialize agents with unified model manager (CPU-friendly small models for HF Spaces)
|
| 97 |
+
try:
|
| 98 |
+
from .utils.model_manager import model_manager
|
| 99 |
+
|
| 100 |
+
# Initialize basic agents that don't require specific models
|
| 101 |
+
text_extractor_agent = TextExtractorAgent()
|
| 102 |
+
phi_scrubber_agent = PHIScrubberAgent()
|
| 103 |
+
|
| 104 |
+
# Initialize model-dependent agents with small CPU models
|
| 105 |
+
try:
|
| 106 |
+
# Small summarization model (~400MB)
|
| 107 |
+
summ_loader = model_manager.get_model_loader("facebook/bart-base", "summarization")
|
| 108 |
+
summarizer_agent = SummarizerAgent(summ_loader)
|
| 109 |
+
logging.info("SummarizerAgent initialized with bart-base")
|
| 110 |
+
except Exception as e:
|
| 111 |
+
logging.warning(f"Summarizer fallback: {e}")
|
| 112 |
+
from .utils.model_loader_gguf import create_fallback_pipeline
|
| 113 |
+
class FallbackSummarizer:
|
| 114 |
+
def generate(self, text, **kwargs):
|
| 115 |
+
return create_fallback_pipeline().generate_full_summary(text)
|
| 116 |
+
summarizer_agent = SummarizerAgent(FallbackSummarizer())
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
# Small text-generation model (~350MB)
|
| 120 |
+
med_loader = model_manager.get_model_loader("distilgpt2", "text-generation")
|
| 121 |
+
med_generator = med_loader.load()
|
| 122 |
+
medical_data_extractor_agent = MedicalDataExtractorAgent(med_generator)
|
| 123 |
+
logging.info("MedicalDataExtractorAgent initialized with distilgpt2")
|
| 124 |
+
except Exception as e:
|
| 125 |
+
logging.warning(f"Medical extractor fallback: {e}")
|
| 126 |
+
from .utils.model_loader_gguf import create_fallback_pipeline
|
| 127 |
+
class FallbackExtractor:
|
| 128 |
+
def generate(self, prompt, **kwargs):
|
| 129 |
+
return create_fallback_pipeline().generate(prompt)
|
| 130 |
+
medical_data_extractor_agent = MedicalDataExtractorAgent(FallbackExtractor())
|
| 131 |
+
|
| 132 |
+
# Initialize patient summarizer with small model
|
| 133 |
+
try:
|
| 134 |
+
patient_summarizer_agent = PatientSummarizerAgent(
|
| 135 |
+
model_name="sshleifer/distilbart-cnn-6-6", # Smaller medical-like summarizer (~1GB)
|
| 136 |
+
model_type="summarization"
|
| 137 |
+
)
|
| 138 |
+
logging.info("PatientSummarizerAgent initialized with distilbart")
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logging.warning(f"Patient summarizer fallback: {e}")
|
| 141 |
+
patient_summarizer_agent = PatientSummarizerAgent(
|
| 142 |
+
model_name="facebook/bart-base",
|
| 143 |
+
model_type="summarization"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# Pass all agents and models to routes
|
| 147 |
+
agents = {
|
| 148 |
+
"text_extractor": text_extractor_agent,
|
| 149 |
+
"phi_scrubber": phi_scrubber_agent,
|
| 150 |
+
"summarizer": summarizer_agent,
|
| 151 |
+
"medical_data_extractor": medical_data_extractor_agent,
|
| 152 |
+
"whisper_model": WhisperModelLoader.get_instance(),
|
| 153 |
+
"patient_summarizer": patient_summarizer_agent,
|
| 154 |
+
"model_manager": model_manager, # Add unified model manager
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
from .api.routes import register_routes
|
| 158 |
+
register_routes(app, agents)
|
| 159 |
+
|
| 160 |
+
logging.info("Application initialized successfully with CPU-friendly models")
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
logging.error(f"Failed to initialize application: {str(e)}", exc_info=True)
|
| 164 |
+
raise
|
| 165 |
+
|
| 166 |
+
@app.errorhandler(Exception)
|
| 167 |
+
def handle_error(error):
|
| 168 |
+
logging.error(f"Unhandled error: {str(error)}", exc_info=True)
|
| 169 |
+
return jsonify({
|
| 170 |
+
"error": str(error),
|
| 171 |
+
"status": "error"
|
| 172 |
+
}), 500
|
| 173 |
+
|
| 174 |
+
if __name__ == "__main__":
|
| 175 |
+
app.run(host="0.0.0.0", port=7860, debug=False)
|
ai_med_extract/gradio_app.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from .app import agents
|
| 4 |
+
import tempfile
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logging.basicConfig(level=logging.INFO)
|
| 8 |
+
|
| 9 |
+
def process_document(file, process_type):
|
| 10 |
+
try:
|
| 11 |
+
# Create a temporary file to store the upload
|
| 12 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[1]) as temp_file:
|
| 13 |
+
temp_file.write(file.read())
|
| 14 |
+
temp_path = temp_file.name
|
| 15 |
+
|
| 16 |
+
results = {}
|
| 17 |
+
|
| 18 |
+
if process_type == "text_extraction":
|
| 19 |
+
results["extracted_text"] = agents["text_extractor"].extract_text(temp_path)
|
| 20 |
+
results["phi_scrubbed"] = agents["phi_scrubber"].scrub_phi(results["extracted_text"])
|
| 21 |
+
|
| 22 |
+
elif process_type == "medical_data":
|
| 23 |
+
text = agents["text_extractor"].extract_text(temp_path)
|
| 24 |
+
results["medical_data"] = agents["medical_data_extractor"].extract_medical_data(text)
|
| 25 |
+
|
| 26 |
+
elif process_type == "summarization":
|
| 27 |
+
text = agents["text_extractor"].extract_text(temp_path)
|
| 28 |
+
results["summary"] = agents["summarizer"].summarize(text)
|
| 29 |
+
|
| 30 |
+
elif process_type == "audio_transcription":
|
| 31 |
+
results["transcription"] = agents["whisper_model"].transcribe(temp_path)
|
| 32 |
+
|
| 33 |
+
# Clean up temporary file
|
| 34 |
+
os.unlink(temp_path)
|
| 35 |
+
|
| 36 |
+
return results
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logging.error(f"Error processing document: {str(e)}", exc_info=True)
|
| 40 |
+
return {"error": str(e)}
|
| 41 |
+
|
| 42 |
+
# Create the Gradio interface
|
| 43 |
+
def create_interface():
|
| 44 |
+
with gr.Blocks(title="Medical Document Processor") as interface:
|
| 45 |
+
gr.Markdown("# Medical Document Processor")
|
| 46 |
+
gr.Markdown("Upload your medical document and select the processing type.")
|
| 47 |
+
|
| 48 |
+
with gr.Row():
|
| 49 |
+
with gr.Column():
|
| 50 |
+
file_input = gr.File(label="Upload Document")
|
| 51 |
+
process_type = gr.Radio(
|
| 52 |
+
choices=["text_extraction", "medical_data", "summarization", "audio_transcription"],
|
| 53 |
+
label="Processing Type"
|
| 54 |
+
)
|
| 55 |
+
process_btn = gr.Button("Process Document")
|
| 56 |
+
|
| 57 |
+
with gr.Column():
|
| 58 |
+
output = gr.JSON(label="Results")
|
| 59 |
+
|
| 60 |
+
process_btn.click(
|
| 61 |
+
fn=process_document,
|
| 62 |
+
inputs=[file_input, process_type],
|
| 63 |
+
outputs=output
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
return interface
|
| 67 |
+
|
| 68 |
+
# Create and launch the interface
|
| 69 |
+
interface = create_interface()
|
| 70 |
+
interface.launch(server_name="0.0.0.0", server_port=7860)
|
ai_med_extract/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ai_med_extract/utils/__init__.py
|
ai_med_extract/utils/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (179 Bytes). View file
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/file_utils.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/file_utils.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/file_utils.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/json_slimmer.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/json_slimmer.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/json_slimmer.cpython-311.pyc differ
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_loader_gguf.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
|
|
|
ai_med_extract/utils/__pycache__/model_loader_spaces.cpython-311.pyc
ADDED
|
Binary file (2.16 kB). View file
|
|
|
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_manager.cpython-311.pyc
RENAMED
|
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/model_manager.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_manager.cpython-311.pyc differ
|
|
|