sachinchandrankallar commited on
Commit
aba0d25
·
1 Parent(s): 7d153bf

Revert "Merge branch 'FT-DEV-17/09/2025'"

Browse files

This reverts commit 715f2878b980fc505d7114dbecf45bcd9357f2ea, reversing
changes made to 11a4a59e6aa7a42c5e9997141b04dc042803ca0c.

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env +1 -7
  2. .env.example +0 -16
  3. .huggingface.yaml +0 -1
  4. .vscode/launch.json +0 -22
  5. .vscode/settings.json +1 -2
  6. 0.41.0' +0 -0
  7. DEPLOYMENT.md +10 -103
  8. DEVELOPMENT.md +0 -377
  9. Dockerfile +1 -1
  10. FINAL_PROGRESS.md +72 -0
  11. GGUF_TROUBLESHOOTING.md +178 -0
  12. PROGRESS_UPDATE.md +32 -0
  13. README.md +12 -56
  14. README_SPACES.md +46 -0
  15. REFACTORED_README.md +463 -0
  16. TODO.md +12 -10
  17. __pycache__/ai_med_extract.cpython-311.pyc +0 -0
  18. __pycache__/test_chunking.cpython-311.pyc +0 -0
  19. __pycache__/test_summary_consistency.cpython-311.pyc +0 -0
  20. ai_med_extract.py +0 -15
  21. ai_med_extract/__init__.py +1 -0
  22. ai_med_extract/__main__.py +5 -0
  23. ai_med_extract/__pycache__/__init__.cpython-311.pyc +0 -0
  24. ai_med_extract/__pycache__/app.cpython-311.pyc +0 -0
  25. ai_med_extract/agents/__init__.py +1 -0
  26. ai_med_extract/agents/__pycache__/__init__.cpython-311.pyc +0 -0
  27. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/medical_data_extractor.cpython-311.pyc +0 -0
  28. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
  29. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/phi_scrubber.cpython-311.pyc +0 -0
  30. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/summarizer.cpython-311.pyc +0 -0
  31. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/text_extractor.cpython-311.pyc +0 -0
  32. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/medical_data_extractor.py +1 -1
  33. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/patient_summary_agent.py +34 -40
  34. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/phi_scrubber.py +6 -7
  35. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/summarizer.py +1 -1
  36. {services/ai-service/src/ai_med_extract → ai_med_extract}/agents/text_extractor.py +119 -9
  37. ai_med_extract/api/__init__.py +1 -0
  38. ai_med_extract/api/__pycache__/__init__.cpython-311.pyc +0 -0
  39. ai_med_extract/api/__pycache__/routes.cpython-311.pyc +0 -0
  40. ai_med_extract/api/model_management.py +397 -0
  41. services/ai-service/src/ai_med_extract/api/routes_fastapi.py → ai_med_extract/api/routes.py +0 -0
  42. ai_med_extract/app.py +175 -0
  43. ai_med_extract/gradio_app.py +70 -0
  44. ai_med_extract/utils/__init__.py +1 -0
  45. ai_med_extract/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  46. {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/file_utils.cpython-311.pyc +0 -0
  47. {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/json_slimmer.cpython-311.pyc +0 -0
  48. {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
  49. ai_med_extract/utils/__pycache__/model_loader_spaces.cpython-311.pyc +0 -0
  50. {services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_manager.cpython-311.pyc +0 -0
.env CHANGED
@@ -3,10 +3,4 @@ HF_HOME=/tmp/huggingface
3
  XDG_CACHE_HOME=/tmp
4
  TORCH_HOME=/tmp/torch
5
  WHISPER_CACHE=/tmp/whisper
6
- UPLOAD_DIR=/tmp/uploads
7
-
8
- # Redis configuration (uncomment to enable scalable features)
9
- # REDIS_URL=redis://localhost:6379/0
10
-
11
- # Database configuration for audit logging (optional)
12
- # DATABASE_URL=postgresql://username:password@localhost:5432/audit_db
 
3
  XDG_CACHE_HOME=/tmp
4
  TORCH_HOME=/tmp/torch
5
  WHISPER_CACHE=/tmp/whisper
6
+ UPLOAD_DIR=/tmp/uploads
 
 
 
 
 
 
.env.example DELETED
@@ -1,16 +0,0 @@
1
- # .env.example - Environment configuration for local development
2
-
3
- # Database configuration
4
- DATABASE_URL=postgresql://user:password@localhost:5432/hnai_db
5
-
6
- # API Key for external services
7
- API_KEY=your-api-key-here
8
-
9
- # Secret key for cryptographic signing
10
- SECRET_KEY=your-secret-key-here
11
-
12
- # Debug mode
13
- DEBUG=True
14
-
15
- # Application port
16
- PORT=8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.huggingface.yaml CHANGED
@@ -1,7 +1,6 @@
1
  runtime: docker
2
  sdk: docker
3
  python_version: "3.10"
4
- app: services/ai-service/src/ai_med_extract/app.py
5
  build:
6
  system_packages:
7
  - tesseract-ocr
 
1
  runtime: docker
2
  sdk: docker
3
  python_version: "3.10"
 
4
  build:
5
  system_packages:
6
  - tesseract-ocr
.vscode/launch.json DELETED
@@ -1,22 +0,0 @@
1
- {
2
- "version": "0.2.0",
3
- "configurations": [
4
- {
5
- "name": "Python: FastAPI (Localhost)",
6
- "type": "debugpy",
7
- "request": "launch",
8
- "module": "uvicorn",
9
- "args": [
10
- "src.ai_med_extract.app:create_app",
11
- "--factory",
12
- "--host", "127.0.0.1",
13
- "--port", "8000",
14
- "--reload"
15
- ],
16
- "cwd": "${workspaceFolder}/services/ai-service",
17
- "env": {
18
- "PYTHONPATH": "${workspaceFolder}/services/ai-service"
19
- }
20
- }
21
- ]
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.vscode/settings.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "python.analysis.extraPaths": [
3
- "./ai_med_extract/utils",
4
- "./services/ai-service/src"
5
  ]
6
  }
 
1
  {
2
  "python.analysis.extraPaths": [
3
+ "./ai_med_extract/utils"
 
4
  ]
5
  }
0.41.0' ADDED
File without changes
DEPLOYMENT.md CHANGED
@@ -1,106 +1,13 @@
1
- # Deployment Instructions
2
 
3
- This document provides deployment instructions for the Medical AI Service in various environments.
 
 
 
 
4
 
5
- ## Local Development
 
 
6
 
7
- ### Prerequisites
8
- - Python 3.10+
9
- - Docker (optional, for containerized testing)
10
-
11
- ### Setup
12
- 1. Clone the repository
13
- 2. Install dependencies: `pip install -r requirements.txt`
14
- 3. Set environment variables (see Configuration section)
15
- 4. Run the application: `python -m uvicorn ai_med_extract.app:create_app --host 0.0.0.0 --port 7860`
16
-
17
- ### Testing
18
- - Health check: `curl http://localhost:7860/health/live`
19
- - API docs: `http://localhost:7860/docs` (FastAPI Swagger UI)
20
-
21
- ## Docker Deployment
22
-
23
- ### Build and Run
24
- ```bash
25
- docker build -t medical-ai-service .
26
- docker run -p 7860:7860 -e SECRET_KEY=your-secret -e DATABASE_URL=your-db medical-ai-service
27
- ```
28
-
29
- ### Configuration
30
- - Exposes port 7860
31
- - Runs FastAPI app with uvicorn
32
- - Includes model caching optimizations
33
-
34
- ## Kubernetes Deployment
35
-
36
- ### Prerequisites
37
- - Kubernetes cluster
38
- - kubectl configured
39
- - Secrets created for database, Redis, and JWT keys
40
-
41
- ### Deploy
42
- ```bash
43
- kubectl apply -f infra/k8s/secure_deployment.yaml
44
- ```
45
-
46
- ### Features
47
- - Horizontal Pod Autoscaler (2-10 replicas based on CPU/memory)
48
- - Resource limits: 1-4 CPU, 4-8Gi memory
49
- - Prometheus monitoring annotations
50
- - Security contexts and network policies
51
-
52
- ### Scaling
53
- The HPA automatically scales based on:
54
- - CPU utilization > 70%
55
- - Memory utilization > 80%
56
-
57
- ## Hugging Face Spaces Deployment
58
-
59
- ### Prerequisites
60
- - Hugging Face account
61
- - Space created with Docker runtime
62
-
63
- ### Configuration
64
- 1. Dockerfile exposes port 7860
65
- 2. FastAPI app listens on 0.0.0.0:7860
66
- 3. requirements.txt includes all dependencies
67
- 4. .huggingface.yaml with `runtime: docker`
68
- 5. .dockerignore and .gitignore present
69
-
70
- ### Deploy
71
- ```bash
72
- # Test locally
73
- docker build -t hntai-app .
74
- docker run -p 7860:7860 hntai-app
75
-
76
- # Push to HF Spaces
77
- # App available at your-space-name.hf.space
78
- ```
79
-
80
- ## Configuration
81
-
82
- ### Required Environment Variables
83
- - `SECRET_KEY`: Application secret key
84
- - `JWT_SECRET_KEY`: JWT signing key
85
- - `DATABASE_URL`: PostgreSQL connection string
86
- - `REDIS_URL`: Redis connection string
87
-
88
- ### Optional
89
- - `ENVIRONMENT`: prod/dev (default: prod)
90
- - `PORT`: Service port (default: 7860)
91
- - `CORS_ORIGINS`: Allowed CORS origins (default: *)
92
- - Model cache directories and other settings in config_settings.py
93
-
94
- ## Monitoring
95
-
96
- ### Health Checks
97
- - `/health/live`: Liveness probe
98
- - `/health/ready`: Readiness probe
99
-
100
- ### Metrics
101
- - `/metrics`: Prometheus metrics endpoint
102
- - Includes performance metrics, model loading status
103
-
104
- ### Logging
105
- - Structured JSON logs for production
106
- - Configurable log levels
 
1
+ # Hugging Face Spaces Docker deployment instructions
2
 
3
+ # 1. Make sure your Dockerfile exposes port 7860 and runs your app on 0.0.0.0:7860
4
+ # 2. Your Flask app should listen on host='0.0.0.0' and port=7860
5
+ # 3. requirements.txt should include all dependencies
6
+ # 4. .huggingface.yaml with 'runtime: docker' is present
7
+ # 5. .dockerignore and .gitignore are present
8
 
9
+ # To test locally:
10
+ # docker build -t hntai-app .
11
+ # docker run -p 7860:7860 hntai-app
12
 
13
+ # Your app will be available at http://localhost:7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DEVELOPMENT.md DELETED
@@ -1,377 +0,0 @@
1
- # HNTAI - Scalable Medical Data Extraction API - Development Guide
2
-
3
- ## Overview
4
-
5
- This FastAPI-based application provides scalable medical data extraction services, fully aligned with the "ChatGPT Version 3 - Scalable" architecture. It features async processing, Redis caching, PostgreSQL persistence, and enterprise-grade security.
6
-
7
- ## Architecture
8
-
9
- ### Core Components
10
-
11
- 1. **FastAPI Application** (`app.py`)
12
- - Main application factory with lifespan events
13
- - CORS middleware for cross-origin requests
14
- - Centralized agent initialization
15
- - Route registration from APIRouter
16
-
17
- 2. **Configuration** (`config_settings.py`)
18
- - Pydantic-based settings with validation
19
- - Environment variable loading
20
- - Database and Redis URL configuration
21
-
22
- 3. **Inference Service** (`inference_service.py`)
23
- - Async text summarization using thread pools
24
- - Model caching for performance
25
- - Chunking for long text processing
26
-
27
- 4. **PHI Scrubber Service** (`phi_scrubber_service.py`)
28
- - Regex-based PHI detection and redaction
29
- - Audit logging to PostgreSQL
30
- - Redis-based statistics tracking
31
-
32
- 5. **API Routes** (`api/routes_fastapi.py`)
33
- - FastAPI APIRouter with async endpoints
34
- - Health checks (/live, /ready)
35
- - Placeholder routes for full migration
36
-
37
- ### Data Flow
38
-
39
- ```
40
- Client Request → FastAPI → Route Handler → Agent/Service → Redis Cache → PostgreSQL → Response
41
- ```
42
-
43
- ## Development Setup
44
-
45
- ### Prerequisites
46
-
47
- - Python 3.10+
48
- - PostgreSQL 13+
49
- - Redis 6+
50
- - Docker (optional)
51
-
52
- ### Local Development
53
-
54
- 1. **Clone and Setup Virtual Environment**
55
- ```bash
56
- git clone <repository>
57
- cd hntai
58
- python -m venv venv
59
- source venv/bin/activate # On Windows: venv\Scripts\activate
60
- ```
61
-
62
- 2. **Install Dependencies**
63
- ```bash
64
- pip install -r requirements.txt
65
- ```
66
-
67
- 3. **Setup Database and Redis**
68
- ```bash
69
- # Start PostgreSQL (using Docker)
70
- docker run -d --name postgres -e POSTGRES_PASSWORD=password -p 5432:5432 postgres:13
71
-
72
- # Start Redis (using Docker)
73
- docker run -d --name redis -p 6379:6379 redis:6
74
-
75
- # Create database
76
- createdb medical_ai
77
- ```
78
-
79
- 4. **Environment Variables**
80
- Create `.env` file:
81
- ```bash
82
- DATABASE_URL=postgresql://postgres:password@localhost:5432/medical_ai
83
- REDIS_URL=redis://localhost:6379/0
84
- SECRET_KEY=your-secret-key-here
85
- JWT_SECRET_KEY=your-jwt-secret-key-here
86
- ```
87
-
88
- 5. **Run Database Migrations**
89
- ```bash
90
- # Apply schema
91
- psql -d medical_ai -f database/postgresql/001_schema.sql
92
- ```
93
-
94
- 6. **Run the Application**
95
- ```bash
96
- # Development mode
97
- python -m ai_med_extract.main
98
-
99
- # Or directly
100
- uvicorn ai_med_extract.app:create_app --reload --host 0.0.0.0 --port 7860
101
- ```
102
-
103
- 7. **Access the Application**
104
- - API: http://localhost:7860
105
- - Docs: http://localhost:7860/docs (FastAPI auto-generated)
106
- - Health: http://localhost:7860/live
107
-
108
- ### Debugging
109
-
110
- 1. **Enable Debug Logging**
111
- ```python
112
- import logging
113
- logging.basicConfig(level=logging.DEBUG)
114
- ```
115
-
116
- 2. **Use FastAPI Debug Mode**
117
- ```bash
118
- uvicorn ai_med_extract.app:create_app --reload --debug --host 0.0.0.0 --port 7860
119
- ```
120
-
121
- 3. **Test Endpoints**
122
- ```bash
123
- # Health check
124
- curl http://localhost:7860/live
125
-
126
- # API docs
127
- curl http://localhost:7860/openapi.json
128
- ```
129
-
130
- 4. **Database Debugging**
131
- ```bash
132
- # Connect to PostgreSQL
133
- psql -d medical_ai
134
-
135
- # Check PHI audit logs
136
- SELECT * FROM phi_audit_log LIMIT 10;
137
- ```
138
-
139
- 5. **Redis Debugging**
140
- ```bash
141
- # Connect to Redis CLI
142
- redis-cli
143
-
144
- # Check keys
145
- KEYS *
146
- ```
147
-
148
- ## Production Deployment
149
-
150
- ### Option 1: Docker Deployment
151
-
152
- 1. **Build Docker Image**
153
- ```bash
154
- docker build -t hntai-api .
155
- ```
156
-
157
- 2. **Run Container**
158
- ```bash
159
- docker run -d \
160
- --name hntai-api \
161
- -p 7860:7860 \
162
- -e DATABASE_URL=postgresql://... \
163
- -e REDIS_URL=redis://... \
164
- -e SECRET_KEY=... \
165
- -e JWT_SECRET_KEY=... \
166
- hntai-api
167
- ```
168
-
169
- ### Option 2: Kubernetes Deployment
170
-
171
- 1. **Prerequisites**
172
- - Kubernetes cluster
173
- - kubectl configured
174
- - PostgreSQL and Redis services running
175
-
176
- 2. **Create Secrets**
177
- ```bash
178
- kubectl create secret generic medical-ai-secrets \
179
- --from-literal=DATABASE_URL=postgresql://... \
180
- --from-literal=REDIS_URL=redis://... \
181
- --from-literal=SECRET_KEY=... \
182
- --from-literal=JWT_SECRET_KEY=...
183
- ```
184
-
185
- 3. **Deploy to Kubernetes**
186
- ```bash
187
- kubectl apply -f infra/k8s/secure_deployment.yaml
188
- ```
189
-
190
- 4. **Verify Deployment**
191
- ```bash
192
- kubectl get pods -n medical-ai
193
- kubectl logs -n medical-ai deployment/medical-ai-service
194
- ```
195
-
196
- ### Option 3: Hugging Face Spaces (Legacy)
197
-
198
- The application still supports HF Spaces deployment for lightweight use cases.
199
-
200
- 1. **Update app.py** for HF Spaces compatibility
201
- 2. **Deploy via HF Spaces** with Docker SDK
202
-
203
- ## Monitoring and Observability
204
-
205
- ### Prometheus Metrics
206
-
207
- The application exposes metrics at `/metrics` endpoint.
208
-
209
- 1. **Setup Prometheus**
210
- ```bash
211
- kubectl apply -f monitoring/prometheus.yml
212
- ```
213
-
214
- 2. **Access Metrics**
215
- ```bash
216
- curl http://ai-service.medical-ai.svc.cluster.local:80/metrics
217
- ```
218
-
219
- ### Health Checks
220
-
221
- - **Liveness** (`/live`): Basic health check
222
- - **Readiness** (`/ready`): Checks if agents are initialized
223
-
224
- ### Logging
225
-
226
- - Structured JSON logging
227
- - PHI operations logged to database
228
- - Error tracking with stack traces
229
-
230
- ## Security Features
231
-
232
- ### HIPAA Compliance
233
-
234
- - PHI scrubbing with audit trails
235
- - Non-root container execution
236
- - Secrets management via Kubernetes
237
- - Network policies restricting traffic
238
-
239
- ### Authentication
240
-
241
- - JWT-based authentication (framework ready)
242
- - API key support (configurable)
243
-
244
- ## API Usage
245
-
246
- ### Health Endpoints
247
-
248
- ```bash
249
- GET /live
250
- GET /ready
251
- ```
252
-
253
- ### PHI Scrubbing
254
-
255
- ```bash
256
- POST /phi/scrub
257
- Content-Type: application/json
258
-
259
- {
260
- "text": "Patient John Doe, SSN 123-45-6789, diagnosed with diabetes."
261
- }
262
- ```
263
-
264
- Response:
265
- ```json
266
- {
267
- "scrubbed_text": "Patient [REDACTED], SSN [REDACTED], diagnosed with diabetes.",
268
- "phi_found": ["NAME", "SSN"],
269
- "redaction_count": 2
270
- }
271
- ```
272
-
273
- ### Text Summarization
274
-
275
- ```bash
276
- POST /api/generate_summary
277
- Content-Type: application/json
278
-
279
- {
280
- "text": "Long medical text...",
281
- "max_length": 150,
282
- "min_length": 50
283
- }
284
- ```
285
-
286
- ### Generate Patient Summary
287
-
288
- The `generate_patient_summary` endpoint has been migrated from the original Flask implementation to FastAPI. It generates a comprehensive 4-section patient summary from EHR data, with support for streaming (SSE) to handle long-running tasks and prevent timeouts.
289
-
290
- **Endpoint**: `POST /generate_patient_summary`
291
-
292
- **Query Parameters**:
293
- - `stream` (optional, default: `false`): Set to `true` for Server-Sent Events (SSE) streaming updates.
294
-
295
- **Request Body** (JSON):
296
- ```json
297
- {
298
- "patientid": "12345",
299
- "token": "your-auth-token",
300
- "key": "your-api-key",
301
- "patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf",
302
- "patient_summarizer_model_type": "gguf",
303
- "generation_mode": "hq", // Options: "hq" (high-quality), "fast", "rule" (deterministic)
304
- "timeout_mode": "fast" // Options: "fast" (8s EHR timeout), "extended" (30s)
305
- }
306
- ```
307
-
308
- **Synchronous Response** (when `stream=false`):
309
- ```json
310
- {
311
- "summary": "## Clinical Assessment\n- Patient details...\n\n## Key Trends & Changes\n- Changes detected...\n\n## Plan & Suggested Actions\n- Recommendations...\n\n## Direct Guidance for Physician\n- Clinical insights...",
312
- "baseline": "Patient baseline data...",
313
- "delta": "Changes from previous visits...",
314
- "timing": {"ehr_api": 2.5, "generation": 15.3, "total": 17.8},
315
- "model_used": "microsoft/Phi-3-mini-4k-instruct (gguf)",
316
- "timeout_mode_used": "fast"
317
- }
318
- ```
319
-
320
- **Streaming Response** (when `stream=true`):
321
- - Returns a `text/event-stream` response with SSE events:
322
- - `type: progress` - Progress updates (e.g., 10%, 50%)
323
- - `type: complete` - Final result with full summary
324
- - `type: error` - Error details if failed
325
- - `type: heartbeat` - Keep-alive signals
326
-
327
- **Notes**:
328
- - The endpoint integrates with an external EHR API to fetch patient data.
329
- - Supports multiple model types: GGUF, text-generation, summarization, seq2seq.
330
- - Includes fallbacks for timeouts, API errors, and model failures.
331
- - PHI scrubbing is applied automatically.
332
- - Full implementation includes delta computation, baseline building, and 4-section markdown output.
333
-
334
- ### Other Endpoints (Migration in Progress)
335
- - `POST /upload` - File upload and text extraction
336
- - `POST /transcribe` - Audio transcription
337
- - `POST /extract_medical_data` - Structured medical data extraction
338
- - `POST /api/extract_medical_data_from_audio` - Audio-based medical extraction
339
-
340
- ## Troubleshooting
341
-
342
- ### Common Issues
343
-
344
- 1. **Model Loading Failures**
345
- - Check HF_HOME and cache directories
346
- - Ensure sufficient memory
347
- - Verify internet connectivity for model downloads
348
-
349
- 2. **Database Connection Errors**
350
- - Verify DATABASE_URL format
351
- - Check PostgreSQL service status
352
- - Ensure database exists and schema applied
353
-
354
- 3. **Redis Connection Issues**
355
- - Verify REDIS_URL format
356
- - Check Redis service availability
357
- - Monitor Redis memory usage
358
-
359
- 4. **PHI Scrubbing Not Working**
360
- - Check regex patterns in phi_scrubber_service.py
361
- - Verify Redis connection for stats
362
- - Check database audit logs
363
-
364
- ### Performance Tuning
365
-
366
- - Adjust thread pools in inference_service.py
367
- - Configure Redis connection pooling
368
- - Set appropriate resource limits in K8s
369
- - Monitor memory usage for model caching
370
-
371
- ## Contributing
372
-
373
- 1. Follow async/await patterns for new endpoints
374
- 2. Add proper error handling and logging
375
- 3. Update tests for new functionality
376
- 4. Ensure HIPAA compliance for PHI handling
377
- 5. Document API changes in this guide
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -219,4 +219,4 @@ ENTRYPOINT ["/entrypoint.sh"]
219
 
220
  EXPOSE 7860
221
 
222
- CMD ["uvicorn", "ai_med_extract.app:create_app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
 
219
 
220
  EXPOSE 7860
221
 
222
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "2", "--timeout", "0", "ai_med_extract.app:app"]
FINAL_PROGRESS.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GGUF Timeout Fix - Complete Implementation
2
+
3
+ ## ✅ All Steps Completed:
4
+
5
+ ### 1. Increased GGUF Timeout
6
+ - Changed from 120s to 300s for Hugging Face Spaces
7
+ - Maintained 120s for local development
8
+ - Made timeout configurable via `GGUF_GENERATION_TIMEOUT` environment variable
9
+
10
+ ### 2. Enhanced Error Handling
11
+ - Added comprehensive timeout handling in `routes.py`
12
+ - Implemented fallback mechanisms when GGUF model fails
13
+ - Added better logging for debugging timeout issues
14
+ - Created robust fallback pipeline for graceful degradation
15
+
16
+ ### 3. Optimized GGUF Model Parameters
17
+ - Added CPU-specific optimizations for Hugging Face Spaces:
18
+ - `use_mlock=False` for better container compatibility
19
+ - `vocab_only=False` for full model loading
20
+ - `n_threads_batch=n_threads` for consistent threading
21
+ - `mmap=True` for memory mapping optimizations
22
+ - Cache type optimizations for better performance
23
+
24
+ ### 4. Added Progress Logging
25
+ - Enhanced logging throughout the generation process
26
+ - Added detailed timing information for each generation loop
27
+ - Added validation checks for summary completeness
28
+ - Improved debugging capabilities
29
+
30
+ ## 🔧 Files Modified:
31
+
32
+ ### `ai_med_extract/utils/model_loader_gguf.py`
33
+ - Updated timeout handling with environment variable support
34
+ - Optimized model initialization parameters for Spaces
35
+ - Enhanced logging throughout the generation process
36
+ - Added detailed progress monitoring
37
+
38
+ ### `ai_med_extract/api/routes.py`
39
+ - Added comprehensive error handling for GGUF timeouts
40
+ - Implemented fallback mechanisms when GGUF fails
41
+ - Improved logging and error responses
42
+ - Added graceful degradation to template-based fallback
43
+
44
+ ## ⚙️ Configuration Options:
45
+
46
+ ### Environment Variables:
47
+ - `GGUF_GENERATION_TIMEOUT`: Custom timeout in seconds (default: 300 for Spaces, 120 for local)
48
+ - `GGUF_N_THREADS`: Number of CPU threads to use
49
+ - `GGUF_N_BATCH`: Batch size for processing
50
+
51
+ ### Performance Settings:
52
+ - **Hugging Face Spaces**: Ultra-conservative settings (1 thread, 16 batch, 512 context)
53
+ - **Local Development**: Normal settings (2 threads, 32 batch, 1024 context)
54
+
55
+ ## 🚀 Ready for Testing:
56
+
57
+ The implementation is now complete and ready for testing. The changes include:
58
+
59
+ 1. **Increased timeout** from 120s to 300s for Hugging Face Spaces
60
+ 2. **Configurable timeout** via environment variable
61
+ 3. **Better error handling** with fallback mechanisms
62
+ 4. **Optimized parameters** for CPU performance on Spaces
63
+ 5. **Enhanced logging** for debugging and monitoring
64
+
65
+ ## 📋 Testing Checklist:
66
+ - [ ] Test GGUF model with Phi-3 model on Spaces
67
+ - [ ] Verify timeout is sufficient for generation
68
+ - [ ] Test fallback mechanisms when GGUF fails
69
+ - [ ] Monitor memory usage and performance
70
+ - [ ] Verify logging provides useful debugging information
71
+
72
+ The implementation should now handle the GGUF timeout issues effectively while providing graceful degradation when the model fails.
GGUF_TROUBLESHOOTING.md ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GGUF Model Troubleshooting Guide for Hugging Face Spaces
2
+
3
+ ## Problem Description
4
+ Your Hugging Face Space is throwing 500 errors when calling the `generatepatientsummary` API with GGUF models, specifically with:
5
+ - `"patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"`
6
+ - `"patient_summarizer_model_type": "gguf"`
7
+
8
+ ## Root Causes Identified
9
+
10
+ ### 1. **Memory Constraints**
11
+ - Phi-3-mini-4k-instruct model is ~2.4GB
12
+ - Hugging Face Spaces have limited memory (Basic: 16GB RAM, Pro: 32GB RAM)
13
+ - Model loading + inference may exceed available memory
14
+
15
+ ### 2. **Model Download Timeouts**
16
+ - Large model downloads can timeout in Spaces environment
17
+ - Network issues during model fetching
18
+ - Insufficient timeout handling
19
+
20
+ ### 3. **Missing Dependencies**
21
+ - `llama-cpp-python` requires specific system libraries
22
+ - CPU optimization flags may not be set correctly
23
+
24
+ ## Solutions Implemented
25
+
26
+ ### 1. **Enhanced Error Handling**
27
+ - Added comprehensive logging throughout the pipeline
28
+ - Implemented fallback mechanisms when GGUF fails
29
+ - Better error messages for debugging
30
+
31
+ ### 2. **Timeout Management**
32
+ - 5-minute timeout for model loading
33
+ - 2-minute timeout for text generation
34
+ - Threading-based timeout (more reliable than signals)
35
+
36
+ ### 3. **Memory Optimization**
37
+ - Reduced context window from 4096 to 4000 tokens
38
+ - Reduced batch size from 128 to 64
39
+ - CPU-only mode with optimized thread usage
40
+
41
+ ### 4. **Fallback Pipeline**
42
+ - Template-based response when GGUF fails
43
+ - Ensures API always returns a response
44
+ - Maintains API contract even during failures
45
+
46
+ ## Testing Your Fix
47
+
48
+ ### Run the Test Script
49
+ ```bash
50
+ cd HNTAI
51
+ python test_gguf.py
52
+ ```
53
+
54
+ This will test:
55
+ - Model loading
56
+ - Basic generation
57
+ - Full summary generation
58
+ - Fallback pipeline
59
+
60
+ ### Expected Output
61
+ ```
62
+ ✓ Model loaded successfully in X.XXs
63
+ ✓ Generation successful in X.XXs
64
+ ✓ Full summary generation successful in X.XXs
65
+ 🎉 All tests passed! GGUF model is working correctly.
66
+ ```
67
+
68
+ ## Deployment Steps
69
+
70
+ ### 1. **Update Your Space**
71
+ ```bash
72
+ git add .
73
+ git commit -m "Fix GGUF model 500 errors with enhanced error handling and fallbacks"
74
+ git push
75
+ ```
76
+
77
+ ### 2. **Monitor Logs**
78
+ Check your Hugging Face Space logs for:
79
+ - Model loading times
80
+ - Memory usage
81
+ - Error messages
82
+ - Fallback activations
83
+
84
+ ### 3. **Test the API**
85
+ ```bash
86
+ curl -X POST "https://your-space.hf.space/generate_patient_summary" \
87
+ -H "Content-Type: application/json" \
88
+ -d '{
89
+ "patientid": "test123",
90
+ "token": "your_token",
91
+ "key": "your_key",
92
+ "patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf",
93
+ "patient_summarizer_model_type": "gguf"
94
+ }'
95
+ ```
96
+
97
+ ## Environment Variables
98
+
99
+ Set these in your Hugging Face Space:
100
+
101
+ ```bash
102
+ # Memory optimization
103
+ GGUF_N_THREADS=2
104
+ GGUF_N_BATCH=64
105
+
106
+ # Cache directories
107
+ HF_HOME=/tmp/huggingface
108
+ XDG_CACHE_HOME=/tmp
109
+ TORCH_HOME=/tmp/torch
110
+ ```
111
+
112
+ ## Alternative Models
113
+
114
+ If Phi-3-mini-4k-instruct still fails, try smaller models:
115
+
116
+ ### Smaller GGUF Models
117
+ ```json
118
+ {
119
+ "patient_summarizer_model_name": "TheBloke/Phi-3-mini-4k-instruct-GGUF/phi-3-mini-4k-instruct-q2_k.gguf",
120
+ "patient_summarizer_model_type": "gguf"
121
+ }
122
+ ```
123
+
124
+ ### Fallback to HuggingFace Models
125
+ ```json
126
+ {
127
+ "patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct",
128
+ "patient_summarizer_model_type": "text-generation"
129
+ }
130
+ ```
131
+
132
+ ## Monitoring and Debugging
133
+
134
+ ### 1. **Check Space Logs**
135
+ - Look for "GGUF" prefixed log messages
136
+ - Monitor memory usage patterns
137
+ - Check for timeout errors
138
+
139
+ ### 2. **API Response Codes**
140
+ - `200`: Success
141
+ - `408`: Generation timeout
142
+ - `500`: Model loading failure (will use fallback)
143
+
144
+ ### 3. **Performance Metrics**
145
+ - Model loading time: Should be < 5 minutes
146
+ - Generation time: Should be < 2 minutes
147
+ - Memory usage: Should stay within Space limits
148
+
149
+ ## Common Issues and Solutions
150
+
151
+ ### Issue: "Model download failed"
152
+ **Solution**: Check network connectivity and model availability
153
+
154
+ ### Issue: "Failed to initialize GGUF model"
155
+ **Solution**: Verify llama-cpp-python installation and system dependencies
156
+
157
+ ### Issue: "Generation timed out"
158
+ **Solution**: Reduce max_tokens or use smaller model
159
+
160
+ ### Issue: "Out of memory"
161
+ **Solution**: Use smaller model variant (q2_k instead of q4)
162
+
163
+ ## Support
164
+
165
+ If issues persist:
166
+ 1. Run `test_gguf.py` and share output
167
+ 2. Check Hugging Face Space logs
168
+ 3. Verify model availability in the Hub
169
+ 4. Consider upgrading to Pro tier for more resources
170
+
171
+ ## Expected Behavior After Fix
172
+
173
+ ✅ **Before**: 500 errors after 5 minutes
174
+ ✅ **After**:
175
+ - Successful model loading with detailed logging
176
+ - Graceful fallback if model fails
177
+ - Proper timeout handling
178
+ - Always returns a response (either real or fallback)
PROGRESS_UPDATE.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GGUF Timeout Fix - Progress Update
2
+
3
+ ## ✅ Completed Steps:
4
+
5
+ 1. **Increased GGUF timeout**: Changed from 120s to 300s for Hugging Face Spaces
6
+ 2. **Configurable timeout**: Added GGUF_GENERATION_TIMEOUT environment variable support
7
+ 3. **Better error handling**: Enhanced timeout and fallback mechanisms in routes.py
8
+ 4. **Fallback pipeline**: Added robust fallback when GGUF model fails to load or times out
9
+
10
+ ## 🔧 Changes Made:
11
+
12
+ ### model_loader_gguf.py:
13
+ - Updated `_generate_with_timeout()` to use 300s default for Spaces, 120s for local
14
+ - Made timeout configurable via environment variable
15
+ - Updated `generate()` to use configurable timeout
16
+
17
+ ### routes.py:
18
+ - Added fallback pipeline usage when GGUF times out
19
+ - Added better logging for timeout errors
20
+ - Added fallback for GGUF model loading failures
21
+ - Improved error messages and response handling
22
+
23
+ ## 🚀 Next Steps:
24
+ - Test the changes with the GGUF model
25
+ - Verify timeout is sufficient for Phi-3 model
26
+ - Test fallback mechanisms
27
+ - Add progress logging for generation
28
+
29
+ ## ⚙️ Configuration:
30
+ - Default timeout: 300s (Spaces) / 120s (local)
31
+ - Environment variable: `GGUF_GENERATION_TIMEOUT`
32
+ - Fallback: Template-based summary when GGUF fails
README.md CHANGED
@@ -8,76 +8,32 @@ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
- # HNTAI - Scalable Medical Data Extraction API
12
 
13
- This is a FastAPI-based scalable API for extracting and processing medical data from various document formats, aligned with "ChatGPT Version 3 - Scalable" architecture.
14
 
15
  ## Features
16
  - Document text extraction (PDF, DOCX, Images)
17
  - Audio transcription
18
  - Medical data extraction
19
- - PHI (Protected Health Information) scrubbing with audit logging
20
- - Text summarization with Redis caching
21
- - PostgreSQL database integration for persistence
22
- - Async processing for scalability
23
- - Health endpoints (/live, /ready)
24
- - Security features (non-root containers, secrets management, HIPAA compliance)
25
 
26
- ## Architecture Alignment
27
- Fully aligned with "ChatGPT Version 3 - Scalable":
28
- - FastAPI for async API handling
29
- - Redis for caching and PHI stats
30
- - PostgreSQL for audit logs and data persistence
31
- - Kubernetes deployment with security contexts
32
- - Network policies and HIPAA compliance
33
- - Prometheus monitoring
34
- - Proper resource limits and health probes
35
-
36
- ## Deployment Options
37
- - **Hugging Face Spaces**: Lightweight Docker deployment (legacy)
38
- - **Kubernetes**: Scalable production deployment with security features
39
 
40
  ## Environment Variables
41
- - `DATABASE_URL`: PostgreSQL connection string
42
- - `REDIS_URL`: Redis connection string
43
- - `SECRET_KEY`: Application secret key
44
- - `JWT_SECRET_KEY`: JWT signing key
45
 
46
  ## API Endpoints
47
- - GET /health/live - Liveness health check
48
- - GET /health/ready - Readiness health check
49
- - GET /metrics - Prometheus metrics
50
- - POST /generate_patient_summary - Generate comprehensive patient summaries (with streaming support)
51
  - POST /upload - Upload and process medical documents
52
- - GET /get_updated_medical_data - Retrieve processed medical data
53
- - PUT /update_medical_data - Update medical data fields
54
  - POST /transcribe - Transcribe audio files
55
  - POST /extract_medical_data - Extract structured medical data
56
  - POST /api/generate_summary - Generate text summaries
57
  - POST /api/extract_medical_data_from_audio - Process audio recordings
58
- - POST /api/patient_summary_openvino - Generate patient summaries using OpenVINO
59
-
60
- ## Development
61
-
62
- ### Code Quality
63
- This project uses the following tools for code quality:
64
- - **Black**: Code formatting
65
- - **isort**: Import sorting
66
- - **flake8**: Linting
67
- - **mypy**: Type checking
68
-
69
- Run quality checks:
70
- ```bash
71
- black .
72
- isort .
73
- flake8 .
74
- mypy .
75
- ```
76
-
77
- ### Testing
78
- Run tests with:
79
- ```bash
80
- python -m pytest
81
- ```
82
 
83
- For more details, check the API documentation at `/docs`, [DEVELOPMENT.md](DEVELOPMENT.md) for development guides, and [DEPLOYMENT.md](DEPLOYMENT.md) for deployment instructions.
 
8
  pinned: false
9
  ---
10
 
11
+ # HNTAI - Medical Data Extraction API
12
 
13
+ This is a Flask-based API for extracting and processing medical data from various document formats.
14
 
15
  ## Features
16
  - Document text extraction (PDF, DOCX, Images)
17
  - Audio transcription
18
  - Medical data extraction
19
+ - PHI (Protected Health Information) scrubbing
20
+ - Text summarization
 
 
 
 
21
 
22
+ ## Deployment on Hugging Face Spaces
23
+ - Uses Docker for deployment
24
+ - All models and data are cached in /tmp
25
+ - Optimized for memory usage
26
+ - Auto-retries for model loading
27
+ - Proper error handling
 
 
 
 
 
 
 
28
 
29
  ## Environment Variables
30
+ All necessary environment variables are pre-configured for Hugging Face Spaces deployment.
 
 
 
31
 
32
  ## API Endpoints
 
 
 
 
33
  - POST /upload - Upload and process medical documents
 
 
34
  - POST /transcribe - Transcribe audio files
35
  - POST /extract_medical_data - Extract structured medical data
36
  - POST /api/generate_summary - Generate text summaries
37
  - POST /api/extract_medical_data_from_audio - Process audio recordings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ For more details, check the API documentation.
README_SPACES.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Medical Document Processing Space
2
+
3
+ This Hugging Face Space provides an AI-powered medical document processing system that can:
4
+ - Extract text from various medical documents (PDFs, images)
5
+ - Transcribe medical audio recordings
6
+ - Scrub PHI (Protected Health Information)
7
+ - Extract medical data and generate summaries
8
+ - Process medical reports and clinical notes
9
+
10
+ ## Features
11
+
12
+ - **Document Processing**: Handles PDFs, images, and audio files
13
+ - **PHI Scrubbing**: Automatically removes sensitive health information
14
+ - **Medical Data Extraction**: Extracts key medical information using MedAlpaca
15
+ - **Summarization**: Generates concise summaries of medical documents
16
+ - **Audio Transcription**: Transcribes medical audio using Whisper
17
+
18
+ ## Usage
19
+
20
+ 1. Upload your medical document (PDF, image, or audio file)
21
+ 2. Select the processing options you need
22
+ 3. Get the processed results with extracted information and summaries
23
+
24
+ ## Technical Details
25
+
26
+ - Built with Flask and modern AI models
27
+ - Uses Hugging Face's infrastructure for model hosting
28
+ - Implements secure file handling and processing
29
+ - Optimized for medical document processing
30
+
31
+ ## Model Information
32
+
33
+ - Text Generation: MedAlpaca-13B
34
+ - Summarization: BART-large-CNN
35
+ - Speech-to-Text: Whisper Base
36
+ - Text Extraction: Custom OCR pipeline
37
+
38
+ ## Limitations
39
+
40
+ - Maximum file size: 16GB
41
+ - Processing time may vary based on document size and complexity
42
+ - Some features may require specific file formats
43
+
44
+ ## Privacy
45
+
46
+ All processing is done securely within the Hugging Face Space environment. No data is stored permanently.
REFACTORED_README.md ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HNTAI Medical Data Extraction - Refactored System
2
+
3
+ ## Overview
4
+
5
+ This project has been completely refactored to provide a unified, flexible model management system that supports **any model name and type**, including GGUF models for patient summary generation. The system now offers dynamic model loading, runtime model switching, and robust fallback mechanisms.
6
+
7
+ ## 🚀 Key Features
8
+
9
+ ### ✨ **Universal Model Support**
10
+ - **Any Model Name**: Use any Hugging Face model, local model, or custom model
11
+ - **Any Model Type**: Support for text-generation, summarization, NER, GGUF, OpenVINO, and more
12
+ - **Automatic Type Detection**: The system automatically detects model types from names
13
+ - **Dynamic Loading**: Load models at runtime without restarting the application
14
+
15
+ ### 🔄 **GGUF Model Integration**
16
+ - **Seamless GGUF Support**: Full integration with llama.cpp for GGUF models
17
+ - **Patient Summary Generation**: Optimized for medical text summarization
18
+ - **Memory Efficient**: Ultra-conservative settings for Hugging Face Spaces
19
+ - **Fallback Mechanisms**: Automatic fallback when GGUF models fail
20
+
21
+ ### 🧠 **Unified Model Manager**
22
+ - **Single Interface**: One manager handles all model types
23
+ - **Smart Caching**: Intelligent model caching with memory management
24
+ - **Fallback Chains**: Multiple fallback options for robustness
25
+ - **Performance Monitoring**: Built-in timing and memory tracking
26
+
27
+ ## 🏗️ Architecture
28
+
29
+ ### Core Components
30
+
31
+ 1. **`UnifiedModelManager`** - Central model management system
32
+ 2. **`BaseModelLoader`** - Abstract interface for all model loaders
33
+ 3. **`TransformersModelLoader`** - Hugging Face Transformers models
34
+ 4. **`GGUFModelLoader`** - GGUF models via llama.cpp
35
+ 5. **`OpenVINOModelLoader`** - OpenVINO optimized models
36
+ 6. **`PatientSummarizerAgent`** - Enhanced patient summary generation
37
+
38
+ ### Model Type Support
39
+
40
+ | Model Type | Description | Example Models |
41
+ |------------|-------------|----------------|
42
+ | `text-generation` | Causal language models | `facebook/bart-base`, `microsoft/DialoGPT-medium` |
43
+ | `summarization` | Text summarization models | `Falconsai/medical_summarization`, `facebook/bart-large-cnn` |
44
+ | `ner` | Named Entity Recognition | `dslim/bert-base-NER`, `Jean-Baptiste/roberta-large-ner-english` |
45
+ | `gguf` | GGUF format models | `microsoft/Phi-3-mini-4k-instruct-gguf` |
46
+ | `openvino` | OpenVINO optimized models | `microsoft/Phi-3-mini-4k-instruct` |
47
+
48
+ ## 🚀 Quick Start
49
+
50
+ ### 1. Basic Usage
51
+
52
+ ```python
53
+ from ai_med_extract.utils.model_manager import model_manager
54
+
55
+ # Load any model dynamically
56
+ loader = model_manager.get_model_loader(
57
+ model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
58
+ model_type="gguf",
59
+ filename="Phi-3-mini-4k-instruct-q4.gguf"
60
+ )
61
+
62
+ # Generate text
63
+ result = loader.generate("Generate a medical summary for...")
64
+ ```
65
+
66
+ ### 2. Patient Summary Generation
67
+
68
+ ```python
69
+ from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
70
+
71
+ # Create agent with any model
72
+ agent = PatientSummarizerAgent(
73
+ model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
74
+ model_type="gguf"
75
+ )
76
+
77
+ # Generate clinical summary
78
+ summary = agent.generate_clinical_summary(patient_data)
79
+ ```
80
+
81
+ ### 3. Runtime Model Switching
82
+
83
+ ```python
84
+ # Switch models at runtime
85
+ agent.update_model(
86
+ model_name="Falconsai/medical_summarization",
87
+ model_type="summarization"
88
+ )
89
+ ```
90
+
91
+ ## 📡 API Endpoints
92
+
93
+ ### Model Management API
94
+
95
+ #### Load Model
96
+ ```http
97
+ POST /api/models/load
98
+ Content-Type: application/json
99
+
100
+ {
101
+ "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
102
+ "model_type": "gguf",
103
+ "filename": "Phi-3-mini-4k-instruct-q4.gguf",
104
+ "force_reload": false
105
+ }
106
+ ```
107
+
108
+ #### Generate Text
109
+ ```http
110
+ POST /api/models/generate
111
+ Content-Type: application/json
112
+
113
+ {
114
+ "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
115
+ "model_type": "gguf",
116
+ "prompt": "Generate a medical summary for...",
117
+ "max_tokens": 512,
118
+ "temperature": 0.7
119
+ }
120
+ ```
121
+
122
+ #### Switch Agent Model
123
+ ```http
124
+ POST /api/models/switch
125
+ Content-Type: application/json
126
+
127
+ {
128
+ "agent_name": "patient_summarizer",
129
+ "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
130
+ "model_type": "gguf"
131
+ }
132
+ ```
133
+
134
+ #### Get Model Information
135
+ ```http
136
+ GET /api/models/info?model_name=microsoft/Phi-3-mini-4k-instruct-gguf
137
+ ```
138
+
139
+ #### Health Check
140
+ ```http
141
+ GET /api/models/health
142
+ ```
143
+
144
+ ### Patient Summary API
145
+
146
+ #### Generate Patient Summary
147
+ ```http
148
+ POST /generate_patient_summary
149
+ Content-Type: application/json
150
+
151
+ {
152
+ "patientid": "12345",
153
+ "token": "your_token",
154
+ "key": "your_api_key",
155
+ "patient_summarizer_model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
156
+ "patient_summarizer_model_type": "gguf"
157
+ }
158
+ ```
159
+
160
+ ## 🔧 Configuration
161
+
162
+ ### Environment Variables
163
+
164
+ ```bash
165
+ # Cache directories
166
+ HF_HOME=/tmp/huggingface
167
+ XDG_CACHE_HOME=/tmp
168
+ TORCH_HOME=/tmp/torch
169
+ WHISPER_CACHE=/tmp/whisper
170
+
171
+ # GGUF optimization
172
+ GGUF_N_THREADS=2
173
+ GGUF_N_BATCH=64
174
+ ```
175
+
176
+ ### Model Configuration
177
+
178
+ The system automatically uses optimized models for different environments:
179
+
180
+ - **Local Development**: Full model capabilities
181
+ - **Hugging Face Spaces**: Memory-optimized models
182
+ - **Production**: Configurable based on resources
183
+
184
+ ## 🎯 Use Cases
185
+
186
+ ### 1. **Medical Document Processing**
187
+ ```python
188
+ # Extract medical data with any model
189
+ medical_data = model_manager.generate_text(
190
+ model_name="facebook/bart-base",
191
+ model_type="text-generation",
192
+ prompt="Extract medical entities from: " + document_text
193
+ )
194
+ ```
195
+
196
+ ### 2. **Patient Summary Generation**
197
+ ```python
198
+ # Use GGUF model for patient summaries
199
+ summary = model_manager.generate_text(
200
+ model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
201
+ model_type="gguf",
202
+ prompt=patient_data_prompt,
203
+ max_tokens=512
204
+ )
205
+ ```
206
+
207
+ ### 3. **Dynamic Model Switching**
208
+ ```python
209
+ # Switch between models based on task requirements
210
+ if task == "summarization":
211
+ model_name = "Falconsai/medical_summarization"
212
+ model_type = "summarization"
213
+ elif task == "extraction":
214
+ model_name = "facebook/bart-base"
215
+ model_type = "text-generation"
216
+
217
+ loader = model_manager.get_model_loader(model_name, model_type)
218
+ ```
219
+
220
+ ## 🔒 Memory Management
221
+
222
+ ### Hugging Face Spaces Optimization
223
+
224
+ The system automatically detects Hugging Face Spaces and applies ultra-conservative memory settings:
225
+
226
+ - **GGUF Models**: 1 thread, 16 batch size, 512 context
227
+ - **Transformers**: Float32 precision, minimal memory usage
228
+ - **Automatic Fallbacks**: Graceful degradation when memory is limited
229
+
230
+ ### Memory Monitoring
231
+
232
+ ```python
233
+ # Check memory usage
234
+ health = requests.get("/api/models/health").json()
235
+ print(f"GPU Memory: {health['gpu_info']['memory_allocated']}")
236
+ print(f"Loaded Models: {health['loaded_models_count']}")
237
+ ```
238
+
239
+ ## 🧪 Testing
240
+
241
+ ### Test GGUF Models
242
+
243
+ ```bash
244
+ # Test GGUF model loading
245
+ python test_gguf.py
246
+
247
+ # Test specific model
248
+ python -c "
249
+ from ai_med_extract.utils.model_manager import model_manager
250
+ loader = model_manager.get_model_loader('microsoft/Phi-3-mini-4k-instruct-gguf', 'gguf')
251
+ result = loader.generate('Test prompt')
252
+ print(f'Success: {len(result)} characters generated')
253
+ "
254
+ ```
255
+
256
+ ### Model Validation
257
+
258
+ ```python
259
+ from ai_med_extract.utils.model_config import validate_model_config
260
+
261
+ # Validate model configuration
262
+ validation = validate_model_config(
263
+ model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
264
+ model_type="gguf"
265
+ )
266
+
267
+ print(f"Valid: {validation['valid']}")
268
+ print(f"Warnings: {validation['warnings']}")
269
+ ```
270
+
271
+ ## 🚨 Error Handling
272
+
273
+ ### Fallback Mechanisms
274
+
275
+ 1. **Primary Model**: Attempts to load the specified model
276
+ 2. **Fallback Model**: Uses predefined fallback for the model type
277
+ 3. **Text Fallback**: Generates structured text responses
278
+ 4. **Graceful Degradation**: Continues operation with reduced functionality
279
+
280
+ ### Common Issues
281
+
282
+ #### GGUF Model Loading Fails
283
+ ```python
284
+ # Check model file
285
+ if not os.path.exists(model_path):
286
+ # Download from Hugging Face
287
+ from huggingface_hub import hf_hub_download
288
+ model_path = hf_hub_download(repo_id, filename)
289
+ ```
290
+
291
+ #### Memory Issues
292
+ ```python
293
+ # Clear cache and reload
294
+ model_manager.clear_cache()
295
+ torch.cuda.empty_cache()
296
+
297
+ # Use smaller model
298
+ loader = model_manager.get_model_loader(
299
+ model_name="facebook/bart-base", # Smaller model
300
+ model_type="text-generation"
301
+ )
302
+ ```
303
+
304
+ ## 📊 Performance
305
+
306
+ ### Benchmarking
307
+
308
+ ```python
309
+ import time
310
+
311
+ # Time model loading
312
+ start = time.time()
313
+ loader = model_manager.get_model_loader(model_name, model_type)
314
+ load_time = time.time() - start
315
+
316
+ # Time generation
317
+ start = time.time()
318
+ result = loader.generate(prompt)
319
+ gen_time = time.time() - start
320
+
321
+ print(f"Load: {load_time:.2f}s, Generate: {gen_time:.2f}s")
322
+ ```
323
+
324
+ ### Optimization Tips
325
+
326
+ 1. **Use Appropriate Model Size**: Smaller models for limited resources
327
+ 2. **Enable Caching**: Models are cached after first load
328
+ 3. **Batch Processing**: Process multiple requests together
329
+ 4. **Memory Monitoring**: Regular health checks
330
+
331
+ ## 🔮 Future Enhancements
332
+
333
+ ### Planned Features
334
+
335
+ - **Model Quantization**: Automatic model optimization
336
+ - **Distributed Loading**: Load models across multiple devices
337
+ - **Model Versioning**: Track and manage model versions
338
+ - **Performance Analytics**: Detailed performance metrics
339
+ - **Auto-scaling**: Automatic model scaling based on load
340
+
341
+ ### Extensibility
342
+
343
+ The system is designed for easy extension:
344
+
345
+ ```python
346
+ class CustomModelLoader(BaseModelLoader):
347
+ def __init__(self, model_name: str):
348
+ self.model_name = model_name
349
+
350
+ def load(self):
351
+ # Custom loading logic
352
+ pass
353
+
354
+ def generate(self, prompt: str, **kwargs):
355
+ # Custom generation logic
356
+ pass
357
+ ```
358
+
359
+ ## 📝 Migration Guide
360
+
361
+ ### From Old System
362
+
363
+ 1. **Replace Hardcoded Models**:
364
+ ```python
365
+ # Old
366
+ model = LazyModelLoader("facebook/bart-base", "text-generation")
367
+
368
+ # New
369
+ model = model_manager.get_model_loader("facebook/bart-base", "text-generation")
370
+ ```
371
+
372
+ 2. **Update Patient Summarizer**:
373
+ ```python
374
+ # Old
375
+ agent = PatientSummarizerAgent()
376
+
377
+ # New
378
+ agent = PatientSummarizerAgent(
379
+ model_name="microsoft/Phi-3-mini-4k-instruct-gguf",
380
+ model_type="gguf"
381
+ )
382
+ ```
383
+
384
+ 3. **Use Dynamic Model Selection**:
385
+ ```python
386
+ # Old: Fixed model types
387
+ # New: Dynamic model selection
388
+ model_type = request.form.get("model_type", "text-generation")
389
+ model_name = request.form.get("model_name", "facebook/bart-base")
390
+ ```
391
+
392
+ ## 🤝 Contributing
393
+
394
+ ### Development Setup
395
+
396
+ ```bash
397
+ # Clone repository
398
+ git clone <repository-url>
399
+ cd HNTAI
400
+
401
+ # Install dependencies
402
+ pip install -r requirements.txt
403
+
404
+ # Run tests
405
+ python -m pytest tests/
406
+
407
+ # Start development server
408
+ python -m ai_med_extract.app
409
+ ```
410
+
411
+ ### Adding New Model Types
412
+
413
+ 1. **Create Loader Class**:
414
+ ```python
415
+ class CustomModelLoader(BaseModelLoader):
416
+ # Implement required methods
417
+ pass
418
+ ```
419
+
420
+ 2. **Update Model Manager**:
421
+ ```python
422
+ if model_type == "custom":
423
+ loader = CustomModelLoader(model_name)
424
+ ```
425
+
426
+ 3. **Add Configuration**:
427
+ ```python
428
+ DEFAULT_MODELS["custom"] = {
429
+ "primary": "default/custom-model",
430
+ "fallback": "fallback/custom-model"
431
+ }
432
+ ```
433
+
434
+ ## 📄 License
435
+
436
+ This project is licensed under the MIT License - see the LICENSE file for details.
437
+
438
+ ## 🆘 Support
439
+
440
+ ### Getting Help
441
+
442
+ - **Documentation**: This README and inline code comments
443
+ - **Issues**: GitHub Issues for bug reports
444
+ - **Discussions**: GitHub Discussions for questions
445
+ - **Examples**: See `test_gguf.py` and other test files
446
+
447
+ ### Common Questions
448
+
449
+ **Q: Can I use my own GGUF model?**
450
+ A: Yes! Just provide the path to your .gguf file or upload it to Hugging Face.
451
+
452
+ **Q: How do I optimize for memory?**
453
+ A: Use smaller models, enable caching, and monitor memory usage via `/api/models/health`.
454
+
455
+ **Q: Can I switch models without restarting?**
456
+ A: Yes! Use the `/api/models/switch` endpoint to change models at runtime.
457
+
458
+ **Q: What if a model fails to load?**
459
+ A: The system automatically falls back to alternative models and provides detailed error information.
460
+
461
+ ---
462
+
463
+ **🎉 Congratulations!** You now have a powerful, flexible system that can work with any model name and type, including GGUF models for patient summary generation. The system is designed to be robust, efficient, and easy to use while maintaining backward compatibility.
TODO.md CHANGED
@@ -1,12 +1,14 @@
1
- # TODO: Integrate Sinkhorn-Normalized Quantization
2
 
3
- ## Steps to Complete
4
- - [x] Create quantization_utils.py with Sinkhorn-Normalized Quantization implementation
5
- - [x] Modify model_manager.py to support optional quantization during model loading
6
- - [x] Add configuration options for quantization in model_config.py
7
- - [x] Test quantization on a sample model without affecting existing workflows
8
- - [x] Verify that existing model loading and inference still work
9
- - [ ] Update documentation if needed
10
 
11
- ## Current Status
12
- Basic tests completed successfully. Quantization is disabled by default, so existing workflows are unaffected. API endpoints can be tested by running the FastAPI app.
 
 
 
 
 
 
 
 
 
1
+ # TODO: Fix None Type Handling and Stream Ending on Generation Failure
2
 
3
+ ## Tasks to Complete
 
 
 
 
 
 
4
 
5
+ - [x] Update `active_set` in `openvino_summarizer_utils.py` to handle None visits
6
+ - [x] Update `compute_deltas` in `openvino_summarizer_utils.py` to ensure old_visits and new_visits are lists
7
+ - [x] Update `visits_sorted` in `openvino_summarizer_utils.py` to handle None input
8
+ - [x] Update `latest_value` and related functions in `openvino_summarizer_utils.py` to handle None visits
9
+ - [x] Update `parse_ehr_chartsummarydtl` in `openvino_summarizer_utils.py` to return [] if chartsummarydtl is None
10
+ - [ ] Ensure `visits` is always a list in `routes.py` background_patient_summary
11
+ - [ ] Wrap `delta = compute_deltas([], visits)` in try-except in `routes.py`
12
+ - [ ] Ensure on generation failure in `background_patient_summary`, update job to 'error' and provide fallback
13
+ - [ ] Verify `sse_generator` ends stream properly on error
14
+ - [ ] Test the fixes to ensure no None type errors and proper stream ending
__pycache__/ai_med_extract.cpython-311.pyc DELETED
Binary file (898 Bytes)
 
__pycache__/test_chunking.cpython-311.pyc ADDED
Binary file (4.61 kB). View file
 
__pycache__/test_summary_consistency.cpython-311.pyc ADDED
Binary file (12.3 kB). View file
 
ai_med_extract.py DELETED
@@ -1,15 +0,0 @@
1
- """Compatibility shim for older imports that expect a top-level `ai_med_extract`.
2
-
3
- This module re-exports the package located at `services/ai-service/src/ai_med_extract`.
4
- It keeps older tests and imports working while the canonical package lives under services/ai-service/src.
5
- """
6
- import sys
7
- import os
8
-
9
- # Compute path to the migrated package
10
- ROOT = os.path.dirname(__file__)
11
- SERVICE_SRC = os.path.join(ROOT, 'services', 'ai-service', 'src')
12
- if SERVICE_SRC not in sys.path:
13
- sys.path.insert(0, SERVICE_SRC)
14
-
15
- from ai_med_extract import * # re-export everything
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ai_med_extract/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ai_med_extract/__init__.py
ai_med_extract/__main__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .app import app
2
+
3
+ # Entrypoint for running the app as a module
4
+ if __name__ == "__main__":
5
+ app.run(host="0.0.0.0", port=7860, debug=True)
ai_med_extract/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (173 Bytes). View file
 
ai_med_extract/__pycache__/app.cpython-311.pyc ADDED
Binary file (9.15 kB). View file
 
ai_med_extract/agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ai_med_extract/agents/__init__.py
ai_med_extract/agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (180 Bytes). View file
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/medical_data_extractor.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/medical_data_extractor.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/medical_data_extractor.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/patient_summary_agent.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/phi_scrubber.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/phi_scrubber.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/phi_scrubber.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/summarizer.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/summarizer.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/__pycache__/text_extractor.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/agents/__pycache__/text_extractor.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/text_extractor.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/medical_data_extractor.py RENAMED
@@ -3,7 +3,7 @@ import logging
3
  import json
4
  import torch
5
 
6
- from .phi_scrubber import MedicalTextUtils
7
 
8
  class MedicalDataExtractorAgent:
9
  def __init__(self, generator):
 
3
  import json
4
  import torch
5
 
6
+ from ai_med_extract.agents.phi_scrubber import MedicalTextUtils
7
 
8
  class MedicalDataExtractorAgent:
9
  def __init__(self, generator):
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/patient_summary_agent.py RENAMED
@@ -6,7 +6,6 @@ import json
6
  from typing import List, Dict, Union, Optional
7
  from textwrap import fill
8
  import concurrent.futures
9
- import logging
10
 
11
  # Suppress non-critical warnings
12
  warnings.filterwarnings("ignore", category=UserWarning)
@@ -28,12 +27,9 @@ class PatientSummarizerAgent:
28
 
29
  # Initialize model loader through unified model manager
30
  self.model_loader = None
31
- # Initialize model loader lazily to avoid heavy import-time work
32
- # Do not call _initialize_model_loader() here; initialize via initialize_agents()
33
- import logging
34
- logging.getLogger(__name__).info(
35
- f"PatientSummarizerAgent created for {model_name} ({model_type}) on {self.device} (loader deferred)"
36
- )
37
 
38
  def _initialize_model_loader(self):
39
  """Initialize the model loader using the unified model manager"""
@@ -72,11 +68,10 @@ class PatientSummarizerAgent:
72
  self.model_type
73
  )
74
 
75
- import logging
76
- logging.getLogger(__name__).info(f"Model loader initialized: {self.model_name} ({self.model_type})")
77
 
78
  except Exception as e:
79
- logging.getLogger(__name__).warning(f"Failed to initialize model loader: {e}")
80
  # Create a fallback loader
81
  self._create_fallback_loader()
82
 
@@ -102,18 +97,16 @@ class PatientSummarizerAgent:
102
  return self.generate(prompt, **kwargs)
103
 
104
  self.model_loader = FallbackLoader(self.model_name, self.model_type)
105
- import logging
106
- logging.getLogger(__name__).warning(f"Using fallback loader for {self.model_name}")
107
 
108
  def generate_clinical_summary(self, patient_data: Union[List[str], Dict]) -> str:
109
  """Generate a comprehensive clinical summary using the unified model manager"""
110
- import logging
111
- logging.getLogger(__name__).info(f"Generating clinical summary using model: {self.model_name} ({self.model_type})...")
112
 
113
  try:
114
  # Build the narrative prompt
115
  narrative_history = self.build_chronological_narrative(patient_data)
116
- logging.getLogger(__name__).debug(f"--- Prompt Sent to Model (truncated) --- {fill(narrative_history, width=80)[:1000]}...")
117
 
118
  # Generate summary using the model loader
119
  if hasattr(self.model_loader, 'generate_full_summary'):
@@ -132,7 +125,7 @@ class PatientSummarizerAgent:
132
  top_p=0.9
133
  )
134
 
135
- logging.getLogger(__name__).debug(f"--- Raw Model Output --- {fill(raw_summary_text, width=80)}")
136
 
137
  # Format the output
138
  formatted_report = self.format_clinical_output(raw_summary_text, patient_data)
@@ -152,7 +145,7 @@ class PatientSummarizerAgent:
152
  return final_output
153
 
154
  except Exception as e:
155
- logging.getLogger(__name__).exception(f"Error during summary generation: {e}")
156
  import traceback
157
  traceback.print_exc()
158
  return f"Error generating summary: {str(e)}"
@@ -174,17 +167,24 @@ class PatientSummarizerAgent:
174
  )
175
 
176
  def _generate_section_with_instance(self, prompt: str, section_name: str) -> tuple:
177
- """Generate a section using the shared model instance."""
178
  try:
179
- # Use the shared model loader instance
180
- if hasattr(self.model_loader, 'generate_full_summary'):
181
- result = self.model_loader.generate_full_summary(
 
 
 
 
 
 
 
182
  prompt,
183
  max_tokens=256, # Reduced for speed
184
  max_loops=1
185
  )
186
  else:
187
- result = self.model_loader.generate(
188
  prompt,
189
  max_new_tokens=256, # Reduced for speed
190
  temperature=0.7,
@@ -194,10 +194,8 @@ class PatientSummarizerAgent:
194
  except Exception as e:
195
  return section_name, f"Error generating section {section_name}: {e}"
196
 
197
- def generate_summary_sections(self, patient_data: Union[List[str], Dict], callback=None) -> Dict[str, str]:
198
  """Generate the 4 summary sections in parallel using dedicated model instances."""
199
- if self.model_loader is None:
200
- self._initialize_model_loader()
201
  narrative_history = self.build_chronological_narrative(patient_data)
202
 
203
  # Define prompts for each section with markdown format instruction
@@ -220,21 +218,17 @@ class PatientSummarizerAgent:
220
  try:
221
  section_name, result = future.result()
222
  results[section_name] = result
223
- if callback:
224
- callback(section_name, result)
225
  except Exception as exc:
226
  results[section] = f"Error generating section {section}: {exc}"
227
- if callback:
228
- callback(section, f"Error generating section {section}: {exc}")
229
 
230
  return results
231
 
232
- def generate_patient_summary(self, patient_data: Union[List[str], Dict], callback=None) -> str:
233
  """Generate the complete patient summary by stitching together 4 sections generated in parallel."""
234
- logging.getLogger(__name__).info(f"Generating patient summary in parallel sections using model: {self.model_name} ({self.model_type})...")
235
 
236
  try:
237
- sections = self.generate_summary_sections(patient_data, callback)
238
 
239
  # Stitch sections together
240
  final_summary = "\n\n".join(
@@ -259,7 +253,7 @@ class PatientSummarizerAgent:
259
  return final_output
260
 
261
  except Exception as e:
262
- logging.getLogger(__name__).exception(f"Error during parallel summary generation: {e}")
263
  import traceback
264
  traceback.print_exc()
265
  return f"Error generating patient summary: {str(e)}"
@@ -414,20 +408,20 @@ class PatientSummarizerAgent:
414
  has_afib = any("atrial fibrillation" in dx.lower() for dx in last_enc.get('diagnosis', []))
415
  on_anticoag = any("warfarin" in med.lower() or "apixaban" in med.lower() for med in last_enc.get('medications', []))
416
  if has_afib:
417
- evaluation += " - \u2705 Patient with Atrial Fibrillation is on anticoagulation.\n" if on_anticoag \
418
- else " - \u274c Atrial Fibrillation present but no anticoagulant prescribed.\n"
419
 
420
  has_mi = any("myocardial infarction" in hx.lower() for hx in result.get('past_medical_history', []))
421
  on_statin = any("atorvastatin" in med.lower() or "statin" in med.lower() for med in last_enc.get('medications', []))
422
  if has_mi:
423
- evaluation += " - \u2705 Patient with MI history is on statin therapy.\n" if on_statin \
424
- else " - \u274c Patient with MI history is not on statin therapy.\n"
425
 
426
  has_aki = any("acute kidney injury" in dx.lower() for dx in last_enc.get('diagnosis', []))
427
  acei_held = "hold" in last_enc.get('dr_notes', '').lower() and "lisinopril" in last_enc.get('dr_notes', '')
428
  if has_aki:
429
- evaluation += " - \u2705 AKI noted and ACE inhibitor was appropriately held.\n" if acei_held \
430
- else " - \u26a0\ufe0f AKI present but ACE inhibitor not documented as held.\n"
431
 
432
  evaluation += (
433
  "\nDisclaimer: This is a simulated evaluation and not a substitute for clinical judgment.\n"
@@ -439,7 +433,7 @@ class PatientSummarizerAgent:
439
  self.model_name = model_name
440
  self.model_type = model_type
441
  self._initialize_model_loader()
442
- logging.getLogger(__name__).info(f"Model updated to: {self.model_name} ({self.model_type})")
443
 
444
  def get_model_info(self) -> dict:
445
  """Get information about the current model"""
 
6
  from typing import List, Dict, Union, Optional
7
  from textwrap import fill
8
  import concurrent.futures
 
9
 
10
  # Suppress non-critical warnings
11
  warnings.filterwarnings("ignore", category=UserWarning)
 
27
 
28
  # Initialize model loader through unified model manager
29
  self.model_loader = None
30
+ self._initialize_model_loader()
31
+
32
+ print(f"✅ PatientSummarizerAgent initialized with {model_name} ({model_type}) on {self.device}")
 
 
 
33
 
34
  def _initialize_model_loader(self):
35
  """Initialize the model loader using the unified model manager"""
 
68
  self.model_type
69
  )
70
 
71
+ print(f"✅ Model loader initialized: {self.model_name} ({self.model_type})")
 
72
 
73
  except Exception as e:
74
+ print(f"Failed to initialize model loader: {e}")
75
  # Create a fallback loader
76
  self._create_fallback_loader()
77
 
 
97
  return self.generate(prompt, **kwargs)
98
 
99
  self.model_loader = FallbackLoader(self.model_name, self.model_type)
100
+ print(f"⚠️ Using fallback loader for {self.model_name}")
 
101
 
102
  def generate_clinical_summary(self, patient_data: Union[List[str], Dict]) -> str:
103
  """Generate a comprehensive clinical summary using the unified model manager"""
104
+ print(f"✨ Generating clinical summary using model: {self.model_name} ({self.model_type})...")
 
105
 
106
  try:
107
  # Build the narrative prompt
108
  narrative_history = self.build_chronological_narrative(patient_data)
109
+ print(f"\n--- Prompt Sent to Model (truncated) ---\n{fill(narrative_history, width=80)[:1000]}...")
110
 
111
  # Generate summary using the model loader
112
  if hasattr(self.model_loader, 'generate_full_summary'):
 
125
  top_p=0.9
126
  )
127
 
128
+ print(f"\n--- Raw Model Output ---\n{fill(raw_summary_text, width=80)}")
129
 
130
  # Format the output
131
  formatted_report = self.format_clinical_output(raw_summary_text, patient_data)
 
145
  return final_output
146
 
147
  except Exception as e:
148
+ print(f"Error during summary generation: {e}")
149
  import traceback
150
  traceback.print_exc()
151
  return f"Error generating summary: {str(e)}"
 
167
  )
168
 
169
  def _generate_section_with_instance(self, prompt: str, section_name: str) -> tuple:
170
+ """Generate a section using a dedicated model instance."""
171
  try:
172
+ # Create a dedicated model loader instance for this section
173
+ from ..utils.model_manager import model_manager
174
+ model_loader = model_manager.get_model_loader(
175
+ self.model_name,
176
+ self.model_type
177
+ )
178
+
179
+ # Generate the section
180
+ if hasattr(model_loader, 'generate_full_summary'):
181
+ result = model_loader.generate_full_summary(
182
  prompt,
183
  max_tokens=256, # Reduced for speed
184
  max_loops=1
185
  )
186
  else:
187
+ result = model_loader.generate(
188
  prompt,
189
  max_new_tokens=256, # Reduced for speed
190
  temperature=0.7,
 
194
  except Exception as e:
195
  return section_name, f"Error generating section {section_name}: {e}"
196
 
197
+ def generate_summary_sections(self, patient_data: Union[List[str], Dict]) -> Dict[str, str]:
198
  """Generate the 4 summary sections in parallel using dedicated model instances."""
 
 
199
  narrative_history = self.build_chronological_narrative(patient_data)
200
 
201
  # Define prompts for each section with markdown format instruction
 
218
  try:
219
  section_name, result = future.result()
220
  results[section_name] = result
 
 
221
  except Exception as exc:
222
  results[section] = f"Error generating section {section}: {exc}"
 
 
223
 
224
  return results
225
 
226
+ def generate_patient_summary(self, patient_data: Union[List[str], Dict]) -> str:
227
  """Generate the complete patient summary by stitching together 4 sections generated in parallel."""
228
+ print(f"Generating patient summary in parallel sections using model: {self.model_name} ({self.model_type})...")
229
 
230
  try:
231
+ sections = self.generate_summary_sections(patient_data)
232
 
233
  # Stitch sections together
234
  final_summary = "\n\n".join(
 
253
  return final_output
254
 
255
  except Exception as e:
256
+ print(f"Error during parallel summary generation: {e}")
257
  import traceback
258
  traceback.print_exc()
259
  return f"Error generating patient summary: {str(e)}"
 
408
  has_afib = any("atrial fibrillation" in dx.lower() for dx in last_enc.get('diagnosis', []))
409
  on_anticoag = any("warfarin" in med.lower() or "apixaban" in med.lower() for med in last_enc.get('medications', []))
410
  if has_afib:
411
+ evaluation += " - Patient with Atrial Fibrillation is on anticoagulation.\n" if on_anticoag \
412
+ else " - Atrial Fibrillation present but no anticoagulant prescribed.\n"
413
 
414
  has_mi = any("myocardial infarction" in hx.lower() for hx in result.get('past_medical_history', []))
415
  on_statin = any("atorvastatin" in med.lower() or "statin" in med.lower() for med in last_enc.get('medications', []))
416
  if has_mi:
417
+ evaluation += " - Patient with MI history is on statin therapy.\n" if on_statin \
418
+ else " - Patient with MI history is not on statin therapy.\n"
419
 
420
  has_aki = any("acute kidney injury" in dx.lower() for dx in last_enc.get('diagnosis', []))
421
  acei_held = "hold" in last_enc.get('dr_notes', '').lower() and "lisinopril" in last_enc.get('dr_notes', '')
422
  if has_aki:
423
+ evaluation += " - AKI noted and ACE inhibitor was appropriately held.\n" if acei_held \
424
+ else " - ⚠️ AKI present but ACE inhibitor not documented as held.\n"
425
 
426
  evaluation += (
427
  "\nDisclaimer: This is a simulated evaluation and not a substitute for clinical judgment.\n"
 
433
  self.model_name = model_name
434
  self.model_type = model_type
435
  self._initialize_model_loader()
436
+ print(f"Model updated to: {model_name} ({model_type})")
437
 
438
  def get_model_info(self) -> dict:
439
  """Get information about the current model"""
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/phi_scrubber.py RENAMED
@@ -7,14 +7,13 @@ from functools import wraps
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
-
11
  def log_execution_time():
12
  def decorator(func):
13
  @wraps(func)
14
  def wrapper(*args, **kwargs):
15
  start = time.time()
16
  result = func(*args, **kwargs)
17
- logger.debug(f"\u23f1 {func.__name__} executed in {time.time() - start:.4f}s")
18
  return result
19
  return wrapper
20
  return decorator
@@ -92,7 +91,7 @@ class MedicalTextUtils:
92
  category_times[cat] = category_times.get(cat, 0) + elapsed
93
 
94
  for cat, details in grouped.items():
95
- logger.info(f"\ud83d\udcc2 Category '{cat}': {len(details)} items, time taken: {category_times[cat]:.4f}s")
96
 
97
  return [{"category": k, "detail": v} for k, v in grouped.items()]
98
 
@@ -141,7 +140,7 @@ class MedicalTextUtils:
141
  json_start = text.index('[')
142
  json_text = text[json_start:]
143
  except ValueError:
144
- logger.warning("\u26a0 '[' not found in output")
145
  return []
146
 
147
  try:
@@ -166,7 +165,7 @@ class MedicalTextUtils:
166
  obj = json.loads(obj_str)
167
  extracted.append(obj)
168
  except Exception as e:
169
- logger.error(f"\u274c Invalid JSON object: {e}")
170
  obj_start = None
171
  return extracted
172
 
@@ -200,7 +199,7 @@ class MedicalTextUtils:
200
  - Radiology
201
  - Doctor Note
202
 
203
- If it doesn\u2019t fit, create a new category.
204
 
205
  Text:
206
  {chunk}
@@ -215,7 +214,7 @@ class MedicalTextUtils:
215
  do_sample=True,
216
  temperature=0.3
217
  )[0]["generated_text"]
218
- logger.info(f"\ud83d\udce4 Output from chunk {idx}: {output}...")
219
  return idx, output
220
  except Exception as e:
221
  logger.error("Error processing chunk %d: %s", idx, e)
 
7
 
8
  logger = logging.getLogger(__name__)
9
 
 
10
  def log_execution_time():
11
  def decorator(func):
12
  @wraps(func)
13
  def wrapper(*args, **kwargs):
14
  start = time.time()
15
  result = func(*args, **kwargs)
16
+ logger.debug(f" {func.__name__} executed in {time.time() - start:.4f}s")
17
  return result
18
  return wrapper
19
  return decorator
 
91
  category_times[cat] = category_times.get(cat, 0) + elapsed
92
 
93
  for cat, details in grouped.items():
94
+ logger.info(f"📂 Category '{cat}': {len(details)} items, time taken: {category_times[cat]:.4f}s")
95
 
96
  return [{"category": k, "detail": v} for k, v in grouped.items()]
97
 
 
140
  json_start = text.index('[')
141
  json_text = text[json_start:]
142
  except ValueError:
143
+ logger.warning(" '[' not found in output")
144
  return []
145
 
146
  try:
 
165
  obj = json.loads(obj_str)
166
  extracted.append(obj)
167
  except Exception as e:
168
+ logger.error(f" Invalid JSON object: {e}")
169
  obj_start = None
170
  return extracted
171
 
 
199
  - Radiology
200
  - Doctor Note
201
 
202
+ If it doesn’t fit, create a new category.
203
 
204
  Text:
205
  {chunk}
 
214
  do_sample=True,
215
  temperature=0.3
216
  )[0]["generated_text"]
217
+ logger.info(f"📤 Output from chunk {idx}: {output}...")
218
  return idx, output
219
  except Exception as e:
220
  logger.error("Error processing chunk %d: %s", idx, e)
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/summarizer.py RENAMED
@@ -124,7 +124,7 @@ class SummarizerAgent:
124
  summary_text = summary_text.strip()
125
 
126
  full_summary += summary_text + "\n\n" # Concatenate summaries with spacing
127
-
128
  summary = full_summary.strip()
129
 
130
  # Ensure required fields are included
 
124
  summary_text = summary_text.strip()
125
 
126
  full_summary += summary_text + "\n\n" # Concatenate summaries with spacing
127
+
128
  summary = full_summary.strip()
129
 
130
  # Ensure required fields are included
{services/ai-service/src/ai_med_extract → ai_med_extract}/agents/text_extractor.py RENAMED
@@ -38,20 +38,13 @@ class TextExtractorAgent:
38
  @staticmethod
39
  def extract_text_from_image(filepath):
40
  image = cv2.imread(filepath)
41
- if image is None:
42
- return None
43
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
44
  _, processed = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
45
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
46
  processed_path = temp_file.name
47
  cv2.imwrite(processed_path, processed)
48
- try:
49
- text = pytesseract.image_to_string(Image.open(processed_path), lang='eng')
50
- finally:
51
- try:
52
- os.remove(processed_path)
53
- except Exception:
54
- pass
55
  return text.strip() or None
56
 
57
  @staticmethod
@@ -71,3 +64,120 @@ class TextExtractorAgent:
71
  for df in dfs.values()
72
  ])
73
  return text.strip() or None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  @staticmethod
39
  def extract_text_from_image(filepath):
40
  image = cv2.imread(filepath)
 
 
41
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
42
  _, processed = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
43
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
44
  processed_path = temp_file.name
45
  cv2.imwrite(processed_path, processed)
46
+ text = pytesseract.image_to_string(Image.open(processed_path), lang='eng')
47
+ os.remove(processed_path)
 
 
 
 
 
48
  return text.strip() or None
49
 
50
  @staticmethod
 
64
  for df in dfs.values()
65
  ])
66
  return text.strip() or None
67
+
68
+ # import pytesseract
69
+ # import cv2
70
+ # from PIL import Image
71
+ # from docx import Document
72
+ # from PyPDF2 import PdfReader
73
+ # from pdf2image import convert_from_path
74
+ # from concurrent.futures import ThreadPoolExecutor
75
+ # import tempfile
76
+ # import os
77
+ # import logging
78
+ # import numpy as np
79
+
80
+ # logger = logging.getLogger(__name__)
81
+
82
+ # class TextExtractorAgent:
83
+ # @staticmethod
84
+ # def extract_text(filepath, ext, password=None):
85
+ # try:
86
+ # ext = ext.lower()
87
+ # if ext == "pdf":
88
+ # return TextExtractorAgent.extract_text_from_pdf(filepath, password)
89
+ # elif ext in {"jpg", "jpeg", "png"}:
90
+ # return TextExtractorAgent.extract_text_from_image(filepath)
91
+ # elif ext == "docx":
92
+ # return TextExtractorAgent.extract_text_from_docx(filepath)
93
+ # return None
94
+ # except Exception as e:
95
+ # logger.error(f"Text extraction failed: {e}")
96
+ # return None
97
+
98
+ # @staticmethod
99
+ # def is_blurred(image_path, variance_threshold=150):
100
+ # try:
101
+ # image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
102
+ # if image is None:
103
+ # logger.error(f"Unable to read image: {image_path}")
104
+ # return True
105
+
106
+ # laplacian_var = cv2.Laplacian(image, cv2.CV_64F).var()
107
+ # edges = cv2.Canny(image, 50, 150)
108
+ # edge_density = np.mean(edges)
109
+
110
+ # logger.info(f"Laplacian: {laplacian_var:.2f}, Edge Density: {edge_density:.2f}")
111
+ # is_blurry = laplacian_var < variance_threshold and edge_density < 10
112
+
113
+ # if is_blurry:
114
+ # logger.warning(f"Image '{image_path}' flagged as blurry.")
115
+ # return is_blurry
116
+ # except Exception as e:
117
+ # logger.exception(f"Error checking blur for '{image_path}': {e}")
118
+ # return True
119
+
120
+ # @staticmethod
121
+ # def extract_text_from_image(filepath):
122
+ # try:
123
+ # if TextExtractorAgent.is_blurred(filepath):
124
+ # logger.warning(f"OCR skipped: '{filepath}' is too blurry.")
125
+ # return "Image is too blurry, OCR failed."
126
+
127
+ # image = cv2.imread(filepath)
128
+ # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
129
+ # gray = cv2.GaussianBlur(gray, (5, 5), 0)
130
+ # gray = cv2.adaptiveThreshold(
131
+ # gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
132
+ # )
133
+ # gray = cv2.dilate(gray, np.ones((2, 2), np.uint8), iterations=1)
134
+
135
+ # with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
136
+ # processed_path = temp_file.name
137
+ # cv2.imwrite(processed_path, gray)
138
+
139
+ # text = pytesseract.image_to_string(Image.open(processed_path), lang="eng").strip()
140
+ # os.remove(processed_path)
141
+
142
+ # if len(text.split()) < 5:
143
+ # logger.warning(f"Too little OCR output from '{filepath}'.")
144
+ # return "OCR failed to extract meaningful text."
145
+
146
+ # return text
147
+ # except Exception as e:
148
+ # logger.exception(f"OCR failed for image '{filepath}': {e}")
149
+ # return "Failed to extract text"
150
+
151
+ # @staticmethod
152
+ # def extract_text_from_pdf(filepath, password=None):
153
+ # try:
154
+ # reader = PdfReader(filepath)
155
+ # if reader.is_encrypted:
156
+ # if not password:
157
+ # return {"error": "File is password-protected."}, 401
158
+ # if reader.decrypt(password) == 0:
159
+ # return {"error": "Invalid password."}, 403
160
+
161
+ # text = "\n".join([page.extract_text() or "" for page in reader.pages])
162
+ # if text.strip():
163
+ # return text.strip(), 200
164
+
165
+ # logger.info("Falling back to OCR for PDF.")
166
+ # images = convert_from_path(filepath)
167
+ # with ThreadPoolExecutor(max_workers=5) as pool:
168
+ # ocr_text = list(pool.map(lambda img: pytesseract.image_to_string(img, lang="eng"), images))
169
+ # full_text = "\n".join(ocr_text).strip()
170
+ # return (full_text, 200) if full_text else ("No text found", 415)
171
+ # except Exception as e:
172
+ # logger.exception(f"PDF processing error: {filepath}")
173
+ # return "Failed to extract text"
174
+
175
+ # @staticmethod
176
+ # def extract_text_from_docx(filepath):
177
+ # try:
178
+ # doc = Document(filepath)
179
+ # text = "\n".join([para.text for para in doc.paragraphs])
180
+ # return text.strip() or None
181
+ # except Exception as e:
182
+ # logger.exception(f"Failed to extract text from DOCX: {filepath}")
183
+ # return None
ai_med_extract/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ai_med_extract/api/__init__.py
ai_med_extract/api/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (177 Bytes). View file
 
ai_med_extract/api/__pycache__/routes.cpython-311.pyc ADDED
Binary file (90 kB). View file
 
ai_med_extract/api/model_management.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dynamic Model Management API
3
+ Allows runtime loading, switching, and management of different model types
4
+ """
5
+
6
+ from flask import Blueprint, request, jsonify
7
+ import logging
8
+ from typing import Dict, Any, Optional
9
+ import torch
10
+
11
+ from ..utils.model_manager import model_manager
12
+ from ..utils.model_config import (
13
+ get_default_model,
14
+ get_fallback_model,
15
+ detect_model_type,
16
+ validate_model_config,
17
+ get_model_info
18
+ )
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Create Blueprint
25
+ model_management_bp = Blueprint('model_management', __name__, url_prefix='/api/models')
26
+
27
+ @model_management_bp.route('/load', methods=['POST'])
28
+ def load_model():
29
+ """
30
+ Load a new model with specified name and type
31
+
32
+ Request body:
33
+ {
34
+ "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
35
+ "model_type": "gguf",
36
+ "filename": "Phi-3-mini-4k-instruct-q4.gguf", # Optional for GGUF
37
+ "force_reload": false # Optional, force reload even if cached
38
+ }
39
+ """
40
+ try:
41
+ data = request.get_json()
42
+ if not data:
43
+ return jsonify({"error": "No data provided"}), 400
44
+
45
+ model_name = data.get("model_name")
46
+ model_type = data.get("model_type")
47
+ filename = data.get("filename")
48
+ force_reload = data.get("force_reload", False)
49
+
50
+ if not model_name:
51
+ return jsonify({"error": "model_name is required"}), 400
52
+
53
+ # Auto-detect model type if not provided
54
+ if not model_type:
55
+ model_type = detect_model_type(model_name)
56
+ logger.info(f"Auto-detected model type: {model_type} for {model_name}")
57
+
58
+ # Validate model configuration
59
+ validation = validate_model_config(model_name, model_type)
60
+ if not validation["valid"]:
61
+ return jsonify({
62
+ "error": "Invalid model configuration",
63
+ "validation": validation
64
+ }), 400
65
+
66
+ # Load the model
67
+ start_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
68
+ end_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
69
+
70
+ if start_time:
71
+ start_time.record()
72
+
73
+ loader = model_manager.get_model_loader(model_name, model_type, filename, force_reload)
74
+
75
+ if end_time:
76
+ end_time.record()
77
+ torch.cuda.synchronize()
78
+ load_time = start_time.elapsed_time(end_time) / 1000.0 # Convert to seconds
79
+ else:
80
+ load_time = None
81
+
82
+ # Get model information
83
+ model_info = loader.get_model_info()
84
+ model_info["load_time_seconds"] = load_time
85
+
86
+ return jsonify({
87
+ "success": True,
88
+ "message": f"Model {model_name} ({model_type}) loaded successfully",
89
+ "model_info": model_info,
90
+ "validation": validation
91
+ }), 200
92
+
93
+ except Exception as e:
94
+ logger.error(f"Failed to load model: {str(e)}", exc_info=True)
95
+ return jsonify({
96
+ "success": False,
97
+ "error": f"Model loading failed: {str(e)}"
98
+ }), 500
99
+
100
+ @model_management_bp.route('/generate', methods=['POST'])
101
+ def generate_text():
102
+ """
103
+ Generate text using a specific model
104
+
105
+ Request body:
106
+ {
107
+ "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
108
+ "model_type": "gguf",
109
+ "filename": "Phi-3-mini-4k-instruct-q4.gguf", # Optional for GGUF
110
+ "prompt": "Generate a medical summary for...",
111
+ "max_tokens": 512,
112
+ "temperature": 0.7,
113
+ "top_p": 0.95
114
+ }
115
+ """
116
+ try:
117
+ data = request.get_json()
118
+ if not data:
119
+ return jsonify({"error": "No data provided"}), 400
120
+
121
+ model_name = data.get("model_name")
122
+ model_type = data.get("model_type")
123
+ filename = data.get("filename")
124
+ prompt = data.get("prompt")
125
+
126
+ if not all([model_name, prompt]):
127
+ return jsonify({"error": "model_name and prompt are required"}), 400
128
+
129
+ # Auto-detect model type if not provided
130
+ if not model_type:
131
+ model_type = detect_model_type(model_name)
132
+
133
+ # Generate text
134
+ start_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
135
+ end_time = torch.cuda.Event(enable_timing=True) if torch.cuda.is_available() else None
136
+
137
+ if start_time:
138
+ start_time.record()
139
+
140
+ generated_text = model_manager.generate_text(
141
+ model_name,
142
+ model_type,
143
+ prompt,
144
+ filename,
145
+ **{k: v for k, v in data.items() if k not in ["model_name", "model_type", "filename", "prompt"]}
146
+ )
147
+
148
+ if end_time:
149
+ end_time.record()
150
+ torch.cuda.synchronize()
151
+ generation_time = start_time.elapsed_time(end_time) / 1000.0
152
+ else:
153
+ generation_time = None
154
+
155
+ return jsonify({
156
+ "success": True,
157
+ "generated_text": generated_text,
158
+ "model_name": model_name,
159
+ "model_type": model_type,
160
+ "generation_time_seconds": generation_time,
161
+ "text_length": len(generated_text)
162
+ }), 200
163
+
164
+ except Exception as e:
165
+ logger.error(f"Text generation failed: {str(e)}", exc_info=True)
166
+ return jsonify({
167
+ "success": False,
168
+ "error": f"Text generation failed: {str(e)}"
169
+ }), 500
170
+
171
+ @model_management_bp.route('/info', methods=['GET'])
172
+ def get_model_information():
173
+ """
174
+ Get information about a specific model or all loaded models
175
+
176
+ Query parameters:
177
+ - model_name: Optional, specific model to get info for
178
+ - model_type: Optional, filter by model type
179
+ """
180
+ try:
181
+ model_name = request.args.get("model_name")
182
+ model_type = request.args.get("model_type")
183
+
184
+ if model_name:
185
+ # Get info for specific model
186
+ if not model_type:
187
+ model_type = detect_model_type(model_name)
188
+
189
+ validation = validate_model_config(model_name, model_type)
190
+ model_info = get_model_info(model_name, model_type)
191
+
192
+ return jsonify({
193
+ "success": True,
194
+ "model_info": model_info,
195
+ "validation": validation
196
+ }), 200
197
+ else:
198
+ # Get info for all loaded models
199
+ loaded_models = model_manager.list_loaded_models()
200
+
201
+ # Filter by type if specified
202
+ if model_type:
203
+ loaded_models = {
204
+ k: v for k, v in loaded_models.items()
205
+ if v.get("model_type") == model_type
206
+ }
207
+
208
+ return jsonify({
209
+ "success": True,
210
+ "loaded_models": loaded_models,
211
+ "total_models": len(loaded_models)
212
+ }), 200
213
+
214
+ except Exception as e:
215
+ logger.error(f"Failed to get model information: {str(e)}", exc_info=True)
216
+ return jsonify({
217
+ "success": False,
218
+ "error": f"Failed to get model information: {str(e)}"
219
+ }), 500
220
+
221
+ @model_management_bp.route('/defaults', methods=['GET'])
222
+ def get_default_models():
223
+ """
224
+ Get default models for different model types
225
+ """
226
+ try:
227
+ from ..utils.model_config import DEFAULT_MODELS, SPACES_OPTIMIZED_MODELS
228
+
229
+ return jsonify({
230
+ "success": True,
231
+ "default_models": DEFAULT_MODELS,
232
+ "spaces_optimized_models": SPACES_OPTIMIZED_MODELS
233
+ }), 200
234
+
235
+ except Exception as e:
236
+ logger.error(f"Failed to get default models: {str(e)}", exc_info=True)
237
+ return jsonify({
238
+ "success": False,
239
+ "error": f"Failed to get default models: {str(e)}"
240
+ }), 500
241
+
242
+ @model_management_bp.route('/clear_cache', methods=['POST'])
243
+ def clear_model_cache():
244
+ """
245
+ Clear the model cache and free memory
246
+ """
247
+ try:
248
+ # Get cache info before clearing
249
+ loaded_models = model_manager.list_loaded_models()
250
+ cache_size = len(loaded_models)
251
+
252
+ # Clear cache
253
+ model_manager.clear_cache()
254
+
255
+ return jsonify({
256
+ "success": True,
257
+ "message": f"Model cache cleared successfully",
258
+ "cleared_models": cache_size,
259
+ "memory_freed": "GPU and CPU memory cleared"
260
+ }), 200
261
+
262
+ except Exception as e:
263
+ logger.error(f"Failed to clear cache: {str(e)}", exc_info=True)
264
+ return jsonify({
265
+ "success": False,
266
+ "error": f"Failed to clear cache: {str(e)}"
267
+ }), 500
268
+
269
+ @model_management_bp.route('/switch', methods=['POST'])
270
+ def switch_model():
271
+ """
272
+ Switch the model used by a specific agent
273
+
274
+ Request body:
275
+ {
276
+ "agent_name": "patient_summarizer",
277
+ "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf",
278
+ "model_type": "gguf",
279
+ "filename": "Phi-3-mini-4k-instruct-q4.gguf" # Optional for GGUF
280
+ }
281
+ """
282
+ try:
283
+ data = request.get_json()
284
+ if not data:
285
+ return jsonify({"error": "No data provided"}), 400
286
+
287
+ agent_name = data.get("agent_name")
288
+ model_name = data.get("model_name")
289
+ model_type = data.get("model_type")
290
+ filename = data.get("filename")
291
+
292
+ if not all([agent_name, model_name]):
293
+ return jsonify({"error": "agent_name and model_name are required"}), 400
294
+
295
+ # Auto-detect model type if not provided
296
+ if not model_type:
297
+ model_type = detect_model_type(model_name)
298
+
299
+ # Validate model configuration
300
+ validation = validate_model_config(model_name, model_type)
301
+ if not validation["valid"]:
302
+ return jsonify({
303
+ "error": "Invalid model configuration",
304
+ "validation": validation
305
+ }), 400
306
+
307
+ # Get the agent from the current app context
308
+ from flask import current_app
309
+ agents = getattr(current_app, 'agents', {})
310
+
311
+ if agent_name not in agents:
312
+ return jsonify({
313
+ "error": f"Agent '{agent_name}' not found",
314
+ "available_agents": list(agents.keys())
315
+ }), 404
316
+
317
+ agent = agents[agent_name]
318
+
319
+ # Update the agent's model if it supports it
320
+ if hasattr(agent, 'update_model'):
321
+ agent.update_model(model_name, model_type)
322
+ message = f"Agent '{agent_name}' model updated to {model_name} ({model_type})"
323
+ elif hasattr(agent, 'model_loader'):
324
+ # Try to update the model loader
325
+ try:
326
+ from ..utils.model_manager import model_manager
327
+ agent.model_loader = model_manager.get_model_loader(model_name, model_type, filename)
328
+ message = f"Agent '{agent_name}' model loader updated to {model_name} ({model_type})"
329
+ except Exception as e:
330
+ return jsonify({
331
+ "error": f"Failed to update agent model loader: {str(e)}"
332
+ }), 500
333
+ else:
334
+ return jsonify({
335
+ "error": f"Agent '{agent_name}' does not support model switching"
336
+ }), 400
337
+
338
+ return jsonify({
339
+ "success": True,
340
+ "message": message,
341
+ "agent_name": agent_name,
342
+ "model_name": model_name,
343
+ "model_type": model_type,
344
+ "validation": validation
345
+ }), 200
346
+
347
+ except Exception as e:
348
+ logger.error(f"Failed to switch model: {str(e)}", exc_info=True)
349
+ return jsonify({
350
+ "success": False,
351
+ "error": f"Failed to switch model: {str(e)}"
352
+ }), 500
353
+
354
+ @model_management_bp.route('/health', methods=['GET'])
355
+ def model_health_check():
356
+ """
357
+ Health check for the model management system
358
+ """
359
+ try:
360
+ # Check if model manager is accessible
361
+ loaded_models = model_manager.list_loaded_models()
362
+
363
+ # Check GPU memory if available
364
+ gpu_info = {}
365
+ if torch.cuda.is_available():
366
+ gpu_info = {
367
+ "available": True,
368
+ "device_count": torch.cuda.device_count(),
369
+ "current_device": torch.cuda.current_device(),
370
+ "memory_allocated": f"{torch.cuda.memory_allocated() / 1024**3:.2f} GB",
371
+ "memory_reserved": f"{torch.cuda.memory_reserved() / 1024**3:.2f} GB"
372
+ }
373
+ else:
374
+ gpu_info = {"available": False}
375
+
376
+ return jsonify({
377
+ "success": True,
378
+ "status": "healthy",
379
+ "model_manager": "operational",
380
+ "loaded_models_count": len(loaded_models),
381
+ "gpu_info": gpu_info,
382
+ "timestamp": torch.cuda.Event(enable_timing=True).elapsed_time(torch.cuda.Event(enable_timing=True)) if torch.cuda.is_available() else None
383
+ }), 200
384
+
385
+ except Exception as e:
386
+ logger.error(f"Health check failed: {str(e)}", exc_info=True)
387
+ return jsonify({
388
+ "success": False,
389
+ "status": "unhealthy",
390
+ "error": f"Health check failed: {str(e)}"
391
+ }), 500
392
+
393
+ # Register the blueprint
394
+ def register_model_management_routes(app):
395
+ """Register model management routes with the Flask app"""
396
+ app.register_blueprint(model_management_bp)
397
+ logger.info("Model management routes registered successfully")
services/ai-service/src/ai_med_extract/api/routes_fastapi.py → ai_med_extract/api/routes.py RENAMED
The diff for this file is too large to render. See raw diff
 
ai_med_extract/app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from flask import Flask, jsonify
4
+ from flask_cors import CORS
5
+ import whisper
6
+ from dotenv import load_dotenv
7
+ from .agents.text_extractor import TextExtractorAgent
8
+ from .agents.phi_scrubber import PHIScrubberAgent
9
+ from .agents.phi_scrubber import MedicalTextUtils
10
+ from .agents.summarizer import SummarizerAgent
11
+ from .agents.medical_data_extractor import MedicalDataExtractorAgent
12
+ from .agents.medical_data_extractor import MedicalDocDataExtractorAgent
13
+ from .agents.patient_summary_agent import PatientSummarizerAgent
14
+ from .utils.model_manager import model_manager
15
+ import torch
16
+ torch.set_num_threads(1) # CPU efficiency for HF Spaces
17
+
18
+ # Load environment variables
19
+ load_dotenv()
20
+
21
+ # Configure logging
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(asctime)s - %(levelname)s - %(message)s",
25
+ handlers=[
26
+ logging.StreamHandler(),
27
+ logging.FileHandler('/tmp/app.log')
28
+ ]
29
+ )
30
+
31
+ app = Flask(__name__)
32
+ CORS(app)
33
+
34
+ # Configure upload directory with safe fallbacks (avoid creating /data at import time)
35
+ def _resolve_upload_dir() -> str:
36
+ try:
37
+ # Prefer /data/uploads if it already exists and is writable
38
+ data_dir = '/data/uploads'
39
+ if os.path.isdir('/data') and (os.path.isdir(data_dir) or os.access('/data', os.W_OK)):
40
+ os.makedirs(data_dir, exist_ok=True)
41
+ return data_dir
42
+ except Exception:
43
+ pass
44
+ # Fallback to /tmp/uploads which is always writable on Spaces
45
+ tmp_dir = '/tmp/uploads'
46
+ os.makedirs(tmp_dir, exist_ok=True)
47
+ return tmp_dir
48
+
49
+ app.config['UPLOAD_FOLDER'] = _resolve_upload_dir()
50
+ app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB max file size
51
+
52
+ # Set cache directories
53
+ CACHE_DIRS = {
54
+ 'HF_HOME': '/tmp/huggingface',
55
+ 'XDG_CACHE_HOME': '/tmp',
56
+ 'TORCH_HOME': '/tmp/torch',
57
+ 'WHISPER_CACHE': '/tmp/whisper'
58
+ }
59
+
60
+ for env_var, path in CACHE_DIRS.items():
61
+ os.environ[env_var] = path
62
+ os.makedirs(path, exist_ok=True)
63
+
64
+ # WhisperModelLoader for audio transcription (CPU-only)
65
+ class WhisperModelLoader:
66
+ _instance = None
67
+
68
+ def __init__(self):
69
+ self._model = None
70
+
71
+ @staticmethod
72
+ def get_instance():
73
+ if WhisperModelLoader._instance is None:
74
+ WhisperModelLoader._instance = WhisperModelLoader()
75
+ return WhisperModelLoader._instance
76
+
77
+ def load(self):
78
+ if self._model is None:
79
+ try:
80
+ logging.info("Loading Whisper tiny model (CPU)...")
81
+ self._model = whisper.load_model(
82
+ "tiny",
83
+ device="cpu", # Explicit CPU for Spaces
84
+ download_root=os.environ.get('WHISPER_CACHE', '/tmp/whisper')
85
+ )
86
+ logging.info("Whisper model loaded successfully")
87
+ except Exception as e:
88
+ logging.error(f"Failed to load Whisper model: {str(e)}", exc_info=True)
89
+ raise
90
+ return self._model
91
+
92
+ def transcribe(self, audio_path):
93
+ model = self.load()
94
+ return model.transcribe(audio_path, fp16=False) # CPU, no FP16
95
+
96
+ # Initialize agents with unified model manager (CPU-friendly small models for HF Spaces)
97
+ try:
98
+ from .utils.model_manager import model_manager
99
+
100
+ # Initialize basic agents that don't require specific models
101
+ text_extractor_agent = TextExtractorAgent()
102
+ phi_scrubber_agent = PHIScrubberAgent()
103
+
104
+ # Initialize model-dependent agents with small CPU models
105
+ try:
106
+ # Small summarization model (~400MB)
107
+ summ_loader = model_manager.get_model_loader("facebook/bart-base", "summarization")
108
+ summarizer_agent = SummarizerAgent(summ_loader)
109
+ logging.info("SummarizerAgent initialized with bart-base")
110
+ except Exception as e:
111
+ logging.warning(f"Summarizer fallback: {e}")
112
+ from .utils.model_loader_gguf import create_fallback_pipeline
113
+ class FallbackSummarizer:
114
+ def generate(self, text, **kwargs):
115
+ return create_fallback_pipeline().generate_full_summary(text)
116
+ summarizer_agent = SummarizerAgent(FallbackSummarizer())
117
+
118
+ try:
119
+ # Small text-generation model (~350MB)
120
+ med_loader = model_manager.get_model_loader("distilgpt2", "text-generation")
121
+ med_generator = med_loader.load()
122
+ medical_data_extractor_agent = MedicalDataExtractorAgent(med_generator)
123
+ logging.info("MedicalDataExtractorAgent initialized with distilgpt2")
124
+ except Exception as e:
125
+ logging.warning(f"Medical extractor fallback: {e}")
126
+ from .utils.model_loader_gguf import create_fallback_pipeline
127
+ class FallbackExtractor:
128
+ def generate(self, prompt, **kwargs):
129
+ return create_fallback_pipeline().generate(prompt)
130
+ medical_data_extractor_agent = MedicalDataExtractorAgent(FallbackExtractor())
131
+
132
+ # Initialize patient summarizer with small model
133
+ try:
134
+ patient_summarizer_agent = PatientSummarizerAgent(
135
+ model_name="sshleifer/distilbart-cnn-6-6", # Smaller medical-like summarizer (~1GB)
136
+ model_type="summarization"
137
+ )
138
+ logging.info("PatientSummarizerAgent initialized with distilbart")
139
+ except Exception as e:
140
+ logging.warning(f"Patient summarizer fallback: {e}")
141
+ patient_summarizer_agent = PatientSummarizerAgent(
142
+ model_name="facebook/bart-base",
143
+ model_type="summarization"
144
+ )
145
+
146
+ # Pass all agents and models to routes
147
+ agents = {
148
+ "text_extractor": text_extractor_agent,
149
+ "phi_scrubber": phi_scrubber_agent,
150
+ "summarizer": summarizer_agent,
151
+ "medical_data_extractor": medical_data_extractor_agent,
152
+ "whisper_model": WhisperModelLoader.get_instance(),
153
+ "patient_summarizer": patient_summarizer_agent,
154
+ "model_manager": model_manager, # Add unified model manager
155
+ }
156
+
157
+ from .api.routes import register_routes
158
+ register_routes(app, agents)
159
+
160
+ logging.info("Application initialized successfully with CPU-friendly models")
161
+
162
+ except Exception as e:
163
+ logging.error(f"Failed to initialize application: {str(e)}", exc_info=True)
164
+ raise
165
+
166
+ @app.errorhandler(Exception)
167
+ def handle_error(error):
168
+ logging.error(f"Unhandled error: {str(error)}", exc_info=True)
169
+ return jsonify({
170
+ "error": str(error),
171
+ "status": "error"
172
+ }), 500
173
+
174
+ if __name__ == "__main__":
175
+ app.run(host="0.0.0.0", port=7860, debug=False)
ai_med_extract/gradio_app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from .app import agents
4
+ import tempfile
5
+ import logging
6
+
7
+ logging.basicConfig(level=logging.INFO)
8
+
9
+ def process_document(file, process_type):
10
+ try:
11
+ # Create a temporary file to store the upload
12
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[1]) as temp_file:
13
+ temp_file.write(file.read())
14
+ temp_path = temp_file.name
15
+
16
+ results = {}
17
+
18
+ if process_type == "text_extraction":
19
+ results["extracted_text"] = agents["text_extractor"].extract_text(temp_path)
20
+ results["phi_scrubbed"] = agents["phi_scrubber"].scrub_phi(results["extracted_text"])
21
+
22
+ elif process_type == "medical_data":
23
+ text = agents["text_extractor"].extract_text(temp_path)
24
+ results["medical_data"] = agents["medical_data_extractor"].extract_medical_data(text)
25
+
26
+ elif process_type == "summarization":
27
+ text = agents["text_extractor"].extract_text(temp_path)
28
+ results["summary"] = agents["summarizer"].summarize(text)
29
+
30
+ elif process_type == "audio_transcription":
31
+ results["transcription"] = agents["whisper_model"].transcribe(temp_path)
32
+
33
+ # Clean up temporary file
34
+ os.unlink(temp_path)
35
+
36
+ return results
37
+
38
+ except Exception as e:
39
+ logging.error(f"Error processing document: {str(e)}", exc_info=True)
40
+ return {"error": str(e)}
41
+
42
+ # Create the Gradio interface
43
+ def create_interface():
44
+ with gr.Blocks(title="Medical Document Processor") as interface:
45
+ gr.Markdown("# Medical Document Processor")
46
+ gr.Markdown("Upload your medical document and select the processing type.")
47
+
48
+ with gr.Row():
49
+ with gr.Column():
50
+ file_input = gr.File(label="Upload Document")
51
+ process_type = gr.Radio(
52
+ choices=["text_extraction", "medical_data", "summarization", "audio_transcription"],
53
+ label="Processing Type"
54
+ )
55
+ process_btn = gr.Button("Process Document")
56
+
57
+ with gr.Column():
58
+ output = gr.JSON(label="Results")
59
+
60
+ process_btn.click(
61
+ fn=process_document,
62
+ inputs=[file_input, process_type],
63
+ outputs=output
64
+ )
65
+
66
+ return interface
67
+
68
+ # Create and launch the interface
69
+ interface = create_interface()
70
+ interface.launch(server_name="0.0.0.0", server_port=7860)
ai_med_extract/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ai_med_extract/utils/__init__.py
ai_med_extract/utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (179 Bytes). View file
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/file_utils.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/file_utils.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/file_utils.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/json_slimmer.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/json_slimmer.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/json_slimmer.cpython-311.pyc differ
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_loader_gguf.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
 
ai_med_extract/utils/__pycache__/model_loader_spaces.cpython-311.pyc ADDED
Binary file (2.16 kB). View file
 
{services/ai-service/src/ai_med_extract → ai_med_extract}/utils/__pycache__/model_manager.cpython-311.pyc RENAMED
Binary files a/services/ai-service/src/ai_med_extract/utils/__pycache__/model_manager.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_manager.cpython-311.pyc differ