import os import logging from flask import Flask, jsonify from flask_cors import CORS import whisper from dotenv import load_dotenv from .agents.text_extractor import TextExtractorAgent from .agents.phi_scrubber import PHIScrubberAgent from .agents.phi_scrubber import MedicalTextUtils from .agents.summarizer import SummarizerAgent from .agents.medical_data_extractor import MedicalDataExtractorAgent from .agents.medical_data_extractor import MedicalDocDataExtractorAgent from .agents.patient_summary_agent import PatientSummarizerAgent from .utils.model_manager import model_manager import torch torch.set_num_threads(1) # CPU efficiency for HF Spaces # Load environment variables load_dotenv() # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.StreamHandler(), logging.FileHandler('/tmp/app.log') ] ) app = Flask(__name__) CORS(app) # Configure upload directory with safe fallbacks (avoid creating /data at import time) def _resolve_upload_dir() -> str: try: # Prefer /data/uploads if it already exists and is writable data_dir = '/data/uploads' if os.path.isdir('/data') and (os.path.isdir(data_dir) or os.access('/data', os.W_OK)): os.makedirs(data_dir, exist_ok=True) return data_dir except Exception: pass # Fallback to /tmp/uploads which is always writable on Spaces tmp_dir = '/tmp/uploads' os.makedirs(tmp_dir, exist_ok=True) return tmp_dir app.config['UPLOAD_FOLDER'] = _resolve_upload_dir() app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB max file size # Set cache directories CACHE_DIRS = { 'HF_HOME': '/tmp/huggingface', 'XDG_CACHE_HOME': '/tmp', 'TORCH_HOME': '/tmp/torch', 'WHISPER_CACHE': '/tmp/whisper' } for env_var, path in CACHE_DIRS.items(): os.environ[env_var] = path os.makedirs(path, exist_ok=True) # WhisperModelLoader for audio transcription (CPU-only) class WhisperModelLoader: _instance = None def __init__(self): self._model = None @staticmethod def get_instance(): if WhisperModelLoader._instance is None: WhisperModelLoader._instance = WhisperModelLoader() return WhisperModelLoader._instance def load(self): if self._model is None: try: logging.info("Loading Whisper tiny model (CPU)...") self._model = whisper.load_model( "tiny", device="cpu", # Explicit CPU for Spaces download_root=os.environ.get('WHISPER_CACHE', '/tmp/whisper') ) logging.info("Whisper model loaded successfully") except Exception as e: logging.error(f"Failed to load Whisper model: {str(e)}", exc_info=True) raise return self._model def transcribe(self, audio_path): model = self.load() return model.transcribe(audio_path, fp16=False) # CPU, no FP16 # Initialize agents with unified model manager (CPU-friendly small models for HF Spaces) try: from .utils.model_manager import model_manager # Initialize basic agents that don't require specific models text_extractor_agent = TextExtractorAgent() phi_scrubber_agent = PHIScrubberAgent() # Initialize model-dependent agents with small CPU models try: # Small summarization model (~400MB) summ_loader = model_manager.get_model_loader("facebook/bart-base", "summarization") summarizer_agent = SummarizerAgent(summ_loader) logging.info("SummarizerAgent initialized with bart-base") except Exception as e: logging.warning(f"Summarizer fallback: {e}") from .utils.model_loader_gguf import create_fallback_pipeline class FallbackSummarizer: def generate(self, text, **kwargs): return create_fallback_pipeline().generate_full_summary(text) summarizer_agent = SummarizerAgent(FallbackSummarizer()) try: # Small text-generation model (~350MB) med_loader = model_manager.get_model_loader("distilgpt2", "text-generation") med_generator = med_loader.load() medical_data_extractor_agent = MedicalDataExtractorAgent(med_generator) logging.info("MedicalDataExtractorAgent initialized with distilgpt2") except Exception as e: logging.warning(f"Medical extractor fallback: {e}") from .utils.model_loader_gguf import create_fallback_pipeline class FallbackExtractor: def generate(self, prompt, **kwargs): return create_fallback_pipeline().generate(prompt) medical_data_extractor_agent = MedicalDataExtractorAgent(FallbackExtractor()) # Initialize patient summarizer with small model try: patient_summarizer_agent = PatientSummarizerAgent( model_name="sshleifer/distilbart-cnn-6-6", # Smaller medical-like summarizer (~1GB) model_type="summarization" ) logging.info("PatientSummarizerAgent initialized with distilbart") except Exception as e: logging.warning(f"Patient summarizer fallback: {e}") patient_summarizer_agent = PatientSummarizerAgent( model_name="facebook/bart-base", model_type="summarization" ) # Pass all agents and models to routes agents = { "text_extractor": text_extractor_agent, "phi_scrubber": phi_scrubber_agent, "summarizer": summarizer_agent, "medical_data_extractor": medical_data_extractor_agent, "whisper_model": WhisperModelLoader.get_instance(), "patient_summarizer": patient_summarizer_agent, "model_manager": model_manager, # Add unified model manager } from .api.routes import register_routes register_routes(app, agents) logging.info("Application initialized successfully with CPU-friendly models") except Exception as e: logging.error(f"Failed to initialize application: {str(e)}", exc_info=True) raise @app.errorhandler(Exception) def handle_error(error): logging.error(f"Unhandled error: {str(error)}", exc_info=True) return jsonify({ "error": str(error), "status": "error" }), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=False)