import os import logging from flask import Flask, jsonify from flask_cors import CORS import whisper from dotenv import load_dotenv from .agents.text_extractor import TextExtractorAgent from .agents.phi_scrubber import PHIScrubberAgent from .agents.phi_scrubber import MedicalTextUtils from .agents.summarizer import SummarizerAgent from .agents.medical_data_extractor import MedicalDataExtractorAgent from .agents.medical_data_extractor import MedicalDocDataExtractorAgent import torch # Load environment variables load_dotenv() # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.StreamHandler(), logging.FileHandler('/tmp/app.log') ] ) app = Flask(__name__) CORS(app) # Configure upload directory UPLOAD_DIR = '/data/uploads' os.makedirs(UPLOAD_DIR, exist_ok=True) app.config['UPLOAD_FOLDER'] = UPLOAD_DIR app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB max file size # Set cache directories CACHE_DIRS = { 'HF_HOME': '/tmp/huggingface', 'TRANSFORMERS_CACHE': '/tmp/huggingface', 'XDG_CACHE_HOME': '/tmp', 'TORCH_HOME': '/tmp/torch', 'WHISPER_CACHE': '/tmp/whisper' } for env_var, path in CACHE_DIRS.items(): os.environ[env_var] = path os.makedirs(path, exist_ok=True) # Model loaders class LazyModelLoader: def __init__(self, model_name, model_type, fallback_model=None): self.model_name = model_name self.model_type = model_type self.fallback_model = fallback_model self._model = None self._tokenizer = None self._pipeline = None def load(self): from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM if self._pipeline is None: try: logging.info(f"Loading {self.model_name}...") # Clear GPU memory import torch torch.cuda.empty_cache() # Load tokenizer with proper error handling try: self._tokenizer = AutoTokenizer.from_pretrained( self.model_name, trust_remote_code=True, cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/tmp/huggingface') ) except Exception as e: logging.error(f"Failed to load tokenizer: {str(e)}") if self.fallback_model: logging.info(f"Trying fallback model tokenizer: {self.fallback_model}") self._tokenizer = AutoTokenizer.from_pretrained( self.fallback_model, trust_remote_code=True, cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/tmp/huggingface') ) else: raise # Load model with memory optimizations try: if self.model_type == "text-generation": self._model = AutoModelForCausalLM.from_pretrained( self.model_name, trust_remote_code=True, device_map="auto", low_cpu_mem_usage=True, torch_dtype=torch.float16, cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/tmp/huggingface') ) else: self._model = AutoModelForSeq2SeqLM.from_pretrained( self.model_name, trust_remote_code=True, device_map="auto", low_cpu_mem_usage=True, torch_dtype=torch.float16, cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/tmp/huggingface') ) except Exception as e: logging.error(f"Failed to load model: {str(e)}") if self.fallback_model: logging.info(f"Trying fallback model: {self.fallback_model}") self._model = AutoModelForSeq2SeqLM.from_pretrained( self.fallback_model, trust_remote_code=True, device_map="auto", low_cpu_mem_usage=True, torch_dtype=torch.float16, cache_dir=os.environ.get('TRANSFORMERS_CACHE', '/tmp/huggingface') ) else: raise # Create pipeline after model is loaded self._pipeline = pipeline( task=self.model_type, model=self._model, tokenizer=self._tokenizer, device_map="auto" ) logging.info(f"Successfully loaded {self.model_name}") return self._pipeline except Exception as e: if self.fallback_model: logging.warning(f"Failed to load {self.model_name}, falling back to {self.fallback_model}") self.model_name = self.fallback_model return self.load() logging.error(f"Failed to load {self.model_name}: {str(e)}", exc_info=True) raise return self._pipeline class WhisperModelLoader: _instance = None def __init__(self): self._model = None @staticmethod def get_instance(): if WhisperModelLoader._instance is None: WhisperModelLoader._instance = WhisperModelLoader() return WhisperModelLoader._instance def load(self): if self._model is None: try: logging.info("Loading Whisper model...") self._model = whisper.load_model( "tiny", # Using tiny model for better memory usage download_root=os.environ.get('WHISPER_CACHE', '/tmp/whisper') ) logging.info("Whisper model loaded successfully") except Exception as e: logging.error(f"Failed to load Whisper model: {str(e)}", exc_info=True) raise return self._model def transcribe(self, audio_path): model = self.load() return model.transcribe(audio_path) # Initialize agents try: # Use smaller models for Hugging Face Spaces medalpaca_model_loader = LazyModelLoader( "facebook/bart-base", # Start with a smaller model "text-generation", fallback_model="facebook/bart-large-cnn" ) summarization_model_loader = LazyModelLoader( "facebook/bart-base", "summarization", fallback_model="facebook/bart-large-cnn" ) # Initialize agents with lazy loading text_extractor_agent = TextExtractorAgent() phi_scrubber_agent = PHIScrubberAgent() medical_data_extractor_agent = MedicalDataExtractorAgent(medalpaca_model_loader) summarizer_agent = SummarizerAgent(summarization_model_loader) # Pass all agents and models to routes agents = { "text_extractor": text_extractor_agent, "phi_scrubber": phi_scrubber_agent, "summarizer": summarizer_agent, "medical_data_extractor": medical_data_extractor_agent, "whisper_model": WhisperModelLoader.get_instance() } from .api.routes import register_routes register_routes(app, agents) except Exception as e: logging.error(f"Failed to initialize application: {str(e)}", exc_info=True) raise @app.errorhandler(Exception) def handle_error(error): logging.error(f"Unhandled error: {str(error)}", exc_info=True) return jsonify({ "error": str(error), "status": "error" }), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=False)