Spaces:

salvinjose
/

HNTAI

Paused

HNTAI / preload_models.py

Update .gitignore to include additional files and directories for macOS, Linux, and application-specific configurations. Modify .huggingface.yaml to enhance Docker build settings and hardware requirements. Refactor app.py to remove legacy code and improve error handling. Remove deprecated files related to comprehensive streaming fixes, deployment scripts, and optimized Docker configurations. Update Dockerfile.prod to extend Gunicorn timeout for better performance. Enhance health endpoints and model management with improved logging and error handling. Consolidate routes and simplify architecture for better maintainability.

af75202 8 months ago

Raw

History Blame

10.2 kB

	#!/usr/bin/env python3
	"""
	Pre-download and cache models for Hugging Face Spaces deployment.
	Run this during Docker build to avoid runtime downloads.

	PRE-CACHED MODELS (downloaded during build):
	- facebook/bart-large-cnn (Summarization)
	- patrickvonplaten/longformer2roberta-cnn_dailymail-fp16 (Seq2Seq)
	- google/flan-t5-large (Summarization)
	- microsoft/Phi-3-mini-4k-instruct (Causal OpenVINO)
	- OpenVINO/Phi-3-mini-4k-instruct-fp16-ov (Causal OpenVINO)
	- microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf (GGUF - PRIMARY)

	RUNTIME BEHAVIOR:
	- If you request a pre-cached model: Loads instantly from cache (30-60 sec)
	- If you request a different model: Downloads and uses at runtime automatically
	- System supports both pre-cached and on-demand model loading

	PRIMARY MODEL for patient summaries:
	- microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf (is_active: true)
	"""
	import os
	import sys
	import logging
	from pathlib import Path

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Set cache directories - these will be baked into the Docker image
	MODEL_CACHE_DIR = os.environ.get('MODEL_CACHE_DIR', '/app/models')
	HF_HOME = os.environ.get('HF_HOME', '/app/.cache/huggingface')
	TORCH_HOME = os.environ.get('TORCH_HOME', '/app/.cache/torch')
	WHISPER_CACHE = os.environ.get('WHISPER_CACHE', '/app/.cache/whisper')

	# Create cache directories
	for cache_dir in [MODEL_CACHE_DIR, HF_HOME, TORCH_HOME, WHISPER_CACHE]:
	Path(cache_dir).mkdir(parents=True, exist_ok=True)
	logger.info(f"Created cache directory: {cache_dir}")

	def preload_transformers_models():
	"""Pre-download Hugging Face transformers models"""
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
	from huggingface_hub import snapshot_download

	# Models for patient summary generation - as specified by user
	models = [
	# Summarization models
	{
	"name": "facebook/bart-large-cnn",
	"type": "seq2seq",
	"description": "BART Large CNN - Summarization",
	"is_active": False # Available but not primary
	},
	{
	"name": "patrickvonplaten/longformer2roberta-cnn_dailymail-fp16",
	"type": "seq2seq",
	"description": "Longformer2Roberta - Seq2Seq Summarization",
	"is_active": False
	},
	{
	"name": "google/flan-t5-large",
	"type": "seq2seq",
	"description": "FLAN-T5 Large - Summarization",
	"is_active": False
	},
	# OpenVINO models for patient summaries
	{
	"name": "microsoft/Phi-3-mini-4k-instruct",
	"type": "causal",
	"description": "Phi-3 Mini - Causal OpenVINO (base model)",
	"is_active": False
	},
	{
	"name": "OpenVINO/Phi-3-mini-4k-instruct-fp16-ov",
	"type": "causal",
	"description": "Phi-3 Mini - FP16 OpenVINO optimized",
	"is_active": False
	},
	]

	for model_info in models:
	model_name = model_info["name"]
	model_type = model_info["type"]
	description = model_info["description"]

	try:
	logger.info(f"📥 Downloading {description}: {model_name}")

	# Download tokenizer
	logger.info(f" ↳ Downloading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	cache_dir=HF_HOME,
	trust_remote_code=False
	)

	# Download model
	logger.info(f" ↳ Downloading model weights...")
	if model_type == "seq2seq":
	model = AutoModelForSeq2SeqLM.from_pretrained(
	model_name,
	cache_dir=HF_HOME,
	trust_remote_code=False
	)
	else:
	# For token classification and other types
	from transformers import AutoModel
	model = AutoModel.from_pretrained(
	model_name,
	cache_dir=HF_HOME,
	trust_remote_code=False
	)

	logger.info(f" ✅ Successfully cached {model_name}")

	# Clean up memory
	del model
	del tokenizer

	except Exception as e:
	logger.error(f" ❌ Failed to download {model_name}: {e}")
	# Don't fail the entire script if one model fails
	continue

	def preload_gguf_models():
	"""Pre-download GGUF models"""
	from huggingface_hub import hf_hub_download

	# GGUF model for patient summaries - PRIMARY MODEL (is_active: true)
	gguf_models = [
	{
	"repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
	"filename": "Phi-3-mini-4k-instruct-q4.gguf",
	"description": "Phi-3 Mini GGUF (Q4 quantized) - PRIMARY for patient summaries",
	"is_active": True # This is the active model for patient summaries
	}
	]

	for model_info in gguf_models:
	try:
	logger.info(f"📥 Downloading GGUF: {model_info['description']}")

	file_path = hf_hub_download(
	repo_id=model_info["repo_id"],
	filename=model_info["filename"],
	cache_dir=HF_HOME,
	local_dir=MODEL_CACHE_DIR,
	local_dir_use_symlinks=False # Copy files instead of symlinks
	)

	logger.info(f" ✅ Successfully cached GGUF model at: {file_path}")

	except Exception as e:
	logger.error(f" ❌ Failed to download GGUF model: {e}")
	continue

	def preload_whisper_models():
	"""Pre-download Whisper models"""
	try:
	logger.info(f"📥 Downloading Whisper tiny model...")

	import whisper
	model = whisper.load_model(
	"tiny",
	device="cpu",
	download_root=WHISPER_CACHE
	)

	logger.info(f" ✅ Successfully cached Whisper tiny model")
	del model

	except Exception as e:
	logger.error(f" ❌ Failed to download Whisper model: {e}")

	def preload_spacy_models():
	"""Pre-download spaCy models"""
	try:
	logger.info(f"📥 Loading spaCy en_core_web_sm model...")

	import spacy
	nlp = spacy.load("en_core_web_sm")

	logger.info(f" ✅ Successfully loaded spaCy model")

	except Exception as e:
	logger.error(f" ❌ Failed to load spaCy model: {e}")

	def preload_nltk_data():
	"""Pre-download NLTK data"""
	try:
	logger.info(f"📥 Downloading NLTK data...")

	import nltk
	nltk_data_dir = os.path.join(HF_HOME, 'nltk_data')
	Path(nltk_data_dir).mkdir(parents=True, exist_ok=True)

	# Download common NLTK datasets
	for package in ['punkt', 'stopwords', 'wordnet', 'averaged_perceptron_tagger']:
	try:
	nltk.download(package, download_dir=nltk_data_dir, quiet=True)
	logger.info(f" ✅ Downloaded NLTK package: {package}")
	except:
	logger.warning(f" ⚠️ Failed to download NLTK package: {package}")

	except Exception as e:
	logger.error(f" ❌ Failed to download NLTK data: {e}")

	def print_cache_summary():
	"""Print summary of cached models"""
	logger.info("\n" + "="*80)
	logger.info("CACHE SUMMARY")
	logger.info("="*80)

	for cache_dir in [MODEL_CACHE_DIR, HF_HOME, TORCH_HOME, WHISPER_CACHE]:
	if os.path.exists(cache_dir):
	# Calculate directory size
	total_size = 0
	file_count = 0
	for dirpath, dirnames, filenames in os.walk(cache_dir):
	for f in filenames:
	fp = os.path.join(dirpath, f)
	if os.path.exists(fp):
	total_size += os.path.getsize(fp)
	file_count += 1

	size_mb = total_size / (1024 * 1024)
	size_gb = size_mb / 1024

	logger.info(f"\n📁 {cache_dir}")
	logger.info(f" Files: {file_count}")
	logger.info(f" Size: {size_mb:.2f} MB ({size_gb:.2f} GB)")

	logger.info("\n" + "="*80)

	def main():
	"""Main preload function"""
	logger.info("🚀 Starting model pre-download process...")
	logger.info(f" HF_HOME: {HF_HOME}")
	logger.info(f" MODEL_CACHE_DIR: {MODEL_CACHE_DIR}")
	logger.info(f" TORCH_HOME: {TORCH_HOME}")
	logger.info(f" WHISPER_CACHE: {WHISPER_CACHE}")
	logger.info("")

	# Import torch early to ensure CUDA detection works
	try:
	import torch
	logger.info(f"🔧 PyTorch version: {torch.__version__}")
	logger.info(f"🔧 CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	logger.info(f"🔧 CUDA version: {torch.version.cuda}")
	logger.info(f"🔧 GPU: {torch.cuda.get_device_name(0)}")
	except Exception as e:
	logger.warning(f"⚠️ Could not detect PyTorch/CUDA info: {e}")

	logger.info("")

	# Preload all models
	steps = [
	("Transformers Models", preload_transformers_models),
	("GGUF Models", preload_gguf_models),
	("Whisper Models", preload_whisper_models),
	("spaCy Models", preload_spacy_models),
	("NLTK Data", preload_nltk_data),
	]

	for step_name, step_func in steps:
	logger.info(f"\n{'='*80}")
	logger.info(f"STEP: {step_name}")
	logger.info(f"{'='*80}\n")

	try:
	step_func()
	except Exception as e:
	logger.error(f"❌ Failed during {step_name}: {e}")
	import traceback
	traceback.print_exc()

	# Print summary
	print_cache_summary()

	logger.info("\n✅ Model pre-download completed!")

	if __name__ == "__main__":
	main()