HNTAI / models_config.json
sachinchandrankallar's picture
Revert "feat: Establish AI medical extraction service with performance optimizations, unified model management, and detailed Hugging Face Spaces deployment guides."
cdea66b
Raw
History Blame
3.32 kB
{
"patient_summary_models": [
{
"name": "facebook/bart-large-cnn",
"type": "summarization",
"is_active": false,
"cached": true,
"deprecated": true,
"description": "BART Large CNN for summarization",
"use_case": "General text summarization",
"notes": "⚠️ NOT RECOMMENDED FOR MEDICAL TEXT. This model was trained on news articles (CNN/DailyMail), not medical data. May produce suboptimal results for clinical text. Use Phi-3-mini-4k-instruct-q4.gguf for better medical text summarization.",
"warning": "Limited medical domain knowledge - trained on news articles"
},
{
"name": "patrickvonplaten/longformer2roberta-cnn_dailymail-fp16",
"type": "seq2seq",
"is_active": false,
"cached": true,
"deprecated": true,
"description": "Longformer2Roberta for long document summarization",
"use_case": "Long document processing",
"notes": "⚠️ DEPRECATED - NOT RECOMMENDED FOR MEDICAL TEXT. This model was trained on news articles (CNN/DailyMail), not medical data, and produces irrelevant summaries for clinical text. The model fundamentally mismatches medical terminology, structured visit data, and clinical narrative style. Use Phi-3-mini-4k-instruct-q4.gguf instead.",
"warning": "DO NOT USE - Trained on news articles, produces irrelevant medical summaries"
},
{
"name": "microsoft/Phi-3-mini-4k-instruct",
"type": "causal-openvino",
"is_active": false,
"cached": true,
"description": "Phi-3 Mini base model for OpenVINO",
"use_case": "Patient summary generation with OpenVINO optimization"
},
{
"name": "OpenVINO/Phi-3-mini-4k-instruct-fp16-ov",
"type": "causal-openvino",
"is_active": false,
"cached": true,
"description": "Phi-3 Mini FP16 optimized for OpenVINO",
"use_case": "Patient summary generation with FP16 optimization"
},
{
"name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf",
"type": "gguf",
"is_active": true,
"cached": true,
"description": "Phi-3 Mini GGUF Q4 quantized - PRIMARY MODEL",
"use_case": "Fast patient summary generation with CPU/GPU",
"repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
"filename": "Phi-3-mini-4k-instruct-q4.gguf"
},
{
"name": "google/flan-t5-large",
"type": "summarization",
"is_active": false,
"cached": true,
"description": "FLAN-T5 Large for summarization",
"use_case": "Alternative summarization model"
}
],
"runtime_behavior": {
"allow_runtime_downloads": true,
"cache_runtime_downloads": true,
"fallback_to_cached": true,
"description": "System will download any requested model at runtime if not cached"
},
"cache_directories": {
"HF_HOME": "/app/.cache/huggingface",
"MODEL_CACHE_DIR": "/app/models",
"TORCH_HOME": "/app/.cache/torch",
"WHISPER_CACHE": "/app/.cache/whisper"
},
"notes": [
"Models with 'cached: true' are pre-downloaded during Docker build",
"Models with 'is_active: true' are the primary/default models",
"Other models can be requested at runtime and will be downloaded automatically",
"Runtime downloads are cached for subsequent uses"
]
}