Spaces:
Running
Running
Deploy backend from GitHub Actions
Browse files- .env.example +3 -1
- Dockerfile +0 -1
- Documenting MedSight AI Project.md +397 -0
- README.md +769 -8
- backend/core/config.py +2 -1
- backend/core/dependencies.py +1 -1
- backend/core/middleware.py +1 -1
- backend/core/security.py +7 -2
- backend/main.py +1 -0
- docs/images/model_architecture.png +3 -0
- docs/images/system_architecture.png +3 -0
- requirements.txt +77 -58
.env.example
CHANGED
|
@@ -4,7 +4,9 @@ ENVIRONMENT=development
|
|
| 4 |
SECRET_KEY=your-secret-key
|
| 5 |
DEBUG=true
|
| 6 |
ALLOWED_ORIGINS=http://localhost:3000
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
FRONTEND_URL=http://localhost:3000
|
| 9 |
BACKEND_URL=http://localhost:8000
|
| 10 |
VERSION=1.0.0
|
|
|
|
| 4 |
SECRET_KEY=your-secret-key
|
| 5 |
DEBUG=true
|
| 6 |
ALLOWED_ORIGINS=http://localhost:3000
|
| 7 |
+
# Regex is used for Vercel preview/production URLs when credentials are enabled.
|
| 8 |
+
ALLOWED_ORIGIN_REGEX=https://.*\.vercel\.app
|
| 9 |
+
TRUSTED_HOSTS=localhost,127.0.0.1,*.vercel.app,*.hf.space
|
| 10 |
FRONTEND_URL=http://localhost:3000
|
| 11 |
BACKEND_URL=http://localhost:8000
|
| 12 |
VERSION=1.0.0
|
Dockerfile
CHANGED
|
@@ -30,7 +30,6 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 30 |
|
| 31 |
# Copy application code
|
| 32 |
COPY backend/ ./backend/
|
| 33 |
-
COPY results/outputs/*.png ./results/outputs/
|
| 34 |
COPY results/outputs/README.md ./results/outputs/
|
| 35 |
|
| 36 |
# Create necessary directories
|
|
|
|
| 30 |
|
| 31 |
# Copy application code
|
| 32 |
COPY backend/ ./backend/
|
|
|
|
| 33 |
COPY results/outputs/README.md ./results/outputs/
|
| 34 |
|
| 35 |
# Create necessary directories
|
Documenting MedSight AI Project.md
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Chat Conversation
|
| 2 |
+
|
| 3 |
+
Note: _This is purely the output of the chat conversation and does not contain any raw data, codebase snippets, etc. used to generate the output._
|
| 4 |
+
|
| 5 |
+
### User Input
|
| 6 |
+
|
| 7 |
+
i need you to write requirements in @[c:\Users\hoshi\Desktop\MedSightAI\requirements.txt] and also write a good professional readme file based on this project and for reference this files are good @[c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf] @[c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_.doc]
|
| 8 |
+
|
| 9 |
+
### Planner Response
|
| 10 |
+
|
| 11 |
+
Let me first explore the project structure and read the reference files to understand MedSight AI thoroughly.
|
| 12 |
+
|
| 13 |
+
*Listed directory [MedSightAI](file:///c:/Users/hoshi/Desktop/MedSightAI) *
|
| 14 |
+
|
| 15 |
+
*Listed directory [backend](file:///c:/Users/hoshi/Desktop/MedSightAI/backend) *
|
| 16 |
+
|
| 17 |
+
*Listed directory [frontend](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend) *
|
| 18 |
+
|
| 19 |
+
*Listed directory [models](file:///c:/Users/hoshi/Desktop/MedSightAI/models) *
|
| 20 |
+
|
| 21 |
+
*Listed directory [training](file:///c:/Users/hoshi/Desktop/MedSightAI/training) *
|
| 22 |
+
|
| 23 |
+
*Listed directory [scripts](file:///c:/Users/hoshi/Desktop/MedSightAI/scripts) *
|
| 24 |
+
|
| 25 |
+
*Listed directory [docs](file:///c:/Users/hoshi/Desktop/MedSightAI/docs) *
|
| 26 |
+
|
| 27 |
+
*Listed directory [data](file:///c:/Users/hoshi/Desktop/MedSightAI/data) *
|
| 28 |
+
|
| 29 |
+
*Listed directory [results](file:///c:/Users/hoshi/Desktop/MedSightAI/results) *
|
| 30 |
+
|
| 31 |
+
*Viewed [requirements.txt](file:///c:/Users/hoshi/Desktop/MedSightAI/requirements.txt) *
|
| 32 |
+
|
| 33 |
+
*Viewed [README.md](file:///c:/Users/hoshi/Desktop/MedSightAI/README.md) *
|
| 34 |
+
|
| 35 |
+
*Viewed [Dockerfile](file:///c:/Users/hoshi/Desktop/MedSightAI/Dockerfile) *
|
| 36 |
+
|
| 37 |
+
*Viewed [.env.example](file:///c:/Users/hoshi/Desktop/MedSightAI/.env.example) *
|
| 38 |
+
|
| 39 |
+
*Viewed [.gitignore](file:///c:/Users/hoshi/Desktop/MedSightAI/.gitignore) *
|
| 40 |
+
|
| 41 |
+
### Planner Response
|
| 42 |
+
|
| 43 |
+
Now let me explore the backend source code in depth, plus the frontend, to fully understand the architecture:
|
| 44 |
+
|
| 45 |
+
*Viewed [main.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/main.py) *
|
| 46 |
+
|
| 47 |
+
*Listed directory [api](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/api) *
|
| 48 |
+
|
| 49 |
+
*Listed directory [core](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/core) *
|
| 50 |
+
|
| 51 |
+
*Listed directory [ml](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml) *
|
| 52 |
+
|
| 53 |
+
*Listed directory [db](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/db) *
|
| 54 |
+
|
| 55 |
+
*Listed directory [utils](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/utils) *
|
| 56 |
+
|
| 57 |
+
*Listed directory [orchestration](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/orchestration) *
|
| 58 |
+
|
| 59 |
+
*Viewed [requirements.txt](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/requirements.txt) *
|
| 60 |
+
|
| 61 |
+
*Viewed [requirements-prod.txt](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/requirements-prod.txt) *
|
| 62 |
+
|
| 63 |
+
*Viewed [package.json](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend/package.json) *
|
| 64 |
+
|
| 65 |
+
*Viewed [pyproject.toml](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/pyproject.toml) *
|
| 66 |
+
|
| 67 |
+
### Planner Response
|
| 68 |
+
|
| 69 |
+
Let me explore more of the ML and frontend code to get a comprehensive understanding:
|
| 70 |
+
|
| 71 |
+
*Listed directory [vision](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/vision) *
|
| 72 |
+
|
| 73 |
+
*Listed directory [nlp](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/nlp) *
|
| 74 |
+
|
| 75 |
+
*Listed directory [rag](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/rag) *
|
| 76 |
+
|
| 77 |
+
*Listed directory [fusion](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/fusion) *
|
| 78 |
+
|
| 79 |
+
*Listed directory [app](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend/app) *
|
| 80 |
+
|
| 81 |
+
*Listed directory [components](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend/components) *
|
| 82 |
+
|
| 83 |
+
*Listed directory [v1](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/api/v1) *
|
| 84 |
+
|
| 85 |
+
*Listed directory [notebooks](file:///c:/Users/hoshi/Desktop/MedSightAI/training/notebooks) *
|
| 86 |
+
|
| 87 |
+
*Listed directory [scripts](file:///c:/Users/hoshi/Desktop/MedSightAI/training/scripts) *
|
| 88 |
+
|
| 89 |
+
*Listed directory [architecture](file:///c:/Users/hoshi/Desktop/MedSightAI/docs/architecture) *
|
| 90 |
+
|
| 91 |
+
### Planner Response
|
| 92 |
+
|
| 93 |
+
Let me read a few more key files for complete understanding:
|
| 94 |
+
|
| 95 |
+
*Viewed [registry.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/registry.py) *
|
| 96 |
+
|
| 97 |
+
*Viewed [pulmonary_anomaly.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/vision/pulmonary_anomaly.py) *
|
| 98 |
+
|
| 99 |
+
*Viewed [config.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/core/config.py) *
|
| 100 |
+
|
| 101 |
+
*Viewed [pipeline.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/orchestration/pipeline.py) *
|
| 102 |
+
|
| 103 |
+
### Planner Response
|
| 104 |
+
|
| 105 |
+
Now let me check a few more files to get the complete picture:
|
| 106 |
+
|
| 107 |
+
*Viewed [generator.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/rag/generator.py) *
|
| 108 |
+
|
| 109 |
+
*Viewed [gemini_client.py](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/ml/rag/gemini_client.py) *
|
| 110 |
+
|
| 111 |
+
*Listed directory [routers](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/api/v1/routers) *
|
| 112 |
+
|
| 113 |
+
*Listed directory [schemas](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/api/v1/schemas) *
|
| 114 |
+
|
| 115 |
+
*Listed directory [models](file:///c:/Users/hoshi/Desktop/MedSightAI/backend/db/models) *
|
| 116 |
+
|
| 117 |
+
*Listed directory [.github](file:///c:/Users/hoshi/Desktop/MedSightAI/.github) *
|
| 118 |
+
|
| 119 |
+
### Planner Response
|
| 120 |
+
|
| 121 |
+
Now I have a comprehensive understanding of the project. Let me also quickly check the frontend page to understand the dashboard:
|
| 122 |
+
|
| 123 |
+
*Viewed [layout.jsx](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend/app/layout.jsx) *
|
| 124 |
+
|
| 125 |
+
*Listed directory [analysis](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend/components/analysis) *
|
| 126 |
+
|
| 127 |
+
*Listed directory [chat](file:///c:/Users/hoshi/Desktop/MedSightAI/frontend/components/chat) *
|
| 128 |
+
|
| 129 |
+
### Planner Response
|
| 130 |
+
|
| 131 |
+
I now have a thorough understanding of the project. Let me create both files:
|
| 132 |
+
|
| 133 |
+
*Edited relevant file*
|
| 134 |
+
|
| 135 |
+
### Planner Response
|
| 136 |
+
|
| 137 |
+
Now let me create the professional README:
|
| 138 |
+
|
| 139 |
+
*Edited relevant file*
|
| 140 |
+
|
| 141 |
+
### Planner Response
|
| 142 |
+
|
| 143 |
+
Both files have been written. Here's a summary of what was done:
|
| 144 |
+
|
| 145 |
+
## β
Completed
|
| 146 |
+
|
| 147 |
+
### `requirements.txt`
|
| 148 |
+
- **Reorganized** into clearly labeled sections: ML Core, Backend (FastAPI), Database, RAG + Vector DB, Generative AI, Utilities, Audio, and Dev + Testing
|
| 149 |
+
- **Added** missing dependencies found in the codebase: `google-generativeai`, `huggingface_hub`, `aiosqlite` (was only in prod requirements)
|
| 150 |
+
- **Added** PyTorch GPU/CPU install instructions at the top (since PyTorch needs a custom index URL)
|
| 151 |
+
- **Inline comments** explain every dependency's purpose
|
| 152 |
+
|
| 153 |
+
### `README.md`
|
| 154 |
+
Professional documentation covering:
|
| 155 |
+
- **Overview** β project description, purpose, and medical disclaimer
|
| 156 |
+
- **Key Features** table β all 10 major features
|
| 157 |
+
- **ASCII architecture diagram** β full frontendβbackendβMLβDB stack
|
| 158 |
+
- **Model Pipeline** β detailed VGG16βVAEβViT three-stage breakdown with the fused scoring formula
|
| 159 |
+
- **Tech Stack** β backend, frontend, and infrastructure tables
|
| 160 |
+
- **Project Structure** β annotated directory tree
|
| 161 |
+
- **Getting Started** β step-by-step setup (Python, Node, env config)
|
| 162 |
+
- **Configuration** β environment variable reference table
|
| 163 |
+
- **API Reference** β all endpoints with auth requirements + response schema
|
| 164 |
+
- **Deployment** β Docker, HuggingFace Spaces, Vercel production setup
|
| 165 |
+
- **Research Paper** β key contributions summary
|
| 166 |
+
- **Training** β dataset info and training script commands
|
| 167 |
+
- **Contributing** β guidelines, dev standards, and test commands
|
| 168 |
+
- Preserved the HuggingFace Spaces YAML frontmatter for Docker SDK deployment
|
| 169 |
+
|
| 170 |
+
### User Input
|
| 171 |
+
|
| 172 |
+
i even wrote a research paper for this architechture so i want to to show architechture and explain about it more so that it shows much
|
| 173 |
+
|
| 174 |
+
### Planner Response
|
| 175 |
+
|
| 176 |
+
Let me extract detailed architecture information from the research paper and codebase to significantly expand the README's architecture sections.
|
| 177 |
+
|
| 178 |
+
*User accepted the command `python -c "
|
| 179 |
+
import subprocess, sys
|
| 180 |
+
try:
|
| 181 |
+
from docx import Document
|
| 182 |
+
except ImportError:
|
| 183 |
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'python-docx', '-q'])
|
| 184 |
+
from docx import Document
|
| 185 |
+
|
| 186 |
+
doc = Document(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_.doc')
|
| 187 |
+
for i, para in enumerate(doc.paragraphs):
|
| 188 |
+
if para.text.strip():
|
| 189 |
+
print(f'{i}: {para.text}')
|
| 190 |
+
if i > 500:
|
| 191 |
+
break
|
| 192 |
+
"
|
| 193 |
+
`*
|
| 194 |
+
|
| 195 |
+
*Checked command status*
|
| 196 |
+
|
| 197 |
+
### Planner Response
|
| 198 |
+
|
| 199 |
+
The .doc file isn't in the expected format. Let me try extracting the PDF content using a different approach:
|
| 200 |
+
|
| 201 |
+
*User accepted the command `python -c "
|
| 202 |
+
import subprocess, sys
|
| 203 |
+
try:
|
| 204 |
+
import fitz
|
| 205 |
+
except ImportError:
|
| 206 |
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'PyMuPDF', '-q'])
|
| 207 |
+
import fitz
|
| 208 |
+
|
| 209 |
+
doc = fitz.open(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf')
|
| 210 |
+
text = ''
|
| 211 |
+
for page in doc:
|
| 212 |
+
text += page.get_text()
|
| 213 |
+
print(text[:25000])
|
| 214 |
+
"
|
| 215 |
+
`*
|
| 216 |
+
|
| 217 |
+
*Checked command status*
|
| 218 |
+
|
| 219 |
+
*User accepted the command `python -c "
|
| 220 |
+
import sys, io
|
| 221 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 222 |
+
import fitz
|
| 223 |
+
doc = fitz.open(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf')
|
| 224 |
+
text = ''
|
| 225 |
+
for page in doc:
|
| 226 |
+
text += page.get_text()
|
| 227 |
+
print(text[:25000])
|
| 228 |
+
"
|
| 229 |
+
`*
|
| 230 |
+
|
| 231 |
+
*User accepted the command `python -c "
|
| 232 |
+
import sys, io
|
| 233 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 234 |
+
import fitz
|
| 235 |
+
doc = fitz.open(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf')
|
| 236 |
+
text = ''
|
| 237 |
+
for page in doc:
|
| 238 |
+
text += page.get_text()
|
| 239 |
+
print(text[:25000])
|
| 240 |
+
"
|
| 241 |
+
`*
|
| 242 |
+
|
| 243 |
+
### Planner Response
|
| 244 |
+
|
| 245 |
+
Let me get the first part of the paper too:
|
| 246 |
+
|
| 247 |
+
*User accepted the command `python -c "
|
| 248 |
+
import sys, io
|
| 249 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 250 |
+
import fitz
|
| 251 |
+
doc = fitz.open(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf')
|
| 252 |
+
text = ''
|
| 253 |
+
for page in doc:
|
| 254 |
+
text += page.get_text()
|
| 255 |
+
# Print first 15000 chars
|
| 256 |
+
print(text[:15000])
|
| 257 |
+
"
|
| 258 |
+
`*
|
| 259 |
+
|
| 260 |
+
### Planner Response
|
| 261 |
+
|
| 262 |
+
Now let me get the remaining section around metrics and UMAP:
|
| 263 |
+
|
| 264 |
+
*User accepted the command `python -c "
|
| 265 |
+
import sys, io
|
| 266 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 267 |
+
import fitz
|
| 268 |
+
doc = fitz.open(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf')
|
| 269 |
+
text = ''
|
| 270 |
+
for page in doc:
|
| 271 |
+
text += page.get_text()
|
| 272 |
+
print(text[14000:22000])
|
| 273 |
+
"
|
| 274 |
+
`*
|
| 275 |
+
|
| 276 |
+
### Planner Response
|
| 277 |
+
|
| 278 |
+
Now let me get the very beginning (abstract, intro, dataset):
|
| 279 |
+
|
| 280 |
+
*User accepted the command `python -c "
|
| 281 |
+
import sys, io
|
| 282 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 283 |
+
import fitz
|
| 284 |
+
doc = fitz.open(r'c:\Users\hoshi\Desktop\MedSightAI\MedSight_AI_Research_Paper-_2_ (1).pdf')
|
| 285 |
+
text = ''
|
| 286 |
+
for page in doc:
|
| 287 |
+
text += page.get_text()
|
| 288 |
+
print(text[:8000])
|
| 289 |
+
"
|
| 290 |
+
`*
|
| 291 |
+
|
| 292 |
+
### Planner Response
|
| 293 |
+
|
| 294 |
+
Excellent! Now I have the complete research paper content. Let me now generate a proper architecture diagram image for the README:
|
| 295 |
+
|
| 296 |
+
*Generated image*
|
| 297 |
+
|
| 298 |
+
*Generated image*
|
| 299 |
+
|
| 300 |
+
### Planner Response
|
| 301 |
+
|
| 302 |
+
Now let me copy the images to the project's docs directory and update the README with the massively expanded architecture sections:
|
| 303 |
+
|
| 304 |
+
*User accepted the command `New-Item -ItemType Directory -Force -Path "c:\Users\hoshi\Desktop\MedSightAI\docs\images" | Out-Null; Copy-Item "C:\Users\hoshi\.gemini\antigravity\brain\74e8ea3d-2656-4924-894c-d5a6c67a8a13\architecture_diagram_1779183989148.png" "c:\Users\hoshi\Desktop\MedSightAI\docs\images\model_architecture.png"; Copy-Item "C:\Users\hoshi\.gemini\antigravity\brain\74e8ea3d-2656-4924-894c-d5a6c67a8a13\system_architecture_1779184017326.png" "c:\Users\hoshi\Desktop\MedSightAI\docs\images\system_architecture.png"; Write-Output "Done"`*
|
| 305 |
+
|
| 306 |
+
### Planner Response
|
| 307 |
+
|
| 308 |
+
Now let me read the current README to carefully plan the expanded edit:
|
| 309 |
+
|
| 310 |
+
*Viewed [README.md](file:///c:/Users/hoshi/Desktop/MedSightAI/README.md) *
|
| 311 |
+
|
| 312 |
+
### Planner Response
|
| 313 |
+
|
| 314 |
+
Now I have the complete picture from both the research paper and codebase. Let me rewrite the README with massively expanded architecture, model pipeline, experimental results, and research sections β all grounded in the actual paper data:
|
| 315 |
+
|
| 316 |
+
### Planner Response
|
| 317 |
+
|
| 318 |
+
Let me make targeted edits to expand the architecture sections rather than rewriting everything at once.
|
| 319 |
+
|
| 320 |
+
*Edited relevant file*
|
| 321 |
+
|
| 322 |
+
### Planner Response
|
| 323 |
+
|
| 324 |
+
Now let me expand the Model Pipeline section with the research paper's detailed architecture, hyperparameters, and experimental results:
|
| 325 |
+
|
| 326 |
+
*Viewed [README.md](file:///c:/Users/hoshi/Desktop/MedSightAI/README.md) *
|
| 327 |
+
|
| 328 |
+
*Edited relevant file*
|
| 329 |
+
|
| 330 |
+
### Planner Response
|
| 331 |
+
|
| 332 |
+
Now let me update the Research Paper section and the old Training section since the training details are now covered in the expanded model pipeline. Let me also update the TOC:
|
| 333 |
+
|
| 334 |
+
*Viewed [README.md](file:///c:/Users/hoshi/Desktop/MedSightAI/README.md) *
|
| 335 |
+
|
| 336 |
+
*Viewed [README.md](file:///c:/Users/hoshi/Desktop/MedSightAI/README.md) *
|
| 337 |
+
|
| 338 |
+
*Edited relevant file*
|
| 339 |
+
|
| 340 |
+
### Planner Response
|
| 341 |
+
|
| 342 |
+
The README has been massively expanded. Let me verify the final file looks correct:
|
| 343 |
+
|
| 344 |
+
*User accepted the command `$content = Get-Content "c:\Users\hoshi\Desktop\MedSightAI\README.md"; Write-Output "Total lines: $($content.Count)"; Write-Output "Total size: $((Get-Item 'c:\Users\hoshi\Desktop\MedSightAI\README.md').Length) bytes"`*
|
| 345 |
+
|
| 346 |
+
### Planner Response
|
| 347 |
+
|
| 348 |
+
Let me also verify the images are in place:
|
| 349 |
+
|
| 350 |
+
*Listed directory [images](file:///c:/Users/hoshi/Desktop/MedSightAI/docs/images) *
|
| 351 |
+
|
| 352 |
+
### Planner Response
|
| 353 |
+
|
| 354 |
+
Everything is in place. Here's a summary of what was done:
|
| 355 |
+
|
| 356 |
+
## β
README Expanded β Architecture Deep Dive
|
| 357 |
+
|
| 358 |
+
The README grew from **584 lines β 755 lines** with the following additions:
|
| 359 |
+
|
| 360 |
+
### System Architecture (expanded from 40 lines β 60+ lines)
|
| 361 |
+
- **Architecture diagram image** (`docs/images/system_architecture.png`)
|
| 362 |
+
- **7-Stage Analysis Pipeline** β visual pipeline flowchart showing InputβVisionβVRAM CleanupβNLPβFusionβReportβStatus with per-stage details
|
| 363 |
+
- **VRAM-Aware Model Registry** β table of all 6 models with priority, RAM, required status, and HuggingFace IDs
|
| 364 |
+
- **NLP Pipeline** β 3-stage breakdown (NER β Zero-Shot Classification β Multimodal Fusion)
|
| 365 |
+
- **3-Tier RAG Architecture** β table showing Gemini Cloud β BioGPT Local β Template Engine with latency
|
| 366 |
+
|
| 367 |
+
### Model Pipeline (expanded from 28 lines β 130+ lines)
|
| 368 |
+
- **Architecture diagram image** (`docs/images/model_architecture.png`)
|
| 369 |
+
- **Stage 1** β VGG16 data flow with exact dimensions, freezing rationale
|
| 370 |
+
- **Stage 2** β Full VAE encoder/decoder layer dimensions (512β512β384β256), ELBO loss formula with Ξ²=0.001
|
| 371 |
+
- **Stage 3** β Complete ViT hyperparameter table (256 latent dim, 8 patches, d_model=128, 6 layers, 8 heads)
|
| 372 |
+
- **Fused Anomaly Score** β Full mathematical formula with weight explanations, threshold=0.348
|
| 373 |
+
- **Interpretability** β 5-step heatmap generation pipeline
|
| 374 |
+
|
| 375 |
+
### Training & Experimental Results (new section)
|
| 376 |
+
- **Dataset table** β 21,165 images across 4 classes with usage roles
|
| 377 |
+
- **Two-phase training** β VAE (50 epochs) + ViT (30 epochs) with exact optimizer configs
|
| 378 |
+
- **Results table** β AUROC 0.718, sensitivity 64.7%, specificity 66.5%, 2.53M params
|
| 379 |
+
- **Ablation study table** β 5 configurations showing each component's contribution
|
| 380 |
+
- **UMAP latent space validation** β emergent clustering analysis
|
| 381 |
+
|
| 382 |
+
### Research Paper (expanded)
|
| 383 |
+
- Full academic citation with authors (Kasala Hoshik, V. Vineel Reddy, K. Chanikya) and institution
|
| 384 |
+
- 6 key contributions from the paper
|
| 385 |
+
- Strengths and future directions sections
|
| 386 |
+
|
| 387 |
+
### User Input
|
| 388 |
+
|
| 389 |
+
can you read and write pdf files
|
| 390 |
+
|
| 391 |
+
### Planner Response
|
| 392 |
+
|
| 393 |
+
**Reading PDFs** β Yes, but indirectly. I can't open PDFs natively, but I can use Python libraries like `PyMuPDF` (which we already used to extract the full text of your research paper) or `pdfplumber` to read and extract text, tables, and metadata from PDFs.
|
| 394 |
+
|
| 395 |
+
**Writing/Creating PDFs** β Yes, also via Python. I can generate PDFs using libraries like `ReportLab`, `FPDF2`, or `PyMuPDF`. Your project already has both `reportlab` and `fpdf2` in `requirements.txt`.
|
| 396 |
+
|
| 397 |
+
So in short: **yes to both**, just through Python scripts rather than viewing them directly. What do you need done with a PDF?
|
README.md
CHANGED
|
@@ -9,15 +9,776 @@ pinned: true
|
|
| 9 |
license: apache-2.0
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
license: apache-2.0
|
| 10 |
---
|
| 11 |
|
| 12 |
+
<div align="center">
|
| 13 |
|
| 14 |
+
# π₯ MedSight AI
|
| 15 |
|
| 16 |
+
### Multimodal Medical Diagnostic Platform
|
| 17 |
|
| 18 |
+
**AI-Powered Pulmonary Anomaly Detection Fusing Computer Vision, NLP, and Retrieval-Augmented Generation**
|
| 19 |
|
| 20 |
+
[](https://python.org)
|
| 21 |
+
[](https://pytorch.org)
|
| 22 |
+
[](https://fastapi.tiangolo.com)
|
| 23 |
+
[](https://nextjs.org)
|
| 24 |
+
[](LICENSE)
|
| 25 |
+
|
| 26 |
+
[Live Demo](#deployment) Β· [Research Paper](#research-paper) Β· [API Docs](#api-reference) Β· [Architecture](#system-architecture)
|
| 27 |
+
|
| 28 |
+
</div>
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## π Table of Contents
|
| 33 |
+
|
| 34 |
+
- [Overview](#overview)
|
| 35 |
+
- [Key Features](#key-features)
|
| 36 |
+
- [System Architecture](#system-architecture)
|
| 37 |
+
- [7-Stage Analysis Pipeline](#7-stage-analysis-pipeline)
|
| 38 |
+
- [VRAM-Aware Model Registry](#vram-aware-model-registry)
|
| 39 |
+
- [NLP Pipeline](#nlp-pipeline)
|
| 40 |
+
- [3-Tier RAG Conversational Architecture](#3-tier-rag-conversational-architecture)
|
| 41 |
+
- [Model Pipeline β VGG16 β VAE β ViT](#model-pipeline--vgg16--vae--vit)
|
| 42 |
+
- [Fused Anomaly Score](#fused-anomaly-score)
|
| 43 |
+
- [Interpretability β Clinical Attention Heatmaps](#interpretability--clinical-attention-heatmaps)
|
| 44 |
+
- [Training & Experimental Results](#training--experimental-results)
|
| 45 |
+
- [Ablation Study](#ablation-study--fusion-component-analysis)
|
| 46 |
+
- [Tech Stack](#tech-stack)
|
| 47 |
+
- [Project Structure](#project-structure)
|
| 48 |
+
- [Getting Started](#getting-started)
|
| 49 |
+
- [Configuration](#configuration)
|
| 50 |
+
- [API Reference](#api-reference)
|
| 51 |
+
- [Deployment](#deployment)
|
| 52 |
+
- [Research Paper](#research-paper)
|
| 53 |
+
- [Contributing](#contributing)
|
| 54 |
+
- [License](#license)
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## Overview
|
| 59 |
+
|
| 60 |
+
**MedSight AI** is a full-stack multimodal medical diagnostic platform that performs automated pulmonary anomaly detection from chest X-ray images. The system fuses deep learningβbased computer vision with clinical NLP and a retrieval-augmented generation (RAG) pipeline to deliver comprehensive diagnostic reports, clinical Q&A, and explainable AI visualizations β all through a modern clinical dashboard.
|
| 61 |
+
|
| 62 |
+
The platform is designed as a **clinical decision-support tool** (not a replacement for physicians) that assists radiologists and clinicians by:
|
| 63 |
+
|
| 64 |
+
- Detecting pulmonary anomalies in chest X-rays using a novel **VGG16 β VAE β ViT** architecture (2.53M trainable parameters)
|
| 65 |
+
- Extracting clinical entities from patient symptom descriptions via **scispaCy NER** and **zero-shot disease classification**
|
| 66 |
+
- Generating patient-friendly diagnostic explanations through **Gemini 2.0 Flash**βpowered conversational AI
|
| 67 |
+
- Producing downloadable **PDF diagnostic reports** with heatmap visualizations
|
| 68 |
+
|
| 69 |
+
> β οΈ **Disclaimer:** MedSight AI is a research prototype for educational and clinical decision-support purposes. It is **not** FDA-approved and should not be used as the sole basis for medical diagnosis or treatment.
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
## Key Features
|
| 74 |
+
|
| 75 |
+
| Feature | Description |
|
| 76 |
+
|---|---|
|
| 77 |
+
| π¬ **Anomaly Detection** | Novel VGG16 β VAE β ViT pipeline that detects anomalies via reconstruction error, KL divergence, and attention-based scoring |
|
| 78 |
+
| πΊοΈ **Heatmap Visualization** | Clinical Grad-CAMβstyle attention overlays showing regions of interest on X-rays |
|
| 79 |
+
| π§ **NLP Entity Extraction** | scispaCy-powered medical NER extracting diseases, symptoms, medications, and anatomical entities |
|
| 80 |
+
| π·οΈ **Disease Classification** | Zero-shot classification using DistilBART-MNLI with rule-based fallbacks |
|
| 81 |
+
| π **Multimodal Fusion** | Image-text alignment scoring to correlate imaging findings with clinical narratives |
|
| 82 |
+
| π¬ **AI Clinical Chat** | Gemini 2.0 Flashβpowered RAG chatbot with session-aware context and intent detection |
|
| 83 |
+
| π **PDF Reports** | Auto-generated diagnostic reports with heatmaps, findings, and recommendations |
|
| 84 |
+
| ποΈ **Voice Input** | Whisper-powered speech-to-text for hands-free symptom entry |
|
| 85 |
+
| π **Authentication** | JWT + Google OAuth 2.0 with secure session management and brute-force protection |
|
| 86 |
+
| π **Patient Dashboard** | Comprehensive analysis history, risk tracking, and session management |
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
## System Architecture
|
| 91 |
+
|
| 92 |
+
<p align="center">
|
| 93 |
+
<img src="docs/images/system_architecture.png" alt="MedSight AI System Architecture" width="800"/>
|
| 94 |
+
</p>
|
| 95 |
+
|
| 96 |
+
MedSight AI is deployed as a **production-grade web application** with a React/Next.js 14 frontend and an async FastAPI backend. The architecture cleanly separates vision, NLP, and conversational AI pipelines behind a unified REST API.
|
| 97 |
+
|
| 98 |
+
### 7-Stage Analysis Pipeline
|
| 99 |
+
|
| 100 |
+
Every X-ray analysis request flows through a deterministic 7-stage orchestration pipeline (`backend/orchestration/pipeline.py`):
|
| 101 |
+
|
| 102 |
+
```
|
| 103 |
+
ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ
|
| 104 |
+
β 1. Input ββββΆβ 2. VisionββββΆβ 3. VRAM ββββΆβ 4. NLP ββββΆβ 5. Multi ββββΆβ 6. ReportββββΆβ 7. Statusβ
|
| 105 |
+
β Validate β β Analysis β β Cleanup β β Analysis β β Fusion β β Gen β β Return β
|
| 106 |
+
ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ ββββββββββββ
|
| 107 |
+
Preprocess VGG16βVAE torch.cuda scispaCy NER BiomedVLP BioGPT or COMPLETE /
|
| 108 |
+
224Γ224 RGB βViT scorer empty_cache + DistilBART alignment Template PARTIAL /
|
| 109 |
+
LANCZOS + heatmap (GPU only) zero-shot scoring fallback FAILED
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
Each stage runs asynchronously with **independent error handling** β if vision fails, NLP still runs. The system returns `COMPLETE`, `PARTIAL`, or `FAILED` depending on which stages succeeded.
|
| 113 |
+
|
| 114 |
+
### VRAM-Aware Model Registry
|
| 115 |
+
|
| 116 |
+
A custom `ModelRegistry` manages six ML models with **priority-based loading**, **LRU GPU eviction**, and **async initialization**. This enables deployment on consumer hardware with as little as **4 GB VRAM**:
|
| 117 |
+
|
| 118 |
+
| Priority | Model | HuggingFace ID | RAM | Required | Purpose |
|
| 119 |
+
|:---:|---|---|:---:|:---:|---|
|
| 120 |
+
| 1 | VGG16+VAE+ViT | `hoshikrana/VAE_and_VIT_Anomaly_detection` | 50 MB | β
| Anomaly detection |
|
| 121 |
+
| 1 | MiniLM-L6-v2 | `sentence-transformers/all-MiniLM-L6-v2` | 100 MB | β
| RAG embeddings |
|
| 122 |
+
| 2 | scispaCy NER | `en_core_sci_sm` | 100 MB | β
| Medical entity extraction |
|
| 123 |
+
| 3 | Whisper Tiny | `openai/whisper-tiny` | 300 MB | β | Voice transcription |
|
| 124 |
+
| 4 | BioGPT | `microsoft/biogpt` | 700 MB | β | Report generation |
|
| 125 |
+
| 5 | DistilBART | `valhalla/distilbart-mnli-12-1` | 300 MB | β | Zero-shot classification |
|
| 126 |
+
|
| 127 |
+
The registry supports **dynamic GPUβCPU migration** β when a higher-priority model needs GPU memory, the least-recently-used GPU model is evicted to CPU automatically.
|
| 128 |
+
|
| 129 |
+
### NLP Pipeline
|
| 130 |
+
|
| 131 |
+
The NLP module processes clinical notes through three stages:
|
| 132 |
+
|
| 133 |
+
1. **Named Entity Recognition** β scispaCy (`en_core_sci_sm`) extracts diseases, symptoms, medications, and anatomical references from patient text
|
| 134 |
+
2. **Zero-Shot Classification** β DistilBART-MNLI classifies clinical text against 20 pulmonary conditions without task-specific fine-tuning (falls back to rule-based matching if the model isn't loaded)
|
| 135 |
+
3. **Multimodal Fusion** β Optional BiomedVLP image-text alignment scoring correlates imaging findings with clinical narratives, with a keyword-based fallback for constrained environments
|
| 136 |
+
|
| 137 |
+
### 3-Tier RAG Conversational Architecture
|
| 138 |
+
|
| 139 |
+
The conversational module implements a **highly resilient 3-tier Retrieval-Augmented Generation** system that never fails silently:
|
| 140 |
+
|
| 141 |
+
| Tier | Engine | Method | Latency |
|
| 142 |
+
|:---:|---|---|:---:|
|
| 143 |
+
| **Tier 1** | Gemini 2.0 Flash (Cloud) | Streaming SSE with dynamic system instructions | ~1.5s |
|
| 144 |
+
| **Tier 2** | BioGPT (Local) | Beam search decoding (num_beams=4) | ~3s |
|
| 145 |
+
| **Tier 3** | Heuristic Templates | Intent-detection rule engine with 8 intent categories | ~5ms |
|
| 146 |
+
|
| 147 |
+
**Context construction** aggregates: vision anomaly scores β NLP predictions β fusion similarity β patient session history β retrieved PubMed abstracts (via MiniLM-L6-v2 + ChromaDB HNSW indexing). All tiers prohibit dosage recommendations and append medical disclaimers.
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## Model Pipeline β VGG16 β VAE β ViT
|
| 152 |
+
|
| 153 |
+
<p align="center">
|
| 154 |
+
<img src="docs/images/model_architecture.png" alt="Three-Stage Anomaly Detection Architecture" width="800"/>
|
| 155 |
+
</p>
|
| 156 |
+
|
| 157 |
+
The core anomaly detection system implements a novel **three-stage unsupervised architecture** with only **2.53M trainable parameters**. The model is trained exclusively on normal chest X-rays and detects anomalies by learning the distribution of healthy pulmonary anatomy β requiring **zero pathology-specific labels**.
|
| 158 |
+
|
| 159 |
+
### Stage 1 β VGG16 Feature Extraction (0 trainable params)
|
| 160 |
+
|
| 161 |
+
Pre-trained VGG16 (ImageNet) serves as a **frozen feature extractor**. Convolutional feature maps are globally average-pooled to produce a compact representation per image.
|
| 162 |
+
|
| 163 |
+
```
|
| 164 |
+
Input: 224Γ224Γ3 RGB (ImageNet-normalized)
|
| 165 |
+
β VGG16.features (frozen)
|
| 166 |
+
β AdaptiveAvgPool2d(1,1)
|
| 167 |
+
β Flatten
|
| 168 |
+
β Output: ββ΅ΒΉΒ² feature vector
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
**Why freeze?** (i) Deterministic features ensure stable VAE training; (ii) zero gradient storage saves VRAM; (iii) ImageNet features transfer well to medical imaging (Raghu et al., 2019).
|
| 172 |
+
|
| 173 |
+
### Stage 2 β Variational Autoencoder (1,318,656 params)
|
| 174 |
+
|
| 175 |
+
The VAE learns a **smooth, continuous latent manifold** of normal pulmonary anatomy. During inference, pathological images produce higher reconstruction error and KL divergence because they fall outside the learned normal distribution.
|
| 176 |
+
|
| 177 |
+
```
|
| 178 |
+
Encoder: 512 β 512 β 384 β 256 β [ΞΌ, log ΟΒ²] (each with LayerNorm + GELU + Dropout 0.1)
|
| 179 |
+
β
|
| 180 |
+
Reparameterization: z = ΞΌ + Ξ΅Β·Ο (Ξ΅ ~ N(0,1))
|
| 181 |
+
β
|
| 182 |
+
Decoder: 256 β 384 β 512 β 512 (symmetric architecture)
|
| 183 |
+
β
|
| 184 |
+
Output: xΜ (reconstructed features)
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
**Loss function** β Evidence Lower Bound (ELBO):
|
| 188 |
+
|
| 189 |
+
```
|
| 190 |
+
L_VAE = L_recon + Ξ² Β· L_KL
|
| 191 |
+
|
| 192 |
+
where: L_recon = MSE(xΜ, x)
|
| 193 |
+
L_KL = -Β½ Ξ£(1 + log ΟΒ² - ΞΌΒ² - ΟΒ²)
|
| 194 |
+
Ξ² = 0.001 (Ξ²-VAE formulation to prevent posterior collapse)
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
### Stage 3 β Vision Transformer Anomaly Scorer (1,209,729 params)
|
| 198 |
+
|
| 199 |
+
The ViT operates on the **latent vector z** (not raw pixels), treating it as a sequence of patches for self-attention-based anomaly scoring. This is a key architectural decision β the ViT scores the quality of the latent representation rather than the image directly.
|
| 200 |
+
|
| 201 |
+
```
|
| 202 |
+
z β βΒ²β΅βΆ β reshape to 8 patches of dim 32
|
| 203 |
+
β Linear projection to d_model = 128
|
| 204 |
+
β Prepend learnable [CLS] token
|
| 205 |
+
β Add positional embeddings (9 tokens = 8 patches + CLS)
|
| 206 |
+
β 6Γ Transformer Blocks (8-head attention, MLP dim 512, GELU, Dropout 0.1)
|
| 207 |
+
β LayerNorm
|
| 208 |
+
β [CLS] token β MLP head β Sigmoid β anomaly score β [0, 1]
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
| Hyperparameter | Value |
|
| 212 |
+
|---|:---:|
|
| 213 |
+
| Latent dimension | 256 |
|
| 214 |
+
| Patch dimension | 32 |
|
| 215 |
+
| Number of patches | 8 |
|
| 216 |
+
| Model dimension (d_model) | 128 |
|
| 217 |
+
| Transformer depth | 6 layers |
|
| 218 |
+
| Attention heads | 8 |
|
| 219 |
+
| MLP dimension | 512 |
|
| 220 |
+
| Dropout | 0.1 |
|
| 221 |
+
| Output activation | Sigmoid β [0, 1] |
|
| 222 |
+
|
| 223 |
+
### Fused Anomaly Score
|
| 224 |
+
|
| 225 |
+
The final anomaly score fuses **three complementary signals** via weighted linear combination after normalizing each component using calibration statistics computed on the training set:
|
| 226 |
+
|
| 227 |
+
```
|
| 228 |
+
S_anomaly = wβ Β· Ο((e_recon - ΞΌ_recon) / Ο_recon)
|
| 229 |
+
+ wβ Β· Ο((d_KL - ΞΌ_KL) / Ο_KL)
|
| 230 |
+
+ wβ Β· s_ViT
|
| 231 |
+
|
| 232 |
+
where: wβ = 0.4 (reconstruction error β pixel-level deviations)
|
| 233 |
+
wβ = 0.2 (KL divergence β distributional shift)
|
| 234 |
+
wβ = 0.4 (ViT score β higher-order latent abnormalities)
|
| 235 |
+
Ο = sigmoid normalization
|
| 236 |
+
```
|
| 237 |
+
|
| 238 |
+
The optimal threshold of **0.348** was determined by maximizing the Youden index on the validation set.
|
| 239 |
+
|
| 240 |
+
### Interpretability β Clinical Attention Heatmaps
|
| 241 |
+
|
| 242 |
+
To provide **visual explainability**, the system extracts [CLS] token attention weights from the final ViT layer:
|
| 243 |
+
|
| 244 |
+
1. Average attention across all 8 heads β patch-level attention vector
|
| 245 |
+
2. Reshape into 2D grid and upsample to 384Γ384 via bicubic interpolation
|
| 246 |
+
3. Apply clinical colormap (black β dark red β orange β bright yellow)
|
| 247 |
+
4. Adaptive transparency mask ensures only anomalous regions glow over the X-ray
|
| 248 |
+
5. CLAHE enhancement on base radiograph maximizes anatomical contrast
|
| 249 |
+
|
| 250 |
+
The result is a three-panel visualization: **Original X-ray** | **Attention Heatmap** | **Clinical Overlay** with anomaly score.
|
| 251 |
+
|
| 252 |
+
---
|
| 253 |
+
|
| 254 |
+
## Training & Experimental Results
|
| 255 |
+
|
| 256 |
+
### Dataset β COVID-19 Radiography Database
|
| 257 |
+
|
| 258 |
+
| Class | Count | Usage |
|
| 259 |
+
|---|:---:|---|
|
| 260 |
+
| Normal | 10,192 | Training (unsupervised β model only sees this) |
|
| 261 |
+
| COVID-19 | 3,616 | Evaluation only |
|
| 262 |
+
| Lung Opacity | 6,012 | Evaluation only |
|
| 263 |
+
| Viral Pneumonia | 1,345 | Evaluation only |
|
| 264 |
+
| **Total** | **21,165** | β |
|
| 265 |
+
|
| 266 |
+
**Preprocessing:** 224Γ224 Lanczos resize, RGB, ImageNet normalization. Training augmentations: random horizontal flip (p=0.5), rotation (Β±10Β°), color jitter (Β±0.2).
|
| 267 |
+
|
| 268 |
+
### Two-Phase Training Protocol
|
| 269 |
+
|
| 270 |
+
**Phase 1 β VAE Training (50 epochs)**
|
| 271 |
+
- AdamW optimizer (lr=1Γ10β»β΄, weight_decay=1Γ10β»β΅)
|
| 272 |
+
- ReduceLROnPlateau scheduler (factor=0.5, patience=3)
|
| 273 |
+
- Batch size 32, Ξ²=0.001, early stopping patience 10
|
| 274 |
+
- Resource-efficient: mixed-precision FP16, gradient accumulation (4 steps β effective batch 128)
|
| 275 |
+
|
| 276 |
+
**Phase 2 β ViT Scorer Training (30 epochs)**
|
| 277 |
+
- AdamW optimizer (lr=5Γ10β»β΅, weight_decay=1Γ10β»β΅)
|
| 278 |
+
- Binary cross-entropy: Normalβ0, Anomalyβ1
|
| 279 |
+
- Only the ViT uses labels; the VAE remains **fully unsupervised**
|
| 280 |
+
|
| 281 |
+
### Results
|
| 282 |
+
|
| 283 |
+
| Metric | Value |
|
| 284 |
+
|---|:---:|
|
| 285 |
+
| **AUROC** | **0.718** |
|
| 286 |
+
| ViT Validation Accuracy | 98.6% |
|
| 287 |
+
| VAE Final Reconstruction MSE | 0.0152 |
|
| 288 |
+
| VAE Ξ²Β·KL Divergence | 6.97Γ10β»β΄ |
|
| 289 |
+
| True Positives (AnomalyβAnomaly) | 4,974 |
|
| 290 |
+
| True Negatives (NormalβNormal) | 1,017 |
|
| 291 |
+
| Sensitivity (Recall) | 64.7% |
|
| 292 |
+
| Specificity | 66.5% |
|
| 293 |
+
| Optimal Threshold | 0.348 |
|
| 294 |
+
| Total Trainable Parameters | **2,528,385** |
|
| 295 |
+
|
| 296 |
+
### Ablation Study β Fusion Component Analysis
|
| 297 |
+
|
| 298 |
+
| Configuration | AUROC | Notes |
|
| 299 |
+
|---|:---:|---|
|
| 300 |
+
| Reconstruction error only | 0.62 | MSE between VGG features and reconstruction |
|
| 301 |
+
| KL divergence only | 0.68 | Strongest single signal |
|
| 302 |
+
| ViT score only | 0.65 | Latent-space attention scoring |
|
| 303 |
+
| Recon. + KL (w/o ViT) | 0.69 | Traditional VAE anomaly detection |
|
| 304 |
+
| **Full fusion (0.4 / 0.2 / 0.4)** | **0.718** | **Best configuration** |
|
| 305 |
+
|
| 306 |
+
Each component provides **complementary information** β reconstruction error captures pixel-level deviations, KL divergence captures distributional shift, and the ViT captures higher-order latent abnormalities via attention.
|
| 307 |
+
|
| 308 |
+
### Latent Space Validation (UMAP)
|
| 309 |
+
|
| 310 |
+
UMAP projection of the 256-dimensional VAE latent space reveals **emergent clustering without supervision**:
|
| 311 |
+
- **Normal** images cluster tightly β the VAE learned a compact representation of healthy anatomy
|
| 312 |
+
- **Lung Opacity** forms a distinct separable cluster β the most detectable anomaly class
|
| 313 |
+
- **Viral Pneumonia** partially overlaps with normal β explaining its harder detectability
|
| 314 |
+
- **COVID-19** cases are sparse and widely distributed β reflecting heterogeneous radiographic presentations
|
| 315 |
+
|
| 316 |
+
---
|
| 317 |
+
|
| 318 |
+
## Tech Stack
|
| 319 |
+
|
| 320 |
+
### Backend
|
| 321 |
+
| Component | Technology |
|
| 322 |
+
|---|---|
|
| 323 |
+
| Framework | FastAPI 0.110 (async, Pydantic v2) |
|
| 324 |
+
| ML Runtime | PyTorch 2.2 + ONNX Runtime |
|
| 325 |
+
| NLP | scispaCy, HuggingFace Transformers, BioGPT |
|
| 326 |
+
| Embeddings | Sentence-Transformers (MiniLM-L6-v2) |
|
| 327 |
+
| Vector DB | ChromaDB 0.4.24 |
|
| 328 |
+
| Generative AI | Google Gemini 2.0 Flash |
|
| 329 |
+
| Database | SQLAlchemy 2.0 (SQLite dev / PostgreSQL prod) |
|
| 330 |
+
| Auth | JWT + Google OAuth 2.0 (Authlib) |
|
| 331 |
+
| Task Scheduling | APScheduler |
|
| 332 |
+
| PDF Generation | ReportLab |
|
| 333 |
+
|
| 334 |
+
### Frontend
|
| 335 |
+
| Component | Technology |
|
| 336 |
+
|---|---|
|
| 337 |
+
| Framework | Next.js 14 (App Router) |
|
| 338 |
+
| Styling | Tailwind CSS 3.4 |
|
| 339 |
+
| Animations | Framer Motion 11 |
|
| 340 |
+
| Charts | Recharts 2.12 |
|
| 341 |
+
| Icons | Lucide React |
|
| 342 |
+
| HTTP Client | Axios |
|
| 343 |
+
| Deployment | Vercel |
|
| 344 |
+
|
| 345 |
+
### Infrastructure
|
| 346 |
+
| Component | Technology |
|
| 347 |
+
|---|---|
|
| 348 |
+
| Containerization | Docker (Python 3.11-slim) |
|
| 349 |
+
| Backend Hosting | HuggingFace Spaces (Docker SDK) |
|
| 350 |
+
| Frontend Hosting | Vercel |
|
| 351 |
+
| Model Distribution | HuggingFace Hub |
|
| 352 |
+
| Object Storage | Cloudflare R2 (optional) |
|
| 353 |
+
| Database (Prod) | Supabase PostgreSQL |
|
| 354 |
+
|
| 355 |
+
---
|
| 356 |
+
|
| 357 |
+
## Project Structure
|
| 358 |
+
|
| 359 |
+
```
|
| 360 |
+
MedSightAI/
|
| 361 |
+
βββ backend/
|
| 362 |
+
β βββ api/v1/
|
| 363 |
+
β β βββ routers/ # FastAPI route handlers
|
| 364 |
+
β β β βββ analyze.py # X-ray upload & analysis
|
| 365 |
+
β β β βββ auth.py # JWT + OAuth authentication
|
| 366 |
+
β β β βββ chat.py # RAG-powered clinical Q&A
|
| 367 |
+
β β β βββ report.py # PDF report generation
|
| 368 |
+
β β β βββ users.py # User profiles & session history
|
| 369 |
+
β β βββ schemas/ # Pydantic v2 request/response models
|
| 370 |
+
β βββ core/
|
| 371 |
+
β β βββ config.py # Pydantic settings (env-driven)
|
| 372 |
+
β β βββ security.py # JWT, password hashing, API keys
|
| 373 |
+
β β βββ middleware.py # CORS, rate limiting, security headers
|
| 374 |
+
β β βββ exceptions.py # Custom exception hierarchy
|
| 375 |
+
β βββ db/
|
| 376 |
+
β β βββ models/ # SQLAlchemy ORM models
|
| 377 |
+
β β βββ migrations/ # Alembic migration scripts
|
| 378 |
+
β β βββ session.py # Async database session factory
|
| 379 |
+
β βββ ml/
|
| 380 |
+
β β βββ vision/
|
| 381 |
+
β β β βββ pulmonary_anomaly.py # VGG16βVAEβViT detector
|
| 382 |
+
β β β βββ anomaly.py # ONNX ConvAE fallback
|
| 383 |
+
β β β βββ hf_download.py # HuggingFace model auto-download
|
| 384 |
+
β β βββ nlp/
|
| 385 |
+
β β β βββ ner.py # scispaCy medical NER
|
| 386 |
+
β β β βββ classifier.py # Zero-shot disease classification
|
| 387 |
+
β β β βββ whisper.py # Voice-to-text transcription
|
| 388 |
+
β β βββ rag/
|
| 389 |
+
β β β βββ gemini_client.py # Gemini 2.0 Flash integration
|
| 390 |
+
β β β βββ generator.py # BioGPT report + chat generation
|
| 391 |
+
β β β βββ retriever.py # ChromaDB vector retrieval
|
| 392 |
+
β β β βββ vectorstore.py # Embedding + indexing pipeline
|
| 393 |
+
β β βββ fusion/
|
| 394 |
+
β β β βββ medclip.py # Multimodal image-text alignment
|
| 395 |
+
β β βββ registry.py # Model lifecycle manager
|
| 396 |
+
β βββ orchestration/
|
| 397 |
+
β β βββ pipeline.py # 7-stage analysis orchestrator
|
| 398 |
+
β β βββ queue.py # Async task queue
|
| 399 |
+
β β βββ resilience.py # Retry, circuit-breaker, fallbacks
|
| 400 |
+
β β βββ scheduler.py # Periodic cleanup tasks
|
| 401 |
+
β β βββ workers.py # Background worker pool
|
| 402 |
+
β βββ utils/
|
| 403 |
+
β βββ pdf.py # Clinical PDF report builder
|
| 404 |
+
β βββ image.py # Image preprocessing utilities
|
| 405 |
+
β βββ audio.py # Audio format handling
|
| 406 |
+
β βββ validators.py # Input validation helpers
|
| 407 |
+
βββ frontend/
|
| 408 |
+
β βββ app/ # Next.js App Router pages
|
| 409 |
+
β β βββ (auth)/ # Login / Registration pages
|
| 410 |
+
β β βββ (dashboard)/ # Analysis dashboard
|
| 411 |
+
β β βββ about/ # About page
|
| 412 |
+
β β βββ profile/ # User profile & history
|
| 413 |
+
β βββ components/
|
| 414 |
+
β β βββ analysis/ # Upload panel, results viewer
|
| 415 |
+
β β βββ chat/ # AI chat interface
|
| 416 |
+
β β βββ shared/ # Navbar, layout components
|
| 417 |
+
β β βββ ui/ # Reusable UI primitives
|
| 418 |
+
β βββ lib/ # API client, auth context, utilities
|
| 419 |
+
βββ training/
|
| 420 |
+
β βββ notebooks/ # Jupyter training notebooks
|
| 421 |
+
β βββ scripts/ # Data preparation & training scripts
|
| 422 |
+
βββ data/ # Raw/processed data & uploads
|
| 423 |
+
βββ models/ # Cached model weights
|
| 424 |
+
βββ results/ # Training outputs & evaluation
|
| 425 |
+
βββ Dockerfile # Production Docker image
|
| 426 |
+
βββ requirements.txt # Python dependencies
|
| 427 |
+
βββ .env.example # Environment variable template
|
| 428 |
+
```
|
| 429 |
+
|
| 430 |
+
---
|
| 431 |
+
|
| 432 |
+
## Getting Started
|
| 433 |
+
|
| 434 |
+
### Prerequisites
|
| 435 |
+
|
| 436 |
+
- **Python** 3.10 or higher
|
| 437 |
+
- **Node.js** 18+ and npm
|
| 438 |
+
- **Git** and **Git LFS** (for model weights)
|
| 439 |
+
- **(Optional)** CUDA 11.8+ compatible GPU for accelerated inference
|
| 440 |
+
|
| 441 |
+
### 1. Clone the Repository
|
| 442 |
+
|
| 443 |
+
```bash
|
| 444 |
+
git clone https://github.com/hoshikrana/MedSightAI.git
|
| 445 |
+
cd MedSightAI
|
| 446 |
+
```
|
| 447 |
+
|
| 448 |
+
### 2. Backend Setup
|
| 449 |
+
|
| 450 |
+
```bash
|
| 451 |
+
# Create virtual environment
|
| 452 |
+
python -m venv venv
|
| 453 |
+
|
| 454 |
+
# Activate (Windows)
|
| 455 |
+
.\venv\Scripts\activate
|
| 456 |
+
|
| 457 |
+
# Activate (macOS/Linux)
|
| 458 |
+
source venv/bin/activate
|
| 459 |
+
|
| 460 |
+
# Install PyTorch (GPU β CUDA 11.8)
|
| 461 |
+
pip install torch==2.2.0+cu118 torchvision==0.17.0+cu118 --index-url https://download.pytorch.org/whl/cu118
|
| 462 |
+
|
| 463 |
+
# OR install PyTorch (CPU-only)
|
| 464 |
+
pip install torch==2.2.0+cpu torchvision==0.17.0+cpu --index-url https://download.pytorch.org/whl/cpu
|
| 465 |
+
|
| 466 |
+
# Install remaining dependencies
|
| 467 |
+
pip install -r requirements.txt
|
| 468 |
+
|
| 469 |
+
# Install scispaCy model
|
| 470 |
+
pip install https://s3-us-west-2.amazonaws.com/ai2-s3-scispacy/releases/v0.5.1/en_core_sci_sm-0.5.1.tar.gz
|
| 471 |
+
```
|
| 472 |
+
|
| 473 |
+
### 3. Environment Configuration
|
| 474 |
+
|
| 475 |
+
```bash
|
| 476 |
+
# Copy the example environment file
|
| 477 |
+
cp .env.example .env
|
| 478 |
+
|
| 479 |
+
# Generate secure keys
|
| 480 |
+
python -c "import secrets; print('SECRET_KEY=' + secrets.token_hex(32))"
|
| 481 |
+
python -c "import secrets; print('JWT_SECRET_KEY=' + secrets.token_hex(32))"
|
| 482 |
+
```
|
| 483 |
+
|
| 484 |
+
Edit `.env` with your configuration. Required variables:
|
| 485 |
+
- `SECRET_KEY` β Application secret (min 32 chars)
|
| 486 |
+
- `JWT_SECRET_KEY` β JWT signing key (min 32 chars)
|
| 487 |
+
- `GEMINI_API_KEY` β [Get free API key](https://aistudio.google.com/app/apikey) for AI chat
|
| 488 |
+
- `GOOGLE_CLIENT_ID` / `GOOGLE_CLIENT_SECRET` β For OAuth (optional)
|
| 489 |
+
|
| 490 |
+
### 4. Frontend Setup
|
| 491 |
+
|
| 492 |
+
```bash
|
| 493 |
+
cd frontend
|
| 494 |
+
npm install
|
| 495 |
+
```
|
| 496 |
+
|
| 497 |
+
Create `frontend/.env.local`:
|
| 498 |
+
```env
|
| 499 |
+
NEXT_PUBLIC_API_URL=http://localhost:8000
|
| 500 |
+
NEXT_PUBLIC_APP_NAME=MedSight AI
|
| 501 |
+
```
|
| 502 |
+
|
| 503 |
+
### 5. Run the Application
|
| 504 |
+
|
| 505 |
+
```bash
|
| 506 |
+
# Terminal 1 β Backend (from project root)
|
| 507 |
+
python -m uvicorn backend.main:app --reload --host 0.0.0.0 --port 8000
|
| 508 |
+
|
| 509 |
+
# Terminal 2 β Frontend
|
| 510 |
+
cd frontend
|
| 511 |
+
npm run dev
|
| 512 |
+
```
|
| 513 |
+
|
| 514 |
+
Open [http://localhost:3000](http://localhost:3000) in your browser.
|
| 515 |
+
|
| 516 |
+
---
|
| 517 |
+
|
| 518 |
+
## Configuration
|
| 519 |
+
|
| 520 |
+
### Environment Variables
|
| 521 |
+
|
| 522 |
+
| Variable | Default | Description |
|
| 523 |
+
|---|---|---|
|
| 524 |
+
| `ENVIRONMENT` | `development` | `development` / `production` / `test` |
|
| 525 |
+
| `SECRET_KEY` | *required* | Application secret key (β₯32 chars) |
|
| 526 |
+
| `DATABASE_URL` | `sqlite+aiosqlite:///./medsight.db` | Database connection string |
|
| 527 |
+
| `GEMINI_API_KEY` | β | Google Gemini API key for AI chat |
|
| 528 |
+
| `HF_TOKEN` | β | HuggingFace token for model downloads |
|
| 529 |
+
| `ALLOWED_ORIGINS` | `http://localhost:3000` | Comma-separated exact frontend origins |
|
| 530 |
+
| `ALLOWED_ORIGIN_REGEX` | `https://.*\.vercel\.app` | Regex for Vercel preview/production origins |
|
| 531 |
+
| `TRUSTED_HOSTS` | `localhost,127.0.0.1,*.vercel.app,*.hf.space` | Hosts accepted by TrustedHostMiddleware |
|
| 532 |
+
| `VISION_ANOMALY_BACKEND` | `auto` | `auto` / `onnx` / `pulmonary` |
|
| 533 |
+
| `GPU_VRAM_BUDGET_MB` | `3500` | Max VRAM budget for model loading |
|
| 534 |
+
| `MAX_UPLOAD_SIZE_MB` | `10` | Maximum upload file size |
|
| 535 |
+
| `STORAGE_BACKEND` | `local` | `local` / `r2` (Cloudflare R2) |
|
| 536 |
+
| `RATE_LIMIT_ANALYZE` | `10/hour` | Analysis endpoint rate limit |
|
| 537 |
+
| `RATE_LIMIT_CHAT` | `50/hour` | Chat endpoint rate limit |
|
| 538 |
+
|
| 539 |
+
See [`.env.example`](.env.example) for the complete list of configurable options.
|
| 540 |
+
|
| 541 |
+
### Vision Backend Selection
|
| 542 |
+
|
| 543 |
+
The `VISION_ANOMALY_BACKEND` setting controls which vision model is used:
|
| 544 |
+
|
| 545 |
+
| Mode | Description |
|
| 546 |
+
|---|---|
|
| 547 |
+
| `auto` | Auto-detects available checkpoints (prefers `pulmonary` β `onnx`) |
|
| 548 |
+
| `pulmonary` | Uses the VGG16βVAEβViT `.pth` checkpoint |
|
| 549 |
+
| `onnx` | Uses the ConvAE ONNX model for lightweight CPU inference |
|
| 550 |
+
|
| 551 |
+
---
|
| 552 |
+
|
| 553 |
+
## API Reference
|
| 554 |
+
|
| 555 |
+
### Core Endpoints
|
| 556 |
+
|
| 557 |
+
| Method | Endpoint | Description | Auth |
|
| 558 |
+
|---|---|---|---|
|
| 559 |
+
| `POST` | `/api/v1/analyze` | Upload X-ray image + symptoms for analysis | β
|
|
| 560 |
+
| `GET` | `/api/v1/analyze/status/{task_id}` | Poll analysis task status | β
|
|
| 561 |
+
| `GET` | `/api/v1/analyze/result/{session_id}` | Retrieve completed analysis results | β
|
|
| 562 |
+
| `POST` | `/api/v1/chat` | AI-powered clinical Q&A (streaming) | β
|
|
| 563 |
+
| `GET` | `/api/v1/report/{session_id}` | Generate & download PDF report | β
|
|
| 564 |
+
| `GET` | `/api/v1/health` | System health check | β |
|
| 565 |
+
| `GET` | `/docs` | Interactive Swagger UI (dev only) | β |
|
| 566 |
+
|
| 567 |
+
### Authentication Endpoints
|
| 568 |
+
|
| 569 |
+
| Method | Endpoint | Description |
|
| 570 |
+
|---|---|---|
|
| 571 |
+
| `POST` | `/api/v1/auth/register` | Email/password registration |
|
| 572 |
+
| `POST` | `/api/v1/auth/login` | Email/password login β JWT tokens |
|
| 573 |
+
| `POST` | `/api/v1/auth/refresh` | Refresh access token |
|
| 574 |
+
| `GET` | `/api/v1/auth/google` | Initiate Google OAuth flow |
|
| 575 |
+
| `GET` | `/api/v1/auth/google/callback` | Google OAuth callback |
|
| 576 |
+
|
| 577 |
+
### Analysis Response Schema
|
| 578 |
+
|
| 579 |
+
```json
|
| 580 |
+
{
|
| 581 |
+
"session_id": "uuid",
|
| 582 |
+
"overall_status": "COMPLETE | PARTIAL | FAILED",
|
| 583 |
+
"vision": {
|
| 584 |
+
"anomaly_score": 72.5,
|
| 585 |
+
"risk_level": "HIGH",
|
| 586 |
+
"heatmap_base64": "data:image/png;base64,...",
|
| 587 |
+
"top_regions": [{"x": 76, "y": 56, "width": 72, "height": 86, "confidence": 0.85}],
|
| 588 |
+
"model_confidence": 0.82
|
| 589 |
+
},
|
| 590 |
+
"nlp": {
|
| 591 |
+
"entities": {"diseases": [...], "symptoms": [...], "medications": [...]},
|
| 592 |
+
"primary_diagnosis": "Pneumonia",
|
| 593 |
+
"diagnosis_confidence": 0.78,
|
| 594 |
+
"differential": [{"disease": "Pleural Effusion", "confidence": 0.45}]
|
| 595 |
+
},
|
| 596 |
+
"fusion": {
|
| 597 |
+
"image_text_similarity": 0.72,
|
| 598 |
+
"alignment": "moderate",
|
| 599 |
+
"final_risk": "MEDIUM"
|
| 600 |
+
},
|
| 601 |
+
"report_text": "## AI Diagnostic Report ...",
|
| 602 |
+
"timings": {
|
| 603 |
+
"preprocess_ms": 45,
|
| 604 |
+
"vision_ms": 1200,
|
| 605 |
+
"nlp_ms": 350,
|
| 606 |
+
"fusion_ms": 120,
|
| 607 |
+
"report_ms": 800,
|
| 608 |
+
"total_ms": 2515
|
| 609 |
+
}
|
| 610 |
+
}
|
| 611 |
+
```
|
| 612 |
+
|
| 613 |
+
---
|
| 614 |
+
|
| 615 |
+
## Deployment
|
| 616 |
+
|
| 617 |
+
### Production Architecture
|
| 618 |
+
|
| 619 |
+
| Service | Platform | Purpose |
|
| 620 |
+
|---|---|---|
|
| 621 |
+
| **Backend API** | HuggingFace Spaces (Docker SDK) | FastAPI + ML inference on port 7860 |
|
| 622 |
+
| **Frontend** | Vercel | Next.js static + SSR |
|
| 623 |
+
| **Database** | Supabase | Managed PostgreSQL |
|
| 624 |
+
| **Models** | HuggingFace Hub | Model weight distribution |
|
| 625 |
+
| **Storage** | Cloudflare R2 | Medical image storage (optional) |
|
| 626 |
+
|
| 627 |
+
### Docker Deployment
|
| 628 |
+
|
| 629 |
+
```bash
|
| 630 |
+
# Build the production image
|
| 631 |
+
docker build -t medsight-ai .
|
| 632 |
+
|
| 633 |
+
# Run locally
|
| 634 |
+
docker run -p 7860:7860 --env-file .env medsight-ai
|
| 635 |
+
```
|
| 636 |
+
|
| 637 |
+
The Dockerfile uses `python:3.11-slim`, installs CPU-only PyTorch (~800MB smaller than CUDA), and runs Uvicorn with a single worker. Peak memory is approximately **4GB** during inference.
|
| 638 |
+
|
| 639 |
+
### HuggingFace Spaces
|
| 640 |
+
|
| 641 |
+
The backend is configured to deploy directly to HuggingFace Spaces via the Docker SDK. The HuggingFace metadata is in the `README.md` frontmatter. Models are auto-downloaded from `hoshikrana/VAE_and_VIT_Anomaly_detection` on startup.
|
| 642 |
+
|
| 643 |
+
Required GitHub repository secrets for the deployment workflow:
|
| 644 |
+
|
| 645 |
+
| Secret | Purpose |
|
| 646 |
+
|---|---|
|
| 647 |
+
| `HF_TOKEN` | Hugging Face write token for uploading the Space and runtime model downloads |
|
| 648 |
+
| `HF_SPACE_ID` | Space repo id, for example `username/medsight-ai-backend` |
|
| 649 |
+
| `HF_SPACE_URL` | Public backend URL, for example `https://username-medsight-ai-backend.hf.space` |
|
| 650 |
+
| `VERCEL_TOKEN` | Vercel CLI token |
|
| 651 |
+
| `VERCEL_ORG_ID` | Vercel team/user id |
|
| 652 |
+
| `VERCEL_PROJECT_ID` | Vercel project id for the frontend |
|
| 653 |
+
|
| 654 |
+
Set these Hugging Face Space runtime variables as secrets or variables:
|
| 655 |
+
|
| 656 |
+
```env
|
| 657 |
+
ENVIRONMENT=production
|
| 658 |
+
SECRET_KEY=<64-hex-or-long-random-secret>
|
| 659 |
+
JWT_SECRET_KEY=<different-64-hex-or-long-random-secret>
|
| 660 |
+
DATABASE_URL=<production-postgres-url-or-sqlite-for-demo-only>
|
| 661 |
+
ALLOWED_ORIGINS=https://<your-vercel-domain>
|
| 662 |
+
ALLOWED_ORIGIN_REGEX=https://.*\.vercel\.app
|
| 663 |
+
TRUSTED_HOSTS=*.hf.space,localhost,127.0.0.1
|
| 664 |
+
FRONTEND_URL=https://<your-vercel-domain>
|
| 665 |
+
BACKEND_URL=https://<your-hf-space-subdomain>.hf.space
|
| 666 |
+
HF_TOKEN=<token-if-model-repo-is-private>
|
| 667 |
+
```
|
| 668 |
+
|
| 669 |
+
---
|
| 670 |
+
|
| 671 |
+
## Research Paper
|
| 672 |
+
|
| 673 |
+
This project is accompanied by a peer-reviewed research paper:
|
| 674 |
+
|
| 675 |
+
> **"MedSight AI: A Multimodal Deep Learning Framework for Unsupervised Pulmonary Anomaly Detection with Retrieval-Augmented Clinical Decision Support"**
|
| 676 |
+
>
|
| 677 |
+
> Kasala Hoshik, V. Vineel Reddy, K. Chanikya
|
| 678 |
+
> Lovely Professional University, Phagwara, Punjab, India
|
| 679 |
+
> Research | May 2026
|
| 680 |
+
|
| 681 |
+
### Key Research Contributions
|
| 682 |
+
|
| 683 |
+
1. **Novel three-stage architecture (VGG16 β VAE β ViT)** β Decomposes anomaly detection into feature extraction, distributional learning, and attention-based scoring with only 2.53M trainable parameters (vs. 86M in ViT-Base or 307M in DINOv2)
|
| 684 |
+
2. **Unsupervised paradigm shift** β Trained exclusively on normal radiographs, eliminating the need for expensive per-pathology annotation. Can detect novel/rare pathologies absent from training data
|
| 685 |
+
3. **Multi-signal interpretable scoring** β Fusion of reconstruction error, KL divergence, and ViT attention provides clinicians with three complementary perspectives on why an image was flagged
|
| 686 |
+
4. **UMAP-validated latent representations** β Emergent clustering in the VAE latent space demonstrates pathology-relevant structure without any supervised signal
|
| 687 |
+
5. **Production-grade multimodal system** β Complete clinical platform integrating vision, NLP, and 3-tier RAG conversational AI with graceful degradation when individual components fail
|
| 688 |
+
6. **Resource-constrained deployment** β Full pipeline operates within 4 GB VRAM, enabling deployment on consumer hardware and CPU-only environments
|
| 689 |
+
|
| 690 |
+
### Strengths Highlighted in the Paper
|
| 691 |
+
|
| 692 |
+
- **Clinical viability** β AUROC of 0.718 demonstrates unsupervised detection can provide clinically useful screening as a triage tool
|
| 693 |
+
- **Extreme parameter efficiency** β 2.53M params vs. 86M (ViT-Base) or 307M (DINOv2)
|
| 694 |
+
- **Interpretable multi-signal scoring** β Three complementary anomaly signals provide richer diagnostic information than single-metric approaches
|
| 695 |
+
|
| 696 |
+
### Future Directions
|
| 697 |
+
|
| 698 |
+
- Perceptual loss (instead of MSE) for VAE reconstruction to better capture structural anomalies
|
| 699 |
+
- Larger backbones (DINOv2 ViT-S/14 producing 384-d features)
|
| 700 |
+
- Multi-scale latent analysis using hierarchical VAEs
|
| 701 |
+
- Contrastive pre-training of the anomaly scorer
|
| 702 |
+
- Domain-specific backbones (CheXNet) for improved viral pneumonia sensitivity
|
| 703 |
+
|
| 704 |
+
---
|
| 705 |
+
|
| 706 |
+
## Reproduce Training
|
| 707 |
+
|
| 708 |
+
See [Training & Experimental Results](#training--experimental-results) above for full methodology and hyperparameters.
|
| 709 |
+
|
| 710 |
+
```bash
|
| 711 |
+
# Prepare and preprocess the dataset
|
| 712 |
+
python training/scripts/prepare_dataset.py
|
| 713 |
+
|
| 714 |
+
# Train the VAE + ViT anomaly detector
|
| 715 |
+
python training/scripts/train_anomaly.py
|
| 716 |
+
|
| 717 |
+
# Or use the Jupyter notebook for interactive training
|
| 718 |
+
jupyter notebook training/notebooks/covid\ \(1\).ipynb
|
| 719 |
+
|
| 720 |
+
# Upload trained models to HuggingFace
|
| 721 |
+
python training/scripts/upload_models.py
|
| 722 |
+
```
|
| 723 |
+
|
| 724 |
+
---
|
| 725 |
+
|
| 726 |
+
## Contributing
|
| 727 |
+
|
| 728 |
+
We welcome contributions! Please follow these steps:
|
| 729 |
+
|
| 730 |
+
1. Fork the repository
|
| 731 |
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
| 732 |
+
3. Make your changes and ensure tests pass
|
| 733 |
+
4. Commit with descriptive messages (`git commit -m 'Add amazing feature'`)
|
| 734 |
+
5. Push to your branch (`git push origin feature/amazing-feature`)
|
| 735 |
+
6. Open a Pull Request
|
| 736 |
+
|
| 737 |
+
### Development Guidelines
|
| 738 |
+
|
| 739 |
+
- **Backend:** Follow `ruff` and `black` formatting (see `pyproject.toml`)
|
| 740 |
+
- **Frontend:** Follow ESLint + Prettier configuration
|
| 741 |
+
- **Tests:** Add tests for new features (`pytest` for backend, `npm test` for frontend)
|
| 742 |
+
- **Commits:** Use conventional commit messages
|
| 743 |
+
|
| 744 |
+
### Running Tests
|
| 745 |
+
|
| 746 |
+
```bash
|
| 747 |
+
# Backend tests
|
| 748 |
+
pytest backend/tests/ -v --tb=short
|
| 749 |
+
|
| 750 |
+
# With specific markers
|
| 751 |
+
pytest -m "unit" -v # Fast unit tests only
|
| 752 |
+
pytest -m "integration" -v # Integration tests
|
| 753 |
+
pytest -m "ml" -v # ML model tests
|
| 754 |
+
|
| 755 |
+
# Frontend lint
|
| 756 |
+
cd frontend && npm run lint
|
| 757 |
+
```
|
| 758 |
+
|
| 759 |
+
---
|
| 760 |
+
|
| 761 |
+
## Acknowledgements
|
| 762 |
+
|
| 763 |
+
- [COVID-19 Radiography Dataset](https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database) β Training data
|
| 764 |
+
- [scispaCy](https://allenai.github.io/scispacy/) β Biomedical NLP models
|
| 765 |
+
- [HuggingFace Transformers](https://huggingface.co/docs/transformers) β Model hub and inference
|
| 766 |
+
- [Google Gemini](https://ai.google.dev/) β Generative AI for clinical chat
|
| 767 |
+
- [FastAPI](https://fastapi.tiangolo.com/) β High-performance async API framework
|
| 768 |
+
- [Next.js](https://nextjs.org/) β React framework for the frontend
|
| 769 |
+
|
| 770 |
+
---
|
| 771 |
+
|
| 772 |
+
## License
|
| 773 |
+
|
| 774 |
+
This project is licensed under the **Apache License 2.0** β see the [LICENSE](LICENSE) file for details.
|
| 775 |
+
|
| 776 |
+
---
|
| 777 |
+
|
| 778 |
+
<div align="center">
|
| 779 |
+
|
| 780 |
+
**Built with β€οΈ for advancing medical AI research**
|
| 781 |
+
|
| 782 |
+
*MedSight AI is a research project and should not be used for clinical diagnosis without physician oversight.*
|
| 783 |
+
|
| 784 |
+
</div>
|
backend/core/config.py
CHANGED
|
@@ -19,7 +19,8 @@ class Settings(BaseSettings):
|
|
| 19 |
DEBUG: bool = False
|
| 20 |
VERSION: str = "1.0.0"
|
| 21 |
ALLOWED_ORIGINS: Any = ["http://localhost:3000"]
|
| 22 |
-
|
|
|
|
| 23 |
FRONTEND_URL: str = "http://localhost:3000"
|
| 24 |
BACKEND_URL: str = "http://localhost:8000"
|
| 25 |
|
|
|
|
| 19 |
DEBUG: bool = False
|
| 20 |
VERSION: str = "1.0.0"
|
| 21 |
ALLOWED_ORIGINS: Any = ["http://localhost:3000"]
|
| 22 |
+
ALLOWED_ORIGIN_REGEX: str | None = r"https://.*\.vercel\.app"
|
| 23 |
+
TRUSTED_HOSTS: Any = ["localhost", "127.0.0.1", "*.vercel.app", "*.hf.space"]
|
| 24 |
FRONTEND_URL: str = "http://localhost:3000"
|
| 25 |
BACKEND_URL: str = "http://localhost:8000"
|
| 26 |
|
backend/core/dependencies.py
CHANGED
|
@@ -123,7 +123,7 @@ def get_pagination(
|
|
| 123 |
return page, limit
|
| 124 |
|
| 125 |
def get_client_ip(request: Request) -> str:
|
| 126 |
-
"""Extracts real IP from X-Forwarded-For
|
| 127 |
forwarded_for = request.headers.get("X-Forwarded-For")
|
| 128 |
if forwarded_for:
|
| 129 |
return forwarded_for.split(',')[0].strip()
|
|
|
|
| 123 |
return page, limit
|
| 124 |
|
| 125 |
def get_client_ip(request: Request) -> str:
|
| 126 |
+
"""Extracts real IP from X-Forwarded-For when running behind a platform proxy."""
|
| 127 |
forwarded_for = request.headers.get("X-Forwarded-For")
|
| 128 |
if forwarded_for:
|
| 129 |
return forwarded_for.split(',')[0].strip()
|
backend/core/middleware.py
CHANGED
|
@@ -14,7 +14,7 @@ logger = logging.getLogger("access")
|
|
| 14 |
class HTTPSRedirectMiddleware(BaseHTTPMiddleware):
|
| 15 |
async def dispatch(self, request: Request, call_next):
|
| 16 |
if settings.is_production:
|
| 17 |
-
# Check if request came in as HTTP
|
| 18 |
proto = request.headers.get("X-Forwarded-Proto", "https")
|
| 19 |
if proto == "http":
|
| 20 |
https_url = str(request.url).replace("http://", "https://", 1)
|
|
|
|
| 14 |
class HTTPSRedirectMiddleware(BaseHTTPMiddleware):
|
| 15 |
async def dispatch(self, request: Request, call_next):
|
| 16 |
if settings.is_production:
|
| 17 |
+
# Check if request came in as HTTP behind a platform proxy.
|
| 18 |
proto = request.headers.get("X-Forwarded-Proto", "https")
|
| 19 |
if proto == "http":
|
| 20 |
https_url = str(request.url).replace("http://", "https://", 1)
|
backend/core/security.py
CHANGED
|
@@ -119,12 +119,17 @@ def set_refresh_cookie(response: Response, token: str):
|
|
| 119 |
max_age=settings.REFRESH_TOKEN_EXPIRE_DAYS * 86400,
|
| 120 |
httponly=True,
|
| 121 |
secure=settings.is_production,
|
| 122 |
-
samesite="lax",
|
| 123 |
path="/api/v1/auth/refresh"
|
| 124 |
)
|
| 125 |
|
| 126 |
def clear_auth_cookies(response: Response):
|
| 127 |
-
response.delete_cookie(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def get_refresh_token_from_cookie(request: Request) -> str:
|
| 130 |
token = request.cookies.get("refresh_token")
|
|
|
|
| 119 |
max_age=settings.REFRESH_TOKEN_EXPIRE_DAYS * 86400,
|
| 120 |
httponly=True,
|
| 121 |
secure=settings.is_production,
|
| 122 |
+
samesite="none" if settings.is_production else "lax",
|
| 123 |
path="/api/v1/auth/refresh"
|
| 124 |
)
|
| 125 |
|
| 126 |
def clear_auth_cookies(response: Response):
|
| 127 |
+
response.delete_cookie(
|
| 128 |
+
"refresh_token",
|
| 129 |
+
path="/api/v1/auth/refresh",
|
| 130 |
+
secure=settings.is_production,
|
| 131 |
+
samesite="none" if settings.is_production else "lax",
|
| 132 |
+
)
|
| 133 |
|
| 134 |
def get_refresh_token_from_cookie(request: Request) -> str:
|
| 135 |
token = request.cookies.get("refresh_token")
|
backend/main.py
CHANGED
|
@@ -162,6 +162,7 @@ app.add_middleware(SecurityHeadersMiddleware)
|
|
| 162 |
app.add_middleware(
|
| 163 |
CORSMiddleware,
|
| 164 |
allow_origins=settings.ALLOWED_ORIGINS,
|
|
|
|
| 165 |
allow_credentials=True,
|
| 166 |
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
|
| 167 |
allow_headers=["Content-Type", "Authorization", "X-API-Key", "X-Request-ID", "Accept"],
|
|
|
|
| 162 |
app.add_middleware(
|
| 163 |
CORSMiddleware,
|
| 164 |
allow_origins=settings.ALLOWED_ORIGINS,
|
| 165 |
+
allow_origin_regex=settings.ALLOWED_ORIGIN_REGEX,
|
| 166 |
allow_credentials=True,
|
| 167 |
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
|
| 168 |
allow_headers=["Content-Type", "Authorization", "X-API-Key", "X-Request-ID", "Accept"],
|
docs/images/model_architecture.png
ADDED
|
Git LFS Details
|
docs/images/system_architecture.png
ADDED
|
Git LFS Details
|
requirements.txt
CHANGED
|
@@ -1,59 +1,78 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
#
|
| 17 |
-
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
python-jose[cryptography]==3.3.0 # JWT creation and verification
|
| 27 |
-
passlib[bcrypt]==1.7.4
|
| 28 |
-
httpx==0.27.0
|
| 29 |
-
aiofiles==23.2.1
|
| 30 |
-
authlib==1.3.0
|
| 31 |
-
itsdangerous==2.1.2
|
| 32 |
-
python-magic-bin==0.4.14
|
| 33 |
-
|
| 34 |
-
# [Database]
|
| 35 |
-
sqlalchemy==2.0.28
|
| 36 |
-
alembic==1.13.1
|
| 37 |
-
aiosqlite==0.20.0
|
| 38 |
-
asyncpg==0.29.0
|
| 39 |
-
greenlet==3.0.3
|
| 40 |
-
|
| 41 |
-
# [RAG + Vector DB]
|
| 42 |
-
chromadb==0.4.24
|
| 43 |
-
|
| 44 |
-
# [
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
#
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# β MedSight AI β Python Dependencies β
|
| 3 |
+
# β Python 3.10+ | PyTorch 2.2 β
|
| 4 |
+
# β Install: pip install -r requirements.txt β
|
| 5 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 6 |
+
|
| 7 |
+
# ββ PyTorch (install FIRST β version must match torchvision) ββββββββββ
|
| 8 |
+
# GPU (CUDA 11.8):
|
| 9 |
+
# pip install torch==2.2.0+cu118 torchvision==0.17.0+cu118 --index-url https://download.pytorch.org/whl/cu118
|
| 10 |
+
# CPU-only:
|
| 11 |
+
# pip install torch==2.2.0+cpu torchvision==0.17.0+cpu --index-url https://download.pytorch.org/whl/cpu
|
| 12 |
+
|
| 13 |
+
# ββ [ML Core] βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
+
torchvision==0.17.0 # Pairs exactly with PyTorch 2.2
|
| 15 |
+
transformers==4.38.2 # Stable version for BioGPT and BioBERT
|
| 16 |
+
datasets==2.18.0 # Required for NCBI dataset parsing
|
| 17 |
+
accelerate==0.27.2 # Handles memory-efficient model loading
|
| 18 |
+
peft==0.9.0 # Lightweight fine-tuning parameter handling
|
| 19 |
+
sentence-transformers==2.6.0 # Powers the all-MiniLM-L6-v2 embedding model
|
| 20 |
+
timm==0.9.16 # Required backend for DINOv2 architecture
|
| 21 |
+
scikit-learn==1.4.1.post1 # Used for metric calculations (F1, precision, recall)
|
| 22 |
+
numpy==1.26.4 # Last stable 1.x version before 2.0 breaking changes
|
| 23 |
+
Pillow==10.2.0 # Safe version for PIL image processing
|
| 24 |
+
opencv-python-headless==4.9.0.80 # Headless avoids Qt DLL conflicts on servers
|
| 25 |
+
onnxruntime==1.17.1 # Fast CPU inference for ConvAE ONNX backend
|
| 26 |
+
scispacy==0.5.4 # Lightweight medical NLP (NER pipeline)
|
| 27 |
+
# Install scispaCy model manually:
|
| 28 |
+
# pip install https://s3-us-west-2.amazonaws.com/ai2-s3-scispacy/releases/v0.5.1/en_core_sci_sm-0.5.1.tar.gz
|
| 29 |
+
|
| 30 |
+
# ββ [Backend β FastAPI] βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
+
fastapi==0.110.0 # Base API framework
|
| 32 |
+
uvicorn[standard]==0.27.1 # ASGI server for FastAPI
|
| 33 |
+
python-multipart==0.0.9 # Required for FastAPI file uploads
|
| 34 |
+
pydantic==2.6.3 # Pydantic v2 exact version for type safety
|
| 35 |
+
pydantic-settings==2.2.1 # Required for v2 configuration management
|
| 36 |
+
email-validator==2.1.1 # Required by Pydantic EmailStr schemas
|
| 37 |
+
slowapi==0.1.9 # Rate limiting without Redis
|
| 38 |
python-jose[cryptography]==3.3.0 # JWT creation and verification
|
| 39 |
+
passlib[bcrypt]==1.7.4 # Password hashing algorithms
|
| 40 |
+
httpx==0.27.0 # Async HTTP client for OAuth
|
| 41 |
+
aiofiles==23.2.1 # Async file handling for image uploads
|
| 42 |
+
authlib==1.3.0 # Handles Google OAuth 2.0 flow
|
| 43 |
+
itsdangerous==2.1.2 # Required by Authlib sessions
|
| 44 |
+
python-magic-bin==0.4.14 # Windows-compatible MIME type checker (critical for file validation)
|
| 45 |
+
|
| 46 |
+
# ββ [Database] ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
sqlalchemy==2.0.28 # SQLAlchemy 2.0 async ORM
|
| 48 |
+
alembic==1.13.1 # Database migration manager
|
| 49 |
+
aiosqlite==0.20.0 # Async SQLite driver for local development
|
| 50 |
+
asyncpg==0.29.0 # Async PostgreSQL driver for production
|
| 51 |
+
greenlet==3.0.3 # Required for SQLAlchemy async execution
|
| 52 |
+
|
| 53 |
+
# ββ [RAG + Vector DB] βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
chromadb==0.4.24 # Local vector database for RAG retrieval
|
| 55 |
+
|
| 56 |
+
# ββ [Generative AI] βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 57 |
+
google-generativeai>=0.8.0 # Gemini 2.0 Flash API for clinical Q&A chat
|
| 58 |
+
huggingface_hub>=0.20.0 # HuggingFace Hub for model downloads
|
| 59 |
+
|
| 60 |
+
# ββ [Utilities] βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
reportlab==4.1.0 # PDF report generation
|
| 62 |
+
fpdf2==2.7.9 # Fallback lightweight PDF generation
|
| 63 |
+
python-dotenv==1.0.1 # Environment variable parsing
|
| 64 |
+
tqdm==4.66.2 # Progress bars for dataset processing
|
| 65 |
+
ffmpeg-python==0.2.0 # Audio conversion wrapper for Whisper
|
| 66 |
+
apscheduler==3.10.4 # In-memory background task scheduling
|
| 67 |
+
boto3==1.34.69 # Optional Cloudflare R2/S3-compatible upload storage
|
| 68 |
+
|
| 69 |
+
# ββ [Audio β Optional] ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
+
openai-whisper==20231117 # Whisper CPU transcription for voice input
|
| 71 |
+
|
| 72 |
+
# ββ [Dev + Testing] βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
+
pytest==8.1.0 # Core testing framework
|
| 74 |
+
pytest-asyncio==0.23.5 # Testing async FastAPI routes
|
| 75 |
+
black==24.3.0 # Code formatting
|
| 76 |
+
ruff==0.3.4 # Fast Python linter
|
| 77 |
+
mypy==1.9.0 # Static type checking
|
| 78 |
+
locust==2.24.0 # Load testing framework
|