hoshikrana commited on
Commit
5b56e7c
·
1 Parent(s): c1b7fff

feat: setup secrets management, logging infrastructure, and code quality tools

Browse files
.pre-commit-config.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.3.4
4
+ hooks:
5
+ - id: ruff
6
+ args: [ --fix ]
7
+ - id: ruff-format
8
+ - repo: https://github.com/pre-commit/pre-commit-hooks
9
+ rev: v4.5.0
10
+ hooks:
11
+ - id: check-added-large-files
12
+ args: ['--maxkb=50000']
13
+ - id: check-merge-conflict
14
+ - id: detect-private-key
15
+ - id: check-json
16
+ - id: check-yaml
17
+ - id: end-of-file-fixer
18
+ - id: trailing-whitespace
Makefile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: lint format test test-all clean verify run-dev
2
+
3
+ lint:
4
+ ruff check backend/
5
+ black --check backend/
6
+ mypy backend/core/
7
+
8
+ format:
9
+ ruff check --fix backend/
10
+ black backend/
11
+
12
+ test:
13
+ pytest -m "unit" --tb=short
14
+
15
+ test-all:
16
+ pytest --tb=short
17
+
18
+ clean:
19
+ del /s /q __pycache__ .pytest_cache .ruff_cache
20
+
21
+ verify:
22
+ python verify_env.py
23
+
24
+ run-dev:
25
+ uvicorn backend.main:app --reload --port 8000
backend/core/config.py CHANGED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Literal
4
+ from pydantic import field_validator, computed_field
5
+ from pydantic_settings import BaseSettings, SettingsConfigDict
6
+
7
+ class Settings(BaseSettings):
8
+ """Core application settings populated from environment variables."""
9
+ model_config = SettingsConfigDict(
10
+ env_file=".env",
11
+ env_file_encoding="utf-8",
12
+ case_sensitive=False,
13
+ extra="ignore"
14
+ )
15
+
16
+ # === Application ===
17
+ ENVIRONMENT: Literal["development", "production", "test"] = "development"
18
+ SECRET_KEY: str
19
+ DEBUG: bool = False
20
+ VERSION: str = "1.0.0"
21
+ ALLOWED_ORIGINS: list[str] = ["http://localhost:3000"]
22
+ FRONTEND_URL: str = "http://localhost:3000"
23
+ BACKEND_URL: str = "http://localhost:8000"
24
+
25
+ # === Database ===
26
+ DATABASE_URL: str = "sqlite+aiosqlite:///./medsight.db"
27
+
28
+ # === JWT ===
29
+ JWT_SECRET_KEY: str
30
+ JWT_SECRET_KEY_OLD: str | None = None # Used for key rotation
31
+ JWT_ALGORITHM: str = "HS256"
32
+ ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
33
+ REFRESH_TOKEN_EXPIRE_DAYS: int = 7
34
+
35
+ # === Google OAuth ===
36
+ GOOGLE_CLIENT_ID: str = ""
37
+ GOOGLE_CLIENT_SECRET: str = ""
38
+ GOOGLE_REDIRECT_URI: str = ""
39
+
40
+ # === HuggingFace ===
41
+ HF_TOKEN: str = ""
42
+
43
+ # === ML Config ===
44
+ MODEL_CACHE_DIR: Path = Path("C:/hf_cache")
45
+ TEMP_DIR: Path = Path("./backend/temp")
46
+ MAX_UPLOAD_SIZE_MB: int = 10
47
+ GPU_VRAM_BUDGET_MB: int = 3500
48
+
49
+ # === Rate Limiting ===
50
+ RATE_LIMIT_ANALYZE: str = "10/hour"
51
+ RATE_LIMIT_CHAT: str = "50/hour"
52
+ RATE_LIMIT_AUTH: str = "5/minute"
53
+
54
+ # === Logging ===
55
+ LOG_LEVEL: str = "DEBUG"
56
+ LOG_DIR: Path = Path("./backend/logs")
57
+
58
+ @field_validator("SECRET_KEY")
59
+ @classmethod
60
+ def secret_key_must_be_strong(cls, v: str) -> str:
61
+ if len(v) < 32:
62
+ raise ValueError("SECRET_KEY must be at least 32 characters")
63
+ return v
64
+
65
+ @field_validator("JWT_SECRET_KEY")
66
+ @classmethod
67
+ def jwt_key_must_be_strong(cls, v: str) -> str:
68
+ if len(v) < 32:
69
+ raise ValueError("JWT_SECRET_KEY must be at least 32 characters")
70
+ return v
71
+
72
+ @computed_field
73
+ @property
74
+ def is_production(self) -> bool:
75
+ return self.ENVIRONMENT == "production"
76
+
77
+ @computed_field
78
+ @property
79
+ def max_upload_bytes(self) -> int:
80
+ return self.MAX_UPLOAD_SIZE_MB * 1024 * 1024
81
+
82
+ def __repr__(self) -> str:
83
+ # NEVER show secrets in repr to prevent accidental logging
84
+ return f"Settings(environment={self.ENVIRONMENT}, debug={self.DEBUG}, version={self.VERSION})"
85
+
86
+ # Singleton — import this everywhere
87
+ settings = Settings()
88
+
89
+ def startup_validation():
90
+ """Validates critical infrastructure at startup."""
91
+ errors = []
92
+
93
+ try:
94
+ settings.TEMP_DIR.mkdir(parents=True, exist_ok=True)
95
+ settings.LOG_DIR.mkdir(parents=True, exist_ok=True)
96
+ settings.MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
97
+ except PermissionError:
98
+ errors.append("Lack permissions to create required directories (temp, logs, cache).")
99
+
100
+ if not settings.DATABASE_URL:
101
+ errors.append("DATABASE_URL is not set.")
102
+
103
+ if settings.is_production:
104
+ if not settings.GOOGLE_CLIENT_ID:
105
+ errors.append("GOOGLE_CLIENT_ID is required in production.")
106
+ if settings.DEBUG:
107
+ errors.append("DEBUG mode must be False in production.")
108
+
109
+ if errors:
110
+ raise RuntimeError("Startup Validation Failed:\n" + "\n".join(errors))
backend/core/logging_config.py CHANGED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from logging.handlers import RotatingFileHandler
4
+ from datetime import datetime, UTC
5
+ from contextvars import ContextVar
6
+ from uuid import uuid4
7
+
8
+ from backend.core.config import settings
9
+
10
+ # Context var for async-safe request ID tracking
11
+ _request_id_var: ContextVar[str] = ContextVar("request_id", default="")
12
+
13
+ def get_request_id() -> str:
14
+ return _request_id_var.get()
15
+
16
+ class MaskingFilter(logging.Filter):
17
+ SENSITIVE_FIELDS = {"password", "token", "secret", "key", "authorization", "cookie", "api_key"}
18
+
19
+ def filter(self, record: logging.LogRecord) -> bool:
20
+ # Mask sensitive data in extra dict if present
21
+ if hasattr(record, "extra") and isinstance(record.extra, dict):
22
+ for k in record.extra.keys():
23
+ if any(sens in k.lower() for sens in self.SENSITIVE_FIELDS):
24
+ record.extra[k] = "***MASKED***"
25
+ return True
26
+
27
+ class JSONFormatter(logging.Formatter):
28
+ def format(self, record: logging.LogRecord) -> str:
29
+ log_dict = {
30
+ "timestamp": datetime.fromtimestamp(record.created, UTC).isoformat(),
31
+ "level": record.levelname,
32
+ "logger": record.name,
33
+ "message": record.getMessage(),
34
+ "request_id": get_request_id(),
35
+ "module": record.module,
36
+ "function": record.funcName,
37
+ "line": record.lineno,
38
+ "environment": settings.ENVIRONMENT
39
+ }
40
+ if record.exc_info:
41
+ log_dict["exception"] = self.formatException(record.exc_info)[:500]
42
+
43
+ # Merge extra fields
44
+ for key, value in record.__dict__.items():
45
+ if key not in ["args", "asctime", "created", "exc_info", "exc_text", "filename", "funcName", "levelname", "levelno", "lineno", "module", "msecs", "message", "msg", "name", "pathname", "process", "processName", "relativeCreated", "stack_info", "thread", "threadName"]:
46
+ log_dict[key] = value
47
+
48
+ return json.dumps(log_dict)
49
+
50
+ class ColoredConsoleFormatter(logging.Formatter):
51
+ COLORS = {
52
+ 'DEBUG': '\033[94m', 'INFO': '\033[92m', 'WARNING': '\033[93m',
53
+ 'ERROR': '\033[91m', 'CRITICAL': '\033[95m'
54
+ }
55
+ RESET = '\033[0m'
56
+
57
+ def format(self, record: logging.LogRecord) -> str:
58
+ color = self.COLORS.get(record.levelname, self.RESET)
59
+ time_str = datetime.fromtimestamp(record.created).strftime('%H:%M:%S')
60
+ req_id = get_request_id()
61
+ req_str = f" [{req_id[:8]}]" if req_id else ""
62
+ return f"{time_str} {color}[{record.levelname}]{self.RESET} {record.module}:{record.lineno}{req_str} — {record.getMessage()}"
63
+
64
+ def setup_logging():
65
+ settings.LOG_DIR.mkdir(parents=True, exist_ok=True)
66
+ root_logger = logging.getLogger()
67
+ root_logger.setLevel(settings.LOG_LEVEL)
68
+
69
+ # Clear existing handlers
70
+ root_logger.handlers.clear()
71
+
72
+ # Filters
73
+ masking_filter = MaskingFilter()
74
+
75
+ # Handlers
76
+ console_handler = logging.StreamHandler()
77
+ console_handler.setFormatter(ColoredConsoleFormatter() if not settings.is_production else JSONFormatter())
78
+ console_handler.addFilter(masking_filter)
79
+
80
+ app_file = RotatingFileHandler(settings.LOG_DIR / "app.log", maxBytes=10*1024*1024, backupCount=5)
81
+ app_file.setFormatter(JSONFormatter())
82
+ app_file.addFilter(masking_filter)
83
+
84
+ err_file = RotatingFileHandler(settings.LOG_DIR / "error.log", maxBytes=10*1024*1024, backupCount=5)
85
+ err_file.setLevel(logging.ERROR)
86
+ err_file.setFormatter(JSONFormatter())
87
+ err_file.addFilter(masking_filter)
88
+
89
+ ml_file = RotatingFileHandler(settings.LOG_DIR / "ml.log", maxBytes=10*1024*1024, backupCount=5)
90
+ ml_file.setFormatter(JSONFormatter())
91
+ ml_file.addFilter(lambda r: "ml" in r.name)
92
+
93
+ access_file = RotatingFileHandler(settings.LOG_DIR / "access.log", maxBytes=10*1024*1024, backupCount=5)
94
+ access_file.setFormatter(JSONFormatter())
95
+ access_file.addFilter(lambda r: "access" in r.name)
96
+
97
+ root_logger.addHandler(console_handler)
98
+ root_logger.addHandler(app_file)
99
+ root_logger.addHandler(err_file)
100
+ logging.getLogger("ml").addHandler(ml_file)
101
+ logging.getLogger("access").addHandler(access_file)
102
+
103
+ # Suppress noise
104
+ logging.getLogger("uvicorn.access").propagate = False
105
+ logging.getLogger("httpx").setLevel(logging.WARNING)
106
+ logging.getLogger("chromadb").setLevel(logging.WARNING)
107
+
108
+ class MLLogger:
109
+ def __init__(self):
110
+ self.logger = logging.getLogger("ml")
111
+
112
+ def log_model_load(self, name: str, device: str, load_time_ms: int, vram_delta_mb: int = None):
113
+ self.logger.info("model_loaded", extra={
114
+ "model_name": name, "device": device, "load_time_ms": load_time_ms, "vram_delta_mb": vram_delta_mb
115
+ })
116
+
117
+ def log_inference(self, name: str, inference_time_ms: int, input_summary: dict, output_summary: dict):
118
+ self.logger.info("inference_complete", extra={
119
+ "model_name": name, "inference_time_ms": inference_time_ms, "input": input_summary, "output": output_summary
120
+ })
121
+
122
+ def log_checkpoint(self, epoch: int, loss: float, val_loss: float, path: str):
123
+ self.logger.info("checkpoint_saved", extra={"epoch": epoch, "loss": loss, "val_loss": val_loss, "path": path})
124
+
125
+ def log_oom(self, model_name: str, batch_size: int, vram_available_mb: int):
126
+ self.logger.error("cuda_oom", extra={"model_name": model_name, "batch_size": batch_size, "vram_available_mb": vram_available_mb})
127
+
128
+ def log_pipeline_step(self, step_name: str, status: str, duration_ms: int, session_id: str):
129
+ self.logger.info("pipeline_step", extra={"step": step_name, "status": status, "duration_ms": duration_ms, "session_id": session_id})
130
+
131
+ ml_logger = MLLogger()
backend/pyproject.toml CHANGED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.ruff]
2
+ line-length = 100
3
+ target-version = "py310"
4
+
5
+ # Select critical rule sets: Errors, Warnings, Pyflakes, isort, PEP8-naming, pyupgrade, flake8-bugbear, flake8-builtins, flake8-comprehensions, pytest-style, Ruff-specific
6
+ select = ["E", "W", "F", "I", "N", "UP", "B", "A", "C4", "PT", "RUF"]
7
+
8
+ ignore = [
9
+ "B008", # Do not perform function calls in default args (Required for FastAPI Depends)
10
+ "A003", # Shadowing Python built-in (id is fine for DB models)
11
+ "N818", # Exception names should end in Error (We use custom hierarchy names)
12
+ ]
13
+
14
+ [tool.ruff.per-file-ignores]
15
+ "tests/*" = ["S101"] # Assert is fine in tests
16
+ "db/migrations/*" = ["E501"] # Migration auto-generations can be long
17
+
18
+ [tool.black]
19
+ line-length = 100
20
+ target-version = ["py310"]
21
+ include = '\.pyi?$'
22
+ exclude = "migrations"
23
+
24
+ [tool.mypy]
25
+ python_version = "3.10"
26
+ strict = false
27
+ warn_return_any = true
28
+ warn_unused_imports = true
29
+ ignore_missing_imports = true
30
+
31
+ [[tool.mypy.overrides]]
32
+ module = ["torch.*", "transformers.*", "chromadb.*", "cv2.*", "PIL.*"]
33
+ ignore_missing_imports = true
34
+
35
+ [tool.pytest.ini_options]
36
+ asyncio_mode = "auto"
37
+ testpaths = ["tests"]
38
+ markers = [
39
+ "unit: fast isolated tests",
40
+ "integration: tests using real DB or filesystem",
41
+ "ml: tests that load ML models",
42
+ "slow: tests taking more than 5 seconds",
43
+ "e2e: full end-to-end tests"
44
+ ]
45
+ filterwarnings = ["ignore::DeprecationWarning", "ignore::PendingDeprecationWarning"]
46
+ log_cli = true
47
+ log_cli_level = "INFO"
frontend/.eslintrc.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "extends": "next/core-web-vitals",
3
+ "rules": {
4
+ "no-unused-vars": "error",
5
+ "no-console": "warn",
6
+ "react-hooks/exhaustive-deps": "warn"
7
+ }
8
+ }
frontend/.prettierrc ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "tabWidth": 2,
3
+ "singleQuote": true,
4
+ "semi": true,
5
+ "trailingComma": "es5",
6
+ "printWidth": 100
7
+ }