"""Backend-wide constants. Tunable here without touching analysis code."""

from __future__ import annotations

# --- analysis -----------------------------------------------------------------

ANALYSIS_SR = 22050           # librosa load sample rate (mono signals)
CLAP_SR = 48000               # CLAP processor expects 48 kHz
FRAME_SIZE = 2048
HOP_SIZE = 512
EPS = 1e-7
WAVEFORM_BINS = 180           # the frontend Waveform component expects exactly 180

# --- live-upload caps ---------------------------------------------------------

CLIP_CAP_S = 90               # truncate uploads to 90 s before CLAP encode
MAX_UPLOAD_BYTES = 50 * 1024 * 1024
ALLOWED_EXTENSIONS = {".mp3", ".wav", ".flac", ".ogg", ".m4a"}
ALLOWED_MIME_PREFIX = "audio/"

# --- audio encoder (ADR-0002: swap LAION-CLAP → MuQ-MuLan) -------------------
#
# MuQ-MuLan is a CLIP-style music-text joint embedder (~700M params) from
# Tencent AI Lab, January 2025 SOTA on MagnaTagATune zero-shot tagging.
# We use it for both similarity (audio path) and zero-shot genre tagging
# (text path), so a single model load powers both jobs.
#
# Sample rate: 24 kHz (LAION-CLAP was 48 kHz). The encoder resamples inputs
# internally so callers can pass any sr — but the runtime cost favors sending
# audio at the target rate when possible.

AUDIO_ENCODER_MODEL_ID = "OpenMuQ/MuQ-MuLan-large"
AUDIO_ENCODER_SAMPLE_RATE = 24000
# Joint embedding dim. MuQ-MuLan paper §3.3 specifies 512-d shared space —
# same as LAION-CLAP, so the catalog matrix shape is unchanged across the swap.
AUDIO_ENCODER_EMBED_DIM = 512

# Backward-compat aliases — older code references CLAP_*; keep them mapped to
# the new constants until a follow-up rename PR migrates references.
CLAP_MODEL_ID = AUDIO_ENCODER_MODEL_ID
CLAP_EMBED_DIM = AUDIO_ENCODER_EMBED_DIM
CLAP_SR = AUDIO_ENCODER_SAMPLE_RATE
CLAP_GENRE_TOP_K = 3
CLAP_GENRE_TEMPERATURE = 10.0

# --- windowed encoding (Phase 1 + 2) -----------------------------------------

# 10 s windows of audio fed independently to CLAP, then mean-pooled and L2-
# normalized to produce a single track-level embedding. Matches PROJECT_PLAN
# Phase 1 + 2 acceptance criteria and LOCKED_DECISIONS track-length protocol.
CLAP_WINDOW_SECONDS = 10
# Soft cap on the per-window count; matches existing CLIP_CAP_S=90 above so a
# 90 s query produces at most 9 windows. Catalog inputs (30 s previews) produce 1–3.
CLAP_QUERY_MAX_SECONDS = CLIP_CAP_S
CLAP_POOLING = "l2_normalized_mean"

# Provisional "Completely unique" cutoff carried over from prior project — NO
# published CLAP-512 threshold data exists. Recalibrate from negatives
# distribution after the golden set is built. See PRESEARCH Q1.
SIMILARITY_THRESHOLD_DEFAULT = 0.70

GENRE_LABELS: list[str] = [
    "Synthwave",
    "Lo-fi Hip-Hop",
    "Ambient",
    "Trap",
    "Indie Folk",
    "House",
    "Drum & Bass",
    "Cinematic",
    "Industrial",
    "Jazz Fusion",
    "Phonk",
    "Orchestral",
    "Hyperpop",
    "Dream Pop",
    "Techno",
]
GENRE_PROMPT_TEMPLATE = "a {label} music track"