"""
The spec-parser brick: messy human text -> the advisor's form fields.

Serves cn0303/fitcheck-spec-parser (Qwen3-1.7B + LoRA, trained in this repo —
see scripts/train_spec_lora.py and the model card for the honest eval). Same
serving pattern as the narrator: lazy load inside @spaces.GPU, loud errors,
no fake fallbacks. Missing info comes back null — the model is specifically
gated against inventing specs.
"""

import json
import re
import sys

from model_brick import _should_load, EAGER_LOAD

ADAPTER_ID = "cn0303/fitcheck-spec-parser"
BASE_ID = "unsloth/Qwen3-1.7B"
# Pinned revisions: the deployed Space must load exactly the reviewed commits,
# not whatever a mutable Hub repo points at later. No trust_remote_code is used.
ADAPTER_REVISION = "19c626f50f70b761d6d9a8f73a9d6bc69b656eae"
BASE_REVISION = "6262b50d6c1f8ee5e4ac750d710c33603bfc2a0c"

# MUST stay in sync with scripts/build_spec_dataset.py (the training prompt).
SYSTEM_PROMPT = """\
You turn a person's description of their computer into JSON for a hardware checker.
Output ONLY a JSON object with exactly these fields:
{"computer": "Windows laptop"|"Windows desktop"|"Mac"|"Linux PC"|"Mini PC / Raspberry Pi"|null,
 "ram_gb": number|null, "provider": "nvidia"|"amd"|"apple"|"intel"|"none"|null,
 "gpu": string|null, "vram_gb": number|null}
Rules:
- Extract ONLY what the text states or directly implies. Anything not stated is null. Never guess or invent a spec.
- "provider": "none" ONLY when the text says there is no separate graphics card (e.g. "no GPU", "integrated only"). Graphics simply not mentioned or unknown -> null.
- "gpu" must be a specific model (e.g. "RTX 3060"). A brand or series alone ("geforce", "gtx", "radeon") is NOT a gpu -> set provider, leave gpu null.
- If the text describes two or more different machines or a choice between them, every field is null."""

FIELDS = ("computer", "ram_gb", "provider", "gpu", "vram_gb")

# The serving guard: the model's raw output is untrusted text. Anything outside
# the schema becomes null (unknown) rather than flowing into the form as-is — an
# out-of-enum "computer" or a negative RAM is not a real spec, and null is the
# honest "not stated", consistent with the don't-invent rule. Bounds mirror the
# Pydantic limits in app.py's AdviseIn so the parser can't propose a value the
# /api/advise endpoint would itself reject.
_COMPUTERS = {"Windows laptop", "Windows desktop", "Mac", "Linux PC",
              "Mini PC / Raspberry Pi"}
_PROVIDERS = {"nvidia", "amd", "apple", "intel", "none"}
_RAM_MAX = 4096.0
_VRAM_MAX = 1024.0


def _num(v, lo, hi):
    """A finite number strictly within (0, hi] -> float, else None."""
    if isinstance(v, bool):   # bool is an int subclass; reject it explicitly
        return None
    try:
        f = float(v)
    except (TypeError, ValueError):
        return None
    if f != f or f in (float("inf"), float("-inf")):   # NaN / inf
        return None
    if f <= 0 or f > hi:
        return None
    return round(f, 1)


def _validate(obj: dict) -> dict:
    """Coerce raw model output to the schema; invalid -> null, never invented."""
    out = {}
    c = obj.get("computer")
    out["computer"] = c if c in _COMPUTERS else None
    p = obj.get("provider")
    out["provider"] = p.lower() if isinstance(p, str) and p.lower() in _PROVIDERS else None
    g = obj.get("gpu")
    out["gpu"] = g.strip() if isinstance(g, str) and g.strip() else None
    out["ram_gb"] = _num(obj.get("ram_gb"), 0, _RAM_MAX)
    out["vram_gb"] = _num(obj.get("vram_gb"), 0, _VRAM_MAX)
    return out


_GENERATE = None
_state = {"tok": None, "model": None}

if _should_load():
    try:
        import spaces
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer

        def _load(local_files_only: bool = False):
            # local_files_only=True (eager boot path) forbids downloads, so a
            # cold cache raises here and we fall back to lazy loading instead of
            # stalling the Space boot.
            from peft import PeftModel
            lfo = {"local_files_only": local_files_only}
            tok = AutoTokenizer.from_pretrained(ADAPTER_ID, revision=ADAPTER_REVISION, **lfo)
            model = AutoModelForCausalLM.from_pretrained(
                BASE_ID, revision=BASE_REVISION, dtype=torch.bfloat16, **lfo)
            model = PeftModel.from_pretrained(model, ADAPTER_ID, revision=ADAPTER_REVISION, **lfo)
            _state["tok"] = tok
            _state["model"] = model.to("cuda").eval()

        @spaces.GPU(duration=120)   # warm path: load from pre-cached weights + generate (120s is within the ZeroGPU cap)
        def _generate(text: str) -> str:
            if _state["model"] is None:
                _load()
            tok, model = _state["tok"], _state["model"]
            msgs = [{"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": text}]
            kw = dict(add_generation_prompt=True, return_tensors="pt", return_dict=True)
            try:
                inputs = tok.apply_chat_template(msgs, enable_thinking=False, **kw)
            except TypeError:
                inputs = tok.apply_chat_template(msgs, **kw)
            inputs = inputs.to("cuda")
            n = inputs["input_ids"].shape[1]
            with torch.no_grad():
                out = model.generate(**inputs, max_new_tokens=96, do_sample=False,
                                     pad_token_id=tok.eos_token_id)
            return tok.decode(out[0][n:], skip_special_tokens=True).strip()

        _GENERATE = _generate

        # Pre-cache the weights at BOOT, on CPU, with no time limit, so the first
        # /parse call loads from a warm cache and finishes inside its GPU window.
        # Without this a cold cache (e.g. after a redeploy) makes the first call
        # try to pull ~3.4GB during the GPU slot, which overruns it and hangs
        # every request. snapshot_download is a no-op once the files are cached.
        try:
            from huggingface_hub import snapshot_download
            snapshot_download(BASE_ID, revision=BASE_REVISION)
            snapshot_download(ADAPTER_ID, revision=ADAPTER_REVISION)
            print("[FitCheck] spec parser weights pre-cached at boot",
                  file=sys.stderr, flush=True)
        except Exception as pe:  # noqa: BLE001
            print(f"[FitCheck] spec parser pre-download skipped "
                  f"({type(pe).__name__}: {pe})", file=sys.stderr, flush=True)

        # Eager boot path (flagged): resident-load now only if cached; never
        # downloads at boot (local_files_only=True), so a cold cache is harmless.
        if EAGER_LOAD:
            try:
                _load(local_files_only=True)
                print("[FitCheck] spec parser: resident at import (cached)",
                      file=sys.stderr, flush=True)
            except Exception as ee:  # noqa: BLE001
                print(f"[FitCheck] spec parser eager load skipped "
                      f"({type(ee).__name__}); will load lazily",
                      file=sys.stderr, flush=True)
    except Exception as e:  # noqa: BLE001
        print(f"[FitCheck] spec parser unavailable: {e!r}", file=sys.stderr, flush=True)


def parse_specs(text: str) -> dict:
    """Returns the parsed fields, or {error} — never invented content."""
    text = (text or "").strip()
    if not text:
        return {"error": "Nothing to parse — paste or type a description first."}
    if len(text) > 4000:
        text = text[:4000]
    if _GENERATE is None:
        return {"error": "The spec parser model isn't loaded in this environment."}
    try:
        raw = _GENERATE(text)
    except Exception as e:  # noqa: BLE001
        return {"error": f"Spec parser failed: {e}"}
    m = re.search(r"\{.*\}", raw, re.DOTALL)
    if not m:
        return {"error": f"The parser didn't return JSON. Raw output: {raw[:200]}"}
    try:
        obj = json.loads(m.group(0))
    except json.JSONDecodeError:
        return {"error": f"The parser returned malformed JSON: {m.group(0)[:200]}"}
    if not isinstance(obj, dict):
        return {"error": f"The parser returned a non-object JSON value: {raw[:200]}"}
    return _validate(obj)