""" The spec-parser brick: messy human text -> the advisor's form fields. Serves cn0303/fitcheck-spec-parser (Qwen3-1.7B + LoRA, trained in this repo — see scripts/train_spec_lora.py and the model card for the honest eval). Same serving pattern as the narrator: lazy load inside @spaces.GPU, loud errors, no fake fallbacks. Missing info comes back null — the model is specifically gated against inventing specs. """ import json import re import sys from model_brick import _should_load, EAGER_LOAD ADAPTER_ID = "cn0303/fitcheck-spec-parser" BASE_ID = "unsloth/Qwen3-1.7B" # Pinned revisions: the deployed Space must load exactly the reviewed commits, # not whatever a mutable Hub repo points at later. No trust_remote_code is used. ADAPTER_REVISION = "19c626f50f70b761d6d9a8f73a9d6bc69b656eae" BASE_REVISION = "6262b50d6c1f8ee5e4ac750d710c33603bfc2a0c" # MUST stay in sync with scripts/build_spec_dataset.py (the training prompt). SYSTEM_PROMPT = """\ You turn a person's description of their computer into JSON for a hardware checker. Output ONLY a JSON object with exactly these fields: {"computer": "Windows laptop"|"Windows desktop"|"Mac"|"Linux PC"|"Mini PC / Raspberry Pi"|null, "ram_gb": number|null, "provider": "nvidia"|"amd"|"apple"|"intel"|"none"|null, "gpu": string|null, "vram_gb": number|null} Rules: - Extract ONLY what the text states or directly implies. Anything not stated is null. Never guess or invent a spec. - "provider": "none" ONLY when the text says there is no separate graphics card (e.g. "no GPU", "integrated only"). Graphics simply not mentioned or unknown -> null. - "gpu" must be a specific model (e.g. "RTX 3060"). A brand or series alone ("geforce", "gtx", "radeon") is NOT a gpu -> set provider, leave gpu null. - If the text describes two or more different machines or a choice between them, every field is null.""" FIELDS = ("computer", "ram_gb", "provider", "gpu", "vram_gb") # The serving guard: the model's raw output is untrusted text. Anything outside # the schema becomes null (unknown) rather than flowing into the form as-is — an # out-of-enum "computer" or a negative RAM is not a real spec, and null is the # honest "not stated", consistent with the don't-invent rule. Bounds mirror the # Pydantic limits in app.py's AdviseIn so the parser can't propose a value the # /api/advise endpoint would itself reject. _COMPUTERS = {"Windows laptop", "Windows desktop", "Mac", "Linux PC", "Mini PC / Raspberry Pi"} _PROVIDERS = {"nvidia", "amd", "apple", "intel", "none"} _RAM_MAX = 4096.0 _VRAM_MAX = 1024.0 def _num(v, lo, hi): """A finite number strictly within (0, hi] -> float, else None.""" if isinstance(v, bool): # bool is an int subclass; reject it explicitly return None try: f = float(v) except (TypeError, ValueError): return None if f != f or f in (float("inf"), float("-inf")): # NaN / inf return None if f <= 0 or f > hi: return None return round(f, 1) def _validate(obj: dict) -> dict: """Coerce raw model output to the schema; invalid -> null, never invented.""" out = {} c = obj.get("computer") out["computer"] = c if c in _COMPUTERS else None p = obj.get("provider") out["provider"] = p.lower() if isinstance(p, str) and p.lower() in _PROVIDERS else None g = obj.get("gpu") out["gpu"] = g.strip() if isinstance(g, str) and g.strip() else None out["ram_gb"] = _num(obj.get("ram_gb"), 0, _RAM_MAX) out["vram_gb"] = _num(obj.get("vram_gb"), 0, _VRAM_MAX) return out _GENERATE = None _state = {"tok": None, "model": None} if _should_load(): try: import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer def _load(local_files_only: bool = False): # local_files_only=True (eager boot path) forbids downloads, so a # cold cache raises here and we fall back to lazy loading instead of # stalling the Space boot. from peft import PeftModel lfo = {"local_files_only": local_files_only} tok = AutoTokenizer.from_pretrained(ADAPTER_ID, revision=ADAPTER_REVISION, **lfo) model = AutoModelForCausalLM.from_pretrained( BASE_ID, revision=BASE_REVISION, dtype=torch.bfloat16, **lfo) model = PeftModel.from_pretrained(model, ADAPTER_ID, revision=ADAPTER_REVISION, **lfo) _state["tok"] = tok _state["model"] = model.to("cuda").eval() @spaces.GPU(duration=120) # warm path: load from pre-cached weights + generate (120s is within the ZeroGPU cap) def _generate(text: str) -> str: if _state["model"] is None: _load() tok, model = _state["tok"], _state["model"] msgs = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": text}] kw = dict(add_generation_prompt=True, return_tensors="pt", return_dict=True) try: inputs = tok.apply_chat_template(msgs, enable_thinking=False, **kw) except TypeError: inputs = tok.apply_chat_template(msgs, **kw) inputs = inputs.to("cuda") n = inputs["input_ids"].shape[1] with torch.no_grad(): out = model.generate(**inputs, max_new_tokens=96, do_sample=False, pad_token_id=tok.eos_token_id) return tok.decode(out[0][n:], skip_special_tokens=True).strip() _GENERATE = _generate # Pre-cache the weights at BOOT, on CPU, with no time limit, so the first # /parse call loads from a warm cache and finishes inside its GPU window. # Without this a cold cache (e.g. after a redeploy) makes the first call # try to pull ~3.4GB during the GPU slot, which overruns it and hangs # every request. snapshot_download is a no-op once the files are cached. try: from huggingface_hub import snapshot_download snapshot_download(BASE_ID, revision=BASE_REVISION) snapshot_download(ADAPTER_ID, revision=ADAPTER_REVISION) print("[FitCheck] spec parser weights pre-cached at boot", file=sys.stderr, flush=True) except Exception as pe: # noqa: BLE001 print(f"[FitCheck] spec parser pre-download skipped " f"({type(pe).__name__}: {pe})", file=sys.stderr, flush=True) # Eager boot path (flagged): resident-load now only if cached; never # downloads at boot (local_files_only=True), so a cold cache is harmless. if EAGER_LOAD: try: _load(local_files_only=True) print("[FitCheck] spec parser: resident at import (cached)", file=sys.stderr, flush=True) except Exception as ee: # noqa: BLE001 print(f"[FitCheck] spec parser eager load skipped " f"({type(ee).__name__}); will load lazily", file=sys.stderr, flush=True) except Exception as e: # noqa: BLE001 print(f"[FitCheck] spec parser unavailable: {e!r}", file=sys.stderr, flush=True) def parse_specs(text: str) -> dict: """Returns the parsed fields, or {error} — never invented content.""" text = (text or "").strip() if not text: return {"error": "Nothing to parse — paste or type a description first."} if len(text) > 4000: text = text[:4000] if _GENERATE is None: return {"error": "The spec parser model isn't loaded in this environment."} try: raw = _GENERATE(text) except Exception as e: # noqa: BLE001 return {"error": f"Spec parser failed: {e}"} m = re.search(r"\{.*\}", raw, re.DOTALL) if not m: return {"error": f"The parser didn't return JSON. Raw output: {raw[:200]}"} try: obj = json.loads(m.group(0)) except json.JSONDecodeError: return {"error": f"The parser returned malformed JSON: {m.group(0)[:200]}"} if not isinstance(obj, dict): return {"error": f"The parser returned a non-object JSON value: {raw[:200]}"} return _validate(obj)