"""LLM provider abstraction with a CPU-safe fallback.""" from __future__ import annotations from pathlib import Path from typing import Protocol from .config import Settings class LLMProvider(Protocol): name: str def generate(self, *, system_prompt: str, user_prompt: str, has_context: bool) -> str: """Generate an answer.""" class TemplateLLM: """Deterministic fallback used when no local model is enabled.""" name = "template" def generate(self, *, system_prompt: str, user_prompt: str, has_context: bool) -> str: prefix = "" if not has_context: prefix = ( "No approved source context was found for this query, so this is an ungrounded design pass.\n\n" ) return ( prefix + "Direct recommendation: use the request as a design brief, then turn it into a small set of " "testable mechanics instead of a broad idea dump.\n\n" + "Concrete design details: define the player decision, the resource or constraint that makes it " "interesting, the feedback the player receives, and one failure state that teaches rather than stalls. " "For level or encounter work, sketch the intended rhythm as setup, pressure, twist, and release.\n\n" + "Risks and tradeoffs: watch for dominant strategies, hidden math, unclear affordances, and content " "that depends on GM improvisation when it should be supported by tools or tables.\n\n" + "Playtest checks: run one fast paper test, ask players what they thought the objective was, record " "where they hesitated, and tune only one variable at a time.\n\n" + "Model status: set GMC_ENABLE_LOCAL_LLM=true and provide llama-cpp-python support to use the " "configured local Ministral GGUF model for richer generation." ) class LlamaCppLLM: """llama.cpp-backed local GGUF provider.""" name = "llama_cpp" def __init__(self, settings: Settings) -> None: self.settings = settings self._model = None def _resolve_model_path(self) -> str: if self.settings.llm_model_path: candidate = Path(self.settings.llm_model_path) if candidate.exists(): return str(candidate) raise FileNotFoundError(f"GMC_LLM_MODEL_PATH does not exist: {candidate}") from huggingface_hub import hf_hub_download, list_repo_files repo_id = self.settings.llm_model_repo requested = self.settings.llm_model_file try: return hf_hub_download(repo_id=repo_id, filename=requested) except Exception: files = list_repo_files(repo_id) ggufs = [name for name in files if name.lower().endswith(".gguf")] preferred = [name for name in ggufs if "q4_k_m" in name.lower()] fallback = preferred or [name for name in ggufs if "q4" in name.lower()] or ggufs if not fallback: raise FileNotFoundError(f"no GGUF files found in {repo_id}") return hf_hub_download(repo_id=repo_id, filename=fallback[0]) def _load(self): if self._model is None: from llama_cpp import Llama self._model = Llama( model_path=self._resolve_model_path(), n_ctx=self.settings.llm_context_window, n_threads=None, verbose=False, ) return self._model def generate(self, *, system_prompt: str, user_prompt: str, has_context: bool) -> str: model = self._load() result = model.create_chat_completion( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], temperature=0.75, max_tokens=self.settings.llm_max_tokens, ) content = result["choices"][0]["message"]["content"].strip() if not has_context and "ungrounded" not in content.lower(): content = "No approved source context was found for this query, so this is an ungrounded design pass.\n\n" + content return content def create_llm_provider(settings: Settings) -> LLMProvider: provider = settings.llm_provider.strip().lower() if provider == "template": return TemplateLLM() if provider == "llama_cpp": return LlamaCppLLM(settings) if provider == "auto": if settings.enable_local_llm or settings.llm_model_path: return LlamaCppLLM(settings) return TemplateLLM() raise ValueError(f"unknown LLM provider: {settings.llm_provider}")