Spaces:
Running on Zero
Running on Zero
File size: 4,962 Bytes
c8055f7 f4e9a2f c8055f7 f4e9a2f c8055f7 f4e9a2f c8055f7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | """Optional small-model assistance through Hugging Face Inference Providers."""
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from typing import Any, Protocol
from schemas import AnalysisResult
PRIMARY_MODEL_ID = "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
QUICK_MODEL_ID = "Qwen/Qwen3.5-9B"
MODEL_CHOICES = {
"deterministic": {
"label": "Deterministic field notes",
"model_id": None,
},
"nemotron": {
"label": "Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B",
"model_id": PRIMARY_MODEL_ID,
},
"qwen": {
"label": "Quick small-model assist: Qwen3.5 9B",
"model_id": QUICK_MODEL_ID,
},
}
class ChatClient(Protocol):
def chat_completion(self, *args: Any, **kwargs: Any) -> Any:
...
@dataclass(slots=True)
class ModelAssistResult:
model_id: str
memo: dict[str, Any]
note: str
def model_id_for_engine(engine: str) -> str | None:
choice = MODEL_CHOICES.get(engine)
if not choice:
return None
model_id = choice["model_id"]
return str(model_id) if model_id else None
def run_model_assist(
*,
engine: str,
result: AnalysisResult,
narrative_text: str,
token: str | None = None,
client: ChatClient | None = None,
) -> ModelAssistResult:
"""Ask the selected small model for a concise memo grounded in visible text."""
model_id = model_id_for_engine(engine)
if not model_id:
raise ValueError(f"No model is configured for analysis engine {engine!r}.")
prompt = build_model_prompt(result, narrative_text)
if client is None:
from huggingface_hub import InferenceClient, get_token
resolved_token = token or os.getenv("HF_TOKEN") or get_token()
if not resolved_token:
raise ValueError(
"Sign in with Hugging Face to enable small-model assist through "
"the inference-api OAuth scope."
)
inference_client = InferenceClient(
model=model_id,
provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
token=resolved_token,
timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
)
else:
inference_client = client
response = inference_client.chat_completion(
messages=[
{
"role": "system",
"content": (
"You analyze visible coding-agent narrative messages. "
"Do not infer hidden reasoning. Return JSON only."
),
},
{"role": "user", "content": prompt},
],
model=model_id,
max_tokens=900,
temperature=0.2,
response_format={"type": "json_object"},
)
content = extract_chat_content(response)
memo = parse_model_json(content)
return ModelAssistResult(
model_id=model_id,
memo=memo,
note=f"Small-model assist completed with {model_id}.",
)
def build_model_prompt(result: AnalysisResult, narrative_text: str) -> str:
deterministic_json = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
narrative_excerpt = narrative_text[:12000]
return f"""Use the deterministic codebook analysis and redacted visible narrative below.
Return JSON with exactly these keys:
- executive_memo: 4-6 sentences for a developer
- detour_memo: 2-4 sentences about productive detours vs wandering
- outcome_audit_memo: 2-4 sentences about completion claims and caveats
- caveats: array of short strings
Rules:
- Analyze only visible narrative messages.
- Do not claim to know hidden reasoning.
- Cite episode IDs where useful.
- Do not include raw secrets, tool outputs, or long quotes.
Deterministic analysis:
{deterministic_json}
Redacted narrative excerpt:
{narrative_excerpt}
"""
def extract_chat_content(response: Any) -> str:
try:
content = response.choices[0].message.content
except (AttributeError, IndexError, TypeError) as exc:
raise ValueError("Model response did not contain chat completion content.") from exc
if not isinstance(content, str) or not content.strip():
raise ValueError("Model response content was empty.")
return content
def parse_model_json(content: str) -> dict[str, Any]:
try:
parsed = json.loads(content)
except json.JSONDecodeError as exc:
raise ValueError("Model response was not valid JSON.") from exc
required = {
"executive_memo": str,
"detour_memo": str,
"outcome_audit_memo": str,
"caveats": list,
}
for key, expected_type in required.items():
if key not in parsed or not isinstance(parsed[key], expected_type):
raise ValueError(f"Model response missing {key!r} as {expected_type.__name__}.")
parsed["caveats"] = [str(item) for item in parsed["caveats"][:6]]
return parsed
|