File size: 4,962 Bytes
c8055f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4e9a2f
c8055f7
 
 
 
 
 
 
 
 
 
f4e9a2f
 
 
 
 
 
 
 
c8055f7
 
 
 
f4e9a2f
c8055f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""Optional small-model assistance through Hugging Face Inference Providers."""

from __future__ import annotations

import json
import os
from dataclasses import dataclass
from typing import Any, Protocol

from schemas import AnalysisResult


PRIMARY_MODEL_ID = "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
QUICK_MODEL_ID = "Qwen/Qwen3.5-9B"

MODEL_CHOICES = {
    "deterministic": {
        "label": "Deterministic field notes",
        "model_id": None,
    },
    "nemotron": {
        "label": "Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B",
        "model_id": PRIMARY_MODEL_ID,
    },
    "qwen": {
        "label": "Quick small-model assist: Qwen3.5 9B",
        "model_id": QUICK_MODEL_ID,
    },
}


class ChatClient(Protocol):
    def chat_completion(self, *args: Any, **kwargs: Any) -> Any:
        ...


@dataclass(slots=True)
class ModelAssistResult:
    model_id: str
    memo: dict[str, Any]
    note: str


def model_id_for_engine(engine: str) -> str | None:
    choice = MODEL_CHOICES.get(engine)
    if not choice:
        return None
    model_id = choice["model_id"]
    return str(model_id) if model_id else None


def run_model_assist(
    *,
    engine: str,
    result: AnalysisResult,
    narrative_text: str,
    token: str | None = None,
    client: ChatClient | None = None,
) -> ModelAssistResult:
    """Ask the selected small model for a concise memo grounded in visible text."""

    model_id = model_id_for_engine(engine)
    if not model_id:
        raise ValueError(f"No model is configured for analysis engine {engine!r}.")

    prompt = build_model_prompt(result, narrative_text)
    if client is None:
        from huggingface_hub import InferenceClient, get_token

        resolved_token = token or os.getenv("HF_TOKEN") or get_token()
        if not resolved_token:
            raise ValueError(
                "Sign in with Hugging Face to enable small-model assist through "
                "the inference-api OAuth scope."
            )

        inference_client = InferenceClient(
            model=model_id,
            provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
            token=resolved_token,
            timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
        )
    else:
        inference_client = client
    response = inference_client.chat_completion(
        messages=[
            {
                "role": "system",
                "content": (
                    "You analyze visible coding-agent narrative messages. "
                    "Do not infer hidden reasoning. Return JSON only."
                ),
            },
            {"role": "user", "content": prompt},
        ],
        model=model_id,
        max_tokens=900,
        temperature=0.2,
        response_format={"type": "json_object"},
    )
    content = extract_chat_content(response)
    memo = parse_model_json(content)
    return ModelAssistResult(
        model_id=model_id,
        memo=memo,
        note=f"Small-model assist completed with {model_id}.",
    )


def build_model_prompt(result: AnalysisResult, narrative_text: str) -> str:
    deterministic_json = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
    narrative_excerpt = narrative_text[:12000]
    return f"""Use the deterministic codebook analysis and redacted visible narrative below.

Return JSON with exactly these keys:
- executive_memo: 4-6 sentences for a developer
- detour_memo: 2-4 sentences about productive detours vs wandering
- outcome_audit_memo: 2-4 sentences about completion claims and caveats
- caveats: array of short strings

Rules:
- Analyze only visible narrative messages.
- Do not claim to know hidden reasoning.
- Cite episode IDs where useful.
- Do not include raw secrets, tool outputs, or long quotes.

Deterministic analysis:
{deterministic_json}

Redacted narrative excerpt:
{narrative_excerpt}
"""


def extract_chat_content(response: Any) -> str:
    try:
        content = response.choices[0].message.content
    except (AttributeError, IndexError, TypeError) as exc:
        raise ValueError("Model response did not contain chat completion content.") from exc
    if not isinstance(content, str) or not content.strip():
        raise ValueError("Model response content was empty.")
    return content


def parse_model_json(content: str) -> dict[str, Any]:
    try:
        parsed = json.loads(content)
    except json.JSONDecodeError as exc:
        raise ValueError("Model response was not valid JSON.") from exc

    required = {
        "executive_memo": str,
        "detour_memo": str,
        "outcome_audit_memo": str,
        "caveats": list,
    }
    for key, expected_type in required.items():
        if key not in parsed or not isinstance(parsed[key], expected_type):
            raise ValueError(f"Model response missing {key!r} as {expected_type.__name__}.")
    parsed["caveats"] = [str(item) for item in parsed["caveats"][:6]]
    return parsed