Spaces:
Running on Zero
Running on Zero
File size: 4,141 Bytes
c8055f7 f4e9a2f c8055f7 7c8120d c8055f7 bd351d2 c8055f7 bd351d2 c8055f7 7c8120d c8055f7 bd351d2 c8055f7 bd351d2 c8055f7 bd351d2 c8055f7 bd351d2 c8055f7 bd351d2 c8055f7 8652b1a bd351d2 8652b1a bd351d2 8652b1a bd351d2 f4e9a2f bd351d2 f4e9a2f bd351d2 f4e9a2f c8055f7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | from __future__ import annotations
import json
import types
import unittest
from pathlib import Path
from unittest.mock import patch
from analyzer import analyze_trace_file
from model_runtime import MODEL_CHOICES, PRIMARY_MODEL_ID, parse_model_json, run_model_assist
MEMO_JSON = {
"executive_memo": "The trace shows a visible upload-boundary correction.",
"detour_memo": "E01 narrows scope instead of changing the parser.",
"outcome_audit_memo": "The agent keeps a deployment caveat visible.",
"caveats": ["Model memo is based only on redacted narrative."],
}
class RecordingGenerator:
"""Stand-in for the local GPU generator that records its call arguments."""
def __init__(self) -> None:
self.calls: list[dict] = []
def __call__(self, messages, *, model_id, max_new_tokens) -> str:
self.calls.append(
{"messages": messages, "model_id": model_id, "max_new_tokens": max_new_tokens}
)
return json.dumps(MEMO_JSON)
class ModelRuntimeTests(unittest.TestCase):
def test_nemotron_label_does_not_call_it_small(self) -> None:
label = str(MODEL_CHOICES["nemotron"]["label"])
self.assertIn("NVIDIA Nemotron 3 Nano 30B-A3B", label)
self.assertNotIn("small", label.lower())
def test_parse_model_json_validates_required_shape(self) -> None:
memo = parse_model_json(json.dumps(MEMO_JSON))
self.assertEqual(memo["executive_memo"], MEMO_JSON["executive_memo"])
self.assertEqual(memo["caveats"], MEMO_JSON["caveats"])
def test_parse_model_json_recovers_from_code_fence(self) -> None:
memo = parse_model_json("```json\n" + json.dumps(MEMO_JSON) + "\n```")
self.assertEqual(memo["detour_memo"], MEMO_JSON["detour_memo"])
def test_parse_model_json_extracts_object_from_prose(self) -> None:
raw = "Here is the analysis:\n" + json.dumps(MEMO_JSON) + "\nHope this helps."
memo = parse_model_json(raw)
self.assertEqual(memo["outcome_audit_memo"], MEMO_JSON["outcome_audit_memo"])
def test_run_model_assist_uses_selected_model(self) -> None:
result, narrative = analyze_trace_file(Path("examples/sample_trace_redacted.jsonl"))
generate = RecordingGenerator()
assist = run_model_assist(
engine="nemotron",
result=result,
narrative_text=narrative,
generate=generate,
)
self.assertEqual(assist.model_id, PRIMARY_MODEL_ID)
self.assertIn("upload-boundary", assist.memo["executive_memo"])
self.assertEqual(generate.calls[0]["model_id"], PRIMARY_MODEL_ID)
def test_analyzer_records_unknown_engine_note(self) -> None:
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"),
analysis_engine="missing-engine",
)
self.assertTrue(result.model_notes)
self.assertIn("Unknown analysis engine", result.model_notes[0])
def test_analyzer_model_error_note_avoids_double_period(self) -> None:
with patch("analyzer.run_model_assist", side_effect=ValueError("model unavailable.")):
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"),
analysis_engine="qwen",
)
self.assertTrue(result.model_notes)
self.assertNotIn("..", result.model_notes[0])
self.assertIn("ValueError: model unavailable.", result.model_notes[0])
def test_analyzer_records_model_engine_on_success(self) -> None:
with patch("analyzer.run_model_assist") as run_model_assist:
run_model_assist.return_value = types.SimpleNamespace(
model_id=PRIMARY_MODEL_ID,
memo=dict(MEMO_JSON),
note="ok",
)
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"),
analysis_engine="nemotron",
)
self.assertIn(PRIMARY_MODEL_ID, result.engine)
self.assertNotIn("token", run_model_assist.call_args.kwargs)
if __name__ == "__main__":
unittest.main()
|