Spaces:

build-small-hackathon
/

trace-field-notes

Running on Zero

File size: 4,141 Bytes

c8055f7
 
 
 
 
 
f4e9a2f
c8055f7
 
7c8120d
c8055f7
 
bd351d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8055f7
bd351d2
c8055f7
 
 
7c8120d
 
 
 
 
 
c8055f7
bd351d2
 
 
 
 
 
 
 
 
 
 
 
 
c8055f7
bd351d2
c8055f7
 
 
bd351d2
c8055f7
 
 
 
 
bd351d2
c8055f7
 
 
 
bd351d2
c8055f7
 
 
 
 
 
 
 
 
 
8652b1a
bd351d2
8652b1a
 
 
 
 
 
 
bd351d2
8652b1a
bd351d2
f4e9a2f
 
 
bd351d2
f4e9a2f
 
 
 
 
 
 
 
bd351d2
f4e9a2f
c8055f7

from __future__ import annotations

import json
import types
import unittest
from pathlib import Path
from unittest.mock import patch

from analyzer import analyze_trace_file
from model_runtime import MODEL_CHOICES, PRIMARY_MODEL_ID, parse_model_json, run_model_assist


MEMO_JSON = {
    "executive_memo": "The trace shows a visible upload-boundary correction.",
    "detour_memo": "E01 narrows scope instead of changing the parser.",
    "outcome_audit_memo": "The agent keeps a deployment caveat visible.",
    "caveats": ["Model memo is based only on redacted narrative."],
}


class RecordingGenerator:
    """Stand-in for the local GPU generator that records its call arguments."""

    def __init__(self) -> None:
        self.calls: list[dict] = []

    def __call__(self, messages, *, model_id, max_new_tokens) -> str:
        self.calls.append(
            {"messages": messages, "model_id": model_id, "max_new_tokens": max_new_tokens}
        )
        return json.dumps(MEMO_JSON)


class ModelRuntimeTests(unittest.TestCase):
    def test_nemotron_label_does_not_call_it_small(self) -> None:
        label = str(MODEL_CHOICES["nemotron"]["label"])

        self.assertIn("NVIDIA Nemotron 3 Nano 30B-A3B", label)
        self.assertNotIn("small", label.lower())

    def test_parse_model_json_validates_required_shape(self) -> None:
        memo = parse_model_json(json.dumps(MEMO_JSON))

        self.assertEqual(memo["executive_memo"], MEMO_JSON["executive_memo"])
        self.assertEqual(memo["caveats"], MEMO_JSON["caveats"])

    def test_parse_model_json_recovers_from_code_fence(self) -> None:
        memo = parse_model_json("```json\n" + json.dumps(MEMO_JSON) + "\n```")

        self.assertEqual(memo["detour_memo"], MEMO_JSON["detour_memo"])

    def test_parse_model_json_extracts_object_from_prose(self) -> None:
        raw = "Here is the analysis:\n" + json.dumps(MEMO_JSON) + "\nHope this helps."
        memo = parse_model_json(raw)

        self.assertEqual(memo["outcome_audit_memo"], MEMO_JSON["outcome_audit_memo"])

    def test_run_model_assist_uses_selected_model(self) -> None:
        result, narrative = analyze_trace_file(Path("examples/sample_trace_redacted.jsonl"))
        generate = RecordingGenerator()

        assist = run_model_assist(
            engine="nemotron",
            result=result,
            narrative_text=narrative,
            generate=generate,
        )

        self.assertEqual(assist.model_id, PRIMARY_MODEL_ID)
        self.assertIn("upload-boundary", assist.memo["executive_memo"])
        self.assertEqual(generate.calls[0]["model_id"], PRIMARY_MODEL_ID)

    def test_analyzer_records_unknown_engine_note(self) -> None:
        result, _ = analyze_trace_file(
            Path("examples/sample_trace_redacted.jsonl"),
            analysis_engine="missing-engine",
        )

        self.assertTrue(result.model_notes)
        self.assertIn("Unknown analysis engine", result.model_notes[0])

    def test_analyzer_model_error_note_avoids_double_period(self) -> None:
        with patch("analyzer.run_model_assist", side_effect=ValueError("model unavailable.")):
            result, _ = analyze_trace_file(
                Path("examples/sample_trace_redacted.jsonl"),
                analysis_engine="qwen",
            )

        self.assertTrue(result.model_notes)
        self.assertNotIn("..", result.model_notes[0])
        self.assertIn("ValueError: model unavailable.", result.model_notes[0])

    def test_analyzer_records_model_engine_on_success(self) -> None:
        with patch("analyzer.run_model_assist") as run_model_assist:
            run_model_assist.return_value = types.SimpleNamespace(
                model_id=PRIMARY_MODEL_ID,
                memo=dict(MEMO_JSON),
                note="ok",
            )
            result, _ = analyze_trace_file(
                Path("examples/sample_trace_redacted.jsonl"),
                analysis_engine="nemotron",
            )

        self.assertIn(PRIMARY_MODEL_ID, result.engine)
        self.assertNotIn("token", run_model_assist.call_args.kwargs)


if __name__ == "__main__":
    unittest.main()