JacobLinCool Codex commited on
Commit
7c8120d
·
verified ·
1 Parent(s): 6ac8ef6

feat: improve qwen zerogpu ux

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (6) hide show
  1. README.md +9 -11
  2. analyzer.py +1 -1
  3. app.py +111 -65
  4. model_runtime.py +5 -5
  5. report_renderer.py +1 -1
  6. tests/test_model_runtime.py +7 -1
README.md CHANGED
@@ -22,11 +22,11 @@ telemetry by default and analyzes only the agent's visible narrative messages:
22
  what it planned, where it got stuck, how it detoured, how it recovered, and how
23
  it claimed completion.
24
 
25
- Built for the Build Small Hackathon as a Gradio app. The default engine uses a
26
- verified deterministic codebook analyzer so the Space can always start and
27
- produce a report. The app also exposes explicit small-model assist modes for
28
- `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
29
- Hugging Face Inference Providers when the user signs in with Hugging Face OAuth.
30
 
31
  ## Run Locally
32
 
@@ -45,12 +45,10 @@ python3.11 -m unittest discover -s tests
45
 
46
  ## Analysis Engines
47
 
48
- - `Deterministic field notes`: default, local, no model dependency.
49
- - `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
50
- 30B total-parameter Nemotron model through the signed-in user's
51
- `inference-api` OAuth scope.
52
- - `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
53
- memo.
54
 
55
  If a selected model is unavailable or the user is not signed in, the report
56
  records the reason in model notes and returns the deterministic analysis instead
 
22
  what it planned, where it got stuck, how it detoured, how it recovered, and how
23
  it claimed completion.
24
 
25
+ Built for the Build Small Hackathon as a Gradio app. The default engine is the
26
+ quick Qwen3.5 9B model-assisted path on ZeroGPU, with a verified deterministic
27
+ codebook analyzer as the always-available recovery path. The app also exposes
28
+ `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` through Hugging Face Inference
29
+ Providers when the user signs in with Hugging Face OAuth.
30
 
31
  ## Run Locally
32
 
 
45
 
46
  ## Analysis Engines
47
 
48
+ - `Quick small-model assist: Qwen3.5 9B`: default model-assisted memo.
49
+ - `NVIDIA Nemotron 3 Nano 30B-A3B assist`: uses Nemotron through the signed-in
50
+ user's `inference-api` OAuth scope.
51
+ - `Deterministic field notes`: local, no model dependency.
 
 
52
 
53
  If a selected model is unavailable or the user is not signed in, the report
54
  records the reason in model notes and returns the deterministic analysis instead
analyzer.py CHANGED
@@ -197,7 +197,7 @@ def analyze_trace_file(
197
  )
198
  except Exception as exc:
199
  result.model_notes.append(
200
- "Small-model assist was requested but unavailable: "
201
  f"{type(exc).__name__}: {exc}. Deterministic analysis was returned."
202
  )
203
  else:
 
197
  )
198
  except Exception as exc:
199
  result.model_notes.append(
200
+ "Model assist was requested but unavailable: "
201
  f"{type(exc).__name__}: {exc}. Deterministic analysis was returned."
202
  )
203
  else:
app.py CHANGED
@@ -17,6 +17,8 @@ from report_renderer import render_report
17
 
18
 
19
  SPACE_URL = "https://huggingface.co/spaces/build-small-hackathon/trace-field-notes"
 
 
20
 
21
  PRIVACY_WARNING = (
22
  "Agent traces can contain prompts, tool inputs, command outputs, local file paths, "
@@ -25,34 +27,22 @@ PRIVACY_WARNING = (
25
  )
26
 
27
  HERO_MD = f"""
28
- # Trace Field Notes
29
-
30
- See how your coding agent got stuck, detoured, recovered, and claimed success.
31
-
32
- Upload a Codex, Claude Code, or Pi Agent session log. The app extracts visible narrative messages, classifies difficulty episodes, and turns the session into a qualitative field report.
33
-
34
- > {PRIVACY_WARNING}
35
  """
36
 
37
  SESSION_PATHS_MD = """
38
- ## Find Your Session Log
39
 
40
  | Agent | Local session directory |
41
  |---|---|
42
  | Codex | `~/.codex/sessions` |
43
  | Claude Code | `~/.claude/projects` |
44
  | Pi Agent | `~/.pi/agent/sessions` |
45
-
46
- ```bash
47
- # Codex
48
- ls ~/.codex/sessions
49
-
50
- # Claude Code
51
- ls ~/.claude/projects
52
-
53
- # Pi Agent
54
- ls ~/.pi/agent/sessions
55
- ```
56
  """
57
 
58
  AGENT_PROMPT = f"""Use this Space as a tool.
@@ -69,26 +59,81 @@ AGENT_PROMPT = f"""Use this Space as a tool.
69
 
70
  CUSTOM_CSS = """
71
  :root {
72
- --field-border: #d7d0c2;
73
- --field-ink: #202124;
74
- --field-muted: #605b52;
75
- --field-paper: #fbfaf7;
76
- --field-accent: #326b59;
 
 
77
  }
78
  .gradio-container {
79
- max-width: 1180px !important;
80
  color: var(--field-ink);
81
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  .trace-panel {
83
  border: 1px solid var(--field-border);
84
  border-radius: 8px;
85
- padding: 14px;
86
- background: var(--field-paper);
 
 
 
 
 
 
 
 
 
 
 
 
87
  }
88
  button.primary {
89
  background: var(--field-accent) !important;
90
  border-color: var(--field-accent) !important;
91
  }
 
 
 
 
 
 
 
 
 
92
  textarea, input {
93
  border-radius: 6px !important;
94
  }
@@ -101,7 +146,7 @@ def _analyze_trace_impl(
101
  redact_secrets: bool = True,
102
  ignore_tool_calls: bool = True,
103
  report_style: str = "field_notes",
104
- analysis_engine: str = "deterministic",
105
  oauth_token: Optional[gr.OAuthToken] = None,
106
  ) -> tuple[str, dict[str, Any], str, str, str]:
107
  """Gradio-callable analysis endpoint."""
@@ -144,7 +189,7 @@ def analyze_trace(
144
  redact_secrets: bool = True,
145
  ignore_tool_calls: bool = True,
146
  report_style: str = "field_notes",
147
- analysis_engine: str = "deterministic",
148
  oauth_token: Optional[gr.OAuthToken] = None,
149
  ) -> tuple[str, dict[str, Any], str, str, str]:
150
  """ZeroGPU-visible Gradio endpoint."""
@@ -184,6 +229,10 @@ def write_temp_artifact(prefix: str, suffix: str, content: str) -> str:
184
  return handle.name
185
 
186
 
 
 
 
 
187
  with gr.Blocks(
188
  title="Trace Field Notes",
189
  css=CUSTOM_CSS,
@@ -198,23 +247,24 @@ with gr.Blocks(
198
 
199
  with gr.Row(equal_height=False):
200
  with gr.Column(scale=3, elem_classes=["trace-panel"]):
 
201
  trace_input = gr.File(
202
- label="Upload Agent Session Log",
203
  file_types=[".jsonl", ".json", ".txt", ".log"],
204
  type="filepath",
205
  )
206
  with gr.Row():
207
  include_user_context = gr.Checkbox(
208
  value=True,
209
- label="Include user prompts as context",
210
  )
211
  redact_secrets = gr.Checkbox(
212
  value=True,
213
- label="Redact likely secrets before analysis",
214
  )
215
  ignore_tool_calls = gr.Checkbox(
216
  value=True,
217
- label="Ignore tool call contents",
218
  interactive=False,
219
  )
220
  report_style = gr.Radio(
@@ -222,13 +272,14 @@ with gr.Blocks(
222
  value="field_notes",
223
  label="Report style",
224
  interactive=False,
 
225
  )
226
  analysis_engine = gr.Radio(
227
  choices=[
228
  (str(choice["label"]), key)
229
  for key, choice in MODEL_CHOICES.items()
230
  ],
231
- value="deterministic",
232
  label="Analysis engine",
233
  )
234
  with gr.Row():
@@ -241,31 +292,24 @@ with gr.Blocks(
241
  "Model-assisted modes use your signed-in Hugging Face OAuth token with the `inference-api` scope. "
242
  "The deterministic engine does not require sign-in."
243
  )
244
- analyze_button = gr.Button("Analyze My Trace", variant="primary")
245
- with gr.Column(scale=2):
 
 
246
  gr.Markdown(SESSION_PATHS_MD)
 
 
 
 
 
 
 
 
247
 
248
- with gr.Accordion("Agent-callable prompt", open=False):
249
- gr.Textbox(
250
- value=AGENT_PROMPT,
251
- label="Prompt for Codex or Claude Code",
252
- lines=9,
253
- interactive=False,
254
- show_copy_button=True,
255
- )
256
-
257
- gr.Examples(
258
- examples=[
259
- [
260
- "examples/sample_trace_redacted.jsonl",
261
- True,
262
- True,
263
- True,
264
- "field_notes",
265
- "deterministic",
266
- ]
267
- ],
268
- inputs=[
269
  trace_input,
270
  include_user_context,
271
  redact_secrets,
@@ -273,16 +317,18 @@ with gr.Blocks(
273
  report_style,
274
  analysis_engine,
275
  ],
276
- label="Try a redacted sample trace",
277
  )
278
 
279
- report_output = gr.Markdown(label="Field Report")
280
- with gr.Row():
281
- episode_json = gr.JSON(label="Structured Episode JSON")
282
- with gr.Row():
283
- redacted_download = gr.File(label="Download Redacted Narrative")
284
- report_download = gr.File(label="Download Markdown Report")
285
- json_download = gr.File(label="Download Structured JSON")
 
 
 
286
 
287
  analyze_button.click(
288
  analyze_trace,
 
17
 
18
 
19
  SPACE_URL = "https://huggingface.co/spaces/build-small-hackathon/trace-field-notes"
20
+ DEFAULT_ANALYSIS_ENGINE = "qwen"
21
+ SAMPLE_TRACE_PATH = "examples/sample_trace_redacted.jsonl"
22
 
23
  PRIVACY_WARNING = (
24
  "Agent traces can contain prompts, tool inputs, command outputs, local file paths, "
 
27
  )
28
 
29
  HERO_MD = f"""
30
+ <div class="hero">
31
+ <div class="hero-kicker">ZeroGPU field report</div>
32
+ <h1>Trace Field Notes</h1>
33
+ <p>Map where a coding agent got stuck, changed route, recovered, and claimed success.</p>
34
+ </div>
35
+ <div class="privacy-callout">{PRIVACY_WARNING}</div>
 
36
  """
37
 
38
  SESSION_PATHS_MD = """
39
+ ### Session Logs
40
 
41
  | Agent | Local session directory |
42
  |---|---|
43
  | Codex | `~/.codex/sessions` |
44
  | Claude Code | `~/.claude/projects` |
45
  | Pi Agent | `~/.pi/agent/sessions` |
 
 
 
 
 
 
 
 
 
 
 
46
  """
47
 
48
  AGENT_PROMPT = f"""Use this Space as a tool.
 
59
 
60
  CUSTOM_CSS = """
61
  :root {
62
+ --field-border: rgba(148, 163, 184, 0.28);
63
+ --field-ink: #f8fafc;
64
+ --field-muted: #94a3b8;
65
+ --field-panel: rgba(15, 23, 42, 0.74);
66
+ --field-panel-strong: rgba(15, 23, 42, 0.92);
67
+ --field-accent: #2f8a69;
68
+ --field-accent-strong: #23785d;
69
  }
70
  .gradio-container {
71
+ max-width: 1220px !important;
72
  color: var(--field-ink);
73
  }
74
+ .hero {
75
+ border: 1px solid var(--field-border);
76
+ border-radius: 8px;
77
+ padding: 18px 20px;
78
+ background: linear-gradient(135deg, rgba(47, 138, 105, 0.18), rgba(15, 23, 42, 0.3));
79
+ }
80
+ .hero h1 {
81
+ margin: 0;
82
+ font-size: 34px;
83
+ line-height: 1.08;
84
+ }
85
+ .hero p {
86
+ max-width: 760px;
87
+ margin: 10px 0 0;
88
+ color: var(--field-muted);
89
+ font-size: 15px;
90
+ }
91
+ .hero-kicker {
92
+ margin-bottom: 8px;
93
+ color: #7dd3fc;
94
+ font: 700 12px/1.2 ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
95
+ text-transform: uppercase;
96
+ letter-spacing: 0;
97
+ }
98
+ .privacy-callout {
99
+ margin: 12px 0 16px;
100
+ border-left: 3px solid #f59e0b;
101
+ padding: 10px 12px;
102
+ color: #dbe4ef;
103
+ background: rgba(245, 158, 11, 0.08);
104
+ border-radius: 0 6px 6px 0;
105
+ }
106
  .trace-panel {
107
  border: 1px solid var(--field-border);
108
  border-radius: 8px;
109
+ padding: 16px;
110
+ background: var(--field-panel);
111
+ }
112
+ .guide-panel {
113
+ border: 1px solid var(--field-border);
114
+ border-radius: 8px;
115
+ padding: 16px;
116
+ background: var(--field-panel);
117
+ }
118
+ .guide-panel table {
119
+ width: 100%;
120
+ }
121
+ .action-row button {
122
+ min-height: 42px;
123
  }
124
  button.primary {
125
  background: var(--field-accent) !important;
126
  border-color: var(--field-accent) !important;
127
  }
128
+ button.primary:hover {
129
+ background: var(--field-accent-strong) !important;
130
+ }
131
+ .download-row {
132
+ align-items: stretch;
133
+ }
134
+ .result-tabs {
135
+ margin-top: 14px;
136
+ }
137
  textarea, input {
138
  border-radius: 6px !important;
139
  }
 
146
  redact_secrets: bool = True,
147
  ignore_tool_calls: bool = True,
148
  report_style: str = "field_notes",
149
+ analysis_engine: str = DEFAULT_ANALYSIS_ENGINE,
150
  oauth_token: Optional[gr.OAuthToken] = None,
151
  ) -> tuple[str, dict[str, Any], str, str, str]:
152
  """Gradio-callable analysis endpoint."""
 
189
  redact_secrets: bool = True,
190
  ignore_tool_calls: bool = True,
191
  report_style: str = "field_notes",
192
+ analysis_engine: str = DEFAULT_ANALYSIS_ENGINE,
193
  oauth_token: Optional[gr.OAuthToken] = None,
194
  ) -> tuple[str, dict[str, Any], str, str, str]:
195
  """ZeroGPU-visible Gradio endpoint."""
 
229
  return handle.name
230
 
231
 
232
+ def load_sample_trace() -> tuple[str, bool, bool, bool, str, str]:
233
+ return SAMPLE_TRACE_PATH, True, True, True, "field_notes", DEFAULT_ANALYSIS_ENGINE
234
+
235
+
236
  with gr.Blocks(
237
  title="Trace Field Notes",
238
  css=CUSTOM_CSS,
 
247
 
248
  with gr.Row(equal_height=False):
249
  with gr.Column(scale=3, elem_classes=["trace-panel"]):
250
+ gr.Markdown("### Trace Input")
251
  trace_input = gr.File(
252
+ label="Agent session log",
253
  file_types=[".jsonl", ".json", ".txt", ".log"],
254
  type="filepath",
255
  )
256
  with gr.Row():
257
  include_user_context = gr.Checkbox(
258
  value=True,
259
+ label="Include user context",
260
  )
261
  redact_secrets = gr.Checkbox(
262
  value=True,
263
+ label="Redact likely secrets",
264
  )
265
  ignore_tool_calls = gr.Checkbox(
266
  value=True,
267
+ label="Ignore tool contents",
268
  interactive=False,
269
  )
270
  report_style = gr.Radio(
 
272
  value="field_notes",
273
  label="Report style",
274
  interactive=False,
275
+ visible=False,
276
  )
277
  analysis_engine = gr.Radio(
278
  choices=[
279
  (str(choice["label"]), key)
280
  for key, choice in MODEL_CHOICES.items()
281
  ],
282
+ value=DEFAULT_ANALYSIS_ENGINE,
283
  label="Analysis engine",
284
  )
285
  with gr.Row():
 
292
  "Model-assisted modes use your signed-in Hugging Face OAuth token with the `inference-api` scope. "
293
  "The deterministic engine does not require sign-in."
294
  )
295
+ with gr.Row(elem_classes=["action-row"]):
296
+ analyze_button = gr.Button("Analyze My Trace", variant="primary")
297
+ sample_button = gr.Button("Use Sample Trace", variant="secondary")
298
+ with gr.Column(scale=2, elem_classes=["guide-panel"]):
299
  gr.Markdown(SESSION_PATHS_MD)
300
+ with gr.Accordion("Agent-callable prompt", open=False):
301
+ gr.Textbox(
302
+ value=AGENT_PROMPT,
303
+ label="Prompt for Codex or Claude Code",
304
+ lines=9,
305
+ interactive=False,
306
+ show_copy_button=True,
307
+ )
308
 
309
+ sample_button.click(
310
+ load_sample_trace,
311
+ inputs=None,
312
+ outputs=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  trace_input,
314
  include_user_context,
315
  redact_secrets,
 
317
  report_style,
318
  analysis_engine,
319
  ],
 
320
  )
321
 
322
+ with gr.Tabs(elem_classes=["result-tabs"]):
323
+ with gr.Tab("Field Report"):
324
+ report_output = gr.Markdown(label="Field Report")
325
+ with gr.Tab("Episodes JSON"):
326
+ episode_json = gr.JSON(label="Structured Episode JSON")
327
+ with gr.Tab("Downloads"):
328
+ with gr.Row(elem_classes=["download-row"]):
329
+ redacted_download = gr.File(label="Redacted Narrative")
330
+ report_download = gr.File(label="Markdown Report")
331
+ json_download = gr.File(label="Structured JSON")
332
 
333
  analyze_button.click(
334
  analyze_trace,
model_runtime.py CHANGED
@@ -1,4 +1,4 @@
1
- """Optional small-model assistance through Hugging Face Inference Providers."""
2
 
3
  from __future__ import annotations
4
 
@@ -19,7 +19,7 @@ MODEL_CHOICES = {
19
  "model_id": None,
20
  },
21
  "nemotron": {
22
- "label": "Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B",
23
  "model_id": PRIMARY_MODEL_ID,
24
  },
25
  "qwen": {
@@ -57,7 +57,7 @@ def run_model_assist(
57
  token: str | None = None,
58
  client: ChatClient | None = None,
59
  ) -> ModelAssistResult:
60
- """Ask the selected small model for a concise memo grounded in visible text."""
61
 
62
  model_id = model_id_for_engine(engine)
63
  if not model_id:
@@ -70,7 +70,7 @@ def run_model_assist(
70
  resolved_token = token or os.getenv("HF_TOKEN") or get_token()
71
  if not resolved_token:
72
  raise ValueError(
73
- "Sign in with Hugging Face to enable small-model assist through "
74
  "the inference-api OAuth scope."
75
  )
76
 
@@ -103,7 +103,7 @@ def run_model_assist(
103
  return ModelAssistResult(
104
  model_id=model_id,
105
  memo=memo,
106
- note=f"Small-model assist completed with {model_id}.",
107
  )
108
 
109
 
 
1
+ """Optional model assistance through Hugging Face Inference Providers."""
2
 
3
  from __future__ import annotations
4
 
 
19
  "model_id": None,
20
  },
21
  "nemotron": {
22
+ "label": "NVIDIA Nemotron 3 Nano 30B-A3B assist",
23
  "model_id": PRIMARY_MODEL_ID,
24
  },
25
  "qwen": {
 
57
  token: str | None = None,
58
  client: ChatClient | None = None,
59
  ) -> ModelAssistResult:
60
+ """Ask the selected model for a concise memo grounded in visible text."""
61
 
62
  model_id = model_id_for_engine(engine)
63
  if not model_id:
 
70
  resolved_token = token or os.getenv("HF_TOKEN") or get_token()
71
  if not resolved_token:
72
  raise ValueError(
73
+ "Sign in with Hugging Face to enable model assist through "
74
  "the inference-api OAuth scope."
75
  )
76
 
 
103
  return ModelAssistResult(
104
  model_id=model_id,
105
  memo=memo,
106
+ note=f"Model assist completed with {model_id}.",
107
  )
108
 
109
 
report_renderer.py CHANGED
@@ -76,7 +76,7 @@ def render_model_memo(result: AnalysisResult) -> str:
76
  if not result.model_memo and not result.model_notes:
77
  return ""
78
 
79
- lines = ["## Small-Model Memo"]
80
  if result.model_memo:
81
  lines.append(result.model_memo.get("executive_memo", ""))
82
  lines.append(f"**Detours:** {result.model_memo.get('detour_memo', '')}")
 
76
  if not result.model_memo and not result.model_notes:
77
  return ""
78
 
79
+ lines = ["## Model Memo"]
80
  if result.model_memo:
81
  lines.append(result.model_memo.get("executive_memo", ""))
82
  lines.append(f"**Detours:** {result.model_memo.get('detour_memo', '')}")
tests/test_model_runtime.py CHANGED
@@ -7,7 +7,7 @@ from pathlib import Path
7
  from unittest.mock import patch
8
 
9
  from analyzer import analyze_trace_file
10
- from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
11
 
12
 
13
  class FakeChatClient:
@@ -31,6 +31,12 @@ class FakeChatClient:
31
 
32
 
33
  class ModelRuntimeTests(unittest.TestCase):
 
 
 
 
 
 
34
  def test_parse_model_json_validates_required_shape(self) -> None:
35
  memo = parse_model_json(
36
  json.dumps(
 
7
  from unittest.mock import patch
8
 
9
  from analyzer import analyze_trace_file
10
+ from model_runtime import MODEL_CHOICES, PRIMARY_MODEL_ID, parse_model_json, run_model_assist
11
 
12
 
13
  class FakeChatClient:
 
31
 
32
 
33
  class ModelRuntimeTests(unittest.TestCase):
34
+ def test_nemotron_label_does_not_call_it_small(self) -> None:
35
+ label = str(MODEL_CHOICES["nemotron"]["label"])
36
+
37
+ self.assertIn("NVIDIA Nemotron 3 Nano 30B-A3B", label)
38
+ self.assertNotIn("small", label.lower())
39
+
40
  def test_parse_model_json_validates_required_shape(self) -> None:
41
  memo = parse_model_json(
42
  json.dumps(