JacobLinCool Codex commited on
Commit
f4e9a2f
·
verified ·
1 Parent(s): c8055f7

feat: enable oauth-backed model assist

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (6) hide show
  1. README.md +10 -5
  2. analyzer.py +2 -0
  3. app.py +13 -1
  4. model_runtime.py +10 -2
  5. requirements.txt +1 -1
  6. tests/test_model_runtime.py +22 -0
README.md CHANGED
@@ -7,6 +7,10 @@ sdk_version: 5.50.0
7
  app_file: app.py
8
  pinned: false
9
  license: mit
 
 
 
 
10
  ---
11
 
12
  # Trace Field Notes
@@ -22,7 +26,7 @@ Built for the Build Small Hackathon as a Gradio app. The default engine uses a
22
  verified deterministic codebook analyzer so the Space can always start and
23
  produce a report. The app also exposes explicit small-model assist modes for
24
  `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
25
- Hugging Face Inference Providers when the runtime has provider access.
26
 
27
  ## Run Locally
28
 
@@ -43,13 +47,14 @@ python3.11 -m unittest discover -s tests
43
 
44
  - `Deterministic field notes`: default, local, no model dependency.
45
  - `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
46
- 30B total-parameter Nemotron model when Hugging Face Inference Providers can
47
- serve it.
48
  - `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
49
  memo.
50
 
51
- If a selected model is unavailable, the report records the error in model notes
52
- and returns the deterministic analysis instead of failing the whole Space.
 
53
 
54
  ## Agent Session Locations
55
 
 
7
  app_file: app.py
8
  pinned: false
9
  license: mit
10
+ hf_oauth: true
11
+ hf_oauth_scopes:
12
+ - inference-api
13
+ hf_oauth_expiration_minutes: 480
14
  ---
15
 
16
  # Trace Field Notes
 
26
  verified deterministic codebook analyzer so the Space can always start and
27
  produce a report. The app also exposes explicit small-model assist modes for
28
  `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
29
+ Hugging Face Inference Providers when the user signs in with Hugging Face OAuth.
30
 
31
  ## Run Locally
32
 
 
47
 
48
  - `Deterministic field notes`: default, local, no model dependency.
49
  - `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
50
+ 30B total-parameter Nemotron model through the signed-in user's
51
+ `inference-api` OAuth scope.
52
  - `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
53
  memo.
54
 
55
+ If a selected model is unavailable or the user is not signed in, the report
56
+ records the reason in model notes and returns the deterministic analysis instead
57
+ of failing the whole Space.
58
 
59
  ## Agent Session Locations
60
 
analyzer.py CHANGED
@@ -119,6 +119,7 @@ def analyze_trace_file(
119
  ignore_tool_calls: bool = True,
120
  report_style: str = "field_notes",
121
  analysis_engine: str = "deterministic",
 
122
  ) -> tuple[AnalysisResult, str]:
123
  """Parse, optionally redact, and analyze an uploaded trace file."""
124
 
@@ -192,6 +193,7 @@ def analyze_trace_file(
192
  engine=analysis_engine,
193
  result=result,
194
  narrative_text=narrative_text,
 
195
  )
196
  except Exception as exc:
197
  result.model_notes.append(
 
119
  ignore_tool_calls: bool = True,
120
  report_style: str = "field_notes",
121
  analysis_engine: str = "deterministic",
122
+ hf_token: str | None = None,
123
  ) -> tuple[AnalysisResult, str]:
124
  """Parse, optionally redact, and analyze an uploaded trace file."""
125
 
 
193
  engine=analysis_engine,
194
  result=result,
195
  narrative_text=narrative_text,
196
+ token=hf_token,
197
  )
198
  except Exception as exc:
199
  result.model_notes.append(
app.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
  import json
6
  import tempfile
7
  from pathlib import Path
8
- from typing import Any
9
 
10
  import gradio as gr
11
 
@@ -101,6 +101,7 @@ def analyze_trace(
101
  ignore_tool_calls: bool = True,
102
  report_style: str = "field_notes",
103
  analysis_engine: str = "deterministic",
 
104
  ) -> tuple[str, dict[str, Any], str, str, str]:
105
  """Gradio-callable analysis endpoint."""
106
 
@@ -116,6 +117,7 @@ def analyze_trace(
116
  ignore_tool_calls=ignore_tool_calls,
117
  report_style=report_style,
118
  analysis_engine=analysis_engine,
 
119
  )
120
  except TraceParseError as exc:
121
  raise gr.Error(str(exc)) from exc
@@ -205,6 +207,16 @@ with gr.Blocks(
205
  value="deterministic",
206
  label="Analysis engine",
207
  )
 
 
 
 
 
 
 
 
 
 
208
  analyze_button = gr.Button("Analyze My Trace", variant="primary")
209
  with gr.Column(scale=2):
210
  gr.Markdown(SESSION_PATHS_MD)
 
5
  import json
6
  import tempfile
7
  from pathlib import Path
8
+ from typing import Any, Optional
9
 
10
  import gradio as gr
11
 
 
101
  ignore_tool_calls: bool = True,
102
  report_style: str = "field_notes",
103
  analysis_engine: str = "deterministic",
104
+ oauth_token: Optional[gr.OAuthToken] = None,
105
  ) -> tuple[str, dict[str, Any], str, str, str]:
106
  """Gradio-callable analysis endpoint."""
107
 
 
117
  ignore_tool_calls=ignore_tool_calls,
118
  report_style=report_style,
119
  analysis_engine=analysis_engine,
120
+ hf_token=oauth_token.token if oauth_token else None,
121
  )
122
  except TraceParseError as exc:
123
  raise gr.Error(str(exc)) from exc
 
207
  value="deterministic",
208
  label="Analysis engine",
209
  )
210
+ with gr.Row():
211
+ gr.LoginButton(
212
+ value="Sign in for model assist",
213
+ logout_value="Signed in as {}",
214
+ size="sm",
215
+ )
216
+ gr.Markdown(
217
+ "Model-assisted modes use your signed-in Hugging Face OAuth token with the `inference-api` scope. "
218
+ "The deterministic engine does not require sign-in."
219
+ )
220
  analyze_button = gr.Button("Analyze My Trace", variant="primary")
221
  with gr.Column(scale=2):
222
  gr.Markdown(SESSION_PATHS_MD)
model_runtime.py CHANGED
@@ -54,6 +54,7 @@ def run_model_assist(
54
  engine: str,
55
  result: AnalysisResult,
56
  narrative_text: str,
 
57
  client: ChatClient | None = None,
58
  ) -> ModelAssistResult:
59
  """Ask the selected small model for a concise memo grounded in visible text."""
@@ -64,12 +65,19 @@ def run_model_assist(
64
 
65
  prompt = build_model_prompt(result, narrative_text)
66
  if client is None:
67
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
68
 
69
  inference_client = InferenceClient(
70
  model=model_id,
71
  provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
72
- token=os.getenv("HF_TOKEN") or None,
73
  timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
74
  )
75
  else:
 
54
  engine: str,
55
  result: AnalysisResult,
56
  narrative_text: str,
57
+ token: str | None = None,
58
  client: ChatClient | None = None,
59
  ) -> ModelAssistResult:
60
  """Ask the selected small model for a concise memo grounded in visible text."""
 
65
 
66
  prompt = build_model_prompt(result, narrative_text)
67
  if client is None:
68
+ from huggingface_hub import InferenceClient, get_token
69
+
70
+ resolved_token = token or os.getenv("HF_TOKEN") or get_token()
71
+ if not resolved_token:
72
+ raise ValueError(
73
+ "Sign in with Hugging Face to enable small-model assist through "
74
+ "the inference-api OAuth scope."
75
+ )
76
 
77
  inference_client = InferenceClient(
78
  model=model_id,
79
  provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
80
+ token=resolved_token,
81
  timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
82
  )
83
  else:
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- gradio>=5.50,<6.0
2
  huggingface_hub>=0.30
 
1
+ gradio[oauth]>=5.50,<6.0
2
  huggingface_hub>=0.30
tests/test_model_runtime.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  import types
5
  import unittest
6
  from pathlib import Path
 
7
 
8
  from analyzer import analyze_trace_file
9
  from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
@@ -69,6 +70,27 @@ class ModelRuntimeTests(unittest.TestCase):
69
  self.assertTrue(result.model_notes)
70
  self.assertIn("Unknown analysis engine", result.model_notes[0])
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  if __name__ == "__main__":
74
  unittest.main()
 
4
  import types
5
  import unittest
6
  from pathlib import Path
7
+ from unittest.mock import patch
8
 
9
  from analyzer import analyze_trace_file
10
  from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
 
70
  self.assertTrue(result.model_notes)
71
  self.assertIn("Unknown analysis engine", result.model_notes[0])
72
 
73
+ def test_analyzer_passes_hf_token_to_model_assist(self) -> None:
74
+ with patch("analyzer.run_model_assist") as run_model_assist:
75
+ run_model_assist.return_value = types.SimpleNamespace(
76
+ model_id=PRIMARY_MODEL_ID,
77
+ memo={
78
+ "executive_memo": "memo",
79
+ "detour_memo": "detour",
80
+ "outcome_audit_memo": "audit",
81
+ "caveats": [],
82
+ },
83
+ note="ok",
84
+ )
85
+ result, _ = analyze_trace_file(
86
+ Path("examples/sample_trace_redacted.jsonl"),
87
+ analysis_engine="nemotron",
88
+ hf_token="hf_test_token",
89
+ )
90
+
91
+ self.assertIn(PRIMARY_MODEL_ID, result.engine)
92
+ self.assertEqual(run_model_assist.call_args.kwargs["token"], "hf_test_token")
93
+
94
 
95
  if __name__ == "__main__":
96
  unittest.main()