Spaces:
Running on Zero
Running on Zero
feat: enable oauth-backed model assist
Browse filesCo-authored-by: Codex <noreply@openai.com>
- README.md +10 -5
- analyzer.py +2 -0
- app.py +13 -1
- model_runtime.py +10 -2
- requirements.txt +1 -1
- tests/test_model_runtime.py +22 -0
README.md
CHANGED
|
@@ -7,6 +7,10 @@ sdk_version: 5.50.0
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
# Trace Field Notes
|
|
@@ -22,7 +26,7 @@ Built for the Build Small Hackathon as a Gradio app. The default engine uses a
|
|
| 22 |
verified deterministic codebook analyzer so the Space can always start and
|
| 23 |
produce a report. The app also exposes explicit small-model assist modes for
|
| 24 |
`nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
|
| 25 |
-
Hugging Face Inference Providers when the
|
| 26 |
|
| 27 |
## Run Locally
|
| 28 |
|
|
@@ -43,13 +47,14 @@ python3.11 -m unittest discover -s tests
|
|
| 43 |
|
| 44 |
- `Deterministic field notes`: default, local, no model dependency.
|
| 45 |
- `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
|
| 46 |
-
30B total-parameter Nemotron model
|
| 47 |
-
|
| 48 |
- `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
|
| 49 |
memo.
|
| 50 |
|
| 51 |
-
If a selected model is unavailable
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
## Agent Session Locations
|
| 55 |
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
license: mit
|
| 10 |
+
hf_oauth: true
|
| 11 |
+
hf_oauth_scopes:
|
| 12 |
+
- inference-api
|
| 13 |
+
hf_oauth_expiration_minutes: 480
|
| 14 |
---
|
| 15 |
|
| 16 |
# Trace Field Notes
|
|
|
|
| 26 |
verified deterministic codebook analyzer so the Space can always start and
|
| 27 |
produce a report. The app also exposes explicit small-model assist modes for
|
| 28 |
`nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
|
| 29 |
+
Hugging Face Inference Providers when the user signs in with Hugging Face OAuth.
|
| 30 |
|
| 31 |
## Run Locally
|
| 32 |
|
|
|
|
| 47 |
|
| 48 |
- `Deterministic field notes`: default, local, no model dependency.
|
| 49 |
- `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
|
| 50 |
+
30B total-parameter Nemotron model through the signed-in user's
|
| 51 |
+
`inference-api` OAuth scope.
|
| 52 |
- `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
|
| 53 |
memo.
|
| 54 |
|
| 55 |
+
If a selected model is unavailable or the user is not signed in, the report
|
| 56 |
+
records the reason in model notes and returns the deterministic analysis instead
|
| 57 |
+
of failing the whole Space.
|
| 58 |
|
| 59 |
## Agent Session Locations
|
| 60 |
|
analyzer.py
CHANGED
|
@@ -119,6 +119,7 @@ def analyze_trace_file(
|
|
| 119 |
ignore_tool_calls: bool = True,
|
| 120 |
report_style: str = "field_notes",
|
| 121 |
analysis_engine: str = "deterministic",
|
|
|
|
| 122 |
) -> tuple[AnalysisResult, str]:
|
| 123 |
"""Parse, optionally redact, and analyze an uploaded trace file."""
|
| 124 |
|
|
@@ -192,6 +193,7 @@ def analyze_trace_file(
|
|
| 192 |
engine=analysis_engine,
|
| 193 |
result=result,
|
| 194 |
narrative_text=narrative_text,
|
|
|
|
| 195 |
)
|
| 196 |
except Exception as exc:
|
| 197 |
result.model_notes.append(
|
|
|
|
| 119 |
ignore_tool_calls: bool = True,
|
| 120 |
report_style: str = "field_notes",
|
| 121 |
analysis_engine: str = "deterministic",
|
| 122 |
+
hf_token: str | None = None,
|
| 123 |
) -> tuple[AnalysisResult, str]:
|
| 124 |
"""Parse, optionally redact, and analyze an uploaded trace file."""
|
| 125 |
|
|
|
|
| 193 |
engine=analysis_engine,
|
| 194 |
result=result,
|
| 195 |
narrative_text=narrative_text,
|
| 196 |
+
token=hf_token,
|
| 197 |
)
|
| 198 |
except Exception as exc:
|
| 199 |
result.model_notes.append(
|
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
| 5 |
import json
|
| 6 |
import tempfile
|
| 7 |
from pathlib import Path
|
| 8 |
-
from typing import Any
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
|
|
@@ -101,6 +101,7 @@ def analyze_trace(
|
|
| 101 |
ignore_tool_calls: bool = True,
|
| 102 |
report_style: str = "field_notes",
|
| 103 |
analysis_engine: str = "deterministic",
|
|
|
|
| 104 |
) -> tuple[str, dict[str, Any], str, str, str]:
|
| 105 |
"""Gradio-callable analysis endpoint."""
|
| 106 |
|
|
@@ -116,6 +117,7 @@ def analyze_trace(
|
|
| 116 |
ignore_tool_calls=ignore_tool_calls,
|
| 117 |
report_style=report_style,
|
| 118 |
analysis_engine=analysis_engine,
|
|
|
|
| 119 |
)
|
| 120 |
except TraceParseError as exc:
|
| 121 |
raise gr.Error(str(exc)) from exc
|
|
@@ -205,6 +207,16 @@ with gr.Blocks(
|
|
| 205 |
value="deterministic",
|
| 206 |
label="Analysis engine",
|
| 207 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
analyze_button = gr.Button("Analyze My Trace", variant="primary")
|
| 209 |
with gr.Column(scale=2):
|
| 210 |
gr.Markdown(SESSION_PATHS_MD)
|
|
|
|
| 5 |
import json
|
| 6 |
import tempfile
|
| 7 |
from pathlib import Path
|
| 8 |
+
from typing import Any, Optional
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
|
|
|
|
| 101 |
ignore_tool_calls: bool = True,
|
| 102 |
report_style: str = "field_notes",
|
| 103 |
analysis_engine: str = "deterministic",
|
| 104 |
+
oauth_token: Optional[gr.OAuthToken] = None,
|
| 105 |
) -> tuple[str, dict[str, Any], str, str, str]:
|
| 106 |
"""Gradio-callable analysis endpoint."""
|
| 107 |
|
|
|
|
| 117 |
ignore_tool_calls=ignore_tool_calls,
|
| 118 |
report_style=report_style,
|
| 119 |
analysis_engine=analysis_engine,
|
| 120 |
+
hf_token=oauth_token.token if oauth_token else None,
|
| 121 |
)
|
| 122 |
except TraceParseError as exc:
|
| 123 |
raise gr.Error(str(exc)) from exc
|
|
|
|
| 207 |
value="deterministic",
|
| 208 |
label="Analysis engine",
|
| 209 |
)
|
| 210 |
+
with gr.Row():
|
| 211 |
+
gr.LoginButton(
|
| 212 |
+
value="Sign in for model assist",
|
| 213 |
+
logout_value="Signed in as {}",
|
| 214 |
+
size="sm",
|
| 215 |
+
)
|
| 216 |
+
gr.Markdown(
|
| 217 |
+
"Model-assisted modes use your signed-in Hugging Face OAuth token with the `inference-api` scope. "
|
| 218 |
+
"The deterministic engine does not require sign-in."
|
| 219 |
+
)
|
| 220 |
analyze_button = gr.Button("Analyze My Trace", variant="primary")
|
| 221 |
with gr.Column(scale=2):
|
| 222 |
gr.Markdown(SESSION_PATHS_MD)
|
model_runtime.py
CHANGED
|
@@ -54,6 +54,7 @@ def run_model_assist(
|
|
| 54 |
engine: str,
|
| 55 |
result: AnalysisResult,
|
| 56 |
narrative_text: str,
|
|
|
|
| 57 |
client: ChatClient | None = None,
|
| 58 |
) -> ModelAssistResult:
|
| 59 |
"""Ask the selected small model for a concise memo grounded in visible text."""
|
|
@@ -64,12 +65,19 @@ def run_model_assist(
|
|
| 64 |
|
| 65 |
prompt = build_model_prompt(result, narrative_text)
|
| 66 |
if client is None:
|
| 67 |
-
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
inference_client = InferenceClient(
|
| 70 |
model=model_id,
|
| 71 |
provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
|
| 72 |
-
token=
|
| 73 |
timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
|
| 74 |
)
|
| 75 |
else:
|
|
|
|
| 54 |
engine: str,
|
| 55 |
result: AnalysisResult,
|
| 56 |
narrative_text: str,
|
| 57 |
+
token: str | None = None,
|
| 58 |
client: ChatClient | None = None,
|
| 59 |
) -> ModelAssistResult:
|
| 60 |
"""Ask the selected small model for a concise memo grounded in visible text."""
|
|
|
|
| 65 |
|
| 66 |
prompt = build_model_prompt(result, narrative_text)
|
| 67 |
if client is None:
|
| 68 |
+
from huggingface_hub import InferenceClient, get_token
|
| 69 |
+
|
| 70 |
+
resolved_token = token or os.getenv("HF_TOKEN") or get_token()
|
| 71 |
+
if not resolved_token:
|
| 72 |
+
raise ValueError(
|
| 73 |
+
"Sign in with Hugging Face to enable small-model assist through "
|
| 74 |
+
"the inference-api OAuth scope."
|
| 75 |
+
)
|
| 76 |
|
| 77 |
inference_client = InferenceClient(
|
| 78 |
model=model_id,
|
| 79 |
provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
|
| 80 |
+
token=resolved_token,
|
| 81 |
timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
|
| 82 |
)
|
| 83 |
else:
|
requirements.txt
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
-
gradio>=5.50,<6.0
|
| 2 |
huggingface_hub>=0.30
|
|
|
|
| 1 |
+
gradio[oauth]>=5.50,<6.0
|
| 2 |
huggingface_hub>=0.30
|
tests/test_model_runtime.py
CHANGED
|
@@ -4,6 +4,7 @@ import json
|
|
| 4 |
import types
|
| 5 |
import unittest
|
| 6 |
from pathlib import Path
|
|
|
|
| 7 |
|
| 8 |
from analyzer import analyze_trace_file
|
| 9 |
from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
|
|
@@ -69,6 +70,27 @@ class ModelRuntimeTests(unittest.TestCase):
|
|
| 69 |
self.assertTrue(result.model_notes)
|
| 70 |
self.assertIn("Unknown analysis engine", result.model_notes[0])
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
if __name__ == "__main__":
|
| 74 |
unittest.main()
|
|
|
|
| 4 |
import types
|
| 5 |
import unittest
|
| 6 |
from pathlib import Path
|
| 7 |
+
from unittest.mock import patch
|
| 8 |
|
| 9 |
from analyzer import analyze_trace_file
|
| 10 |
from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
|
|
|
|
| 70 |
self.assertTrue(result.model_notes)
|
| 71 |
self.assertIn("Unknown analysis engine", result.model_notes[0])
|
| 72 |
|
| 73 |
+
def test_analyzer_passes_hf_token_to_model_assist(self) -> None:
|
| 74 |
+
with patch("analyzer.run_model_assist") as run_model_assist:
|
| 75 |
+
run_model_assist.return_value = types.SimpleNamespace(
|
| 76 |
+
model_id=PRIMARY_MODEL_ID,
|
| 77 |
+
memo={
|
| 78 |
+
"executive_memo": "memo",
|
| 79 |
+
"detour_memo": "detour",
|
| 80 |
+
"outcome_audit_memo": "audit",
|
| 81 |
+
"caveats": [],
|
| 82 |
+
},
|
| 83 |
+
note="ok",
|
| 84 |
+
)
|
| 85 |
+
result, _ = analyze_trace_file(
|
| 86 |
+
Path("examples/sample_trace_redacted.jsonl"),
|
| 87 |
+
analysis_engine="nemotron",
|
| 88 |
+
hf_token="hf_test_token",
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
self.assertIn(PRIMARY_MODEL_ID, result.engine)
|
| 92 |
+
self.assertEqual(run_model_assist.call_args.kwargs["token"], "hf_test_token")
|
| 93 |
+
|
| 94 |
|
| 95 |
if __name__ == "__main__":
|
| 96 |
unittest.main()
|