Spaces:

build-small-hackathon
/

trace-field-notes

Running on Zero

App Files Files Community

JacobLinCool Codex commited on 26 days ago

Commit

f4e9a2f

verified ·

1 Parent(s): c8055f7

feat: enable oauth-backed model assist

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (6) hide show

README.md +10 -5
analyzer.py +2 -0
app.py +13 -1
model_runtime.py +10 -2
requirements.txt +1 -1
tests/test_model_runtime.py +22 -0

README.md CHANGED Viewed

@@ -7,6 +7,10 @@ sdk_version: 5.50.0
 app_file: app.py
 pinned: false
 license: mit
 ---
 # Trace Field Notes
@@ -22,7 +26,7 @@ Built for the Build Small Hackathon as a Gradio app. The default engine uses a
 verified deterministic codebook analyzer so the Space can always start and
 produce a report. The app also exposes explicit small-model assist modes for
 `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
-Hugging Face Inference Providers when the runtime has provider access.
 ## Run Locally
@@ -43,13 +47,14 @@ python3.11 -m unittest discover -s tests
 - `Deterministic field notes`: default, local, no model dependency.
 - `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
-  30B total-parameter Nemotron model when Hugging Face Inference Providers can
-  serve it.
 - `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
   memo.
-If a selected model is unavailable, the report records the error in model notes
-and returns the deterministic analysis instead of failing the whole Space.
 ## Agent Session Locations

 app_file: app.py
 pinned: false
 license: mit
+hf_oauth: true
+hf_oauth_scopes:
+  - inference-api
+hf_oauth_expiration_minutes: 480
 ---
 # Trace Field Notes
 verified deterministic codebook analyzer so the Space can always start and
 produce a report. The app also exposes explicit small-model assist modes for
 `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` and `Qwen/Qwen3.5-9B` through
+Hugging Face Inference Providers when the user signs in with Hugging Face OAuth.
 ## Run Locally
 - `Deterministic field notes`: default, local, no model dependency.
 - `Small-model assist: NVIDIA Nemotron 3 Nano 30B-A3B`: uses the hackathon-sized
+  30B total-parameter Nemotron model through the signed-in user's
+  `inference-api` OAuth scope.
 - `Quick small-model assist: Qwen3.5 9B`: optional lower-latency model-assisted
   memo.
+If a selected model is unavailable or the user is not signed in, the report
+records the reason in model notes and returns the deterministic analysis instead
+of failing the whole Space.
 ## Agent Session Locations

analyzer.py CHANGED Viewed

@@ -119,6 +119,7 @@ def analyze_trace_file(
     ignore_tool_calls: bool = True,
     report_style: str = "field_notes",
     analysis_engine: str = "deterministic",
 ) -> tuple[AnalysisResult, str]:
     """Parse, optionally redact, and analyze an uploaded trace file."""
@@ -192,6 +193,7 @@ def analyze_trace_file(
                     engine=analysis_engine,
                     result=result,
                     narrative_text=narrative_text,
                 )
             except Exception as exc:
                 result.model_notes.append(

     ignore_tool_calls: bool = True,
     report_style: str = "field_notes",
     analysis_engine: str = "deterministic",
+    hf_token: str | None = None,
 ) -> tuple[AnalysisResult, str]:
     """Parse, optionally redact, and analyze an uploaded trace file."""
                     engine=analysis_engine,
                     result=result,
                     narrative_text=narrative_text,
+                    token=hf_token,
                 )
             except Exception as exc:
                 result.model_notes.append(

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from __future__ import annotations
 import json
 import tempfile
 from pathlib import Path
-from typing import Any
 import gradio as gr
@@ -101,6 +101,7 @@ def analyze_trace(
     ignore_tool_calls: bool = True,
     report_style: str = "field_notes",
     analysis_engine: str = "deterministic",
 ) -> tuple[str, dict[str, Any], str, str, str]:
     """Gradio-callable analysis endpoint."""
@@ -116,6 +117,7 @@ def analyze_trace(
             ignore_tool_calls=ignore_tool_calls,
             report_style=report_style,
             analysis_engine=analysis_engine,
         )
     except TraceParseError as exc:
         raise gr.Error(str(exc)) from exc
@@ -205,6 +207,16 @@ with gr.Blocks(
                 value="deterministic",
                 label="Analysis engine",
             )
             analyze_button = gr.Button("Analyze My Trace", variant="primary")
         with gr.Column(scale=2):
             gr.Markdown(SESSION_PATHS_MD)

 import json
 import tempfile
 from pathlib import Path
+from typing import Any, Optional
 import gradio as gr
     ignore_tool_calls: bool = True,
     report_style: str = "field_notes",
     analysis_engine: str = "deterministic",
+    oauth_token: Optional[gr.OAuthToken] = None,
 ) -> tuple[str, dict[str, Any], str, str, str]:
     """Gradio-callable analysis endpoint."""
             ignore_tool_calls=ignore_tool_calls,
             report_style=report_style,
             analysis_engine=analysis_engine,
+            hf_token=oauth_token.token if oauth_token else None,
         )
     except TraceParseError as exc:
         raise gr.Error(str(exc)) from exc
                 value="deterministic",
                 label="Analysis engine",
             )
+            with gr.Row():
+                gr.LoginButton(
+                    value="Sign in for model assist",
+                    logout_value="Signed in as {}",
+                    size="sm",
+                )
+            gr.Markdown(
+                "Model-assisted modes use your signed-in Hugging Face OAuth token with the `inference-api` scope. "
+                "The deterministic engine does not require sign-in."
+            )
             analyze_button = gr.Button("Analyze My Trace", variant="primary")
         with gr.Column(scale=2):
             gr.Markdown(SESSION_PATHS_MD)

model_runtime.py CHANGED Viewed

@@ -54,6 +54,7 @@ def run_model_assist(
     engine: str,
     result: AnalysisResult,
     narrative_text: str,
     client: ChatClient | None = None,
 ) -> ModelAssistResult:
     """Ask the selected small model for a concise memo grounded in visible text."""
@@ -64,12 +65,19 @@ def run_model_assist(
     prompt = build_model_prompt(result, narrative_text)
     if client is None:
-        from huggingface_hub import InferenceClient
         inference_client = InferenceClient(
             model=model_id,
             provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
-            token=os.getenv("HF_TOKEN") or None,
             timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
         )
     else:

     engine: str,
     result: AnalysisResult,
     narrative_text: str,
+    token: str | None = None,
     client: ChatClient | None = None,
 ) -> ModelAssistResult:
     """Ask the selected small model for a concise memo grounded in visible text."""
     prompt = build_model_prompt(result, narrative_text)
     if client is None:
+        from huggingface_hub import InferenceClient, get_token
+        resolved_token = token or os.getenv("HF_TOKEN") or get_token()
+        if not resolved_token:
+            raise ValueError(
+                "Sign in with Hugging Face to enable small-model assist through "
+                "the inference-api OAuth scope."
+            )
         inference_client = InferenceClient(
             model=model_id,
             provider=os.getenv("TRACE_FIELD_NOTES_INFERENCE_PROVIDER") or None,
+            token=resolved_token,
             timeout=float(os.getenv("TRACE_FIELD_NOTES_MODEL_TIMEOUT", "45")),
         )
     else:

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- gradio>=5.50,<6.0
2	huggingface_hub>=0.30


1	+ gradio[oauth]>=5.50,<6.0
2	huggingface_hub>=0.30

tests/test_model_runtime.py CHANGED Viewed

@@ -4,6 +4,7 @@ import json
 import types
 import unittest
 from pathlib import Path
 from analyzer import analyze_trace_file
 from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
@@ -69,6 +70,27 @@ class ModelRuntimeTests(unittest.TestCase):
         self.assertTrue(result.model_notes)
         self.assertIn("Unknown analysis engine", result.model_notes[0])
 if __name__ == "__main__":
     unittest.main()

 import types
 import unittest
 from pathlib import Path
+from unittest.mock import patch
 from analyzer import analyze_trace_file
 from model_runtime import PRIMARY_MODEL_ID, parse_model_json, run_model_assist
         self.assertTrue(result.model_notes)
         self.assertIn("Unknown analysis engine", result.model_notes[0])
+    def test_analyzer_passes_hf_token_to_model_assist(self) -> None:
+        with patch("analyzer.run_model_assist") as run_model_assist:
+            run_model_assist.return_value = types.SimpleNamespace(
+                model_id=PRIMARY_MODEL_ID,
+                memo={
+                    "executive_memo": "memo",
+                    "detour_memo": "detour",
+                    "outcome_audit_memo": "audit",
+                    "caveats": [],
+                },
+                note="ok",
+            )
+            result, _ = analyze_trace_file(
+                Path("examples/sample_trace_redacted.jsonl"),
+                analysis_engine="nemotron",
+                hf_token="hf_test_token",
+            )
+        self.assertIn(PRIMARY_MODEL_ID, result.engine)
+        self.assertEqual(run_model_assist.call_args.kwargs["token"], "hf_test_token")
 if __name__ == "__main__":
     unittest.main()