Spaces:
Running on Zero
Running on Zero
Update hackathon submission docs and app files
Browse files- .env.example +9 -0
- .gitignore +4 -0
- README.md +137 -91
- docs/article.md +116 -0
- docs/social-post.md +27 -0
- docs/submission-notes.md +55 -0
- examples/sample_trace_redacted.jsonl +1 -1
- frontend/static/data.js +1 -2
- tests/test_redaction.py +3 -2
.env.example
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Optional local settings for Trace Field Notes.
|
| 2 |
+
# No secrets are required for the default local run.
|
| 3 |
+
|
| 4 |
+
# Local Gradio port. Hugging Face Spaces sets PORT itself.
|
| 5 |
+
PORT=7860
|
| 6 |
+
|
| 7 |
+
# Server log level: DEBUG, INFO, WARNING, ERROR.
|
| 8 |
+
TFN_LOG_LEVEL=INFO
|
| 9 |
+
|
.gitignore
CHANGED
|
@@ -182,3 +182,7 @@ cython_debug/
|
|
| 182 |
|
| 183 |
# macOS
|
| 184 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
# macOS
|
| 184 |
.DS_Store
|
| 185 |
+
|
| 186 |
+
# Generated demo-video working assets. The final demo video is uploaded to the
|
| 187 |
+
# Hugging Face Space as a public asset instead of committed to GitHub.
|
| 188 |
+
demo_video/
|
README.md
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
---
|
| 2 |
title: Trace Field Notes
|
|
|
|
| 3 |
colorFrom: green
|
| 4 |
colorTo: gray
|
| 5 |
sdk: gradio
|
|
@@ -7,122 +8,167 @@ sdk_version: 6.16.0
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
# Trace Field Notes
|
| 13 |
|
| 14 |
-
Trace Field Notes turns coding-agent session logs into qualitative field
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
```bash
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
pip install -r requirements.txt
|
| 51 |
-
python app.py
|
| 52 |
-
```
|
| 53 |
|
| 54 |
-
#
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
```
|
| 59 |
|
| 60 |
-
##
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
- `Rule-based — instant, no model`: local codebook analyzer, no model or GPU.
|
| 66 |
|
| 67 |
-
|
| 68 |
-
in model notes and returns the deterministic analysis instead of failing the
|
| 69 |
-
whole Space.
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
`
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
slower, but it still works when ZeroGPU quota is exhausted. The frontend exposes
|
| 86 |
-
this as a **Run on** choice so users without quota can still use the app.
|
| 87 |
|
| 88 |
-
|
| 89 |
-
locally for development; on a Mac the small models run on MPS, and the
|
| 90 |
-
deterministic engine needs no model at all. Because of the slower paths, the
|
| 91 |
-
frontend streams real progress — current stage, % complete, messages processed,
|
| 92 |
-
elapsed time, and a best-effort ETA — so a long run never looks stuck.
|
| 93 |
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
GPU/MPS memory). Set the level with `TFN_LOG_LEVEL` (default `INFO`; use `DEBUG`
|
| 100 |
-
for per-stage detail). Example summary line:
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
```bash
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
ls ~/.claude/projects
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
```
|
| 120 |
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
Agent traces can contain prompts, tool inputs, command outputs, local file paths,
|
| 124 |
-
screenshots, secrets, private source code, and personal data. Review and redact
|
| 125 |
-
before uploading or sharing publicly. Redaction defaults to regex patterns plus a
|
| 126 |
-
model pass (`openai/privacy-filter`) that flags names, contacts, and other
|
| 127 |
-
personal data on the Space GPU; the regex pass is the always-available fallback
|
| 128 |
-
when the model is not loaded. The app exports only a redacted narrative text file.
|
|
|
|
| 1 |
---
|
| 2 |
title: Trace Field Notes
|
| 3 |
+
emoji: 🧭
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
short_description: Qualitative field reports for coding-agent session traces.
|
| 12 |
+
tags:
|
| 13 |
+
- build-small
|
| 14 |
+
- backyard-ai
|
| 15 |
+
- best-demo
|
| 16 |
+
- off-brand
|
| 17 |
+
- best-use-of-codex
|
| 18 |
+
- best-minicpm-build
|
| 19 |
+
- nemotron-hardware-prize
|
| 20 |
+
- gradio-server
|
| 21 |
+
- zerogpu
|
| 22 |
+
- coding-agents
|
| 23 |
+
models:
|
| 24 |
+
- openbmb/MiniCPM5-1B
|
| 25 |
+
- nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
|
| 26 |
+
- openai/privacy-filter
|
| 27 |
---
|
| 28 |
|
| 29 |
# Trace Field Notes
|
| 30 |
|
| 31 |
+
Trace Field Notes turns long coding-agent session logs into qualitative field
|
| 32 |
+
reports: where the agent got stuck, how it detoured, what it tried, how it
|
| 33 |
+
recovered, and whether its final claim matched its own evidence.
|
| 34 |
+
|
| 35 |
+
Most agent traces are too long to read after the fact. Tool telemetry is noisy,
|
| 36 |
+
private, and often the wrong level of detail. This app focuses on a narrower
|
| 37 |
+
question: what did the agent *say* about its own work while it was solving a
|
| 38 |
+
task? The answer becomes a field notebook, not a benchmark.
|
| 39 |
+
|
| 40 |
+
## Links
|
| 41 |
+
|
| 42 |
+
- Live Space: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes
|
| 43 |
+
- App runtime: https://build-small-hackathon-trace-field-notes.hf.space/
|
| 44 |
+
- GitHub: https://github.com/JacobLinCool/trace-field-notes
|
| 45 |
+
- Demo video: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes/resolve/main/assets/trace-field-notes-demo.mp4
|
| 46 |
+
- Article draft: [`docs/article.md`](docs/article.md)
|
| 47 |
+
- Social post draft: [`docs/social-post.md`](docs/social-post.md)
|
| 48 |
+
- Public social post: **pending manual publish**. After publishing, replace this
|
| 49 |
+
line with the post URL before final submission.
|
| 50 |
+
|
| 51 |
+
## Who it is for
|
| 52 |
+
|
| 53 |
+
Trace Field Notes is for developers, researchers, and hackathon builders who use
|
| 54 |
+
Codex, Claude Code, Pi Agent, or similar coding agents and want to understand
|
| 55 |
+
the session narrative after the code is written:
|
| 56 |
+
|
| 57 |
+
- Was the agent blocked, or just exploring?
|
| 58 |
+
- Did it change strategy for a good reason?
|
| 59 |
+
- Did a detour produce a better route?
|
| 60 |
+
- Did the closeout claim overstate what was verified?
|
| 61 |
+
- What can the next run learn from this one?
|
| 62 |
+
|
| 63 |
+
The app does **not** claim to inspect hidden reasoning or prove that the final
|
| 64 |
+
code is correct. It reports the visible narrative the agent wrote.
|
| 65 |
+
|
| 66 |
+
## How to use it
|
| 67 |
+
|
| 68 |
+
1. Find a local coding-agent session log.
|
| 69 |
+
2. Review and redact anything sensitive before upload.
|
| 70 |
+
3. Upload `.jsonl`, `.json`, `.txt`, or `.log`.
|
| 71 |
+
4. Choose the analysis engine:
|
| 72 |
+
- **Quick analysis**: `openbmb/MiniCPM5-1B`
|
| 73 |
+
- **Deeper analysis**: `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`
|
| 74 |
+
- **Rule-based**: deterministic codebook, no model
|
| 75 |
+
5. Choose **GPU** for the Hugging Face ZeroGPU path or **CPU** for a no-quota
|
| 76 |
+
run.
|
| 77 |
+
6. Read the report: verdict, trail map, episode detail, terrain groups, detour
|
| 78 |
+
analysis, closeout audit, and redacted narrative export.
|
| 79 |
+
|
| 80 |
+
Common local trace locations:
|
| 81 |
|
| 82 |
```bash
|
| 83 |
+
# Codex
|
| 84 |
+
ls ~/.codex/sessions
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
+
# Claude Code
|
| 87 |
+
ls ~/.claude/projects
|
| 88 |
|
| 89 |
+
# Pi Agent
|
| 90 |
+
ls ~/.pi/agent/sessions
|
| 91 |
```
|
| 92 |
|
| 93 |
+
## Technology
|
| 94 |
|
| 95 |
+
The frontend is a custom React field-notebook UI served through `gradio.Server`.
|
| 96 |
+
It deliberately avoids the default Gradio component look so the report feels
|
| 97 |
+
like a qualitative trail map rather than a form.
|
|
|
|
| 98 |
|
| 99 |
+
The backend pipeline is:
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
1. `parser.py` loads Codex, Claude Code, Pi Agent, JSONL, JSON, text, and log
|
| 102 |
+
files into visible narrative messages.
|
| 103 |
+
2. `redaction.py` applies deterministic secret and PII patterns.
|
| 104 |
+
3. `privacy_filter.py` optionally adds `openai/privacy-filter` on the Space GPU.
|
| 105 |
+
4. `analyzer.py` identifies difficulty episodes and classifies them with a
|
| 106 |
+
deterministic codebook.
|
| 107 |
+
5. `model_runtime.py` optionally asks MiniCPM5 1B or Nemotron 3 Nano 30B-A3B to
|
| 108 |
+
rewrite the analysis into a richer structured field report.
|
| 109 |
+
6. `view_model.py` adapts the result into the JSON shape rendered by the UI.
|
| 110 |
+
7. `profiling.py` logs per-stage timing and resource snapshots to server logs.
|
| 111 |
|
| 112 |
+
The app streams real progress events so long runs do not look frozen: upload,
|
| 113 |
+
extract, redact, chart, classify, synthesize, and model analysis.
|
| 114 |
|
| 115 |
+
## Build Small fit
|
| 116 |
|
| 117 |
+
Trace Field Notes targets the **Backyard AI** track: it solves a specific,
|
| 118 |
+
practical problem for people already using coding agents.
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
It also targets these Build Small prizes / badges:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
- **Best Use of Codex**: Codex helped develop, debug, package, document, and
|
| 123 |
+
produce the demo video. The connected GitHub history includes Codex-attributed
|
| 124 |
+
commits.
|
| 125 |
+
- **Best MiniCPM Build**: Quick analysis uses `openbmb/MiniCPM5-1B`.
|
| 126 |
+
- **Nemotron Hardware Prize**: Deeper analysis uses
|
| 127 |
+
`nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`.
|
| 128 |
+
- **Off Brand**: the app uses `gradio.Server` with a custom React trail-map UI,
|
| 129 |
+
not stock Gradio blocks.
|
| 130 |
+
- **Best Demo**: the repo includes a polished demo video and ready-to-post
|
| 131 |
+
article/social drafts.
|
| 132 |
|
| 133 |
+
It does **not** target Tiny Titan because the optional Nemotron path is 30B, and
|
| 134 |
+
it does **not** target Best Use of Modal because the runtime is Hugging Face
|
| 135 |
+
ZeroGPU / CPU, not Modal.
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
## Privacy posture
|
| 138 |
+
|
| 139 |
+
Agent traces can include prompts, tool inputs, command output, local paths,
|
| 140 |
+
screenshots, secrets, private source code, and personal data. Review and redact
|
| 141 |
+
before uploading or sharing.
|
| 142 |
+
|
| 143 |
+
By default, Trace Field Notes:
|
| 144 |
|
| 145 |
+
- ignores raw tool-call contents;
|
| 146 |
+
- analyzes only visible assistant narrative messages plus optional user context;
|
| 147 |
+
- runs deterministic secret redaction;
|
| 148 |
+
- can run `openai/privacy-filter` for a second PII pass;
|
| 149 |
+
- exports only redacted narrative text.
|
| 150 |
+
|
| 151 |
+
## Local development
|
| 152 |
|
| 153 |
```bash
|
| 154 |
+
python3.11 -m venv .venv
|
| 155 |
+
source .venv/bin/activate
|
| 156 |
+
pip install -r requirements.txt
|
| 157 |
+
python app.py
|
| 158 |
+
```
|
| 159 |
|
| 160 |
+
Run tests:
|
|
|
|
| 161 |
|
| 162 |
+
```bash
|
| 163 |
+
python3.11 -m unittest discover -s tests
|
| 164 |
```
|
| 165 |
|
| 166 |
+
Optional environment settings are listed in [`.env.example`](.env.example).
|
| 167 |
+
|
| 168 |
+
## Codex contribution
|
| 169 |
+
|
| 170 |
+
Codex assisted with repository inspection, implementation debugging, test
|
| 171 |
+
verification, privacy/README hardening, Hugging Face deployment preparation,
|
| 172 |
+
demo-video scripting, voiceover generation, video composition, frame/ASR
|
| 173 |
+
verification, and hackathon submission packaging.
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/article.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Trace Field Notes: a field notebook for coding-agent sessions
|
| 2 |
+
|
| 3 |
+
Demo Space: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes
|
| 4 |
+
Demo video: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes/resolve/main/assets/trace-field-notes-demo.mp4
|
| 5 |
+
GitHub: https://github.com/JacobLinCool/trace-field-notes
|
| 6 |
+
|
| 7 |
+
## The problem
|
| 8 |
+
|
| 9 |
+
Coding-agent sessions are getting longer. A serious Codex or Claude Code run can
|
| 10 |
+
include planning, shell commands, failed tests, patches, retries, summaries,
|
| 11 |
+
caveats, and a confident final message. After the run, the code diff tells you
|
| 12 |
+
what changed, but it does not explain the route the agent took.
|
| 13 |
+
|
| 14 |
+
That route matters. Did the agent understand the task? Did it get blocked? Did it
|
| 15 |
+
notice when its first hypothesis was wrong? Did it take a productive detour, or
|
| 16 |
+
just wander? Did its final success claim match what it had actually verified?
|
| 17 |
+
|
| 18 |
+
Trace Field Notes is built around that narrow but real problem: make coding-agent
|
| 19 |
+
sessions readable after the fact.
|
| 20 |
+
|
| 21 |
+
## The idea
|
| 22 |
+
|
| 23 |
+
Instead of treating a trace as raw telemetry, Trace Field Notes treats it like
|
| 24 |
+
qualitative field data. It reads the visible narrative messages the agent wrote:
|
| 25 |
+
what it planned, where it got stuck, how it rerouted, what it tried, and how it
|
| 26 |
+
closed.
|
| 27 |
+
|
| 28 |
+
The result is not a leaderboard or correctness oracle. It is a field report:
|
| 29 |
+
|
| 30 |
+
- a session verdict;
|
| 31 |
+
- a trail map of difficulty episodes;
|
| 32 |
+
- per-episode intention, difficulty, reroute, evidence, and analyst memo;
|
| 33 |
+
- terrain groups showing recurring difficulty types;
|
| 34 |
+
- a detour read separating exploration from wandering;
|
| 35 |
+
- a closeout audit comparing the final claim to the agent's own evidence;
|
| 36 |
+
- a redacted narrative export.
|
| 37 |
+
|
| 38 |
+
## The experience
|
| 39 |
+
|
| 40 |
+
The first screen is the actual tool, not a landing page. You upload a Codex,
|
| 41 |
+
Claude Code, or Pi Agent log, choose whether to include user context, keep
|
| 42 |
+
redaction on, and select an engine:
|
| 43 |
+
|
| 44 |
+
- Quick analysis with `openbmb/MiniCPM5-1B`
|
| 45 |
+
- Deeper analysis with `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`
|
| 46 |
+
- Rule-based analysis with no model
|
| 47 |
+
|
| 48 |
+
The app streams progress through the real pipeline stages, then opens the field
|
| 49 |
+
report. The custom React UI is intentionally notebook-like: quiet, dense,
|
| 50 |
+
scan-friendly, and centered on the trail map rather than a chat transcript.
|
| 51 |
+
|
| 52 |
+
## How it works
|
| 53 |
+
|
| 54 |
+
Trace Field Notes is a Gradio Space, but the UI is not built from stock Gradio
|
| 55 |
+
blocks. `app.py` uses `gradio.Server` to serve a custom React frontend and expose
|
| 56 |
+
an `analyze_trace` endpoint compatible with `@gradio/client`.
|
| 57 |
+
|
| 58 |
+
The backend pipeline is small and explicit:
|
| 59 |
+
|
| 60 |
+
1. `parser.py` normalizes Codex, Claude Code, Pi Agent, JSONL, JSON, log, and text
|
| 61 |
+
files into visible narrative messages.
|
| 62 |
+
2. `redaction.py` masks likely secrets and private data with deterministic
|
| 63 |
+
patterns.
|
| 64 |
+
3. `privacy_filter.py` can add a second model pass with `openai/privacy-filter`.
|
| 65 |
+
4. `analyzer.py` charts difficulty episodes and classifies them against a
|
| 66 |
+
codebook.
|
| 67 |
+
5. `model_runtime.py` can ask MiniCPM5 1B or Nemotron 3 Nano 30B-A3B to write a
|
| 68 |
+
richer structured analysis.
|
| 69 |
+
6. `view_model.py` packages the verdict, trail map, sections, and export text for
|
| 70 |
+
the frontend.
|
| 71 |
+
|
| 72 |
+
The small-model paths run under Hugging Face ZeroGPU when GPU mode is selected.
|
| 73 |
+
CPU mode remains available for no-quota runs, and the deterministic analyzer is
|
| 74 |
+
tested independently.
|
| 75 |
+
|
| 76 |
+
## Why it fits Build Small
|
| 77 |
+
|
| 78 |
+
This is a Backyard AI project: it solves a specific problem for a specific group
|
| 79 |
+
of people, using small enough models and a focused interface. It is also a good
|
| 80 |
+
fit for several Build Small quests:
|
| 81 |
+
|
| 82 |
+
- Best Use of Codex: Codex helped build, debug, document, package, and demo the
|
| 83 |
+
project, with Codex-attributed commits in the connected GitHub repo.
|
| 84 |
+
- Best MiniCPM Build: the quick analysis path uses MiniCPM5 1B.
|
| 85 |
+
- Nemotron Hardware Prize: the deeper analysis path uses Nemotron 3 Nano
|
| 86 |
+
30B-A3B.
|
| 87 |
+
- Off Brand: the app uses a custom React trail-map interface through
|
| 88 |
+
`gradio.Server`.
|
| 89 |
+
- Best Demo: the submission includes a polished narrated demo and social post
|
| 90 |
+
draft.
|
| 91 |
+
|
| 92 |
+
## Challenges
|
| 93 |
+
|
| 94 |
+
The hardest part was defining the right unit of analysis. A tool call is too
|
| 95 |
+
low-level. A full trace is too broad. The useful unit became a "difficulty
|
| 96 |
+
episode": the span where the agent intended to do something, encountered a
|
| 97 |
+
problem, appraised it, rerouted, attempted a resolution, and made an outcome
|
| 98 |
+
claim.
|
| 99 |
+
|
| 100 |
+
Another challenge was privacy. Agent traces can contain secrets, paths, user
|
| 101 |
+
prompts, screenshots, and private code. The app therefore ignores raw tool
|
| 102 |
+
contents by default, redacts before analysis, and frames its output as a report
|
| 103 |
+
on visible narrative rather than hidden reasoning.
|
| 104 |
+
|
| 105 |
+
## Codex's role
|
| 106 |
+
|
| 107 |
+
Codex was used throughout the project: inspecting the repository, implementing
|
| 108 |
+
backend and frontend changes, debugging model/runtime behavior, writing tests,
|
| 109 |
+
checking privacy handling, preparing hackathon documentation, generating the demo
|
| 110 |
+
storyboard, recording app footage, composing the video, and validating the final
|
| 111 |
+
output with frames and ASR.
|
| 112 |
+
|
| 113 |
+
That is part of the story: Trace Field Notes is an app about understanding coding
|
| 114 |
+
agents, built with help from a coding agent, and submitted with an audit trail in
|
| 115 |
+
GitHub.
|
| 116 |
+
|
docs/social-post.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Social post draft
|
| 2 |
+
|
| 3 |
+
> Replace the URLs if you publish on a platform that shortens or rewrites links.
|
| 4 |
+
|
| 5 |
+
I built **Trace Field Notes** for the Build Small Hackathon: a Gradio app that
|
| 6 |
+
turns long coding-agent session logs into readable qualitative field reports.
|
| 7 |
+
|
| 8 |
+
Instead of drowning in tool-call telemetry, upload a Codex, Claude Code, or Pi
|
| 9 |
+
Agent trace and see:
|
| 10 |
+
|
| 11 |
+
- where the agent got stuck
|
| 12 |
+
- what detours it took
|
| 13 |
+
- how it recovered
|
| 14 |
+
- whether its final success claim matched its own evidence
|
| 15 |
+
|
| 16 |
+
It uses a custom React UI served through `gradio.Server`, a deterministic
|
| 17 |
+
codebook analyzer, optional MiniCPM5 1B quick analysis, optional Nemotron 3 Nano
|
| 18 |
+
30B-A3B deeper analysis, and privacy redaction before analysis.
|
| 19 |
+
|
| 20 |
+
Codex helped build, debug, document, package, and demo the project.
|
| 21 |
+
|
| 22 |
+
Demo Space: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes
|
| 23 |
+
Demo video: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes/resolve/main/assets/trace-field-notes-demo.mp4
|
| 24 |
+
GitHub: https://github.com/JacobLinCool/trace-field-notes
|
| 25 |
+
|
| 26 |
+
#BuildSmall #Gradio #HuggingFace #Codex #MiniCPM #Nemotron #OpenSource
|
| 27 |
+
|
docs/submission-notes.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Build Small submission notes
|
| 2 |
+
|
| 3 |
+
## Project
|
| 4 |
+
|
| 5 |
+
- Name: Trace Field Notes
|
| 6 |
+
- Track: Backyard AI
|
| 7 |
+
- Space: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes
|
| 8 |
+
- Runtime: https://build-small-hackathon-trace-field-notes.hf.space/
|
| 9 |
+
- GitHub: https://github.com/JacobLinCool/trace-field-notes
|
| 10 |
+
- Demo video: https://huggingface.co/spaces/build-small-hackathon/trace-field-notes/resolve/main/assets/trace-field-notes-demo.mp4
|
| 11 |
+
- Social post: pending manual publish; draft in `docs/social-post.md`
|
| 12 |
+
|
| 13 |
+
## Official pre-flight checklist
|
| 14 |
+
|
| 15 |
+
- [x] Every model is under 32B total parameters:
|
| 16 |
+
- `openbmb/MiniCPM5-1B`
|
| 17 |
+
- `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`
|
| 18 |
+
- `openai/privacy-filter`
|
| 19 |
+
- [x] Gradio app deployed as a Space in `build-small-hackathon`.
|
| 20 |
+
- [x] Demo video produced and prepared for public hosting on the Space.
|
| 21 |
+
- [ ] Social-media post published and linked from README.
|
| 22 |
+
- [x] ZeroGPU usage is one Space for this app.
|
| 23 |
+
- [x] README frontmatter includes track / quest tags and model metadata.
|
| 24 |
+
|
| 25 |
+
## Quest / challenge eligibility
|
| 26 |
+
|
| 27 |
+
- Backyard AI: eligible. The app solves a concrete workflow problem for coding
|
| 28 |
+
agent users.
|
| 29 |
+
- Best Use of Codex: eligible. Codex helped build, package, document, demo, and
|
| 30 |
+
verify the project; GitHub commits include Codex co-author trailers.
|
| 31 |
+
- Best MiniCPM Build: eligible. Quick analysis uses `openbmb/MiniCPM5-1B`.
|
| 32 |
+
- Nemotron Hardware Prize: eligible. Deeper analysis uses
|
| 33 |
+
`nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`.
|
| 34 |
+
- Off Brand: eligible. The UI is a custom React field-notebook/trail-map
|
| 35 |
+
experience served through `gradio.Server`, not stock Gradio components.
|
| 36 |
+
- Best Demo: eligible after the public social post is published. The demo video
|
| 37 |
+
and social draft are prepared.
|
| 38 |
+
- Tiny Titan: not eligible because the project offers an optional 30B Nemotron
|
| 39 |
+
path.
|
| 40 |
+
- Best Use of Modal: not eligible; the runtime is Hugging Face ZeroGPU / CPU.
|
| 41 |
+
- Best Agent: not targeted. The app is agent-callable, but the app itself is not
|
| 42 |
+
a multi-step autonomous tool-use agent.
|
| 43 |
+
|
| 44 |
+
## Submission-form summary
|
| 45 |
+
|
| 46 |
+
Trace Field Notes is a Build Small Backyard AI app for people using coding
|
| 47 |
+
agents. Upload a Codex, Claude Code, or Pi Agent session log and the app turns
|
| 48 |
+
the agent's visible narrative messages into a qualitative field report: where it
|
| 49 |
+
got stuck, how it detoured, how it recovered, and whether its final claim matched
|
| 50 |
+
its own evidence. It uses a custom React UI served by `gradio.Server`, a
|
| 51 |
+
deterministic codebook analyzer, optional MiniCPM5 1B quick analysis, optional
|
| 52 |
+
Nemotron 3 Nano 30B-A3B deeper analysis, and privacy redaction before analysis.
|
| 53 |
+
Codex helped build, debug, document, deploy-package, and produce the narrated
|
| 54 |
+
demo video, with Codex-attributed commits in the connected GitHub repository.
|
| 55 |
+
|
examples/sample_trace_redacted.jsonl
CHANGED
|
@@ -2,4 +2,4 @@
|
|
| 2 |
{"timestamp":"2026-06-06T10:00:20Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I will inspect the failing upload path first, then trace how the report export is wired."}]}}
|
| 3 |
{"timestamp":"2026-06-06T10:01:14Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The issue is not where I expected. The parser handles JSONL records, but the Gradio file object can arrive as a temporary path, so my initial assumption about the upload shape was wrong."}]}}
|
| 4 |
{"timestamp":"2026-06-06T10:02:30Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Instead of changing the parser, I will narrow the fix to the upload boundary and add a small helper that normalizes filepath, name, or path attributes."}]}}
|
| 5 |
-
{"timestamp":"2026-06-06T10:03:12Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The focused fix is implemented and verified with a sample trace. Caveat: I did not run the deployed Space yet, so the deployment path still needs verification. Contact test@example.com
|
|
|
|
| 2 |
{"timestamp":"2026-06-06T10:00:20Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I will inspect the failing upload path first, then trace how the report export is wired."}]}}
|
| 3 |
{"timestamp":"2026-06-06T10:01:14Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The issue is not where I expected. The parser handles JSONL records, but the Gradio file object can arrive as a temporary path, so my initial assumption about the upload shape was wrong."}]}}
|
| 4 |
{"timestamp":"2026-06-06T10:02:30Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Instead of changing the parser, I will narrow the fix to the upload boundary and add a small helper that normalizes filepath, name, or path attributes."}]}}
|
| 5 |
+
{"timestamp":"2026-06-06T10:03:12Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"The focused fix is implemented and verified with a sample trace. Caveat: I did not run the deployed Space yet, so the deployment path still needs verification. Contact test@example.com; any real token has already been removed from this shared sample."}]}}
|
frontend/static/data.js
CHANGED
|
@@ -106,7 +106,7 @@
|
|
| 106 |
engine: "Deterministic field notes",
|
| 107 |
captured: "2026-06-06 · 10:00–10:03 UTC",
|
| 108 |
narrative_message_count: 4,
|
| 109 |
-
redaction_count:
|
| 110 |
duration_total: "3m 12s",
|
| 111 |
verdict: {
|
| 112 |
tone: "stable",
|
|
@@ -123,7 +123,6 @@
|
|
| 123 |
},
|
| 124 |
privacy_notes: [
|
| 125 |
"1 email address redacted.",
|
| 126 |
-
"1 GitHub token (ghp_…) redacted.",
|
| 127 |
"Tool-call contents ignored by default; only narrative messages analyzed.",
|
| 128 |
],
|
| 129 |
episodes: [
|
|
|
|
| 106 |
engine: "Deterministic field notes",
|
| 107 |
captured: "2026-06-06 · 10:00–10:03 UTC",
|
| 108 |
narrative_message_count: 4,
|
| 109 |
+
redaction_count: 1,
|
| 110 |
duration_total: "3m 12s",
|
| 111 |
verdict: {
|
| 112 |
tone: "stable",
|
|
|
|
| 123 |
},
|
| 124 |
privacy_notes: [
|
| 125 |
"1 email address redacted.",
|
|
|
|
| 126 |
"Tool-call contents ignored by default; only narrative messages analyzed.",
|
| 127 |
],
|
| 128 |
episodes: [
|
tests/test_redaction.py
CHANGED
|
@@ -7,17 +7,18 @@ from redaction import redact_text
|
|
| 7 |
|
| 8 |
class RedactionTests(unittest.TestCase):
|
| 9 |
def test_redacts_common_secret_shapes(self) -> None:
|
|
|
|
| 10 |
text = (
|
| 11 |
"Authorization: Bearer abcdefghijklmnopqrstuvwxyz123456\n"
|
| 12 |
"email test@example.com\n"
|
| 13 |
-
"token
|
| 14 |
"path /Users/alice/project/private/file.py\n"
|
| 15 |
"url https://example.com/callback?code=secret&state=abc"
|
| 16 |
)
|
| 17 |
|
| 18 |
result = redact_text(text)
|
| 19 |
|
| 20 |
-
self.assertNotIn(
|
| 21 |
self.assertNotIn("test@example.com", result.text)
|
| 22 |
self.assertNotIn("/Users/alice/project", result.text)
|
| 23 |
self.assertIn("[REDACTED_GITHUB_TOKEN]", result.text)
|
|
|
|
| 7 |
|
| 8 |
class RedactionTests(unittest.TestCase):
|
| 9 |
def test_redacts_common_secret_shapes(self) -> None:
|
| 10 |
+
fake_github_token = "gh" + "p_" + "abcdefghijklmnopqrstuvwxyz123456"
|
| 11 |
text = (
|
| 12 |
"Authorization: Bearer abcdefghijklmnopqrstuvwxyz123456\n"
|
| 13 |
"email test@example.com\n"
|
| 14 |
+
f"token {fake_github_token}\n"
|
| 15 |
"path /Users/alice/project/private/file.py\n"
|
| 16 |
"url https://example.com/callback?code=secret&state=abc"
|
| 17 |
)
|
| 18 |
|
| 19 |
result = redact_text(text)
|
| 20 |
|
| 21 |
+
self.assertNotIn(fake_github_token, result.text)
|
| 22 |
self.assertNotIn("test@example.com", result.text)
|
| 23 |
self.assertNotIn("/Users/alice/project", result.text)
|
| 24 |
self.assertIn("[REDACTED_GITHUB_TOKEN]", result.text)
|