Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app.py +2 -1
- index.html +4 -0
- matchday/app.py +30 -212
- matchday/record_trace.py +129 -55
- matchday/traces/session_001.jsonl +5 -4
- matchday/traces/session_002.jsonl +4 -4
app.py
CHANGED
|
@@ -10,7 +10,8 @@ explains. This is the Off-Brand path: a bespoke UI powered by Gradio's backend
|
|
| 10 |
Brain + Hands: Nemotron (on Modal) never calls an API or names a price; Python
|
| 11 |
executes every call and scores every value. Every figure carries provenance.
|
| 12 |
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
Reference patterns (3-codebase study, see MATCHDAY_UNCONSTRAINED_PLAN.md):
|
| 16 |
- N1 gradio.Server custom-frontend architecture (Off-Brand badge):
|
|
|
|
| 10 |
Brain + Hands: Nemotron (on Modal) never calls an API or names a price; Python
|
| 11 |
executes every call and scores every value. Every figure carries provenance.
|
| 12 |
|
| 13 |
+
``matchday/app.py`` is a compatibility shim that imports and launches this same
|
| 14 |
+
app, so ``python3 -m matchday.app`` runs the identical non-decorative path.
|
| 15 |
|
| 16 |
Reference patterns (3-codebase study, see MATCHDAY_UNCONSTRAINED_PLAN.md):
|
| 17 |
- N1 gradio.Server custom-frontend architecture (Off-Brand badge):
|
index.html
CHANGED
|
@@ -84,6 +84,9 @@
|
|
| 84 |
.hero h2 .hero-accent{background:linear-gradient(135deg,var(--wc-gold),var(--wc-pink));
|
| 85 |
-webkit-background-clip:text;background-clip:text;-webkit-text-fill-color:transparent;color:transparent;}
|
| 86 |
.hero p{margin:0;font-size:13px;color:rgba(255,255,255,.92);line-height:1.55;max-width:48ch;}
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
.msg{display:flex;gap:10px;max-width:94%;animation:fade .3s ease;}
|
| 89 |
.msg.user{align-self:flex-end;flex-direction:row-reverse;}
|
|
@@ -223,6 +226,7 @@
|
|
| 223 |
<span class="hero-badge">⚽ FIFA World Cup 2026 · Vancouver</span>
|
| 224 |
<h2>Plan your <span class="hero-accent">World Cup</span> trip</h2>
|
| 225 |
<p>Tell me where you're flying from, the match, your dates & budget. I'll build 3 ranked packages — cheapest flight, safest arrival, closest hotel to BC Place — with live prices & honest provenance.</p>
|
|
|
|
| 226 |
</div>
|
| 227 |
</div>
|
| 228 |
</div>
|
|
|
|
| 84 |
.hero h2 .hero-accent{background:linear-gradient(135deg,var(--wc-gold),var(--wc-pink));
|
| 85 |
-webkit-background-clip:text;background-clip:text;-webkit-text-fill-color:transparent;color:transparent;}
|
| 86 |
.hero p{margin:0;font-size:13px;color:rgba(255,255,255,.92);line-height:1.55;max-width:48ch;}
|
| 87 |
+
.hero-cold{margin-top:11px;font-size:11.5px;font-weight:600;color:#fde68a;
|
| 88 |
+
display:inline-flex;align-items:center;gap:6px;background:rgba(0,0,0,.24);
|
| 89 |
+
padding:5px 11px;border-radius:999px;border:1px solid rgba(251,191,36,.28);}
|
| 90 |
|
| 91 |
.msg{display:flex;gap:10px;max-width:94%;animation:fade .3s ease;}
|
| 92 |
.msg.user{align-self:flex-end;flex-direction:row-reverse;}
|
|
|
|
| 226 |
<span class="hero-badge">⚽ FIFA World Cup 2026 · Vancouver</span>
|
| 227 |
<h2>Plan your <span class="hero-accent">World Cup</span> trip</h2>
|
| 228 |
<p>Tell me where you're flying from, the match, your dates & budget. I'll build 3 ranked packages — cheapest flight, safest arrival, closest hotel to BC Place — with live prices & honest provenance.</p>
|
| 229 |
+
<span class="hero-cold">⏳ First request may take a few minutes while the model warms up.</span>
|
| 230 |
</div>
|
| 231 |
</div>
|
| 232 |
</div>
|
matchday/app.py
CHANGED
|
@@ -1,221 +1,39 @@
|
|
| 1 |
-
"""MatchDay —
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"""
|
| 13 |
from __future__ import annotations
|
| 14 |
|
| 15 |
-
import
|
| 16 |
-
import
|
| 17 |
-
from
|
| 18 |
-
|
| 19 |
-
import gradio as gr
|
| 20 |
-
|
| 21 |
-
from matchday.agent import MatchDayAgent
|
| 22 |
-
from matchday.agent_loop import BuildTripPackagesArgs
|
| 23 |
-
from matchday.intent import parse_intent
|
| 24 |
-
from matchday.models import TripRequest
|
| 25 |
-
from matchday.prompts import EXPLANATION_HINT
|
| 26 |
-
from matchday.render import _CSS, render_full
|
| 27 |
-
from matchday.trip_tool import build_trip_packages, format_for_nemotron
|
| 28 |
-
|
| 29 |
-
logger = logging.getLogger(__name__)
|
| 30 |
-
|
| 31 |
-
# Nemotron primary, deterministic fallback. Flip to False to force the
|
| 32 |
-
# deterministic path (useful if Modal is down or for a lightning-fast demo).
|
| 33 |
-
USE_AGENT = True
|
| 34 |
-
|
| 35 |
-
_HEADER = """
|
| 36 |
-
<div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;
|
| 37 |
-
background:linear-gradient(135deg,#0f172a,#1e3a8a);color:#fff;
|
| 38 |
-
padding:22px 26px;border-radius:14px;margin-bottom:6px">
|
| 39 |
-
<div style="font-size:28px;font-weight:800;letter-spacing:-.02em">⚽ MatchDay</div>
|
| 40 |
-
<div style="font-size:14px;opacity:.88;margin-top:4px">
|
| 41 |
-
Your AI trip planner for the <b>2026 FIFA World Cup in Vancouver</b>.
|
| 42 |
-
Powered by Nemotron-3-Nano-30B (the brain) + deterministic Python (the hands)
|
| 43 |
-
on Modal. Tell me where you're flying from and which match — I'll build 3
|
| 44 |
-
ranked packages (flights · hotels · weather · what's near the stadium) with
|
| 45 |
-
<b>live data and honest provenance</b>.
|
| 46 |
-
</div>
|
| 47 |
-
</div>
|
| 48 |
-
"""
|
| 49 |
-
|
| 50 |
-
_EXAMPLES = [
|
| 51 |
-
"Flying from Montreal, want Canada vs Qatar, mid-range, June 26-29, just me",
|
| 52 |
-
"I want to fly from Toronto to see Brazil vs Germany, premium, July 12, 2 adults",
|
| 53 |
-
"Take me from Ottawa to Vancouver for Canada vs Qatar on 2026-06-26, budget",
|
| 54 |
-
"From Halifax, Canada vs Morocco June 18, couple, luxury",
|
| 55 |
-
]
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def _greenlight_html(summary: str) -> str:
|
| 59 |
-
return (
|
| 60 |
-
'<div style="font-family:-apple-system,Roboto,sans-serif;padding:12px 16px;'
|
| 61 |
-
'background:#f0fdf4;border:1px solid #bbf7d0;border-radius:12px;'
|
| 62 |
-
f'color:#166534;font-size:14px;margin:8px 0">✅ <b>Planning your trip:</b> '
|
| 63 |
-
f"{summary}</div>"
|
| 64 |
-
)
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
def _clarify_html(question: str) -> str:
|
| 68 |
-
return (
|
| 69 |
-
'<div style="font-family:-apple-system,Roboto,sans-serif;padding:16px;'
|
| 70 |
-
'background:#eff6ff;border:1px solid #bfdbfe;border-radius:12px;'
|
| 71 |
-
f'color:#1e3a8a;font-size:15px">💬 {question}</div>'
|
| 72 |
-
)
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
def _explain_html(text: str) -> str:
|
| 76 |
-
safe = text.replace("<", "<").replace(">", ">").replace("\n", "<br>")
|
| 77 |
-
return (
|
| 78 |
-
'<div style="font-family:-apple-system,Roboto,sans-serif;padding:14px 16px;'
|
| 79 |
-
'background:#f8fafc;border:1px solid #e2e8f0;border-radius:12px;'
|
| 80 |
-
f'color:#0f172a;font-size:14px;margin-top:10px;line-height:1.5">'
|
| 81 |
-
f'<b style="color:#1e3a8a">🤖 Nemotron compares your options:</b><br>{safe}</div>'
|
| 82 |
-
)
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
def _progress_html(msg: str) -> str:
|
| 86 |
-
"""Staged status card between phases (U4 + G1 — narration tied to real progress)."""
|
| 87 |
-
return _CSS + (
|
| 88 |
-
'<div class="md-wrap"><div class="md-progress">'
|
| 89 |
-
f'<span class="spin">⏳</span> <span>{msg}</span></div></div>'
|
| 90 |
-
)
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
def _args_to_trip(a: BuildTripPackagesArgs) -> TripRequest:
|
| 94 |
-
return TripRequest(
|
| 95 |
-
origin_airport=a.origin_airport,
|
| 96 |
-
match_name=a.match_name or "the match",
|
| 97 |
-
match_date=date.fromisoformat(a.match_date),
|
| 98 |
-
check_in=date.fromisoformat(a.check_in),
|
| 99 |
-
check_out=date.fromisoformat(a.check_out),
|
| 100 |
-
travelers=a.travelers,
|
| 101 |
-
budget_tier=a.budget_tier,
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
async def _agent_explain(agent, user_text: str, trip: TripRequest, result) -> str:
|
| 106 |
-
"""Round 2: Nemotron compares the packages. Best-effort ('' on failure)."""
|
| 107 |
-
args_json = json.dumps(trip.model_dump(mode="json"))
|
| 108 |
-
convo = [
|
| 109 |
-
{"role": "user", "content": user_text},
|
| 110 |
-
{
|
| 111 |
-
"role": "assistant",
|
| 112 |
-
"content": "",
|
| 113 |
-
"tool_calls": [{
|
| 114 |
-
"id": "call_build",
|
| 115 |
-
"type": "function",
|
| 116 |
-
"function": {"name": "build_trip_packages", "arguments": args_json},
|
| 117 |
-
}],
|
| 118 |
-
},
|
| 119 |
-
{
|
| 120 |
-
"role": "tool",
|
| 121 |
-
"tool_call_id": "call_build",
|
| 122 |
-
"name": "build_trip_packages",
|
| 123 |
-
"content": format_for_nemotron(result),
|
| 124 |
-
},
|
| 125 |
-
{"role": "user", "content": EXPLANATION_HINT},
|
| 126 |
-
]
|
| 127 |
-
try:
|
| 128 |
-
r2 = await agent.run(convo, tools=[]) # no tools -> Nemotron must write text
|
| 129 |
-
return (r2.get("text") or "").strip()
|
| 130 |
-
except Exception as exc:
|
| 131 |
-
logger.warning("explanation round failed: %s", exc)
|
| 132 |
-
return ""
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
async def plan_trip(user_text: str):
|
| 136 |
-
"""Streaming handler: yields staged status during the real agent phases
|
| 137 |
-
(deciding -> searching -> scoring -> final cards+map+timeline). Falls back
|
| 138 |
-
to the deterministic parser if Nemotron is unavailable or hedges.
|
| 139 |
-
"""
|
| 140 |
-
yield _progress_html("Reading your trip request…")
|
| 141 |
-
|
| 142 |
-
agent = None
|
| 143 |
-
if USE_AGENT:
|
| 144 |
-
try:
|
| 145 |
-
agent = MatchDayAgent()
|
| 146 |
-
except Exception as exc:
|
| 147 |
-
logger.warning("agent init failed (%s); deterministic path.", exc)
|
| 148 |
-
|
| 149 |
-
# Round 1 — Nemotron decides which tool to call.
|
| 150 |
-
r1: dict = {"tool_calls": []}
|
| 151 |
-
if agent is not None:
|
| 152 |
-
yield _progress_html("🤖 Nemotron is choosing your best options…")
|
| 153 |
-
try:
|
| 154 |
-
r1 = await agent.run([{"role": "user", "content": user_text}])
|
| 155 |
-
except Exception as exc:
|
| 156 |
-
logger.warning("agent round 1 failed (%s).", exc)
|
| 157 |
-
r1 = {"tool_calls": []}
|
| 158 |
-
|
| 159 |
-
# Resolve a validated trip: Nemotron's args first, then deterministic parse.
|
| 160 |
-
tool_calls = r1.get("tool_calls") or []
|
| 161 |
-
trip: TripRequest | None = None
|
| 162 |
-
if tool_calls and tool_calls[0].get("name") == "build_trip_packages":
|
| 163 |
-
try:
|
| 164 |
-
trip = _args_to_trip(
|
| 165 |
-
BuildTripPackagesArgs.model_validate(tool_calls[0].get("arguments", {}))
|
| 166 |
-
)
|
| 167 |
-
except Exception:
|
| 168 |
-
trip = None
|
| 169 |
-
if trip is None:
|
| 170 |
-
parsed = parse_intent(user_text)
|
| 171 |
-
if parsed.trip_request is not None:
|
| 172 |
-
trip = parsed.trip_request
|
| 173 |
-
|
| 174 |
-
if trip is None:
|
| 175 |
-
clarify_q = ""
|
| 176 |
-
if tool_calls and tool_calls[0].get("name") == "clarify":
|
| 177 |
-
clarify_q = tool_calls[0].get("arguments", {}).get("question", "")
|
| 178 |
-
if not clarify_q:
|
| 179 |
-
clarify_q = parse_intent(user_text).question
|
| 180 |
-
yield _clarify_html(
|
| 181 |
-
clarify_q or "Tell me where you're flying from and which match you want to see."
|
| 182 |
-
)
|
| 183 |
-
return
|
| 184 |
-
|
| 185 |
-
green = _greenlight_html(trip.summary())
|
| 186 |
-
yield green + _progress_html(
|
| 187 |
-
"✈️ Scanning airlines · 🏨 Finding hotels near BC Place · 🌤️ Checking the match-day forecast…"
|
| 188 |
-
)
|
| 189 |
-
try:
|
| 190 |
-
result = await build_trip_packages(trip)
|
| 191 |
-
except Exception as exc:
|
| 192 |
-
yield green + f'<div style="color:#991b1b;padding:12px">⚠️ {exc}</div>'
|
| 193 |
-
return
|
| 194 |
-
|
| 195 |
-
yield green + _progress_html("🗺️ Scoring 3 packages · Nemotron is writing your comparison…")
|
| 196 |
-
explanation = ""
|
| 197 |
-
if agent is not None:
|
| 198 |
-
explanation = await _agent_explain(agent, user_text, trip, result)
|
| 199 |
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
with gr.Row():
|
| 206 |
-
user_input = gr.Textbox(
|
| 207 |
-
label="Tell me about your trip",
|
| 208 |
-
placeholder="e.g. Flying from Montreal, want Canada vs Qatar, mid-range, June 26-29, just me",
|
| 209 |
-
lines=2,
|
| 210 |
-
value=_EXAMPLES[0],
|
| 211 |
-
scale=4,
|
| 212 |
-
)
|
| 213 |
-
plan_btn = gr.Button("🏈 Plan my trip", variant="primary", scale=1)
|
| 214 |
-
output = gr.HTML(label="Your trip packages")
|
| 215 |
-
gr.Examples(examples=_EXAMPLES, inputs=user_input)
|
| 216 |
-
plan_btn.click(fn=plan_trip, inputs=user_input, outputs=output)
|
| 217 |
-
user_input.submit(fn=plan_trip, inputs=user_input, outputs=output)
|
| 218 |
|
| 219 |
|
| 220 |
if __name__ == "__main__":
|
| 221 |
-
|
|
|
|
| 1 |
+
"""MatchDay — local launch entry (``python3 -m matchday.app``).
|
| 2 |
+
|
| 3 |
+
Compatibility shim. Delegates to the canonical smart entry at the repo root,
|
| 4 |
+
``app.py`` — the ``gradio.Server`` app that runs the bounded ``run_agent_loop``:
|
| 5 |
+
Nemotron *understands* the request → may *ground* via ``web_search`` / *clarify*
|
| 6 |
+
to capture intent → calls ``build_trip_packages`` → *explains* the ranked
|
| 7 |
+
packages, with deterministic fallback only as graceful degradation. The HF
|
| 8 |
+
Space runs that same root ``app.py``; this shim makes the local
|
| 9 |
+
``-m matchday.app`` command launch the identical, non-decorative path.
|
| 10 |
+
|
| 11 |
+
History: this file previously held a ``gradio.Blocks`` handler that used the
|
| 12 |
+
single-shot agent bypass (a single inference call only to pick a tool, no
|
| 13 |
+
grounding loop, no cold-start ``_pulse``). That path was *decorative* — the model did not truly
|
| 14 |
+
participate in understanding/grounding/clarification. It was removed so that no
|
| 15 |
+
entry point can serve it; there is now a single source of truth (root
|
| 16 |
+
``app.py``) and no second copy to drift out of sync.
|
| 17 |
"""
|
| 18 |
from __future__ import annotations
|
| 19 |
|
| 20 |
+
import os
|
| 21 |
+
import sys
|
| 22 |
+
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
# Make the repo root importable so `import app` resolves to the root entry no
|
| 25 |
+
# matter the current working directory.
|
| 26 |
+
_REPO_ROOT = Path(__file__).resolve().parent.parent
|
| 27 |
+
if str(_REPO_ROOT) not in sys.path:
|
| 28 |
+
sys.path.insert(0, str(_REPO_ROOT))
|
| 29 |
|
| 30 |
+
# Import the canonical smart entry (repo-root app.py). Its module name (`app`)
|
| 31 |
+
# differs from this file's (`matchday.app`), so there is no collision.
|
| 32 |
+
import app as _root_app # noqa: E402
|
| 33 |
|
| 34 |
+
# Re-export the gradio.Server app so `from matchday.app import app` keeps working.
|
| 35 |
+
app = _root_app.app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
if __name__ == "__main__":
|
| 39 |
+
app.launch(server_port=int(os.environ.get("PORT", "7860")), show_error=True)
|
matchday/record_trace.py
CHANGED
|
@@ -1,12 +1,21 @@
|
|
| 1 |
"""Record a MatchDay agent trace (Build Small "Sharing is Caring" badge).
|
| 2 |
|
| 3 |
-
Runs one agentic trip-planning turn
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
write-scoped HF token is available.
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
Usage:
|
| 11 |
python3 -m matchday.record_trace "<query>" [out.jsonl]
|
| 12 |
"""
|
|
@@ -15,11 +24,11 @@ from __future__ import annotations
|
|
| 15 |
import asyncio
|
| 16 |
import json
|
| 17 |
import sys
|
| 18 |
-
from datetime import
|
| 19 |
from pathlib import Path
|
| 20 |
|
| 21 |
from matchday.agent import MatchDayAgent
|
| 22 |
-
from matchday.agent_loop import
|
| 23 |
from matchday.intent import parse_intent
|
| 24 |
from matchday.models import TripRequest
|
| 25 |
from matchday.prompts import EXPLANATION_HINT
|
|
@@ -30,18 +39,6 @@ def _ts() -> str:
|
|
| 30 |
return datetime.now(timezone.utc).isoformat()
|
| 31 |
|
| 32 |
|
| 33 |
-
def _args_to_trip(a: BuildTripPackagesArgs) -> TripRequest:
|
| 34 |
-
return TripRequest(
|
| 35 |
-
origin_airport=a.origin_airport,
|
| 36 |
-
match_name=a.match_name or "the match",
|
| 37 |
-
match_date=date.fromisoformat(a.match_date),
|
| 38 |
-
check_in=date.fromisoformat(a.check_in),
|
| 39 |
-
check_out=date.fromisoformat(a.check_out),
|
| 40 |
-
travelers=a.travelers,
|
| 41 |
-
budget_tier=a.budget_tier,
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
|
| 45 |
def _packages_view(result) -> list[dict]:
|
| 46 |
return [
|
| 47 |
{
|
|
@@ -58,61 +55,138 @@ def _packages_view(result) -> list[dict]:
|
|
| 58 |
]
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
async def record(query: str, out_path: str) -> None:
|
| 62 |
agent = MatchDayAgent()
|
| 63 |
trace: list[dict] = []
|
| 64 |
log = lambda **e: trace.append({"ts": _ts(), **e}) # noqa: E731
|
| 65 |
|
| 66 |
log(event="user", content=query)
|
| 67 |
-
base = [{"role": "user", "content": query}]
|
| 68 |
-
r1 = await agent.run(base)
|
| 69 |
-
tool_calls = r1.get("tool_calls") or []
|
| 70 |
log(
|
| 71 |
-
event="
|
| 72 |
-
|
| 73 |
-
|
| 74 |
)
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
trip: TripRequest | None = None
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
parsed = parse_intent(query)
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
log(event="
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
log(
|
| 92 |
event="tool_result",
|
| 93 |
tool="build_trip_packages",
|
| 94 |
-
trip=trip.summary(),
|
| 95 |
status=result.status,
|
| 96 |
degradation=result.degradation_notices,
|
| 97 |
packages=_packages_view(result),
|
| 98 |
compact=format_for_nemotron(result),
|
| 99 |
)
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
}],
|
| 110 |
-
},
|
| 111 |
-
{"role": "tool", "tool_call_id": "call_build", "name": "build_trip_packages", "content": format_for_nemotron(result)},
|
| 112 |
-
{"role": "user", "content": EXPLANATION_HINT},
|
| 113 |
-
]
|
| 114 |
-
r2 = await agent.run(convo, tools=[])
|
| 115 |
-
log(event="nemotron_explanation", text=r2.get("text") or "")
|
| 116 |
|
| 117 |
out = Path(out_path)
|
| 118 |
out.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 1 |
"""Record a MatchDay agent trace (Build Small "Sharing is Caring" badge).
|
| 2 |
|
| 3 |
+
Runs one agentic trip-planning turn through the SAME bounded agent loop the
|
| 4 |
+
deployed Space uses — ``matchday.agent_loop.run_agent_loop`` — and writes a
|
| 5 |
+
JSONL trace of every step: the user message, each agent round (understand →
|
| 6 |
+
maybe ground via ``web_search`` → maybe ``clarify`` → call
|
| 7 |
+
``build_trip_packages``), the executed tool result (scored packages +
|
| 8 |
+
provenance + degradation), and the final explanation. This is the artifact
|
| 9 |
+
others can learn from. Publish via ``publish_agent_trace.py`` once a
|
| 10 |
write-scoped HF token is available.
|
| 11 |
|
| 12 |
+
This is the honest path: the trace is produced by ``run_agent_loop`` (validate
|
| 13 |
+
→ dedup → self-correct → ground → build), NOT by a single-shot tool-picking
|
| 14 |
+
inference call. For a clear request the agent decides ``build_trip_packages``
|
| 15 |
+
in round 1; for an ambiguous request it ``clarify``-asks; for a factual
|
| 16 |
+
question it grounds with ``web_search`` — all of which the single-shot bypass
|
| 17 |
+
could not represent, so none of that is reproduced here.
|
| 18 |
+
|
| 19 |
Usage:
|
| 20 |
python3 -m matchday.record_trace "<query>" [out.jsonl]
|
| 21 |
"""
|
|
|
|
| 24 |
import asyncio
|
| 25 |
import json
|
| 26 |
import sys
|
| 27 |
+
from datetime import datetime, timezone
|
| 28 |
from pathlib import Path
|
| 29 |
|
| 30 |
from matchday.agent import MatchDayAgent
|
| 31 |
+
from matchday.agent_loop import run_agent_loop
|
| 32 |
from matchday.intent import parse_intent
|
| 33 |
from matchday.models import TripRequest
|
| 34 |
from matchday.prompts import EXPLANATION_HINT
|
|
|
|
| 39 |
return datetime.now(timezone.utc).isoformat()
|
| 40 |
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def _packages_view(result) -> list[dict]:
|
| 43 |
return [
|
| 44 |
{
|
|
|
|
| 55 |
]
|
| 56 |
|
| 57 |
|
| 58 |
+
async def _explain(agent, user_text: str, trip: TripRequest, result) -> str:
|
| 59 |
+
"""Final round — Nemotron compares the packages. Mirrors app._agent_explain."""
|
| 60 |
+
if trip is None:
|
| 61 |
+
return ""
|
| 62 |
+
args_json = json.dumps(trip.model_dump(mode="json"))
|
| 63 |
+
convo = [
|
| 64 |
+
{"role": "user", "content": user_text},
|
| 65 |
+
{
|
| 66 |
+
"role": "assistant",
|
| 67 |
+
"content": "",
|
| 68 |
+
"tool_calls": [{
|
| 69 |
+
"id": "call_build",
|
| 70 |
+
"type": "function",
|
| 71 |
+
"function": {"name": "build_trip_packages", "arguments": args_json},
|
| 72 |
+
}],
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"role": "tool",
|
| 76 |
+
"tool_call_id": "call_build",
|
| 77 |
+
"name": "build_trip_packages",
|
| 78 |
+
"content": format_for_nemotron(result),
|
| 79 |
+
},
|
| 80 |
+
{"role": "user", "content": EXPLANATION_HINT},
|
| 81 |
+
]
|
| 82 |
+
try:
|
| 83 |
+
r = await agent.run(convo, tools=[]) # no tools → Nemotron must write text
|
| 84 |
+
return (r.get("text") or "").strip()
|
| 85 |
+
except Exception as exc: # noqa: BLE001 — best-effort, must not abort the trace
|
| 86 |
+
return f"(explanation round failed: {exc})"
|
| 87 |
+
|
| 88 |
+
|
| 89 |
async def record(query: str, out_path: str) -> None:
|
| 90 |
agent = MatchDayAgent()
|
| 91 |
trace: list[dict] = []
|
| 92 |
log = lambda **e: trace.append({"ts": _ts(), **e}) # noqa: E731
|
| 93 |
|
| 94 |
log(event="user", content=query)
|
|
|
|
|
|
|
|
|
|
| 95 |
log(
|
| 96 |
+
event="agent_path",
|
| 97 |
+
entry="run_agent_loop",
|
| 98 |
+
note="bounded agent loop the deployed Space uses: understand -> ground/clarify -> build",
|
| 99 |
)
|
| 100 |
|
| 101 |
+
# ── Smart path: the bounded agent loop (mirrors app.plan_trip). Nemotron
|
| 102 |
+
# UNDERSTANDS the request, may GROUND with web_search, may CLARIFY, and
|
| 103 |
+
# calls build_trip_packages when ready. The loop validates args, dedups,
|
| 104 |
+
# and self-corrects one malformed call. No single-shot bypass.
|
| 105 |
+
messages: list[dict] = [{"role": "user", "content": query}]
|
| 106 |
trip: TripRequest | None = None
|
| 107 |
+
result = None
|
| 108 |
+
agent_text = ""
|
| 109 |
+
|
| 110 |
+
for attempt in range(3): # cap grounding rounds (web_search -> build)
|
| 111 |
+
res = await run_agent_loop(agent, messages)
|
| 112 |
+
log(
|
| 113 |
+
event="agent_round",
|
| 114 |
+
attempt=attempt,
|
| 115 |
+
outcome=res.type,
|
| 116 |
+
tool=res.tool or None,
|
| 117 |
+
reason=res.reason or None,
|
| 118 |
+
text=(res.text or "")[:240],
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
if res.type == "tool_called" and res.tool == "build_trip_packages":
|
| 122 |
+
result = res.result.get("full_result")
|
| 123 |
+
trip = res.result.get("trip")
|
| 124 |
+
break
|
| 125 |
+
|
| 126 |
+
if res.type == "tool_called" and res.tool == "web_search":
|
| 127 |
+
# Brain grounded itself — thread the result back so it can build.
|
| 128 |
+
tcid = f"call_ws_{attempt}"
|
| 129 |
+
messages.append({
|
| 130 |
+
"role": "assistant", "content": "",
|
| 131 |
+
"tool_calls": [{
|
| 132 |
+
"id": tcid, "type": "function",
|
| 133 |
+
"function": {
|
| 134 |
+
"name": "web_search",
|
| 135 |
+
"arguments": json.dumps(res.result.get("query") or {}),
|
| 136 |
+
},
|
| 137 |
+
}],
|
| 138 |
+
})
|
| 139 |
+
messages.append({
|
| 140 |
+
"role": "tool", "tool_call_id": tcid, "name": "web_search",
|
| 141 |
+
"content": json.dumps(res.result, ensure_ascii=False)[:1200],
|
| 142 |
+
})
|
| 143 |
+
log(event="agent_grounded", tool="web_search", query=res.result.get("query"))
|
| 144 |
+
continue
|
| 145 |
+
|
| 146 |
+
if res.type == "final_answer":
|
| 147 |
+
agent_text = res.text or ""
|
| 148 |
+
break
|
| 149 |
+
|
| 150 |
+
# fallback_to_deterministic — record it HONESTLY, never silently swap.
|
| 151 |
+
break
|
| 152 |
+
|
| 153 |
+
# ── Deterministic fallback (mirrors app.plan_trip degrade path). Used when
|
| 154 |
+
# the loop fell back (e.g. Modal cold-start timeout) or produced no build.
|
| 155 |
+
if result is None and not agent_text:
|
| 156 |
parsed = parse_intent(query)
|
| 157 |
+
trip = parsed.trip_request
|
| 158 |
+
if trip is not None:
|
| 159 |
+
log(event="deterministic_build", trip=trip.summary())
|
| 160 |
+
try:
|
| 161 |
+
result = await build_trip_packages(trip)
|
| 162 |
+
except Exception as exc: # noqa: BLE001
|
| 163 |
+
log(event="deterministic_build_error", error=str(exc))
|
| 164 |
+
else:
|
| 165 |
+
agent_text = (
|
| 166 |
+
parsed.question
|
| 167 |
+
or "Tell me where you're flying from and which match you want to see."
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# ── Record the outcome.
|
| 171 |
+
if result is not None:
|
| 172 |
log(
|
| 173 |
event="tool_result",
|
| 174 |
tool="build_trip_packages",
|
| 175 |
+
trip=trip.summary() if trip else None,
|
| 176 |
status=result.status,
|
| 177 |
degradation=result.degradation_notices,
|
| 178 |
packages=_packages_view(result),
|
| 179 |
compact=format_for_nemotron(result),
|
| 180 |
)
|
| 181 |
+
explanation = await _explain(agent, query, trip, result)
|
| 182 |
+
log(event="nemotron_explanation", text=explanation)
|
| 183 |
+
elif agent_text:
|
| 184 |
+
log(event="agent_answer", text=agent_text)
|
| 185 |
+
else:
|
| 186 |
+
log(
|
| 187 |
+
event="no_packages",
|
| 188 |
+
reason="agent loop + deterministic fallback produced no packages",
|
| 189 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
out = Path(out_path)
|
| 192 |
out.parent.mkdir(parents=True, exist_ok=True)
|
matchday/traces/session_001.jsonl
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
{"ts": "2026-06-
|
| 2 |
-
{"ts": "2026-06-
|
| 3 |
-
{"ts": "2026-06-
|
| 4 |
-
{"ts": "2026-06-
|
|
|
|
|
|
| 1 |
+
{"ts": "2026-06-14T23:04:15.400506+00:00", "event": "user", "content": "Flying from Montreal, want Canada vs Qatar, mid-range, June 26-29, just me"}
|
| 2 |
+
{"ts": "2026-06-14T23:04:15.401159+00:00", "event": "agent_path", "entry": "run_agent_loop", "note": "bounded agent loop the deployed Space uses: understand -> ground/clarify -> build"}
|
| 3 |
+
{"ts": "2026-06-14T23:08:19.519551+00:00", "event": "agent_round", "attempt": 0, "outcome": "tool_called", "tool": "build_trip_packages", "reason": null, "text": ""}
|
| 4 |
+
{"ts": "2026-06-14T23:08:19.519695+00:00", "event": "tool_result", "tool": "build_trip_packages", "trip": "YUL -> Vancouver, 2026-06-26 to 2026-06-29, mid_range budget, 1 traveler, watching Canada vs Qatar on 2026-06-26.", "status": "complete", "degradation": ["Photos unavailable"], "packages": [{"label": "Cheapest", "total_cost_cad": 910.0, "flight": "Porter Airlines PD 102 (lands 08:13)", "hotel": "Hotel At The Waldorf", "provenance": {"flight": "serpapi", "hotel": "serpapi"}}, {"label": "Safest Arrival", "total_cost_cad": 1289.0, "flight": "Porter Airlines PD 102 (lands 08:13)", "hotel": "Quality Inn & Suites Downtown Vancouver", "provenance": {"flight": "serpapi", "hotel": "serpapi"}}, {"label": "Closest to Stadium", "total_cost_cad": 1376.0, "flight": "Porter Airlines PD 102 (lands 08:13)", "hotel": "Best Western Premier Chateau Granville Hotel & Suites & Conf. Centre", "provenance": {"flight": "serpapi", "hotel": "serpapi"}}], "compact": "STATUS: complete | 1 degradation(s) | 3 package(s) scored\n DEGRADATION: Photos unavailable\n#1 Cheapest | $910.00 CAD | Flight Porter Airlines PD 102 lands 08:13 from YUL (source=serpapi) | Hotel Hotel At The Waldorf 3.367km to BC Place 3.6\u2605 (source=serpapi) | +10.8h buffer | 40 min walk | rain risk 36%\n#2 Safest Arrival | $1,289.00 CAD | Flight Porter Airlines PD 102 lands 08:13 from YUL (source=serpapi) | Hotel Quality Inn & Suites Downtown Vancouver 0.774km to BC Place 4.4\u2605 (source=serpapi) | +10.8h buffer | 9 min walk | rain risk 36%\n#3 Closest to Stadium | $1,376.00 CAD | Flight Porter Airlines PD 102 lands 08:13 from YUL (source=serpapi) | Hotel Best Western Premier Chateau Granville Hotel & Suites & Conf. Centre 0.615km to BC Place 4.2\u2605 (source=serpapi) | +10.8h buffer | 7 min walk | rain risk 36%"}
|
| 5 |
+
{"ts": "2026-06-14T23:08:22.230251+00:00", "event": "nemotron_explanation", "text": "**Package #1 \u2013 Cheapest** \n- **Price:** $910.00 CAD \n- **Flight:** Porter Airlines\u202fPD\u202f102 arriving\u202f08:13 \n- **Hotel:** Hotel At The Waldorf \u2013 3.367\u202fkm to BC\u202fPlace (example data) \n- **Weather:** rain risk\u202f36% (example data) \n\n**Package #2 \u2013 Safest Arrival** \n- **Price:** $1,289.00 CAD \n- **Flight:** Porter Airlines\u202fPD\u202f102 arriving\u202f08:13 \n- **Hotel:** Quality Inn & Suites Downtown Vancouver \u2013 0.774\u202fkm to BC\u202fPlace \n- **Weather:** rain risk\u202f36% (example data) \n\n**Package #3 \u2013 Closest to Stadium** \n- **Price:** $1,376.00 CAD \n- **Flight:** Porter Airlines\u202fPD\u202f102 arriving\u202f08:13 \n- **Hotel:** Best Western Premier Chateau Granville Hotel & Suites & Conf. Centre \u2013 0.615\u202fkm to BC\u202fPlace \n- **Weather:** rain risk\u202f36% (example data) \n\n**Recommendation:** Choose Package\u202f#2 for a mid\u2011range balance of price, proximity, and a safe arrival."}
|
matchday/traces/session_002.jsonl
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
{"ts": "2026-06-
|
| 2 |
-
{"ts": "2026-06-
|
| 3 |
-
{"ts": "2026-06-
|
| 4 |
-
{"ts": "2026-06-
|
|
|
|
| 1 |
+
{"ts": "2026-06-14T23:10:27.843275+00:00", "event": "user", "content": "I want to fly from Toronto to see Brazil vs Germany, premium, 2 adults"}
|
| 2 |
+
{"ts": "2026-06-14T23:10:27.843852+00:00", "event": "agent_path", "entry": "run_agent_loop", "note": "bounded agent loop the deployed Space uses: understand -> ground/clarify -> build"}
|
| 3 |
+
{"ts": "2026-06-14T23:10:29.126410+00:00", "event": "agent_round", "attempt": 0, "outcome": "final_answer", "tool": null, "reason": null, "text": "Flying from Toronto (YYZ) to see Brazil vs Germany match at BC Place, premium budget tier, for 2 adults\u2014what travel dates are you considering?"}
|
| 4 |
+
{"ts": "2026-06-14T23:10:29.126427+00:00", "event": "agent_answer", "text": "Flying from Toronto (YYZ) to see Brazil vs Germany match at BC Place, premium budget tier, for 2 adults\u2014what travel dates are you considering?"}
|