Spaces:
Running
Running
File size: 32,094 Bytes
002831b 46f87c1 002831b 433072e 1af0b6e 46f87c1 002831b 033fbb4 002831b 46f87c1 5ad9595 002831b 46f87c1 002831b 46f87c1 002831b 9f26f45 02b8de5 ea60d1a 02b8de5 20fd636 002831b 15e0487 002831b 02b8de5 002831b 033fbb4 002831b 5ad9595 342f056 15e0487 20fd636 02b8de5 6638954 002831b 6638954 02b8de5 002831b 342f056 002831b 20fd636 15e0487 002831b 4d052ec 002831b 9f26f45 002831b 9f26f45 02b8de5 9f26f45 02b8de5 9f26f45 4d052ec 9f26f45 f40be87 9f26f45 4d052ec 02b8de5 4d052ec 9f26f45 002831b 02b8de5 9f26f45 342f056 9f26f45 02b8de5 9f26f45 02b8de5 9f26f45 02b8de5 4d052ec 02b8de5 4d052ec 9f26f45 02b8de5 342f056 9f26f45 002831b 9f26f45 02b8de5 342f056 9f26f45 002831b 342f056 ea60d1a 9f26f45 002831b 5ad9595 9f26f45 5ad9595 002831b 02b8de5 342f056 02b8de5 002831b 46f87c1 002831b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 | """MatchDay — HF Space entry point (gradio.Server mode, N1 / Off-Brand).
The Space runs THIS file. It is a `gradio.Server` app: a fully custom
``index.html`` frontend is served at ``/`` while ``@app.api("plan_trip")`` is an
async generator that streams N12-typed JSON events through Gradio's queue (SSE),
so the frontend updates live as Nemotron decides → Python scores → Nemotron
explains. This is the Off-Brand path: a bespoke UI powered by Gradio's backend
(queuing, concurrency, Spaces hosting) — not stock Gradio components.
Brain + Hands: Nemotron (on Modal) never calls an API or names a price; Python
executes every call and scores every value. Every figure carries provenance.
``matchday/app.py`` is a compatibility shim that imports and launches this same
app, so ``python3 -m matchday.app`` runs the identical non-decorative path.
Reference patterns (3-codebase study, see MATCHDAY_UNCONSTRAINED_PLAN.md):
- N1 gradio.Server custom-frontend architecture (Off-Brand badge):
https://huggingface.co/blog/introducing-gradio-server ("Why @app.api()…").
- N35 preflight gate (fail-fast on missing SerpApi key): Claude Code
utils/preflightChecks.tsx:1-60.
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import sys
import time
from datetime import date
from pathlib import Path
# Repo-root importability when the Space runs this file directly.
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from fastapi.responses import HTMLResponse # noqa: E402
from gradio import Server # noqa: E402
from matchday.agent import MatchDayAgent # noqa: E402
from matchday.agent_loop import run_agent_loop # noqa: E402
from matchday.agent_trace import ( # noqa: E402
AgentTrace,
ToolCallRecord,
evidence_from_result,
ranking_from_result,
result_source_labels,
validate_packages,
)
from matchday.intent import parse_intent, _find_match # noqa: E402
from matchday.models import TripRequest # noqa: E402
from matchday.wc2026 import resolve_match # noqa: E402
from matchday.prompts import EXPLANATION_HINT # noqa: E402
from matchday.render import render_full # noqa: E402
from matchday.trip_tool import build_trip_packages, format_for_nemotron # noqa: E402
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Nemotron primary, deterministic fallback. Flip to False to force the
# deterministic path (fast demo / Modal-down insurance).
USE_AGENT = True
HERE = Path(__file__).parent
INDEX_HTML = HERE / "index.html"
# Model label shown in the Agent Trace drawer (Best Agent provenance). Honest:
# 30B total / ~3B active MoE — the ≤32B-cap qualifier is the 30B total weight.
_TRACE_MODEL = "Nemotron-3-Nano-30B-A3B · 3B-active MoE · Modal A100"
async def _warm_nemotron() -> None:
"""Best-effort warm generate on Space startup so the FIRST user query isn't
stuck behind a Modal cold start (~2 min warm from the weight cache). Runs as
a fire-and-forget background task; never blocks startup, never raises.
"""
try:
agent = MatchDayAgent()
await asyncio.wait_for(
agent.run([{"role": "user", "content": "warmup ping"}]), timeout=240
)
logger.info("startup warmup ping completed — Nemotron container is hot")
except Exception as exc: # noqa: BLE001 — best-effort, must not break boot
logger.info("startup warmup ping ended early (%s)", repr(exc)[:80])
async def _startup_warmup() -> None:
"""Server startup hook — schedule the warmup without blocking boot."""
asyncio.create_task(_warm_nemotron())
app = Server(on_startup=[_startup_warmup])
def _ev(**payload) -> str:
"""Serialize a typed stream event (N12) as a JSON string for the SSE stream."""
return json.dumps(payload, ensure_ascii=False)
async def _pulse(coro, holder, message, interval: int = 9):
"""Run ``coro`` to completion, yielding a commentary heartbeat every
``interval`` seconds (carrying elapsed seconds) so the SSE stream is never
silent during a long Modal cold-start or SerpApi phase. Stashes the result
in ``holder['result']``; re-raises if ``coro`` raised. Usage::
h = {}
async for beat in _pulse(coro, h, msg):
yield beat
value = h["result"]
"""
task = asyncio.ensure_future(coro)
start = time.monotonic()
while True:
done, _ = await asyncio.wait({task}, timeout=interval)
if task in done:
holder["result"] = task.result()
return
yield _ev(type="commentary", text=f"{message} ({int(time.monotonic() - start)}s)")
def _notice_status(result, *keywords: str) -> str:
"""Map a data category to ``done`` | ``fallback`` from REAL degradation notices.
Honest per-category progress: if ``build_trip_packages`` reported a category
as unavailable, that step is ``fallback``; otherwise ``done``. Tied to the
real dispatch outcome — never a cosmetic timer.
"""
blob = " ".join(result.degradation_notices or "").lower()
return "fallback" if any(k in blob for k in keywords) else "done"
def _precheck_unrecognized_match(user_text: str):
"""Generic pre-agent fixture validator (grounding honesty, option 1).
Deterministically parse the request and ground the named match against the
verified 2026 fixture table BEFORE the agent picks a tool. If the user named
a matchup that isn't a real 2026 fixture, return ``(refusal_note, trip)`` so
``plan_trip`` can refuse honestly with the closest real alternatives and stop
— without ever invoking the agent loop.
Why this exists: Nemotron routes its own tools and, for some non-fixture
matchups (e.g. "Canada vs Morocco"), can non-deterministically choose
``clarify`` over ``build_trip_packages``. When it does, the grounding-refusal
path (the "isn't a 2026 fixture … Canada plays: …" note produced inside the
build tool) never runs, so the demo promises a refusal it never delivers.
Grounding the match deterministically up front guarantees every non-fixture
match is refused honestly, regardless of how the model routes.
Returns ``(note, trip_request)`` when a match is named AND unrecognized;
``None`` otherwise (no match named, parse failed, or the match IS real —
proceed to the normal agent path). Never raises.
"""
try:
parsed = parse_intent(user_text)
except Exception: # noqa: BLE001 — must never break the turn
return None
trip = getattr(parsed, "trip_request", None)
match_name = (getattr(trip, "match_name", "") or "") if trip is not None else ""
if not match_name or match_name == "the match": # _find_match's fallback sentinel
return None
match_name = _clean_match_name(match_name) # drop trailing month ("Morocco June" -> "Morocco")
try:
res = resolve_match(match_name)
except Exception: # noqa: BLE001
return None
if res.recognized or not res.note:
return None
try: # carry the CLEANED name onto the trip so the trace drawer matches the note
trip = trip.model_copy(update={"match_name": match_name})
except Exception: # noqa: BLE001
pass
return res.note, trip
_MONTH_TOKENS = {
"january", "february", "march", "april", "may", "june",
"july", "august", "september", "october", "november", "december",
}
def _clean_match_name(name: str) -> str:
"""Strip a trailing month token from each team in an 'A vs B' match name.
``parse_intent``'s ``_find_match`` greedily appends the next capitalized word
to a team name, so 'Canada vs Morocco June 18' parses to 'Canada vs Morocco
June' — the month leaks into the team and would surface in the refusal note
as "Morocco June plays: Brazil". Trimming trailing month tokens restores the
real team names for a clean note. Conservative: only strips trailing month
tokens, leaves everything else intact (multi-word teams unaffected).
"""
if " vs " not in name:
return name
def _strip(trial: str) -> str:
parts = trial.split()
while parts and parts[-1].lower().rstrip(".") in _MONTH_TOKENS:
parts.pop()
return " ".join(parts)
a, b = name.split(" vs ", 1)
return f"{_strip(a)} vs {_strip(b)}"
_DEFAULT_GREETING = (
"I'd love to plan your FIFA 2026 World Cup trip! Tell me where you're "
"flying from (e.g. 'Montreal' or 'YUL'), which match you'd like to see, "
"and the dates."
)
def _precheck_chitchat(user_text: str):
"""Deterministic reply for pure chit-chat / empty prompts, BEFORE the agent.
A greeting or content-free message ("hi", "hello", "thanks", "test") has no
origin, date, or match to plan around. Replying with ``parse_intent``'s
clarifying question deterministically turns what would be a multi-second to
multi-minute Modal cold-start wait (for a Nemotron call that would only
clarify anyway) into an instant answer. Same pre-agent seam as the fixture
validator — no agent_loop / Modal change.
Conservative: fires ONLY when BOTH origin and date are absent AND no 'X vs Y'
match is named, so any real (even partial) trip request still reaches the
fixture validator / agent. Returns ``(reply, missing_slots)`` for chit-chat,
or ``None`` to proceed normally. Never raises.
"""
try:
parsed = parse_intent(user_text)
except Exception: # noqa: BLE001 — must never break the turn
return None
# `missing` only ever holds origin and/or date; len >= 2 => both absent.
if len(parsed.missing) >= 2 and not _find_match(user_text):
return parsed.question or _DEFAULT_GREETING, list(parsed.missing)
return None
def _finalize_trace(trace: AgentTrace, trip, result, built_by: str) -> None:
"""Populate the final intent/grounding/evidence/ranking/outcome on the trace.
Best-effort: the trace is cosmetic proof, so it must never raise and abort a
trip build. Surfaces the deterministic ranking formula (tier weights + the
per-package normalized dim scores) so a judge can see HOW the order was
decided, not just that it was.
"""
try:
trace.set_intent(trip)
if getattr(result, "match_unrecognized", ""):
# Honest refusal: the named match isn't a real 2026 fixture.
trace.set_grounding(recognized=False, note=result.match_unrecognized)
trace.set_outcome(mode=built_by, status="clarify",
notes=list(result.degradation_notices),
model=_TRACE_MODEL, rounds=trace.rounds)
return
corrected = bool(getattr(result, "grounding_note", ""))
trace.set_grounding(
recognized=True, corrected=corrected,
kickoff=getattr(result, "kickoff_local", "") or "",
venue="BC Place",
match_name=(getattr(result, "grounded_match_name", "") or
(trip.match_name if trip is not None else "")),
note=getattr(result, "grounding_note", "") or "",
)
trace.set_evidence(evidence_from_result(result))
from matchday.scoring import BUDGET_WEIGHTS
tier = trip.budget_tier if trip is not None else "mid_range"
w = BUDGET_WEIGHTS.get(tier, BUDGET_WEIGHTS["mid_range"])
ranking, _records = ranking_from_result(
result, tier,
{"cost": w.cost, "buffer": w.buffer, "transit": w.transit},
)
trace.ranking = ranking
trace.set_outcome(mode=built_by, status=result.status,
notes=list(result.degradation_notices),
model=_TRACE_MODEL, rounds=trace.rounds)
except Exception as exc: # noqa: BLE001
logger.warning("trace finalization skipped: %s", exc)
# Cached per-boot preflight (N35). Fail-fast ONLY on genuinely-doomed config
# (missing SerpApi key — build_trip_packages cannot fetch live flights/hotels).
# Modal cold-start is NOT a hard failure: it streams via _pulse heartbeats and
# the loop degrades to the deterministic parser, so we don't gate on it.
_PREFLIGHT_OK: bool | None = None
def _preflight() -> tuple[bool, str]:
"""Return (ok, reason). ``reason`` is empty when ok. Cached positive."""
global _PREFLIGHT_OK
if _PREFLIGHT_OK:
return True, ""
if not os.environ.get("SERPAPI_API_KEY"):
return False, (
"SerpApi key is not set on this Space — live flight & hotel search is "
"unavailable. Add SERPAPI_API_KEY in Settings → Secrets, then restart."
)
_PREFLIGHT_OK = True
return True, ""
async def _agent_explain(agent, user_text: str, trip: TripRequest, result) -> str:
"""Round 2 — Nemotron compares the packages. Best-effort ('' on failure)."""
args_json = json.dumps(trip.model_dump(mode="json"))
convo = [
{"role": "user", "content": user_text},
{
"role": "assistant",
"content": "",
"tool_calls": [{
"id": "call_build",
"type": "function",
"function": {"name": "build_trip_packages", "arguments": args_json},
}],
},
{
"role": "tool",
"tool_call_id": "call_build",
"name": "build_trip_packages",
"content": format_for_nemotron(result),
},
{"role": "user", "content": EXPLANATION_HINT},
]
try:
r2 = await agent.run(convo, tools=[]) # no tools → Nemotron must write text
return (r2.get("text") or "").strip()
except Exception as exc:
logger.warning("explanation round failed: %s", exc)
return ""
@app.api(name="plan_trip", concurrency_limit=4, stream_every=0.5)
async def plan_trip(user_text: str) -> str:
"""Stream the agentic trip build as typed events (N12 + N10).
Yields: commentary (progress beats, sent immediately) → greenlight
(parsed trip) | clarify | error → result (full cards+map+timeline render
+ Nemotron's explanation). Falls back to the deterministic parser if the
agent is unavailable or hedges.
"""
ok, why = _preflight() # N35 — fail-fast on doomed config (missing key)
if not ok:
yield _ev(type="error", text=f"⚠️ {why}")
return
# The visible Agent Trace accumulator (Best Agent proof). Populated live as
# intent is extracted, the match is grounded, tools run, and packages are
# ranked — emitted to the Evidence drawer in REAL TIME via `trace` events.
trace = AgentTrace()
built_by = "agent" # flips to "deterministic" if the loop doesn't build
yield _ev(type="commentary", text="Reading your trip request…")
yield _ev(type="progress", step="read", status="done", text="Read your request")
yield _ev(type="progress", step="extract", status="running", text="Understanding your trip")
# ── Pre-agent chit-chat guard: a greeting / empty prompt ("hi", "thanks")
# has no origin, date, or match. Reply deterministically and instantly
# instead of waking Nemotron for a call that would only clarify — turns a
# Modal cold-start wait into an immediate answer. Conservative: only fires
# when nothing trip-related was said, so real (even partial) requests still
# reach the fixture validator / agent.
_chat = _precheck_chitchat(user_text)
if _chat is not None:
_chat_text, _chat_missing = _chat
trace.set_intent(None, missing=_chat_missing)
trace.set_outcome(
mode="deterministic", status="clarify",
notes=["Pre-agent chit-chat check: no trip details (origin / date / match) yet."],
model=_TRACE_MODEL, rounds=0,
)
yield _ev(type="trace", data=trace.to_dict())
yield _ev(type="progress", step="extract", status="done", text="Heard you")
yield _ev(type="progress", step="ready", status="fallback", text="Tell me your trip")
yield _ev(type="clarify", text=_chat_text)
return
# ── Generic pre-agent fixture validator (grounding honesty). Ground the named
# match deterministically BEFORE the agent picks a tool: a non-real 2026
# fixture is refused with the closest real alternatives and we stop, so the
# refusal never depends on Nemotron choosing build_trip_packages over clarify.
_pre = _precheck_unrecognized_match(user_text)
if _pre is not None:
_refusal_note, _pre_trip = _pre
trace.set_intent(_pre_trip)
trace.set_grounding(recognized=False, note=_refusal_note)
trace.set_outcome(
mode="deterministic", status="clarify",
notes=["Pre-agent fixture check: named match is not a real 2026 fixture."],
model=_TRACE_MODEL, rounds=0,
)
yield _ev(type="trace", data=trace.to_dict()) # grounding-refusal proof
yield _ev(type="progress", step="extract", status="done", text="Match checked")
yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
yield _ev(type="clarify", text=_refusal_note)
return
agent = None
if USE_AGENT:
try:
# Nemotron reasoning toggle (NVIDIA Nemotron Quest + Best Agent): the
# official Nemotron-3-Nano usage guide serves complex planning turns
# with thinking ON (chain-of-thought before the tool call). Default
# OFF to preserve the verified fast tool-routing path; set
# MATCHDAY_THINKING=1 on the Space to turn on reasoning for the
# agent's decide/ground/explain turns.
thinking = os.environ.get("MATCHDAY_THINKING", "").lower() in ("1", "true", "yes")
agent = MatchDayAgent(thinking=thinking)
except Exception as exc:
logger.warning("agent init failed (%s); deterministic path.", exc)
# ── Smart path: the bounded agent loop (K1). Nemotron UNDERSTANDS the
# request, may GROUND itself with web_search (I6), may CLARIFY to capture
# intent (P7), and calls build_trip_packages when ready. The loop validates
# args, dedups, and self-corrects one malformed call (A4). No more bypass.
messages: list[dict] = [{"role": "user", "content": user_text}]
trip: TripRequest | None = None
result = None # TripPackageResult produced inside the loop's build call
agent_text = "" # a clarify question or direct answer from the Brain
if agent is not None:
yield _ev(type="commentary", text="🧠 Nemotron is understanding your request…")
for attempt in range(3): # cap grounding rounds (web_search → build)
h = {}
try:
async for beat in _pulse(
run_agent_loop(agent, messages, trace=trace),
h,
"🧠 Nemotron is understanding your request & choosing tools",
):
yield beat
res = h.get("result")
# REAL-TIME trace: push the tool-call log to the drawer after
# each agent decision so the user sees the multi-step reasoning
# unfold (web_search → build_trip_packages), not just the end.
if res is not None:
yield _ev(type="trace", data=trace.to_dict())
except Exception as exc:
logger.warning("agent loop attempt %d failed (%s).", attempt, exc)
res = None
if res is None:
break
if res.type == "tool_called" and res.tool == "build_trip_packages":
result = res.result.get("full_result")
trip = res.result.get("trip")
# Sync the display trip to the GROUNDED dates + canonical match
# name (the match was re-centered on the real WC fixture inside
# the tool), and surface any correction note to the user.
if result is not None and getattr(result, "grounded_match_date", None) and trip is not None:
upd = {
"match_date": result.grounded_match_date,
"check_in": result.grounded_check_in,
"check_out": result.grounded_check_out,
}
if getattr(result, "grounded_match_name", ""):
upd["match_name"] = result.grounded_match_name
try:
trip = trip.model_copy(update=upd)
except Exception:
pass
if result is not None and getattr(result, "grounding_note", ""):
yield _ev(type="commentary", text="📅 " + result.grounding_note)
break
if res.type == "tool_called" and res.tool == "web_search":
# Brain grounded itself — thread the result back so it can build.
tcid = f"call_ws_{attempt}"
messages.append({
"role": "assistant", "content": "",
"tool_calls": [{
"id": tcid, "type": "function",
"function": {
"name": "web_search",
"arguments": json.dumps(res.result.get("query") or {}),
},
}],
})
messages.append({
"role": "tool", "tool_call_id": tcid, "name": "web_search",
"content": json.dumps(res.result, ensure_ascii=False)[:1200],
})
yield _ev(
type="commentary",
text="🔎 Grounded with a web search — now building your packages…",
)
continue
if res.type == "final_answer":
agent_text = res.text or ""
break
# fallback_to_deterministic → EXPLICIT + user-visible degrade. Never
# silently swap in the deterministic path. Most commonly this is a
# Modal cold-start timeout (see the agent_loop reason) — tell the user
# honestly so the wait / fast-mode result is understood, not hidden.
if res.type == "fallback_to_deterministic":
yield _ev(
type="commentary",
text="🌡️ Nemotron is warming up on Modal (cold start) — "
"building your packages in fast mode now, then I'll "
"still compare them live.",
)
break
# If the agent's build already flagged an unrecognized match, surface it as a
# clarification with real alternatives (Best-Agent honesty: never silently
# plan a trip around a nonexistent fixture like "Canada vs Morocco").
if result is not None and getattr(result, "match_unrecognized", ""):
_finalize_trace(trace, trip, result, built_by)
yield _ev(type="trace", data=trace.to_dict()) # grounding-refusal proof
yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
yield _ev(type="clarify", text=result.match_unrecognized)
return
# ── Deterministic fallback (K3): parse intent + build directly. Used when
# the agent is unavailable, hedged to a non-build answer, or the loop failed.
if result is None and not agent_text:
parsed = parse_intent(user_text)
built_by = "deterministic"
trace.set_intent(parsed.trip_request, missing=parsed.missing)
yield _ev(type="trace", data=trace.to_dict()) # show extracted intent live
trip = parsed.trip_request
if trip is not None:
yield _ev(type="greenlight", text=trip.summary())
yield _ev(
type="commentary",
text="✈️ Scanning airlines · 🏨 Finding hotels near BC Place · 🌤️ Checking the match-day forecast…",
)
yield _ev(type="progress", step="flights", status="running")
yield _ev(type="progress", step="hotels", status="running")
yield _ev(type="progress", step="weather", status="running")
yield _ev(type="progress", step="nearby", status="running")
hb = {}
_db_t0 = time.monotonic()
try:
async for beat in _pulse(
build_trip_packages(trip),
hb,
"✈️ Scanning airlines · 🏨 hotels near BC Place · 🌤️ weather",
):
yield beat
result = hb["result"]
except Exception as exc:
trace.set_outcome(mode="deterministic", status="error",
notes=[f"build_trip_packages raised: {exc}"],
model=_TRACE_MODEL)
yield _ev(type="trace", data=trace.to_dict()) # honest error in the trace
yield _ev(type="error", text=f"⚠️ {exc}")
return
# The deterministic path bypasses the loop, so record its single
# build tool call here (honest: same canonical build_trip_packages).
if result is not None:
_db_dur = int((time.monotonic() - _db_t0) * 1000)
trace.add_tool_call(ToolCallRecord(
name="build_trip_packages",
args={"mode": "deterministic", **(trip.model_dump(mode="json") if hasattr(trip, "model_dump") else {})},
status="ok" if result.packages else "failed",
duration_ms=_db_dur,
detail=f"{len(result.packages)} package(s) scored",
sources=result_source_labels(result),
))
yield _ev(type="trace", data=trace.to_dict())
# Sync the display trip to the GROUNDED dates so greenlight +
# itinerary match the packages (match was re-centered on the real
# WC fixture inside build_trip_packages). Honesty: show the note.
if getattr(result, "grounded_match_date", None):
upd = {
"match_date": result.grounded_match_date,
"check_in": result.grounded_check_in,
"check_out": result.grounded_check_out,
}
if getattr(result, "grounded_match_name", ""):
upd["match_name"] = result.grounded_match_name
try:
trip = trip.model_copy(update=upd)
except Exception:
pass
if getattr(result, "match_unrecognized", ""):
_finalize_trace(trace, trip, result, built_by)
yield _ev(type="trace", data=trace.to_dict()) # grounding-refusal proof
yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
yield _ev(type="clarify", text=result.match_unrecognized)
return
if getattr(result, "grounding_note", ""):
yield _ev(type="commentary", text="📅 " + result.grounding_note)
else:
trace.set_outcome(mode="deterministic", status="clarify",
notes=["Intent incomplete — asked for the missing detail."],
model=_TRACE_MODEL)
yield _ev(type="trace", data=trace.to_dict()) # show the missing slots honestly
yield _ev(type="progress", step="ready", status="fallback", text="Need a detail from you")
yield _ev(
type="clarify",
text=parsed.question
or "Tell me where you're flying from and which match you want to see.",
)
return
# Clarify / direct answer from the Brain (no packages to show).
if result is None:
trace.set_outcome(mode="agent", status="clarify",
notes=["Brain answered without building packages."],
model=_TRACE_MODEL, rounds=trace.rounds)
yield _ev(type="trace", data=trace.to_dict()) # the reasoning that led to the question
yield _ev(type="progress", step="ready", status="fallback", text="Need a detail from you")
yield _ev(type="clarify", text=agent_text)
return
# ── Honest per-category progress from the REAL dispatch outcome (N10/I1):
# each data step is done/fallback according to build_trip_packages' own
# degradation notices — never a cosmetic timer.
if trip is not None:
yield _ev(type="progress", step="extract", status="done", text="Trip details captured")
yield _ev(type="progress", step="flights", status=_notice_status(result, "flight"))
yield _ev(type="progress", step="hotels", status=_notice_status(result, "hotels unavailable", "hotels,"))
yield _ev(type="progress", step="weather", status=_notice_status(result, "weather"))
yield _ev(type="progress", step="nearby", status=_notice_status(result, "amenities", "nearby"))
yield _ev(type="progress", step="score", status="done" if result.packages else "fallback")
yield _ev(type="progress", step="itinerary", status="running", text="Building itinerary")
yield _ev(type="progress", step="links", status="running", text="Preparing links")
# ── Self-check / validation gate (core agentic #8 self-check / #10 safe
# recommendation): verify the built output before recommending — no invented
# match, sane price band, every flight lands before kickoff, stay brackets
# the match day. Pure + defensive; surfaces honest pass/warn/fail, never blocks.
_val = validate_packages(result, trip)
trace.set_validation(_val)
_vpass = sum(1 for c in _val if c.get("status") == "pass")
_vfail = sum(1 for c in _val if c.get("status") == "fail")
_vwarn = sum(1 for c in _val if c.get("status") == "warn")
_vtxt = (f"Validated {_vpass}/{len(_val)}"
+ (" · flagged" if (_vfail or _vwarn) else " · all clear"))
yield _ev(type="progress", step="validate", status="done", text=_vtxt)
yield _ev(type="trace", data=trace.to_dict()) # stream the ⑥ section live
# ── Render. greenlight confirms the captured intent just before the packages.
if trip is not None:
yield _ev(type="greenlight", text=trip.summary())
yield _ev(type="commentary", text="🗺️ Nemotron is comparing your 3 packages…")
explanation = ""
if agent is not None:
he = {}
try:
async for beat in _pulse(
_agent_explain(agent, user_text, trip, result),
he,
"🗺️ Nemotron is comparing your 3 packages…",
):
yield beat
explanation = he["result"]
except Exception as exc:
logger.warning("explanation round failed (%s).", exc)
explanation = ""
# Final: the full Layla-competitive render (status + cards + map + timeline).
# leaflet_preloaded=True → the frontend already loaded Leaflet in <head>; the
# map's inline init script is re-run after injection (see index.html).
_finalize_trace(trace, trip, result, built_by)
yield _ev(type="trace", data=trace.to_dict()) # final, complete trace for the drawer
yield _ev(type="progress", step="itinerary", status="done")
yield _ev(type="progress", step="links", status="done")
yield _ev(type="progress", step="ready", status="done", text="Your packages are ready")
yield _ev(type="result", html=render_full(result, trip, leaflet_preloaded=True, trace=trace), explanation=explanation)
@app.get("/", response_class=HTMLResponse)
async def homepage():
with open(INDEX_HTML, "r", encoding="utf-8") as fh:
return fh.read()
if __name__ == "__main__":
app.launch(server_port=int(os.environ.get("PORT", "7860")), show_error=True)
|