File size: 32,094 Bytes
002831b
46f87c1
002831b
 
 
 
 
 
 
 
 
 
433072e
 
1af0b6e
 
 
 
 
 
46f87c1
002831b
 
033fbb4
002831b
 
46f87c1
 
5ad9595
002831b
 
46f87c1
002831b
46f87c1
 
002831b
 
 
 
9f26f45
02b8de5
 
 
 
 
 
ea60d1a
02b8de5
20fd636
002831b
15e0487
002831b
 
 
 
 
 
 
 
 
 
 
 
 
02b8de5
 
 
002831b
033fbb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
002831b
 
 
 
 
 
 
5ad9595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342f056
 
 
 
 
 
 
 
 
 
 
15e0487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20fd636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02b8de5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6638954
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
002831b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6638954
 
 
 
 
02b8de5
 
 
 
 
 
002831b
342f056
 
002831b
20fd636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15e0487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
002831b
 
 
4d052ec
 
 
 
 
 
 
 
002831b
 
 
9f26f45
 
 
 
 
002831b
9f26f45
 
 
 
 
 
 
 
 
02b8de5
9f26f45
 
 
 
 
02b8de5
 
 
 
 
9f26f45
 
 
 
 
 
 
 
 
 
4d052ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f26f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f40be87
 
 
 
 
 
 
 
 
 
 
9f26f45
 
4d052ec
 
 
 
02b8de5
 
4d052ec
 
 
 
9f26f45
 
 
002831b
02b8de5
 
 
9f26f45
 
 
 
 
 
 
342f056
 
 
 
9f26f45
02b8de5
9f26f45
 
 
 
 
 
 
 
 
02b8de5
 
 
 
9f26f45
 
02b8de5
 
 
 
 
 
 
 
 
 
 
 
 
4d052ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02b8de5
 
4d052ec
 
 
 
 
9f26f45
02b8de5
 
 
 
342f056
9f26f45
 
 
 
 
 
002831b
9f26f45
 
02b8de5
 
 
 
342f056
9f26f45
002831b
 
342f056
 
 
 
 
 
 
 
 
 
 
 
 
ea60d1a
 
 
 
 
 
 
 
 
 
 
 
 
 
9f26f45
 
 
 
002831b
 
5ad9595
 
 
 
 
9f26f45
5ad9595
 
 
 
 
 
002831b
 
 
 
02b8de5
 
342f056
 
 
02b8de5
002831b
 
 
 
 
 
 
46f87c1
 
002831b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
"""MatchDay — HF Space entry point (gradio.Server mode, N1 / Off-Brand).

The Space runs THIS file. It is a `gradio.Server` app: a fully custom
``index.html`` frontend is served at ``/`` while ``@app.api("plan_trip")`` is an
async generator that streams N12-typed JSON events through Gradio's queue (SSE),
so the frontend updates live as Nemotron decides → Python scores → Nemotron
explains. This is the Off-Brand path: a bespoke UI powered by Gradio's backend
(queuing, concurrency, Spaces hosting) — not stock Gradio components.

Brain + Hands: Nemotron (on Modal) never calls an API or names a price; Python
executes every call and scores every value. Every figure carries provenance.

``matchday/app.py`` is a compatibility shim that imports and launches this same
app, so ``python3 -m matchday.app`` runs the identical non-decorative path.

Reference patterns (3-codebase study, see MATCHDAY_UNCONSTRAINED_PLAN.md):
  - N1 gradio.Server custom-frontend architecture (Off-Brand badge):
    https://huggingface.co/blog/introducing-gradio-server ("Why @app.api()…").
  - N35 preflight gate (fail-fast on missing SerpApi key): Claude Code
    utils/preflightChecks.tsx:1-60.
"""
from __future__ import annotations

import asyncio
import json
import logging
import os
import sys
import time
from datetime import date
from pathlib import Path

# Repo-root importability when the Space runs this file directly.
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from fastapi.responses import HTMLResponse  # noqa: E402
from gradio import Server  # noqa: E402

from matchday.agent import MatchDayAgent  # noqa: E402
from matchday.agent_loop import run_agent_loop  # noqa: E402
from matchday.agent_trace import (  # noqa: E402
    AgentTrace,
    ToolCallRecord,
    evidence_from_result,
    ranking_from_result,
    result_source_labels,
    validate_packages,
)
from matchday.intent import parse_intent, _find_match  # noqa: E402
from matchday.models import TripRequest  # noqa: E402
from matchday.wc2026 import resolve_match  # noqa: E402
from matchday.prompts import EXPLANATION_HINT  # noqa: E402
from matchday.render import render_full  # noqa: E402
from matchday.trip_tool import build_trip_packages, format_for_nemotron  # noqa: E402

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Nemotron primary, deterministic fallback. Flip to False to force the
# deterministic path (fast demo / Modal-down insurance).
USE_AGENT = True

HERE = Path(__file__).parent
INDEX_HTML = HERE / "index.html"
# Model label shown in the Agent Trace drawer (Best Agent provenance). Honest:
# 30B total / ~3B active MoE — the ≤32B-cap qualifier is the 30B total weight.
_TRACE_MODEL = "Nemotron-3-Nano-30B-A3B · 3B-active MoE · Modal A100"


async def _warm_nemotron() -> None:
    """Best-effort warm generate on Space startup so the FIRST user query isn't
    stuck behind a Modal cold start (~2 min warm from the weight cache). Runs as
    a fire-and-forget background task; never blocks startup, never raises.
    """
    try:
        agent = MatchDayAgent()
        await asyncio.wait_for(
            agent.run([{"role": "user", "content": "warmup ping"}]), timeout=240
        )
        logger.info("startup warmup ping completed — Nemotron container is hot")
    except Exception as exc:  # noqa: BLE001 — best-effort, must not break boot
        logger.info("startup warmup ping ended early (%s)", repr(exc)[:80])


async def _startup_warmup() -> None:
    """Server startup hook — schedule the warmup without blocking boot."""
    asyncio.create_task(_warm_nemotron())


app = Server(on_startup=[_startup_warmup])


def _ev(**payload) -> str:
    """Serialize a typed stream event (N12) as a JSON string for the SSE stream."""
    return json.dumps(payload, ensure_ascii=False)


async def _pulse(coro, holder, message, interval: int = 9):
    """Run ``coro`` to completion, yielding a commentary heartbeat every
    ``interval`` seconds (carrying elapsed seconds) so the SSE stream is never
    silent during a long Modal cold-start or SerpApi phase. Stashes the result
    in ``holder['result']``; re-raises if ``coro`` raised. Usage::

        h = {}
        async for beat in _pulse(coro, h, msg):
            yield beat
        value = h["result"]
    """
    task = asyncio.ensure_future(coro)
    start = time.monotonic()
    while True:
        done, _ = await asyncio.wait({task}, timeout=interval)
        if task in done:
            holder["result"] = task.result()
            return
        yield _ev(type="commentary", text=f"{message} ({int(time.monotonic() - start)}s)")


def _notice_status(result, *keywords: str) -> str:
    """Map a data category to ``done`` | ``fallback`` from REAL degradation notices.

    Honest per-category progress: if ``build_trip_packages`` reported a category
    as unavailable, that step is ``fallback``; otherwise ``done``. Tied to the
    real dispatch outcome — never a cosmetic timer.
    """
    blob = " ".join(result.degradation_notices or "").lower()
    return "fallback" if any(k in blob for k in keywords) else "done"


def _precheck_unrecognized_match(user_text: str):
    """Generic pre-agent fixture validator (grounding honesty, option 1).

    Deterministically parse the request and ground the named match against the
    verified 2026 fixture table BEFORE the agent picks a tool. If the user named
    a matchup that isn't a real 2026 fixture, return ``(refusal_note, trip)`` so
    ``plan_trip`` can refuse honestly with the closest real alternatives and stop
    — without ever invoking the agent loop.

    Why this exists: Nemotron routes its own tools and, for some non-fixture
    matchups (e.g. "Canada vs Morocco"), can non-deterministically choose
    ``clarify`` over ``build_trip_packages``. When it does, the grounding-refusal
    path (the "isn't a 2026 fixture … Canada plays: …" note produced inside the
    build tool) never runs, so the demo promises a refusal it never delivers.
    Grounding the match deterministically up front guarantees every non-fixture
    match is refused honestly, regardless of how the model routes.

    Returns ``(note, trip_request)`` when a match is named AND unrecognized;
    ``None`` otherwise (no match named, parse failed, or the match IS real —
    proceed to the normal agent path). Never raises.
    """
    try:
        parsed = parse_intent(user_text)
    except Exception:  # noqa: BLE001 — must never break the turn
        return None
    trip = getattr(parsed, "trip_request", None)
    match_name = (getattr(trip, "match_name", "") or "") if trip is not None else ""
    if not match_name or match_name == "the match":  # _find_match's fallback sentinel
        return None
    match_name = _clean_match_name(match_name)  # drop trailing month ("Morocco June" -> "Morocco")
    try:
        res = resolve_match(match_name)
    except Exception:  # noqa: BLE001
        return None
    if res.recognized or not res.note:
        return None
    try:  # carry the CLEANED name onto the trip so the trace drawer matches the note
        trip = trip.model_copy(update={"match_name": match_name})
    except Exception:  # noqa: BLE001
        pass
    return res.note, trip


_MONTH_TOKENS = {
    "january", "february", "march", "april", "may", "june",
    "july", "august", "september", "october", "november", "december",
}


def _clean_match_name(name: str) -> str:
    """Strip a trailing month token from each team in an 'A vs B' match name.

    ``parse_intent``'s ``_find_match`` greedily appends the next capitalized word
    to a team name, so 'Canada vs Morocco June 18' parses to 'Canada vs Morocco
    June' — the month leaks into the team and would surface in the refusal note
    as "Morocco June plays: Brazil". Trimming trailing month tokens restores the
    real team names for a clean note. Conservative: only strips trailing month
    tokens, leaves everything else intact (multi-word teams unaffected).
    """
    if " vs " not in name:
        return name

    def _strip(trial: str) -> str:
        parts = trial.split()
        while parts and parts[-1].lower().rstrip(".") in _MONTH_TOKENS:
            parts.pop()
        return " ".join(parts)

    a, b = name.split(" vs ", 1)
    return f"{_strip(a)} vs {_strip(b)}"


_DEFAULT_GREETING = (
    "I'd love to plan your FIFA 2026 World Cup trip! Tell me where you're "
    "flying from (e.g. 'Montreal' or 'YUL'), which match you'd like to see, "
    "and the dates."
)


def _precheck_chitchat(user_text: str):
    """Deterministic reply for pure chit-chat / empty prompts, BEFORE the agent.

    A greeting or content-free message ("hi", "hello", "thanks", "test") has no
    origin, date, or match to plan around. Replying with ``parse_intent``'s
    clarifying question deterministically turns what would be a multi-second to
    multi-minute Modal cold-start wait (for a Nemotron call that would only
    clarify anyway) into an instant answer. Same pre-agent seam as the fixture
    validator — no agent_loop / Modal change.

    Conservative: fires ONLY when BOTH origin and date are absent AND no 'X vs Y'
    match is named, so any real (even partial) trip request still reaches the
    fixture validator / agent. Returns ``(reply, missing_slots)`` for chit-chat,
    or ``None`` to proceed normally. Never raises.
    """
    try:
        parsed = parse_intent(user_text)
    except Exception:  # noqa: BLE001 — must never break the turn
        return None
    # `missing` only ever holds origin and/or date; len >= 2 => both absent.
    if len(parsed.missing) >= 2 and not _find_match(user_text):
        return parsed.question or _DEFAULT_GREETING, list(parsed.missing)
    return None


def _finalize_trace(trace: AgentTrace, trip, result, built_by: str) -> None:
    """Populate the final intent/grounding/evidence/ranking/outcome on the trace.

    Best-effort: the trace is cosmetic proof, so it must never raise and abort a
    trip build. Surfaces the deterministic ranking formula (tier weights + the
    per-package normalized dim scores) so a judge can see HOW the order was
    decided, not just that it was.
    """
    try:
        trace.set_intent(trip)
        if getattr(result, "match_unrecognized", ""):
            # Honest refusal: the named match isn't a real 2026 fixture.
            trace.set_grounding(recognized=False, note=result.match_unrecognized)
            trace.set_outcome(mode=built_by, status="clarify",
                              notes=list(result.degradation_notices),
                              model=_TRACE_MODEL, rounds=trace.rounds)
            return
        corrected = bool(getattr(result, "grounding_note", ""))
        trace.set_grounding(
            recognized=True, corrected=corrected,
            kickoff=getattr(result, "kickoff_local", "") or "",
            venue="BC Place",
            match_name=(getattr(result, "grounded_match_name", "") or
                        (trip.match_name if trip is not None else "")),
            note=getattr(result, "grounding_note", "") or "",
        )
        trace.set_evidence(evidence_from_result(result))
        from matchday.scoring import BUDGET_WEIGHTS
        tier = trip.budget_tier if trip is not None else "mid_range"
        w = BUDGET_WEIGHTS.get(tier, BUDGET_WEIGHTS["mid_range"])
        ranking, _records = ranking_from_result(
            result, tier,
            {"cost": w.cost, "buffer": w.buffer, "transit": w.transit},
        )
        trace.ranking = ranking
        trace.set_outcome(mode=built_by, status=result.status,
                          notes=list(result.degradation_notices),
                          model=_TRACE_MODEL, rounds=trace.rounds)
    except Exception as exc:  # noqa: BLE001
        logger.warning("trace finalization skipped: %s", exc)


# Cached per-boot preflight (N35). Fail-fast ONLY on genuinely-doomed config
# (missing SerpApi key — build_trip_packages cannot fetch live flights/hotels).
# Modal cold-start is NOT a hard failure: it streams via _pulse heartbeats and
# the loop degrades to the deterministic parser, so we don't gate on it.
_PREFLIGHT_OK: bool | None = None


def _preflight() -> tuple[bool, str]:
    """Return (ok, reason). ``reason`` is empty when ok. Cached positive."""
    global _PREFLIGHT_OK
    if _PREFLIGHT_OK:
        return True, ""
    if not os.environ.get("SERPAPI_API_KEY"):
        return False, (
            "SerpApi key is not set on this Space — live flight & hotel search is "
            "unavailable. Add SERPAPI_API_KEY in Settings → Secrets, then restart."
        )
    _PREFLIGHT_OK = True
    return True, ""


async def _agent_explain(agent, user_text: str, trip: TripRequest, result) -> str:
    """Round 2 — Nemotron compares the packages. Best-effort ('' on failure)."""
    args_json = json.dumps(trip.model_dump(mode="json"))
    convo = [
        {"role": "user", "content": user_text},
        {
            "role": "assistant",
            "content": "",
            "tool_calls": [{
                "id": "call_build",
                "type": "function",
                "function": {"name": "build_trip_packages", "arguments": args_json},
            }],
        },
        {
            "role": "tool",
            "tool_call_id": "call_build",
            "name": "build_trip_packages",
            "content": format_for_nemotron(result),
        },
        {"role": "user", "content": EXPLANATION_HINT},
    ]
    try:
        r2 = await agent.run(convo, tools=[])  # no tools → Nemotron must write text
        return (r2.get("text") or "").strip()
    except Exception as exc:
        logger.warning("explanation round failed: %s", exc)
        return ""


@app.api(name="plan_trip", concurrency_limit=4, stream_every=0.5)
async def plan_trip(user_text: str) -> str:
    """Stream the agentic trip build as typed events (N12 + N10).

    Yields: commentary (progress beats, sent immediately) → greenlight
    (parsed trip) | clarify | error → result (full cards+map+timeline render
    + Nemotron's explanation). Falls back to the deterministic parser if the
    agent is unavailable or hedges.
    """
    ok, why = _preflight()  # N35 — fail-fast on doomed config (missing key)
    if not ok:
        yield _ev(type="error", text=f"⚠️ {why}")
        return

    # The visible Agent Trace accumulator (Best Agent proof). Populated live as
    # intent is extracted, the match is grounded, tools run, and packages are
    # ranked — emitted to the Evidence drawer in REAL TIME via `trace` events.
    trace = AgentTrace()
    built_by = "agent"  # flips to "deterministic" if the loop doesn't build

    yield _ev(type="commentary", text="Reading your trip request…")
    yield _ev(type="progress", step="read", status="done", text="Read your request")
    yield _ev(type="progress", step="extract", status="running", text="Understanding your trip")

    # ── Pre-agent chit-chat guard: a greeting / empty prompt ("hi", "thanks")
    # has no origin, date, or match. Reply deterministically and instantly
    # instead of waking Nemotron for a call that would only clarify — turns a
    # Modal cold-start wait into an immediate answer. Conservative: only fires
    # when nothing trip-related was said, so real (even partial) requests still
    # reach the fixture validator / agent.
    _chat = _precheck_chitchat(user_text)
    if _chat is not None:
        _chat_text, _chat_missing = _chat
        trace.set_intent(None, missing=_chat_missing)
        trace.set_outcome(
            mode="deterministic", status="clarify",
            notes=["Pre-agent chit-chat check: no trip details (origin / date / match) yet."],
            model=_TRACE_MODEL, rounds=0,
        )
        yield _ev(type="trace", data=trace.to_dict())
        yield _ev(type="progress", step="extract", status="done", text="Heard you")
        yield _ev(type="progress", step="ready", status="fallback", text="Tell me your trip")
        yield _ev(type="clarify", text=_chat_text)
        return

    # ── Generic pre-agent fixture validator (grounding honesty). Ground the named
    # match deterministically BEFORE the agent picks a tool: a non-real 2026
    # fixture is refused with the closest real alternatives and we stop, so the
    # refusal never depends on Nemotron choosing build_trip_packages over clarify.
    _pre = _precheck_unrecognized_match(user_text)
    if _pre is not None:
        _refusal_note, _pre_trip = _pre
        trace.set_intent(_pre_trip)
        trace.set_grounding(recognized=False, note=_refusal_note)
        trace.set_outcome(
            mode="deterministic", status="clarify",
            notes=["Pre-agent fixture check: named match is not a real 2026 fixture."],
            model=_TRACE_MODEL, rounds=0,
        )
        yield _ev(type="trace", data=trace.to_dict())  # grounding-refusal proof
        yield _ev(type="progress", step="extract", status="done", text="Match checked")
        yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
        yield _ev(type="clarify", text=_refusal_note)
        return

    agent = None
    if USE_AGENT:
        try:
            # Nemotron reasoning toggle (NVIDIA Nemotron Quest + Best Agent): the
            # official Nemotron-3-Nano usage guide serves complex planning turns
            # with thinking ON (chain-of-thought before the tool call). Default
            # OFF to preserve the verified fast tool-routing path; set
            # MATCHDAY_THINKING=1 on the Space to turn on reasoning for the
            # agent's decide/ground/explain turns.
            thinking = os.environ.get("MATCHDAY_THINKING", "").lower() in ("1", "true", "yes")
            agent = MatchDayAgent(thinking=thinking)
        except Exception as exc:
            logger.warning("agent init failed (%s); deterministic path.", exc)

    # ── Smart path: the bounded agent loop (K1). Nemotron UNDERSTANDS the
    # request, may GROUND itself with web_search (I6), may CLARIFY to capture
    # intent (P7), and calls build_trip_packages when ready. The loop validates
    # args, dedups, and self-corrects one malformed call (A4). No more bypass.
    messages: list[dict] = [{"role": "user", "content": user_text}]
    trip: TripRequest | None = None
    result = None          # TripPackageResult produced inside the loop's build call
    agent_text = ""        # a clarify question or direct answer from the Brain

    if agent is not None:
        yield _ev(type="commentary", text="🧠 Nemotron is understanding your request…")
        for attempt in range(3):  # cap grounding rounds (web_search → build)
            h = {}
            try:
                async for beat in _pulse(
                    run_agent_loop(agent, messages, trace=trace),
                    h,
                    "🧠 Nemotron is understanding your request & choosing tools",
                ):
                    yield beat
                res = h.get("result")
                # REAL-TIME trace: push the tool-call log to the drawer after
                # each agent decision so the user sees the multi-step reasoning
                # unfold (web_search → build_trip_packages), not just the end.
                if res is not None:
                    yield _ev(type="trace", data=trace.to_dict())
            except Exception as exc:
                logger.warning("agent loop attempt %d failed (%s).", attempt, exc)
                res = None

            if res is None:
                break

            if res.type == "tool_called" and res.tool == "build_trip_packages":
                result = res.result.get("full_result")
                trip = res.result.get("trip")
                # Sync the display trip to the GROUNDED dates + canonical match
                # name (the match was re-centered on the real WC fixture inside
                # the tool), and surface any correction note to the user.
                if result is not None and getattr(result, "grounded_match_date", None) and trip is not None:
                    upd = {
                        "match_date": result.grounded_match_date,
                        "check_in": result.grounded_check_in,
                        "check_out": result.grounded_check_out,
                    }
                    if getattr(result, "grounded_match_name", ""):
                        upd["match_name"] = result.grounded_match_name
                    try:
                        trip = trip.model_copy(update=upd)
                    except Exception:
                        pass
                if result is not None and getattr(result, "grounding_note", ""):
                    yield _ev(type="commentary", text="📅 " + result.grounding_note)
                break

            if res.type == "tool_called" and res.tool == "web_search":
                # Brain grounded itself — thread the result back so it can build.
                tcid = f"call_ws_{attempt}"
                messages.append({
                    "role": "assistant", "content": "",
                    "tool_calls": [{
                        "id": tcid, "type": "function",
                        "function": {
                            "name": "web_search",
                            "arguments": json.dumps(res.result.get("query") or {}),
                        },
                    }],
                })
                messages.append({
                    "role": "tool", "tool_call_id": tcid, "name": "web_search",
                    "content": json.dumps(res.result, ensure_ascii=False)[:1200],
                })
                yield _ev(
                    type="commentary",
                    text="🔎 Grounded with a web search — now building your packages…",
                )
                continue

            if res.type == "final_answer":
                agent_text = res.text or ""
                break
            # fallback_to_deterministic → EXPLICIT + user-visible degrade. Never
            # silently swap in the deterministic path. Most commonly this is a
            # Modal cold-start timeout (see the agent_loop reason) — tell the user
            # honestly so the wait / fast-mode result is understood, not hidden.
            if res.type == "fallback_to_deterministic":
                yield _ev(
                    type="commentary",
                    text="🌡️ Nemotron is warming up on Modal (cold start) — "
                         "building your packages in fast mode now, then I'll "
                         "still compare them live.",
                )
            break

    # If the agent's build already flagged an unrecognized match, surface it as a
    # clarification with real alternatives (Best-Agent honesty: never silently
    # plan a trip around a nonexistent fixture like "Canada vs Morocco").
    if result is not None and getattr(result, "match_unrecognized", ""):
        _finalize_trace(trace, trip, result, built_by)
        yield _ev(type="trace", data=trace.to_dict())  # grounding-refusal proof
        yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
        yield _ev(type="clarify", text=result.match_unrecognized)
        return

    # ── Deterministic fallback (K3): parse intent + build directly. Used when
    # the agent is unavailable, hedged to a non-build answer, or the loop failed.
    if result is None and not agent_text:
        parsed = parse_intent(user_text)
        built_by = "deterministic"
        trace.set_intent(parsed.trip_request, missing=parsed.missing)
        yield _ev(type="trace", data=trace.to_dict())  # show extracted intent live
        trip = parsed.trip_request
        if trip is not None:
            yield _ev(type="greenlight", text=trip.summary())
            yield _ev(
                type="commentary",
                text="✈️ Scanning airlines · 🏨 Finding hotels near BC Place · 🌤️ Checking the match-day forecast…",
            )
            yield _ev(type="progress", step="flights", status="running")
            yield _ev(type="progress", step="hotels", status="running")
            yield _ev(type="progress", step="weather", status="running")
            yield _ev(type="progress", step="nearby", status="running")
            hb = {}
            _db_t0 = time.monotonic()
            try:
                async for beat in _pulse(
                    build_trip_packages(trip),
                    hb,
                    "✈️ Scanning airlines · 🏨 hotels near BC Place · 🌤️ weather",
                ):
                    yield beat
                result = hb["result"]
            except Exception as exc:
                trace.set_outcome(mode="deterministic", status="error",
                                  notes=[f"build_trip_packages raised: {exc}"],
                                  model=_TRACE_MODEL)
                yield _ev(type="trace", data=trace.to_dict())  # honest error in the trace
                yield _ev(type="error", text=f"⚠️ {exc}")
                return
            # The deterministic path bypasses the loop, so record its single
            # build tool call here (honest: same canonical build_trip_packages).
            if result is not None:
                _db_dur = int((time.monotonic() - _db_t0) * 1000)
                trace.add_tool_call(ToolCallRecord(
                    name="build_trip_packages",
                    args={"mode": "deterministic", **(trip.model_dump(mode="json") if hasattr(trip, "model_dump") else {})},
                    status="ok" if result.packages else "failed",
                    duration_ms=_db_dur,
                    detail=f"{len(result.packages)} package(s) scored",
                    sources=result_source_labels(result),
                ))
                yield _ev(type="trace", data=trace.to_dict())
            # Sync the display trip to the GROUNDED dates so greenlight +
            # itinerary match the packages (match was re-centered on the real
            # WC fixture inside build_trip_packages). Honesty: show the note.
            if getattr(result, "grounded_match_date", None):
                upd = {
                    "match_date": result.grounded_match_date,
                    "check_in": result.grounded_check_in,
                    "check_out": result.grounded_check_out,
                }
                if getattr(result, "grounded_match_name", ""):
                    upd["match_name"] = result.grounded_match_name
                try:
                    trip = trip.model_copy(update=upd)
                except Exception:
                    pass
            if getattr(result, "match_unrecognized", ""):
                _finalize_trace(trace, trip, result, built_by)
                yield _ev(type="trace", data=trace.to_dict())  # grounding-refusal proof
                yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
                yield _ev(type="clarify", text=result.match_unrecognized)
                return
            if getattr(result, "grounding_note", ""):
                yield _ev(type="commentary", text="📅 " + result.grounding_note)
        else:
            trace.set_outcome(mode="deterministic", status="clarify",
                              notes=["Intent incomplete — asked for the missing detail."],
                              model=_TRACE_MODEL)
            yield _ev(type="trace", data=trace.to_dict())  # show the missing slots honestly
            yield _ev(type="progress", step="ready", status="fallback", text="Need a detail from you")
            yield _ev(
                type="clarify",
                text=parsed.question
                or "Tell me where you're flying from and which match you want to see.",
            )
            return

    # Clarify / direct answer from the Brain (no packages to show).
    if result is None:
        trace.set_outcome(mode="agent", status="clarify",
                          notes=["Brain answered without building packages."],
                          model=_TRACE_MODEL, rounds=trace.rounds)
        yield _ev(type="trace", data=trace.to_dict())  # the reasoning that led to the question
        yield _ev(type="progress", step="ready", status="fallback", text="Need a detail from you")
        yield _ev(type="clarify", text=agent_text)
        return

    # ── Honest per-category progress from the REAL dispatch outcome (N10/I1):
    # each data step is done/fallback according to build_trip_packages' own
    # degradation notices — never a cosmetic timer.
    if trip is not None:
        yield _ev(type="progress", step="extract", status="done", text="Trip details captured")
    yield _ev(type="progress", step="flights", status=_notice_status(result, "flight"))
    yield _ev(type="progress", step="hotels", status=_notice_status(result, "hotels unavailable", "hotels,"))
    yield _ev(type="progress", step="weather", status=_notice_status(result, "weather"))
    yield _ev(type="progress", step="nearby", status=_notice_status(result, "amenities", "nearby"))
    yield _ev(type="progress", step="score", status="done" if result.packages else "fallback")
    yield _ev(type="progress", step="itinerary", status="running", text="Building itinerary")
    yield _ev(type="progress", step="links", status="running", text="Preparing links")

    # ── Self-check / validation gate (core agentic #8 self-check / #10 safe
    # recommendation): verify the built output before recommending — no invented
    # match, sane price band, every flight lands before kickoff, stay brackets
    # the match day. Pure + defensive; surfaces honest pass/warn/fail, never blocks.
    _val = validate_packages(result, trip)
    trace.set_validation(_val)
    _vpass = sum(1 for c in _val if c.get("status") == "pass")
    _vfail = sum(1 for c in _val if c.get("status") == "fail")
    _vwarn = sum(1 for c in _val if c.get("status") == "warn")
    _vtxt = (f"Validated {_vpass}/{len(_val)}"
             + (" · flagged" if (_vfail or _vwarn) else " · all clear"))
    yield _ev(type="progress", step="validate", status="done", text=_vtxt)
    yield _ev(type="trace", data=trace.to_dict())  # stream the ⑥ section live

    # ── Render. greenlight confirms the captured intent just before the packages.
    if trip is not None:
        yield _ev(type="greenlight", text=trip.summary())
    yield _ev(type="commentary", text="🗺️ Nemotron is comparing your 3 packages…")
    explanation = ""
    if agent is not None:
        he = {}
        try:
            async for beat in _pulse(
                _agent_explain(agent, user_text, trip, result),
                he,
                "🗺️ Nemotron is comparing your 3 packages…",
            ):
                yield beat
            explanation = he["result"]
        except Exception as exc:
            logger.warning("explanation round failed (%s).", exc)
            explanation = ""

    # Final: the full Layla-competitive render (status + cards + map + timeline).
    # leaflet_preloaded=True → the frontend already loaded Leaflet in <head>; the
    # map's inline init script is re-run after injection (see index.html).
    _finalize_trace(trace, trip, result, built_by)
    yield _ev(type="trace", data=trace.to_dict())  # final, complete trace for the drawer
    yield _ev(type="progress", step="itinerary", status="done")
    yield _ev(type="progress", step="links", status="done")
    yield _ev(type="progress", step="ready", status="done", text="Your packages are ready")
    yield _ev(type="result", html=render_full(result, trip, leaflet_preloaded=True, trace=trace), explanation=explanation)


@app.get("/", response_class=HTMLResponse)
async def homepage():
    with open(INDEX_HTML, "r", encoding="utf-8") as fh:
        return fh.read()


if __name__ == "__main__":
    app.launch(server_port=int(os.environ.get("PORT", "7860")), show_error=True)