Spaces:

build-small-hackathon
/

matchday

Running

File size: 32,094 Bytes

"""MatchDay — HF Space entry point (gradio.Server mode, N1 / Off-Brand).

The Space runs THIS file. It is a `gradio.Server` app: a fully custom
``index.html`` frontend is served at ``/`` while ``@app.api("plan_trip")`` is an
async generator that streams N12-typed JSON events through Gradio's queue (SSE),
so the frontend updates live as Nemotron decides → Python scores → Nemotron
explains. This is the Off-Brand path: a bespoke UI powered by Gradio's backend
(queuing, concurrency, Spaces hosting) — not stock Gradio components.

Brain + Hands: Nemotron (on Modal) never calls an API or names a price; Python
executes every call and scores every value. Every figure carries provenance.

``matchday/app.py`` is a compatibility shim that imports and launches this same
app, so ``python3 -m matchday.app`` runs the identical non-decorative path.

Reference patterns (3-codebase study, see MATCHDAY_UNCONSTRAINED_PLAN.md):
  - N1 gradio.Server custom-frontend architecture (Off-Brand badge):
    https://huggingface.co/blog/introducing-gradio-server ("Why @app.api()…").
  - N35 preflight gate (fail-fast on missing SerpApi key): Claude Code
    utils/preflightChecks.tsx:1-60.
"""
from __future__ import annotations

import asyncio
import json
import logging
import os
import sys
import time
from datetime import date
from pathlib import Path

# Repo-root importability when the Space runs this file directly.
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from fastapi.responses import HTMLResponse  # noqa: E402
from gradio import Server  # noqa: E402

from matchday.agent import MatchDayAgent  # noqa: E402
from matchday.agent_loop import run_agent_loop  # noqa: E402
from matchday.agent_trace import (  # noqa: E402
    AgentTrace,
    ToolCallRecord,
    evidence_from_result,
    ranking_from_result,
    result_source_labels,
    validate_packages,
)
from matchday.intent import parse_intent, _find_match  # noqa: E402
from matchday.models import TripRequest  # noqa: E402
from matchday.wc2026 import resolve_match  # noqa: E402
from matchday.prompts import EXPLANATION_HINT  # noqa: E402
from matchday.render import render_full  # noqa: E402
from matchday.trip_tool import build_trip_packages, format_for_nemotron  # noqa: E402

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Nemotron primary, deterministic fallback. Flip to False to force the
# deterministic path (fast demo / Modal-down insurance).
USE_AGENT = True

HERE = Path(__file__).parent
INDEX_HTML = HERE / "index.html"
# Model label shown in the Agent Trace drawer (Best Agent provenance). Honest:
# 30B total / ~3B active MoE — the ≤32B-cap qualifier is the 30B total weight.
_TRACE_MODEL = "Nemotron-3-Nano-30B-A3B · 3B-active MoE · Modal A100"


async def _warm_nemotron() -> None:
    """Best-effort warm generate on Space startup so the FIRST user query isn't
    stuck behind a Modal cold start (~2 min warm from the weight cache). Runs as
    a fire-and-forget background task; never blocks startup, never raises.
    """
    try:
        agent = MatchDayAgent()
        await asyncio.wait_for(
            agent.run([{"role": "user", "content": "warmup ping"}]), timeout=240
        )
        logger.info("startup warmup ping completed — Nemotron container is hot")
    except Exception as exc:  # noqa: BLE001 — best-effort, must not break boot
        logger.info("startup warmup ping ended early (%s)", repr(exc)[:80])


async def _startup_warmup() -> None:
    """Server startup hook — schedule the warmup without blocking boot."""
    asyncio.create_task(_warm_nemotron())


app = Server(on_startup=[_startup_warmup])


def _ev(**payload) -> str:
    """Serialize a typed stream event (N12) as a JSON string for the SSE stream."""
    return json.dumps(payload, ensure_ascii=False)


async def _pulse(coro, holder, message, interval: int = 9):
    """Run ``coro`` to completion, yielding a commentary heartbeat every
    ``interval`` seconds (carrying elapsed seconds) so the SSE stream is never
    silent during a long Modal cold-start or SerpApi phase. Stashes the result
    in ``holder['result']``; re-raises if ``coro`` raised. Usage::

        h = {}
        async for beat in _pulse(coro, h, msg):
            yield beat
        value = h["result"]
    """
    task = asyncio.ensure_future(coro)
    start = time.monotonic()
    while True:
        done, _ = await asyncio.wait({task}, timeout=interval)
        if task in done:
            holder["result"] = task.result()
            return
        yield _ev(type="commentary", text=f"{message} ({int(time.monotonic() - start)}s)")


def _notice_status(result, *keywords: str) -> str:
    """Map a data category to ``done`` | ``fallback`` from REAL degradation notices.

    Honest per-category progress: if ``build_trip_packages`` reported a category
    as unavailable, that step is ``fallback``; otherwise ``done``. Tied to the
    real dispatch outcome — never a cosmetic timer.
    """
    blob = " ".join(result.degradation_notices or "").lower()
    return "fallback" if any(k in blob for k in keywords) else "done"


def _precheck_unrecognized_match(user_text: str):
    """Generic pre-agent fixture validator (grounding honesty, option 1).

    Deterministically parse the request and ground the named match against the
    verified 2026 fixture table BEFORE the agent picks a tool. If the user named
    a matchup that isn't a real 2026 fixture, return ``(refusal_note, trip)`` so
    ``plan_trip`` can refuse honestly with the closest real alternatives and stop
    — without ever invoking the agent loop.

    Why this exists: Nemotron routes its own tools and, for some non-fixture
    matchups (e.g. "Canada vs Morocco"), can non-deterministically choose
    ``clarify`` over ``build_trip_packages``. When it does, the grounding-refusal
    path (the "isn't a 2026 fixture … Canada plays: …" note produced inside the
    build tool) never runs, so the demo promises a refusal it never delivers.
    Grounding the match deterministically up front guarantees every non-fixture
    match is refused honestly, regardless of how the model routes.

    Returns ``(note, trip_request)`` when a match is named AND unrecognized;
    ``None`` otherwise (no match named, parse failed, or the match IS real —
    proceed to the normal agent path). Never raises.
    """
    try:
        parsed = parse_intent(user_text)
    except Exception:  # noqa: BLE001 — must never break the turn
        return None
    trip = getattr(parsed, "trip_request", None)
    match_name = (getattr(trip, "match_name", "") or "") if trip is not None else ""
    if not match_name or match_name == "the match":  # _find_match's fallback sentinel
        return None
    match_name = _clean_match_name(match_name)  # drop trailing month ("Morocco June" -> "Morocco")
    try:
        res = resolve_match(match_name)
    except Exception:  # noqa: BLE001
        return None
    if res.recognized or not res.note:
        return None
    try:  # carry the CLEANED name onto the trip so the trace drawer matches the note
        trip = trip.model_copy(update={"match_name": match_name})
    except Exception:  # noqa: BLE001
        pass
    return res.note, trip


_MONTH_TOKENS = {
    "january", "february", "march", "april", "may", "june",
    "july", "august", "september", "october", "november", "december",
}


def _clean_match_name(name: str) -> str:
    """Strip a trailing month token from each team in an 'A vs B' match name.

    ``parse_intent``'s ``_find_match`` greedily appends the next capitalized word
    to a team name, so 'Canada vs Morocco June 18' parses to 'Canada vs Morocco
    June' — the month leaks into the team and would surface in the refusal note
    as "Morocco June plays: Brazil". Trimming trailing month tokens restores the
    real team names for a clean note. Conservative: only strips trailing month
    tokens, leaves everything else intact (multi-word teams unaffected).
    """
    if " vs " not in name:
        return name

    def _strip(trial: str) -> str:
        parts = trial.split()
        while parts and parts[-1].lower().rstrip(".") in _MONTH_TOKENS:
            parts.pop()
        return " ".join(parts)

    a, b = name.split(" vs ", 1)
    return f"{_strip(a)} vs {_strip(b)}"


_DEFAULT_GREETING = (
    "I'd love to plan your FIFA 2026 World Cup trip! Tell me where you're "
    "flying from (e.g. 'Montreal' or 'YUL'), which match you'd like to see, "
    "and the dates."
)


def _precheck_chitchat(user_text: str):
    """Deterministic reply for pure chit-chat / empty prompts, BEFORE the agent.

    A greeting or content-free message ("hi", "hello", "thanks", "test") has no
    origin, date, or match to plan around. Replying with ``parse_intent``'s
    clarifying question deterministically turns what would be a multi-second to
    multi-minute Modal cold-start wait (for a Nemotron call that would only
    clarify anyway) into an instant answer. Same pre-agent seam as the fixture
    validator — no agent_loop / Modal change.

    Conservative: fires ONLY when BOTH origin and date are absent AND no 'X vs Y'
    match is named, so any real (even partial) trip request still reaches the
    fixture validator / agent. Returns ``(reply, missing_slots)`` for chit-chat,
    or ``None`` to proceed normally. Never raises.
    """
    try:
        parsed = parse_intent(user_text)
    except Exception:  # noqa: BLE001 — must never break the turn
        return None
    # `missing` only ever holds origin and/or date; len >= 2 => both absent.
    if len(parsed.missing) >= 2 and not _find_match(user_text):
        return parsed.question or _DEFAULT_GREETING, list(parsed.missing)
    return None


def _finalize_trace(trace: AgentTrace, trip, result, built_by: str) -> None:
    """Populate the final intent/grounding/evidence/ranking/outcome on the trace.

    Best-effort: the trace is cosmetic proof, so it must never raise and abort a
    trip build. Surfaces the deterministic ranking formula (tier weights + the
    per-package normalized dim scores) so a judge can see HOW the order was
    decided, not just that it was.
    """
    try:
        trace.set_intent(trip)
        if getattr(result, "match_unrecognized", ""):
            # Honest refusal: the named match isn't a real 2026 fixture.
            trace.set_grounding(recognized=False, note=result.match_unrecognized)
            trace.set_outcome(mode=built_by, status="clarify",
                              notes=list(result.degradation_notices),
                              model=_TRACE_MODEL, rounds=trace.rounds)
            return
        corrected = bool(getattr(result, "grounding_note", ""))
        trace.set_grounding(
            recognized=True, corrected=corrected,
            kickoff=getattr(result, "kickoff_local", "") or "",
            venue="BC Place",
            match_name=(getattr(result, "grounded_match_name", "") or
                        (trip.match_name if trip is not None else "")),
            note=getattr(result, "grounding_note", "") or "",
        )
        trace.set_evidence(evidence_from_result(result))
        from matchday.scoring import BUDGET_WEIGHTS
        tier = trip.budget_tier if trip is not None else "mid_range"
        w = BUDGET_WEIGHTS.get(tier, BUDGET_WEIGHTS["mid_range"])
        ranking, _records = ranking_from_result(
            result, tier,
            {"cost": w.cost, "buffer": w.buffer, "transit": w.transit},
        )
        trace.ranking = ranking
        trace.set_outcome(mode=built_by, status=result.status,
                          notes=list(result.degradation_notices),
                          model=_TRACE_MODEL, rounds=trace.rounds)
    except Exception as exc:  # noqa: BLE001
        logger.warning("trace finalization skipped: %s", exc)


# Cached per-boot preflight (N35). Fail-fast ONLY on genuinely-doomed config
# (missing SerpApi key — build_trip_packages cannot fetch live flights/hotels).
# Modal cold-start is NOT a hard failure: it streams via _pulse heartbeats and
# the loop degrades to the deterministic parser, so we don't gate on it.
_PREFLIGHT_OK: bool | None = None


def _preflight() -> tuple[bool, str]:
    """Return (ok, reason). ``reason`` is empty when ok. Cached positive."""
    global _PREFLIGHT_OK
    if _PREFLIGHT_OK:
        return True, ""
    if not os.environ.get("SERPAPI_API_KEY"):
        return False, (
            "SerpApi key is not set on this Space — live flight & hotel search is "
            "unavailable. Add SERPAPI_API_KEY in Settings → Secrets, then restart."
        )
    _PREFLIGHT_OK = True
    return True, ""


async def _agent_explain(agent, user_text: str, trip: TripRequest, result) -> str:
    """Round 2 — Nemotron compares the packages. Best-effort ('' on failure)."""
    args_json = json.dumps(trip.model_dump(mode="json"))
    convo = [
        {"role": "user", "content": user_text},
        {
            "role": "assistant",
            "content": "",
            "tool_calls": [{
                "id": "call_build",
                "type": "function",
                "function": {"name": "build_trip_packages", "arguments": args_json},
            }],
        },
        {
            "role": "tool",
            "tool_call_id": "call_build",
            "name": "build_trip_packages",
            "content": format_for_nemotron(result),
        },
        {"role": "user", "content": EXPLANATION_HINT},
    ]
    try:
        r2 = await agent.run(convo, tools=[])  # no tools → Nemotron must write text
        return (r2.get("text") or "").strip()
    except Exception as exc:
        logger.warning("explanation round failed: %s", exc)
        return ""


@app.api(name="plan_trip", concurrency_limit=4, stream_every=0.5)
async def plan_trip(user_text: str) -> str:
    """Stream the agentic trip build as typed events (N12 + N10).

    Yields: commentary (progress beats, sent immediately) → greenlight
    (parsed trip) | clarify | error → result (full cards+map+timeline render
    + Nemotron's explanation). Falls back to the deterministic parser if the
    agent is unavailable or hedges.
    """
    ok, why = _preflight()  # N35 — fail-fast on doomed config (missing key)
    if not ok:
        yield _ev(type="error", text=f"⚠️ {why}")
        return

    # The visible Agent Trace accumulator (Best Agent proof). Populated live as
    # intent is extracted, the match is grounded, tools run, and packages are
    # ranked — emitted to the Evidence drawer in REAL TIME via `trace` events.
    trace = AgentTrace()
    built_by = "agent"  # flips to "deterministic" if the loop doesn't build

    yield _ev(type="commentary", text="Reading your trip request…")
    yield _ev(type="progress", step="read", status="done", text="Read your request")
    yield _ev(type="progress", step="extract", status="running", text="Understanding your trip")

    # ── Pre-agent chit-chat guard: a greeting / empty prompt ("hi", "thanks")
    # has no origin, date, or match. Reply deterministically and instantly
    # instead of waking Nemotron for a call that would only clarify — turns a
    # Modal cold-start wait into an immediate answer. Conservative: only fires
    # when nothing trip-related was said, so real (even partial) requests still
    # reach the fixture validator / agent.
    _chat = _precheck_chitchat(user_text)
    if _chat is not None:
        _chat_text, _chat_missing = _chat
        trace.set_intent(None, missing=_chat_missing)
        trace.set_outcome(
            mode="deterministic", status="clarify",
            notes=["Pre-agent chit-chat check: no trip details (origin / date / match) yet."],
            model=_TRACE_MODEL, rounds=0,
        )
        yield _ev(type="trace", data=trace.to_dict())
        yield _ev(type="progress", step="extract", status="done", text="Heard you")
        yield _ev(type="progress", step="ready", status="fallback", text="Tell me your trip")
        yield _ev(type="clarify", text=_chat_text)
        return

    # ── Generic pre-agent fixture validator (grounding honesty). Ground the named
    # match deterministically BEFORE the agent picks a tool: a non-real 2026
    # fixture is refused with the closest real alternatives and we stop, so the
    # refusal never depends on Nemotron choosing build_trip_packages over clarify.
    _pre = _precheck_unrecognized_match(user_text)
    if _pre is not None:
        _refusal_note, _pre_trip = _pre
        trace.set_intent(_pre_trip)
        trace.set_grounding(recognized=False, note=_refusal_note)
        trace.set_outcome(
            mode="deterministic", status="clarify",
            notes=["Pre-agent fixture check: named match is not a real 2026 fixture."],
            model=_TRACE_MODEL, rounds=0,
        )
        yield _ev(type="trace", data=trace.to_dict())  # grounding-refusal proof
        yield _ev(type="progress", step="extract", status="done", text="Match checked")
        yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
        yield _ev(type="clarify", text=_refusal_note)
        return

    agent = None
    if USE_AGENT:
        try:
            # Nemotron reasoning toggle (NVIDIA Nemotron Quest + Best Agent): the
            # official Nemotron-3-Nano usage guide serves complex planning turns
            # with thinking ON (chain-of-thought before the tool call). Default
            # OFF to preserve the verified fast tool-routing path; set
            # MATCHDAY_THINKING=1 on the Space to turn on reasoning for the
            # agent's decide/ground/explain turns.
            thinking = os.environ.get("MATCHDAY_THINKING", "").lower() in ("1", "true", "yes")
            agent = MatchDayAgent(thinking=thinking)
        except Exception as exc:
            logger.warning("agent init failed (%s); deterministic path.", exc)

    # ── Smart path: the bounded agent loop (K1). Nemotron UNDERSTANDS the
    # request, may GROUND itself with web_search (I6), may CLARIFY to capture
    # intent (P7), and calls build_trip_packages when ready. The loop validates
    # args, dedups, and self-corrects one malformed call (A4). No more bypass.
    messages: list[dict] = [{"role": "user", "content": user_text}]
    trip: TripRequest | None = None
    result = None          # TripPackageResult produced inside the loop's build call
    agent_text = ""        # a clarify question or direct answer from the Brain

    if agent is not None:
        yield _ev(type="commentary", text="🧠 Nemotron is understanding your request…")
        for attempt in range(3):  # cap grounding rounds (web_search → build)
            h = {}
            try:
                async for beat in _pulse(
                    run_agent_loop(agent, messages, trace=trace),
                    h,
                    "🧠 Nemotron is understanding your request & choosing tools",
                ):
                    yield beat
                res = h.get("result")
                # REAL-TIME trace: push the tool-call log to the drawer after
                # each agent decision so the user sees the multi-step reasoning
                # unfold (web_search → build_trip_packages), not just the end.
                if res is not None:
                    yield _ev(type="trace", data=trace.to_dict())
            except Exception as exc:
                logger.warning("agent loop attempt %d failed (%s).", attempt, exc)
                res = None

            if res is None:
                break

            if res.type == "tool_called" and res.tool == "build_trip_packages":
                result = res.result.get("full_result")
                trip = res.result.get("trip")
                # Sync the display trip to the GROUNDED dates + canonical match
                # name (the match was re-centered on the real WC fixture inside
                # the tool), and surface any correction note to the user.
                if result is not None and getattr(result, "grounded_match_date", None) and trip is not None:
                    upd = {
                        "match_date": result.grounded_match_date,
                        "check_in": result.grounded_check_in,
                        "check_out": result.grounded_check_out,
                    }
                    if getattr(result, "grounded_match_name", ""):
                        upd["match_name"] = result.grounded_match_name
                    try:
                        trip = trip.model_copy(update=upd)
                    except Exception:
                        pass
                if result is not None and getattr(result, "grounding_note", ""):
                    yield _ev(type="commentary", text="📅 " + result.grounding_note)
                break

            if res.type == "tool_called" and res.tool == "web_search":
                # Brain grounded itself — thread the result back so it can build.
                tcid = f"call_ws_{attempt}"
                messages.append({
                    "role": "assistant", "content": "",
                    "tool_calls": [{
                        "id": tcid, "type": "function",
                        "function": {
                            "name": "web_search",
                            "arguments": json.dumps(res.result.get("query") or {}),
                        },
                    }],
                })
                messages.append({
                    "role": "tool", "tool_call_id": tcid, "name": "web_search",
                    "content": json.dumps(res.result, ensure_ascii=False)[:1200],
                })
                yield _ev(
                    type="commentary",
                    text="🔎 Grounded with a web search — now building your packages…",
                )
                continue

            if res.type == "final_answer":
                agent_text = res.text or ""
                break
            # fallback_to_deterministic → EXPLICIT + user-visible degrade. Never
            # silently swap in the deterministic path. Most commonly this is a
            # Modal cold-start timeout (see the agent_loop reason) — tell the user
            # honestly so the wait / fast-mode result is understood, not hidden.
            if res.type == "fallback_to_deterministic":
                yield _ev(
                    type="commentary",
                    text="🌡️ Nemotron is warming up on Modal (cold start) — "
                         "building your packages in fast mode now, then I'll "
                         "still compare them live.",
                )
            break

    # If the agent's build already flagged an unrecognized match, surface it as a
    # clarification with real alternatives (Best-Agent honesty: never silently
    # plan a trip around a nonexistent fixture like "Canada vs Morocco").
    if result is not None and getattr(result, "match_unrecognized", ""):
        _finalize_trace(trace, trip, result, built_by)
        yield _ev(type="trace", data=trace.to_dict())  # grounding-refusal proof
        yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
        yield _ev(type="clarify", text=result.match_unrecognized)
        return

    # ── Deterministic fallback (K3): parse intent + build directly. Used when
    # the agent is unavailable, hedged to a non-build answer, or the loop failed.
    if result is None and not agent_text:
        parsed = parse_intent(user_text)
        built_by = "deterministic"
        trace.set_intent(parsed.trip_request, missing=parsed.missing)
        yield _ev(type="trace", data=trace.to_dict())  # show extracted intent live
        trip = parsed.trip_request
        if trip is not None:
            yield _ev(type="greenlight", text=trip.summary())
            yield _ev(
                type="commentary",
                text="✈️ Scanning airlines · 🏨 Finding hotels near BC Place · 🌤️ Checking the match-day forecast…",
            )
            yield _ev(type="progress", step="flights", status="running")
            yield _ev(type="progress", step="hotels", status="running")
            yield _ev(type="progress", step="weather", status="running")
            yield _ev(type="progress", step="nearby", status="running")
            hb = {}
            _db_t0 = time.monotonic()
            try:
                async for beat in _pulse(
                    build_trip_packages(trip),
                    hb,
                    "✈️ Scanning airlines · 🏨 hotels near BC Place · 🌤️ weather",
                ):
                    yield beat
                result = hb["result"]
            except Exception as exc:
                trace.set_outcome(mode="deterministic", status="error",
                                  notes=[f"build_trip_packages raised: {exc}"],
                                  model=_TRACE_MODEL)
                yield _ev(type="trace", data=trace.to_dict())  # honest error in the trace
                yield _ev(type="error", text=f"⚠️ {exc}")
                return
            # The deterministic path bypasses the loop, so record its single
            # build tool call here (honest: same canonical build_trip_packages).
            if result is not None:
                _db_dur = int((time.monotonic() - _db_t0) * 1000)
                trace.add_tool_call(ToolCallRecord(
                    name="build_trip_packages",
                    args={"mode": "deterministic", **(trip.model_dump(mode="json") if hasattr(trip, "model_dump") else {})},
                    status="ok" if result.packages else "failed",
                    duration_ms=_db_dur,
                    detail=f"{len(result.packages)} package(s) scored",
                    sources=result_source_labels(result),
                ))
                yield _ev(type="trace", data=trace.to_dict())
            # Sync the display trip to the GROUNDED dates so greenlight +
            # itinerary match the packages (match was re-centered on the real
            # WC fixture inside build_trip_packages). Honesty: show the note.
            if getattr(result, "grounded_match_date", None):
                upd = {
                    "match_date": result.grounded_match_date,
                    "check_in": result.grounded_check_in,
                    "check_out": result.grounded_check_out,
                }
                if getattr(result, "grounded_match_name", ""):
                    upd["match_name"] = result.grounded_match_name
                try:
                    trip = trip.model_copy(update=upd)
                except Exception:
                    pass
            if getattr(result, "match_unrecognized", ""):
                _finalize_trace(trace, trip, result, built_by)
                yield _ev(type="trace", data=trace.to_dict())  # grounding-refusal proof
                yield _ev(type="progress", step="ready", status="fallback", text="Match not found")
                yield _ev(type="clarify", text=result.match_unrecognized)
                return
            if getattr(result, "grounding_note", ""):
                yield _ev(type="commentary", text="📅 " + result.grounding_note)
        else:
            trace.set_outcome(mode="deterministic", status="clarify",
                              notes=["Intent incomplete — asked for the missing detail."],
                              model=_TRACE_MODEL)
            yield _ev(type="trace", data=trace.to_dict())  # show the missing slots honestly
            yield _ev(type="progress", step="ready", status="fallback", text="Need a detail from you")
            yield _ev(
                type="clarify",
                text=parsed.question
                or "Tell me where you're flying from and which match you want to see.",
            )
            return

    # Clarify / direct answer from the Brain (no packages to show).
    if result is None:
        trace.set_outcome(mode="agent", status="clarify",
                          notes=["Brain answered without building packages."],
                          model=_TRACE_MODEL, rounds=trace.rounds)
        yield _ev(type="trace", data=trace.to_dict())  # the reasoning that led to the question
        yield _ev(type="progress", step="ready", status="fallback", text="Need a detail from you")
        yield _ev(type="clarify", text=agent_text)
        return

    # ── Honest per-category progress from the REAL dispatch outcome (N10/I1):
    # each data step is done/fallback according to build_trip_packages' own
    # degradation notices — never a cosmetic timer.
    if trip is not None:
        yield _ev(type="progress", step="extract", status="done", text="Trip details captured")
    yield _ev(type="progress", step="flights", status=_notice_status(result, "flight"))
    yield _ev(type="progress", step="hotels", status=_notice_status(result, "hotels unavailable", "hotels,"))
    yield _ev(type="progress", step="weather", status=_notice_status(result, "weather"))
    yield _ev(type="progress", step="nearby", status=_notice_status(result, "amenities", "nearby"))
    yield _ev(type="progress", step="score", status="done" if result.packages else "fallback")
    yield _ev(type="progress", step="itinerary", status="running", text="Building itinerary")
    yield _ev(type="progress", step="links", status="running", text="Preparing links")

    # ── Self-check / validation gate (core agentic #8 self-check / #10 safe
    # recommendation): verify the built output before recommending — no invented
    # match, sane price band, every flight lands before kickoff, stay brackets
    # the match day. Pure + defensive; surfaces honest pass/warn/fail, never blocks.
    _val = validate_packages(result, trip)
    trace.set_validation(_val)
    _vpass = sum(1 for c in _val if c.get("status") == "pass")
    _vfail = sum(1 for c in _val if c.get("status") == "fail")
    _vwarn = sum(1 for c in _val if c.get("status") == "warn")
    _vtxt = (f"Validated {_vpass}/{len(_val)}"
             + (" · flagged" if (_vfail or _vwarn) else " · all clear"))
    yield _ev(type="progress", step="validate", status="done", text=_vtxt)
    yield _ev(type="trace", data=trace.to_dict())  # stream the ⑥ section live

    # ── Render. greenlight confirms the captured intent just before the packages.
    if trip is not None:
        yield _ev(type="greenlight", text=trip.summary())
    yield _ev(type="commentary", text="🗺️ Nemotron is comparing your 3 packages…")
    explanation = ""
    if agent is not None:
        he = {}
        try:
            async for beat in _pulse(
                _agent_explain(agent, user_text, trip, result),
                he,
                "🗺️ Nemotron is comparing your 3 packages…",
            ):
                yield beat
            explanation = he["result"]
        except Exception as exc:
            logger.warning("explanation round failed (%s).", exc)
            explanation = ""

    # Final: the full Layla-competitive render (status + cards + map + timeline).
    # leaflet_preloaded=True → the frontend already loaded Leaflet in <head>; the
    # map's inline init script is re-run after injection (see index.html).
    _finalize_trace(trace, trip, result, built_by)
    yield _ev(type="trace", data=trace.to_dict())  # final, complete trace for the drawer
    yield _ev(type="progress", step="itinerary", status="done")
    yield _ev(type="progress", step="links", status="done")
    yield _ev(type="progress", step="ready", status="done", text="Your packages are ready")
    yield _ev(type="result", html=render_full(result, trip, leaflet_preloaded=True, trace=trace), explanation=explanation)


@app.get("/", response_class=HTMLResponse)
async def homepage():
    with open(INDEX_HTML, "r", encoding="utf-8") as fh:
        return fh.read()


if __name__ == "__main__":
    app.launch(server_port=int(os.environ.get("PORT", "7860")), show_error=True)