Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app.py +54 -1
- index.html +0 -0
- matchday/modal_spike.py +1 -1
app.py
CHANGED
|
@@ -45,7 +45,7 @@ from matchday.agent_trace import ( # noqa: E402
|
|
| 45 |
ranking_from_result,
|
| 46 |
result_source_labels,
|
| 47 |
)
|
| 48 |
-
from matchday.intent import parse_intent # noqa: E402
|
| 49 |
from matchday.models import TripRequest # noqa: E402
|
| 50 |
from matchday.wc2026 import resolve_match # noqa: E402
|
| 51 |
from matchday.prompts import EXPLANATION_HINT # noqa: E402
|
|
@@ -198,6 +198,38 @@ def _clean_match_name(name: str) -> str:
|
|
| 198 |
return f"{_strip(a)} vs {_strip(b)}"
|
| 199 |
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
def _finalize_trace(trace: AgentTrace, trip, result, built_by: str) -> None:
|
| 202 |
"""Populate the final intent/grounding/evidence/ranking/outcome on the trace.
|
| 203 |
|
|
@@ -315,6 +347,27 @@ async def plan_trip(user_text: str) -> str:
|
|
| 315 |
yield _ev(type="progress", step="read", status="done", text="Read your request")
|
| 316 |
yield _ev(type="progress", step="extract", status="running", text="Understanding your trip")
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
# ── Generic pre-agent fixture validator (grounding honesty). Ground the named
|
| 319 |
# match deterministically BEFORE the agent picks a tool: a non-real 2026
|
| 320 |
# fixture is refused with the closest real alternatives and we stop, so the
|
|
|
|
| 45 |
ranking_from_result,
|
| 46 |
result_source_labels,
|
| 47 |
)
|
| 48 |
+
from matchday.intent import parse_intent, _find_match # noqa: E402
|
| 49 |
from matchday.models import TripRequest # noqa: E402
|
| 50 |
from matchday.wc2026 import resolve_match # noqa: E402
|
| 51 |
from matchday.prompts import EXPLANATION_HINT # noqa: E402
|
|
|
|
| 198 |
return f"{_strip(a)} vs {_strip(b)}"
|
| 199 |
|
| 200 |
|
| 201 |
+
_DEFAULT_GREETING = (
|
| 202 |
+
"I'd love to plan your FIFA 2026 World Cup trip! Tell me where you're "
|
| 203 |
+
"flying from (e.g. 'Montreal' or 'YUL'), which match you'd like to see, "
|
| 204 |
+
"and the dates."
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def _precheck_chitchat(user_text: str):
|
| 209 |
+
"""Deterministic reply for pure chit-chat / empty prompts, BEFORE the agent.
|
| 210 |
+
|
| 211 |
+
A greeting or content-free message ("hi", "hello", "thanks", "test") has no
|
| 212 |
+
origin, date, or match to plan around. Replying with ``parse_intent``'s
|
| 213 |
+
clarifying question deterministically turns what would be a multi-second to
|
| 214 |
+
multi-minute Modal cold-start wait (for a Nemotron call that would only
|
| 215 |
+
clarify anyway) into an instant answer. Same pre-agent seam as the fixture
|
| 216 |
+
validator — no agent_loop / Modal change.
|
| 217 |
+
|
| 218 |
+
Conservative: fires ONLY when BOTH origin and date are absent AND no 'X vs Y'
|
| 219 |
+
match is named, so any real (even partial) trip request still reaches the
|
| 220 |
+
fixture validator / agent. Returns ``(reply, missing_slots)`` for chit-chat,
|
| 221 |
+
or ``None`` to proceed normally. Never raises.
|
| 222 |
+
"""
|
| 223 |
+
try:
|
| 224 |
+
parsed = parse_intent(user_text)
|
| 225 |
+
except Exception: # noqa: BLE001 — must never break the turn
|
| 226 |
+
return None
|
| 227 |
+
# `missing` only ever holds origin and/or date; len >= 2 => both absent.
|
| 228 |
+
if len(parsed.missing) >= 2 and not _find_match(user_text):
|
| 229 |
+
return parsed.question or _DEFAULT_GREETING, list(parsed.missing)
|
| 230 |
+
return None
|
| 231 |
+
|
| 232 |
+
|
| 233 |
def _finalize_trace(trace: AgentTrace, trip, result, built_by: str) -> None:
|
| 234 |
"""Populate the final intent/grounding/evidence/ranking/outcome on the trace.
|
| 235 |
|
|
|
|
| 347 |
yield _ev(type="progress", step="read", status="done", text="Read your request")
|
| 348 |
yield _ev(type="progress", step="extract", status="running", text="Understanding your trip")
|
| 349 |
|
| 350 |
+
# ── Pre-agent chit-chat guard: a greeting / empty prompt ("hi", "thanks")
|
| 351 |
+
# has no origin, date, or match. Reply deterministically and instantly
|
| 352 |
+
# instead of waking Nemotron for a call that would only clarify — turns a
|
| 353 |
+
# Modal cold-start wait into an immediate answer. Conservative: only fires
|
| 354 |
+
# when nothing trip-related was said, so real (even partial) requests still
|
| 355 |
+
# reach the fixture validator / agent.
|
| 356 |
+
_chat = _precheck_chitchat(user_text)
|
| 357 |
+
if _chat is not None:
|
| 358 |
+
_chat_text, _chat_missing = _chat
|
| 359 |
+
trace.set_intent(None, missing=_chat_missing)
|
| 360 |
+
trace.set_outcome(
|
| 361 |
+
mode="deterministic", status="clarify",
|
| 362 |
+
notes=["Pre-agent chit-chat check: no trip details (origin / date / match) yet."],
|
| 363 |
+
model=_TRACE_MODEL, rounds=0,
|
| 364 |
+
)
|
| 365 |
+
yield _ev(type="trace", data=trace.to_dict())
|
| 366 |
+
yield _ev(type="progress", step="extract", status="done", text="Heard you")
|
| 367 |
+
yield _ev(type="progress", step="ready", status="fallback", text="Tell me your trip")
|
| 368 |
+
yield _ev(type="clarify", text=_chat_text)
|
| 369 |
+
return
|
| 370 |
+
|
| 371 |
# ── Generic pre-agent fixture validator (grounding honesty). Ground the named
|
| 372 |
# match deterministically BEFORE the agent picks a tool: a non-real 2026
|
| 373 |
# fixture is refused with the closest real alternatives and we stop, so the
|
index.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
matchday/modal_spike.py
CHANGED
|
@@ -99,7 +99,7 @@ app = modal.App("matchday-spike")
|
|
| 99 |
volumes={HF_CACHE_PATH: HF_CACHE_VOL},
|
| 100 |
secrets=[hf_secret],
|
| 101 |
startup_timeout=120 * MINUTES, # time to download + load 60GB model
|
| 102 |
-
scaledown_window=
|
| 103 |
)
|
| 104 |
class Server:
|
| 105 |
@modal.enter()
|
|
|
|
| 99 |
volumes={HF_CACHE_PATH: HF_CACHE_VOL},
|
| 100 |
secrets=[hf_secret],
|
| 101 |
startup_timeout=120 * MINUTES, # time to download + load 60GB model
|
| 102 |
+
scaledown_window=1800, # keep the replica warm 30 min after each request so an active demo stays hot (NOT min_containers — no 24/7 billing)
|
| 103 |
)
|
| 104 |
class Server:
|
| 105 |
@modal.enter()
|