Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- matchday/agent_trace.py +8 -1
- matchday/record_trace.py +15 -0
matchday/agent_trace.py
CHANGED
|
@@ -433,7 +433,14 @@ def validate_packages(result: Any, trip: Any | None = None) -> list[dict[str, st
|
|
| 433 |
f"{late} of {checked} flight(s) land after kickoff" if late
|
| 434 |
else f"{checked} flight(s) land before kickoff")
|
| 435 |
else:
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
else:
|
| 438 |
late = [p for p in packages
|
| 439 |
if float((getattr(p, "scores", {}) or {}).get("arrival_buffer", 1) or 1) <= 0]
|
|
|
|
| 433 |
f"{late} of {checked} flight(s) land after kickoff" if late
|
| 434 |
else f"{checked} flight(s) land before kickoff")
|
| 435 |
else:
|
| 436 |
+
# kickoff present but not directly comparable to the arrival time
|
| 437 |
+
# (e.g. a bare time/str) — fall back to the normalized arrival-buffer
|
| 438 |
+
# score (>0 means the package lands before kickoff). Honest: it
|
| 439 |
+
# evaluates instead of skipping when a strict datetime compare can't.
|
| 440 |
+
late = [p for p in packages
|
| 441 |
+
if float((getattr(p, "scores", {}) or {}).get("arrival_buffer", 1) or 1) <= 0]
|
| 442 |
+
add("arrival before kickoff", "fail" if late else "pass",
|
| 443 |
+
"arrival-buffer score <= 0" if late else "positive arrival buffer on every package")
|
| 444 |
else:
|
| 445 |
late = [p for p in packages
|
| 446 |
if float((getattr(p, "scores", {}) or {}).get("arrival_buffer", 1) or 1) <= 0]
|
matchday/record_trace.py
CHANGED
|
@@ -29,6 +29,7 @@ from pathlib import Path
|
|
| 29 |
|
| 30 |
from matchday.agent import MatchDayAgent
|
| 31 |
from matchday.agent_loop import run_agent_loop
|
|
|
|
| 32 |
from matchday.intent import parse_intent
|
| 33 |
from matchday.models import TripRequest
|
| 34 |
from matchday.prompts import EXPLANATION_HINT
|
|
@@ -178,6 +179,20 @@ async def record(query: str, out_path: str) -> None:
|
|
| 178 |
packages=_packages_view(result),
|
| 179 |
compact=format_for_nemotron(result),
|
| 180 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
explanation = await _explain(agent, query, trip, result)
|
| 182 |
log(event="nemotron_explanation", text=explanation)
|
| 183 |
elif agent_text:
|
|
|
|
| 29 |
|
| 30 |
from matchday.agent import MatchDayAgent
|
| 31 |
from matchday.agent_loop import run_agent_loop
|
| 32 |
+
from matchday.agent_trace import validate_packages
|
| 33 |
from matchday.intent import parse_intent
|
| 34 |
from matchday.models import TripRequest
|
| 35 |
from matchday.prompts import EXPLANATION_HINT
|
|
|
|
| 179 |
packages=_packages_view(result),
|
| 180 |
compact=format_for_nemotron(result),
|
| 181 |
)
|
| 182 |
+
# Self-check / validation gate — the SAME deterministic gate the deployed
|
| 183 |
+
# plan_trip runs live (core agentic #8 self-check / #10 safe final
|
| 184 |
+
# recommendation). Recorded here so the artifact proves the output was
|
| 185 |
+
# validated before recommending: no invented match, sane prices, every
|
| 186 |
+
# flight lands before kickoff, stay brackets the match day.
|
| 187 |
+
validation = validate_packages(result, trip)
|
| 188 |
+
_vpass = sum(1 for c in validation if c.get("status") == "pass")
|
| 189 |
+
_vfail = sum(1 for c in validation if c.get("status") == "fail")
|
| 190 |
+
log(
|
| 191 |
+
event="self_check",
|
| 192 |
+
status="flagged" if _vfail else "all_clear",
|
| 193 |
+
passed=f"{_vpass}/{len(validation)}",
|
| 194 |
+
checks=validation,
|
| 195 |
+
)
|
| 196 |
explanation = await _explain(agent, query, trip, result)
|
| 197 |
log(event="nemotron_explanation", text=explanation)
|
| 198 |
elif agent_text:
|