from __future__ import annotations import json import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT / "src")) from packetcourt import audit_packet def load_cases(path: Path) -> list[dict]: return [json.loads(line) for line in path.read_text().splitlines() if line.strip()] def evaluate_case(case: dict) -> tuple[int, int, list[str]]: audit = audit_packet(case["front_text"], case["back_text"]) checks: list[tuple[bool, str]] = [] actual_claims = {claim.claim for claim in audit.claims} checks.append((actual_claims == set(case["expected_claims"]), "claim set")) verdicts = {claim.claim: claim.verdict.value for claim in audit.claims} for claim, expected in case.get("expected_verdicts", {}).items(): checks.append((verdicts.get(claim) == expected, f"{claim} verdict")) gap_text = " ".join( f"{finding.headline} {finding.front_impression} {finding.quiet_context}" for finding in audit.persuasion_gap ).lower() for term in case.get("expected_gap_terms", []): checks.append((term.lower() in gap_text, f"gap contains {term}")) if "expected_best_before" in case: checks.append((audit.expiry.best_before == case["expected_best_before"], "best-before date")) if "expected_after_opening" in case: checks.append((audit.expiry.after_opening_instruction == case["expected_after_opening"], "after-opening instruction")) if "expected_sugar_teaspoons" in case: checks.append((audit.whole_packet.sugar_teaspoons == case["expected_sugar_teaspoons"], "sugar teaspoons")) failures = [label for passed, label in checks if not passed] return len(checks) - len(failures), len(checks), failures def main() -> int: cases = load_cases(ROOT / "data" / "golden_cases.jsonl") passed = total = 0 for case in cases: case_passed, case_total, failures = evaluate_case(case) passed += case_passed total += case_total marker = "PASS" if not failures else "FAIL" print(f"{marker} {case['id']} {case['title']}: {case_passed}/{case_total}") for failure in failures: print(f" - {failure}") print(f"\nPacketCourt golden evaluation: {passed}/{total} checks passed across {len(cases)} cases.") return 0 if passed == total else 1 if __name__ == "__main__": raise SystemExit(main())