#!/usr/bin/env bash # Load 10K eval cases to Postgres and run all five eval runners (optional limits). set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$ROOT" if [[ -f "$ROOT/.env" ]]; then set -a # shellcheck disable=SC1091 source "$ROOT/.env" set +a fi LIMIT="${LIMIT:-}" LIMIT_ARG=() if [[ -n "${LIMIT}" ]]; then LIMIT_ARG=(--limit "${LIMIT}") fi echo "=== Loading 10,000 eval cases to Postgres ===" python3 scripts/load_eval_pack_to_db.py echo "" echo "=== Eval pack status ===" python3 scripts/load_eval_pack_to_db.py --verify-only echo "" echo "=== Running eval runners ${LIMIT_ARG[*]:-(full datasets)} ===" python3 eval/runners/run_golden_memory_eval.py "${LIMIT_ARG[@]}" python3 eval/runners/run_adversarial_memory_eval.py "${LIMIT_ARG[@]}" python3 eval/runners/run_governance_policy_eval.py "${LIMIT_ARG[@]}" python3 eval/runners/run_retrieval_stress_eval.py "${LIMIT_ARG[@]}" echo "" echo "Done. Summaries in eval/dashboards/"