pharmaspine-backend / scripts /run_full_eval_pack.sh
ashish1265659565's picture
Upload folder using huggingface_hub
08fd094 verified
Raw
History Blame Contribute Delete
968 Bytes
#!/usr/bin/env bash
# Load 10K eval cases to Postgres and run all five eval runners (optional limits).
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
if [[ -f "$ROOT/.env" ]]; then
set -a
# shellcheck disable=SC1091
source "$ROOT/.env"
set +a
fi
LIMIT="${LIMIT:-}"
LIMIT_ARG=()
if [[ -n "${LIMIT}" ]]; then
LIMIT_ARG=(--limit "${LIMIT}")
fi
echo "=== Loading 10,000 eval cases to Postgres ==="
python3 scripts/load_eval_pack_to_db.py
echo ""
echo "=== Eval pack status ==="
python3 scripts/load_eval_pack_to_db.py --verify-only
echo ""
echo "=== Running eval runners ${LIMIT_ARG[*]:-(full datasets)} ==="
python3 eval/runners/run_golden_memory_eval.py "${LIMIT_ARG[@]}"
python3 eval/runners/run_adversarial_memory_eval.py "${LIMIT_ARG[@]}"
python3 eval/runners/run_governance_policy_eval.py "${LIMIT_ARG[@]}"
python3 eval/runners/run_retrieval_stress_eval.py "${LIMIT_ARG[@]}"
echo ""
echo "Done. Summaries in eval/dashboards/"