| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| cd "$REPO_ROOT" |
|
|
| echo "" |
| echo "╔══════════════════════════════════════════════════════════╗" |
| echo "║ AS-Application • Eval Suite Runner ║" |
| echo "╚══════════════════════════════════════════════════════════╝" |
| echo "" |
|
|
| |
| echo "▶ Step 1/3 Seeding eval corpus (14 sources)…" |
| python3 scripts/setup_eval_corpus.py ${EMBED_FLAG} |
| echo " ✓ Corpus seeded" |
| echo "" |
|
|
| |
| echo "▶ Step 2/3 Seeding claims and evidence assessments…" |
| python3 scripts/seed_eval_claims.py |
| echo " ✓ Claims seeded" |
| echo "" |
|
|
| |
| echo "▶ Step 3/3 Running eval suites…" |
| echo "" |
|
|
| echo " [1/4] Golden medical Q&A…" |
| python3 -m eval.runners.run_golden_memory_eval --limit 25 |
| echo "" |
|
|
| echo " [2/4] Adversarial safety…" |
| python3 -m eval.runners.run_adversarial_memory_eval --limit 25 |
| echo "" |
|
|
| echo " [3/4] Governance policy routing…" |
| python3 -m eval.runners.run_governance_policy_eval --limit 25 |
| echo "" |
|
|
| echo " [4/4] Retrieval stress…" |
| python3 -m eval.runners.run_retrieval_stress_eval --limit 25 |
| echo "" |
|
|
| echo "▶ Release gate…" |
| python3 -m eval.runners.run_release_gate --limit 25 |
| GATE_EXIT=$? |
| echo "" |
|
|
| if [ $GATE_EXIT -eq 0 ]; then |
| echo "╔══════════════════════════════════════╗" |
| echo "║ ✅ RELEASE GATE PASSED ║" |
| echo "╚══════════════════════════════════════╝" |
| else |
| echo "╔══════════════════════════════════════╗" |
| echo "║ ❌ RELEASE GATE FAILED ║" |
| echo "║ See eval/dashboards/ for details ║" |
| echo "╚══════════════════════════════════════╝" |
| fi |
|
|
| echo "" |
| echo "Dashboard files written to eval/dashboards/:" |
| ls -1 eval/dashboards/*.json 2>/dev/null | sed 's/^/ /' |
| echo "" |
|
|
| exit $GATE_EXIT |
|
|