#!/usr/bin/env bash # run_eval_suite.sh — Seed the database and regenerate all eval dashboards. # # Usage (from repo root): # chmod +x run_eval_suite.sh # ./run_eval_suite.sh # # The script will: # 1. Register all 14 eval corpus sources + chunks in PostgreSQL (with embeddings) # 2. Seed claims, evidence assessments, and graph edges # 3. Run all four eval suites and write dashboard JSON/MD files # 4. Run the release gate and print the final pass/fail verdict # # Prerequisites: # - PostgreSQL running at localhost with database ai_knowledge_spine # - Ollama running at the URL in .env with the embedding model pulled # (default: qwen3-embedding:8b — pull once with: ollama pull qwen3-embedding:8b) # - .env present at repo root with AKS_DATABASE_URL and OLLAMA_* set # - Python 3.11+ activated in your shell set -euo pipefail REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$REPO_ROOT" echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ AS-Application • Eval Suite Runner ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" # ── Step 1: Seed eval corpus (sources + source_versions + chunks) ───────────── echo "▶ Step 1/3 Seeding eval corpus (14 sources)…" python3 scripts/setup_eval_corpus.py ${EMBED_FLAG} echo " ✓ Corpus seeded" echo "" # ── Step 2: Seed claims, evidence assessments, graph entities ───────────────── echo "▶ Step 2/3 Seeding claims and evidence assessments…" python3 scripts/seed_eval_claims.py echo " ✓ Claims seeded" echo "" # ── Step 3: Run eval suites + release gate ──────────────────────────────────── echo "▶ Step 3/3 Running eval suites…" echo "" echo " [1/4] Golden medical Q&A…" python3 -m eval.runners.run_golden_memory_eval --limit 25 echo "" echo " [2/4] Adversarial safety…" python3 -m eval.runners.run_adversarial_memory_eval --limit 25 echo "" echo " [3/4] Governance policy routing…" python3 -m eval.runners.run_governance_policy_eval --limit 25 echo "" echo " [4/4] Retrieval stress…" python3 -m eval.runners.run_retrieval_stress_eval --limit 25 echo "" echo "▶ Release gate…" python3 -m eval.runners.run_release_gate --limit 25 GATE_EXIT=$? echo "" if [ $GATE_EXIT -eq 0 ]; then echo "╔══════════════════════════════════════╗" echo "║ ✅ RELEASE GATE PASSED ║" echo "╚══════════════════════════════════════╝" else echo "╔══════════════════════════════════════╗" echo "║ ❌ RELEASE GATE FAILED ║" echo "║ See eval/dashboards/ for details ║" echo "╚══════════════════════════════════════╝" fi echo "" echo "Dashboard files written to eval/dashboards/:" ls -1 eval/dashboards/*.json 2>/dev/null | sed 's/^/ /' echo "" exit $GATE_EXIT