Spaces:

ashish1265659565
/

pharmaspine-backend

Sleeping

App Files Files Community

pharmaspine-backend / run_eval_suite.sh

ashish1265659565

Upload folder using huggingface_hub

08fd094 verified 3 days ago

Raw

History Blame Contribute Delete

3.52 kB

	#!/usr/bin/env bash
	# run_eval_suite.sh — Seed the database and regenerate all eval dashboards.
	#
	# Usage (from repo root):
	# chmod +x run_eval_suite.sh
	# ./run_eval_suite.sh
	#
	# The script will:
	# 1. Register all 14 eval corpus sources + chunks in PostgreSQL (with embeddings)
	# 2. Seed claims, evidence assessments, and graph edges
	# 3. Run all four eval suites and write dashboard JSON/MD files
	# 4. Run the release gate and print the final pass/fail verdict
	#
	# Prerequisites:
	# - PostgreSQL running at localhost with database ai_knowledge_spine
	# - Ollama running at the URL in .env with the embedding model pulled
	# (default: qwen3-embedding:8b — pull once with: ollama pull qwen3-embedding:8b)
	# - .env present at repo root with AKS_DATABASE_URL and OLLAMA_* set
	# - Python 3.11+ activated in your shell

	set -euo pipefail

	REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
	cd "$REPO_ROOT"

	echo ""
	echo "╔══════════════════════════════════════════════════════════╗"
	echo "║ AS-Application • Eval Suite Runner ║"
	echo "╚══════════════════════════════════════════════════════════╝"
	echo ""

	# ── Step 1: Seed eval corpus (sources + source_versions + chunks) ─────────────
	echo "▶ Step 1/3 Seeding eval corpus (14 sources)…"
	python3 scripts/setup_eval_corpus.py ${EMBED_FLAG}
	echo " ✓ Corpus seeded"
	echo ""

	# ── Step 2: Seed claims, evidence assessments, graph entities ─────────────────
	echo "▶ Step 2/3 Seeding claims and evidence assessments…"
	python3 scripts/seed_eval_claims.py
	echo " ✓ Claims seeded"
	echo ""

	# ── Step 3: Run eval suites + release gate ────────────────────────────────────
	echo "▶ Step 3/3 Running eval suites…"
	echo ""

	echo " [1/4] Golden medical Q&A…"
	python3 -m eval.runners.run_golden_memory_eval --limit 25
	echo ""

	echo " [2/4] Adversarial safety…"
	python3 -m eval.runners.run_adversarial_memory_eval --limit 25
	echo ""

	echo " [3/4] Governance policy routing…"
	python3 -m eval.runners.run_governance_policy_eval --limit 25
	echo ""

	echo " [4/4] Retrieval stress…"
	python3 -m eval.runners.run_retrieval_stress_eval --limit 25
	echo ""

	echo "▶ Release gate…"
	python3 -m eval.runners.run_release_gate --limit 25
	GATE_EXIT=$?
	echo ""

	if [ $GATE_EXIT -eq 0 ]; then
	echo "╔══════════════════════════════════════╗"
	echo "║ ✅ RELEASE GATE PASSED ║"
	echo "╚══════════════════════════════════════╝"
	else
	echo "╔══════════════════════════════════════╗"
	echo "║ ❌ RELEASE GATE FAILED ║"
	echo "║ See eval/dashboards/ for details ║"
	echo "╚══════════════════════════════════════╝"
	fi

	echo ""
	echo "Dashboard files written to eval/dashboards/:"
	ls -1 eval/dashboards/*.json 2>/dev/null \| sed 's/^/ /'
	echo ""

	exit $GATE_EXIT