pharmaspine-backend / scripts /check_llm_setup.sh
ashish1265659565's picture
Upload folder using huggingface_hub
08fd094 verified
Raw
History Blame Contribute Delete
1.79 kB
#!/usr/bin/env bash
# Verify hybrid LLM setup: Groq key, Ollama phi3.5 (routing), Qwen embed + fallback.
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
if [[ -f "$ROOT/.env" ]]; then
set -a
# shellcheck disable=SC1091
source "$ROOT/.env"
set +a
fi
BASE_URL="${GW_OLLAMA_BASE_URL:-${OLLAMA_BASE_URL:-http://127.0.0.1:11434}}"
ROUTING_MODEL="${GW_OLLAMA_ROUTING_MODEL:-phi3.5:latest}"
FALLBACK_MODEL="${GW_OLLAMA_SYNTHESIS_FALLBACK_MODEL:-qwen3.5:9b}"
EMBED_MODEL="${GW_OLLAMA_EMBEDDING_MODEL:-qwen3-embedding:8b}"
GROQ_MODEL="${GW_GROQ_MODEL:-${GROQ_MODEL:-llama-3.3-70b-versatile}}"
GROQ_KEY="${GW_GROQ_API_KEY:-${GROQ_API_KEY:-}}"
echo "Hybrid LLM check"
echo " Ollama: ${BASE_URL}"
echo " Routing: ${ROUTING_MODEL} (Self-RAG + CRAG)"
echo " Synthesis: groq:${GROQ_MODEL} → fallback ollama:${FALLBACK_MODEL}"
echo " Embedding: ${EMBED_MODEL}"
TAGS_JSON="$(curl -sf "${BASE_URL%/}/api/tags" || true)"
if [[ -z "${TAGS_JSON}" ]]; then
echo "ERROR: Ollama not reachable. Run: ollama serve"
exit 1
fi
export TAGS_JSON ROUTING_MODEL FALLBACK_MODEL EMBED_MODEL
python3 <<'PY'
import json, os, sys
names = [m["name"] for m in json.loads(os.environ["TAGS_JSON"]).get("models", [])]
for label, model in [
("routing", os.environ["ROUTING_MODEL"]),
("synthesis fallback", os.environ["FALLBACK_MODEL"]),
("embedding", os.environ["EMBED_MODEL"]),
]:
if model not in names:
print(f"ERROR: Missing Ollama model for {label}: {model}")
print(f" ollama pull {model}")
sys.exit(1)
print(f"OK: {label} → {model}")
PY
if [[ -z "${GROQ_KEY}" ]]; then
echo "WARNING: GROQ_API_KEY not set — synthesis will use Ollama fallback only."
else
echo "OK: GROQ_API_KEY is set (groq:${GROQ_MODEL})"
fi
echo "Done."