#!/usr/bin/env bash # Verify hybrid LLM setup: Groq key, Ollama phi3.5 (routing), Qwen embed + fallback. set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" if [[ -f "$ROOT/.env" ]]; then set -a # shellcheck disable=SC1091 source "$ROOT/.env" set +a fi BASE_URL="${GW_OLLAMA_BASE_URL:-${OLLAMA_BASE_URL:-http://127.0.0.1:11434}}" ROUTING_MODEL="${GW_OLLAMA_ROUTING_MODEL:-phi3.5:latest}" FALLBACK_MODEL="${GW_OLLAMA_SYNTHESIS_FALLBACK_MODEL:-qwen3.5:9b}" EMBED_MODEL="${GW_OLLAMA_EMBEDDING_MODEL:-qwen3-embedding:8b}" GROQ_MODEL="${GW_GROQ_MODEL:-${GROQ_MODEL:-llama-3.3-70b-versatile}}" GROQ_KEY="${GW_GROQ_API_KEY:-${GROQ_API_KEY:-}}" echo "Hybrid LLM check" echo " Ollama: ${BASE_URL}" echo " Routing: ${ROUTING_MODEL} (Self-RAG + CRAG)" echo " Synthesis: groq:${GROQ_MODEL} → fallback ollama:${FALLBACK_MODEL}" echo " Embedding: ${EMBED_MODEL}" TAGS_JSON="$(curl -sf "${BASE_URL%/}/api/tags" || true)" if [[ -z "${TAGS_JSON}" ]]; then echo "ERROR: Ollama not reachable. Run: ollama serve" exit 1 fi export TAGS_JSON ROUTING_MODEL FALLBACK_MODEL EMBED_MODEL python3 <<'PY' import json, os, sys names = [m["name"] for m in json.loads(os.environ["TAGS_JSON"]).get("models", [])] for label, model in [ ("routing", os.environ["ROUTING_MODEL"]), ("synthesis fallback", os.environ["FALLBACK_MODEL"]), ("embedding", os.environ["EMBED_MODEL"]), ]: if model not in names: print(f"ERROR: Missing Ollama model for {label}: {model}") print(f" ollama pull {model}") sys.exit(1) print(f"OK: {label} → {model}") PY if [[ -z "${GROQ_KEY}" ]]; then echo "WARNING: GROQ_API_KEY not set — synthesis will use Ollama fallback only." else echo "OK: GROQ_API_KEY is set (groq:${GROQ_MODEL})" fi echo "Done."