#!/bin/bash # run.sh — convenience wrapper around docker compose set -e cd "$(dirname "$0")" ENV_FILE="${ENV_FILE:-}" COMPOSE_ARGS=() if [ -n "$ENV_FILE" ]; then COMPOSE_ARGS+=(--env-file "$ENV_FILE") fi CMD="${1:-up}" shift 2>/dev/null || true case "$CMD" in up) docker compose "${COMPOSE_ARGS[@]}" up -d "${@}" echo "vLLM container started. Logs: docker compose logs -f" ;; down) docker compose "${COMPOSE_ARGS[@]}" down "${@}" ;; logs) docker compose "${COMPOSE_ARGS[@]}" logs -f "${@}" ;; rebuild) docker compose "${COMPOSE_ARGS[@]}" build --no-cache "${@}" docker compose "${COMPOSE_ARGS[@]}" up -d ;; exec) shift docker compose "${COMPOSE_ARGS[@]}" exec vllm "${@:-bash}" ;; test) echo "Waiting for server..." for i in $(seq 1 30); do if curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/v1/models 2>/dev/null | grep -q 200; then echo "Ready!" curl -s http://localhost:8000/v1/models | jq . exit 0 fi sleep 10 done echo "Timeout waiting for server" exit 1 ;; bench) curl -s http://localhost:8000/v1/chat/completions \ -H 'Content-Type: application/json' \ -d '{ "model": "huihui-qwen36-27b-local", "messages": [{"role": "user", "content": "Hello, what is 2+2? Answer briefly."}], "max_tokens": 100, "temperature": 0 }' | jq . ;; *) echo "Usage: $0 {up|down|logs|rebuild|exec|test|bench}" echo "" echo " up Start container (detached)" echo " down Stop container" echo " logs Tail logs" echo " rebuild Rebuild image and restart" echo " exec Execute command in container (default: bash)" echo " test Ping /v1/models until ready" echo " bench Quick inference smoke test" exit 1 ;; esac