# LLM Provider Selection LLM_PROVIDER=ollama # Options: nvidia, ollama MCP_ENABLED=true MCP_SERVER_URL=https://huggingface.co/mcp MCP_EXTRA_SERVER_URLS=https://docs.livekit.io/mcp # Comma-separated extra MCP servers (set empty to disable) # STT Provider Selection STT_PROVIDER=moonshine # Options: moonshine, nvidia, deepgram # Moonshine STT Settings (local speech-to-text) MOONSHINE_MODEL_ID=usefulsensors/moonshine-streaming-medium MOONSHINE_LANGUAGE=en # Deepgram STT Settings (cloud speech-to-text) DEEPGRAM_STT_MODEL=nova-3 DEEPGRAM_STT_LANGUAGE=en-US # NVIDIA STT Settings (cloud speech-to-text) NVIDIA_STT_API_KEY= # Optional: uses NVIDIA_API_KEY if not set NVIDIA_STT_MODEL=parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer NVIDIA_STT_LANGUAGE_CODE=en-US # NVIDIA API Key (shared by LLM and STT unless NVIDIA_STT_API_KEY is set) NVIDIA_API_KEY=your_nvidia_api_key_here # NVIDIA LLM Settings NVIDIA_MODEL=meta/llama-3.1-8b-instruct # Ollama LLM Settings OLLAMA_CLOUD_MODE=true OLLAMA_MODEL=qwen3-next:80b OLLAMA_API_KEY=your_ollama_api_key_here # Set OLLAMA_CLOUD_MODE=false to use local Ollama at http://localhost:11434/v1. # Local mode can keep OLLAMA_API_KEY=ollama if your server ignores auth. # Note: do not use ":cloud" aliases with the /v1 endpoint. # Langfuse Tracing Settings (optional) LANGFUSE_ENABLED=false LANGFUSE_HOST=https://cloud.langfuse.com LANGFUSE_BASE_URL= # Optional alternative to LANGFUSE_HOST LANGFUSE_PROJECT_ID= # Required for frontend deep links: project//... LANGFUSE_PUBLIC_KEY= LANGFUSE_SECRET_KEY= LANGFUSE_PUBLIC_TRACES=false # Mark traces public so non-members can open shared links LANGFUSE_ASSISTANT_TEXT_GRACE_TIMEOUT_MS=500 # Short wait for assistant text on normal turns LANGFUSE_TRACE_FINALIZE_TIMEOUT_MS=8000 # Legacy fallback retained for compatibility LANGFUSE_POST_TOOL_RESPONSE_TIMEOUT_MS=30000 LANGFUSE_MAX_PENDING_TRACE_TASKS=200 LANGFUSE_TRACE_FLUSH_TIMEOUT_MS=1000 LANGFUSE_CONTINUATION_COALESCE_WINDOW_MS=1500 # Merge immediate continuation turns into one trace; 0 disables it # Common LLM Parameters LLM_TEMPERATURE=0.7 LLM_MAX_TOKENS=1024 # LLM/MCP API timeout/retry tuning. LLM_CONN_TIMEOUT_SEC=20.0 MCP_CONN_TIMEOUT_SEC=20.0 # Timeout for one MCP tool request/response cycle LLM_CONN_MAX_RETRY=1 LLM_CONN_RETRY_INTERVAL_SEC=1.0 TURN_LLM_STALL_TIMEOUT_SEC=12.0 MCP_STARTUP_GREETING_TIMEOUT_SEC=0.0 # Set >0 to force-interrupt slow startup greetings; 0 disables the cutoff # TTS Provider Selection TTS_PROVIDER=pocket # Options: pocket, deepgram, nvidia DEEPGRAM_API_KEY= # Required when STT_PROVIDER=deepgram or TTS_PROVIDER=deepgram NVIDIA_TTS_API_KEY= # Optional: uses NVIDIA_API_KEY if not set # NVIDIA TTS Settings (cloud or self-hosted Riva) NVIDIA_TTS_VOICE=Magpie-Multilingual.EN-US.Leo NVIDIA_TTS_LANGUAGE_CODE=en-US NVIDIA_TTS_SERVER=grpc.nvcf.nvidia.com:443 NVIDIA_TTS_FUNCTION_ID=877104f7-e885-42b9-8de8-f6e4c6303969 NVIDIA_TTS_USE_SSL=true # Set false for self-hosted Riva without TLS # Pocket TTS Settings (local text-to-speech) POCKET_TTS_VOICE=alba POCKET_TTS_TEMPERATURE=0.7 POCKET_TTS_LSD_DECODE_STEPS=1 POCKET_TTS_CONN_TIMEOUT_SEC=45.0 # Timeout for one PocketTTS synthesis attempt # PocketTTS output sample rate is fixed to native 24kHz. # LiveKit Settings LIVEKIT_URL=wss://your-livekit-server.example.com LIVEKIT_API_KEY=your_livekit_api_key_here LIVEKIT_API_SECRET=your_livekit_api_secret_here LIVEKIT_AGENT_NAME=open-voice-agent-local # Use a unique name per environment to avoid worker collisions LIVEKIT_NUM_IDLE_PROCESSES=1 # Use 0-1 locally to reduce memory pressure LIVEKIT_INITIALIZE_PROCESS_TIMEOUT_SEC=20.0 # Increase idle worker bootstrap timeout LIVEKIT_JOB_MEMORY_WARN_MB=6144 # Per-job memory warning threshold (6 GB) # LiveKit audio input configuration LIVEKIT_SAMPLE_RATE=24000 LIVEKIT_NUM_CHANNELS=1 LIVEKIT_FRAME_SIZE_MS=60 # Larger frames slightly reduce responsiveness but avoid over-eager VAD transitions LIVEKIT_PRE_CONNECT_AUDIO=true LIVEKIT_PRE_CONNECT_TIMEOUT=3.0 # Voice Activity Detection (VAD) configuration VAD_MIN_SPEECH_DURATION=0.18 # Require 180ms of speech before activation VAD_MIN_SILENCE_DURATION=0.55 # Wait longer before treating a pause as end of speech VAD_THRESHOLD=0.5 # Silero default; keep balanced sensitivity for speech vs background noise # Turn endpointing tuning MIN_ENDPOINTING_DELAY=0.5 # Default turn commit delay before endpointing MAX_ENDPOINTING_DELAY=3.0 # Let the detector wait longer when phrasing suggests continuation PREEMPTIVE_GENERATION=false # Wait for the committed turn before generating a reply