"""Tests for backend bug fixes in LiteLLM and any-llm integrations. Tests tool forwarding, tool argument parsing, streaming param forwarding, message conversion (tool_use/tool_result), streaming tool_calls, and Vertex AI model mapping. """ import json from unittest.mock import AsyncMock, MagicMock, patch import pytest pytest.importorskip("litellm") from headroom.backends.litellm import ( _VERTEX_MODEL_MAP, LiteLLMBackend, _convert_anthropic_tool, _convert_tool_choice, _parse_tool_arguments, ) # ============================================================================= # Tool Format Conversion (Bug 1) # ============================================================================= class TestConvertAnthropicTool: """Test Anthropic → OpenAI tool format conversion.""" def test_basic_tool_conversion(self): anthropic_tool = { "name": "get_weather", "description": "Get the weather for a location", "input_schema": { "type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"], }, } result = _convert_anthropic_tool(anthropic_tool) assert result == { "type": "function", "function": { "name": "get_weather", "description": "Get the weather for a location", "parameters": { "type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"], }, }, } def test_tool_without_description(self): tool = {"name": "do_thing", "input_schema": {"type": "object"}} result = _convert_anthropic_tool(tool) assert result["function"]["name"] == "do_thing" assert "description" not in result["function"] assert result["function"]["parameters"] == {"type": "object"} def test_tool_without_input_schema(self): tool = {"name": "simple_tool", "description": "No params"} result = _convert_anthropic_tool(tool) assert result["function"]["name"] == "simple_tool" assert "parameters" not in result["function"] class TestConvertToolChoice: """Test Anthropic → OpenAI tool_choice conversion.""" def test_auto(self): assert _convert_tool_choice({"type": "auto"}) == "auto" def test_any_to_required(self): assert _convert_tool_choice({"type": "any"}) == "required" def test_specific_tool(self): result = _convert_tool_choice({"type": "tool", "name": "get_weather"}) assert result == {"type": "function", "function": {"name": "get_weather"}} def test_string_passthrough(self): assert _convert_tool_choice("auto") == "auto" assert _convert_tool_choice("none") == "none" # ============================================================================= # Tool Argument Parsing (Bug 2) # ============================================================================= class TestParseToolArguments: """Test that tool arguments are parsed from JSON string to dict.""" def test_json_string_parsed(self): result = _parse_tool_arguments('{"location": "Paris"}') assert result == {"location": "Paris"} def test_dict_passthrough(self): d = {"location": "Paris"} result = _parse_tool_arguments(d) assert result == d def test_invalid_json_returns_original(self): result = _parse_tool_arguments("not json") assert result == "not json" def test_empty_string(self): result = _parse_tool_arguments("") assert result == "" def test_none_passthrough(self): result = _parse_tool_arguments(None) assert result is None # ============================================================================= # LiteLLM send_message Tools Forwarding (Bug 1) # ============================================================================= class TestLiteLLMToolsForwarding: """Test that tools are forwarded through LiteLLM send_message.""" @pytest.mark.asyncio async def test_tools_forwarded_in_send_message(self): """Tools should be converted and passed to litellm.acompletion.""" mock_response = MagicMock() mock_response.choices = [ MagicMock( message=MagicMock(content="Hello", tool_calls=None), finish_reason="stop", ) ] mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=5) with ( patch("headroom.backends.litellm.acompletion", new_callable=AsyncMock) as mock_acomp, patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}), ): mock_acomp.return_value = mock_response backend = LiteLLMBackend(provider="openrouter") body = { "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "hello"}], "max_tokens": 100, "tools": [ { "name": "get_weather", "description": "Get weather", "input_schema": {"type": "object", "properties": {}}, } ], "tool_choice": {"type": "auto"}, } await backend.send_message(body, {}) call_kwargs = mock_acomp.call_args[1] assert "tools" in call_kwargs assert call_kwargs["tools"][0]["type"] == "function" assert call_kwargs["tools"][0]["function"]["name"] == "get_weather" assert call_kwargs["tool_choice"] == "auto" @pytest.mark.asyncio async def test_tool_arguments_parsed_in_response(self): """Tool call arguments should be parsed from JSON string to dict.""" mock_tc = MagicMock() mock_tc.id = "call_123" mock_tc.function.name = "get_weather" mock_tc.function.arguments = '{"location": "Paris"}' mock_response = MagicMock() mock_response.choices = [ MagicMock( message=MagicMock(content=None, tool_calls=[mock_tc]), finish_reason="tool_calls", ) ] mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=5) with ( patch("headroom.backends.litellm.acompletion", new_callable=AsyncMock) as mock_acomp, patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}), ): mock_acomp.return_value = mock_response backend = LiteLLMBackend(provider="openrouter") result = await backend.send_message( {"model": "test", "messages": [{"role": "user", "content": "hi"}]}, {}, ) tool_block = result.body["content"][0] assert tool_block["type"] == "tool_use" assert tool_block["input"] == {"location": "Paris"} assert isinstance(tool_block["input"], dict) # ============================================================================= # Message Conversion: tool_use / tool_result (GitHub Issue — Bug 2) # ============================================================================= class TestConvertMessagesToolBlocks: """Test that _convert_messages_for_litellm converts Anthropic tool blocks to OpenAI format.""" def _make_backend(self): with patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}): return LiteLLMBackend(provider="openrouter") def test_tool_result_converted_to_tool_role(self): """Anthropic tool_result blocks must become role=tool messages.""" backend = self._make_backend() messages = [ {"role": "user", "content": "Weather in Paris?"}, { "role": "assistant", "content": [ { "type": "tool_use", "id": "toolu_01", "name": "get_weather", "input": {"city": "Paris"}, }, ], }, { "role": "user", "content": [ {"type": "tool_result", "tool_use_id": "toolu_01", "content": "Sunny, 22C"}, ], }, ] converted = backend._convert_messages_for_litellm(messages) # assistant message should have tool_calls assistant = converted[1] assert assistant["role"] == "assistant" assert "tool_calls" in assistant assert assistant["tool_calls"][0]["id"] == "toolu_01" assert assistant["tool_calls"][0]["type"] == "function" assert assistant["tool_calls"][0]["function"]["name"] == "get_weather" assert json.loads(assistant["tool_calls"][0]["function"]["arguments"]) == {"city": "Paris"} # tool_result should become role=tool tool_msg = converted[2] assert tool_msg["role"] == "tool" assert tool_msg["tool_call_id"] == "toolu_01" assert tool_msg["content"] == "Sunny, 22C" def test_tool_result_with_list_content(self): """tool_result with list content should be flattened to string.""" backend = self._make_backend() messages = [ { "role": "user", "content": [ { "type": "tool_result", "tool_use_id": "toolu_02", "content": [ {"type": "text", "text": "Line 1"}, {"type": "text", "text": "Line 2"}, ], }, ], }, ] converted = backend._convert_messages_for_litellm(messages) assert converted[0]["role"] == "tool" assert converted[0]["content"] == "Line 1\nLine 2" def test_assistant_tool_use_with_text(self): """Assistant message with both text and tool_use blocks.""" backend = self._make_backend() messages = [ { "role": "assistant", "content": [ {"type": "text", "text": "Let me check the weather."}, { "type": "tool_use", "id": "toolu_03", "name": "get_weather", "input": {"city": "Tokyo"}, }, ], }, ] converted = backend._convert_messages_for_litellm(messages) assert len(converted) == 1 assert converted[0]["role"] == "assistant" assert converted[0]["content"] == "Let me check the weather." assert converted[0]["tool_calls"][0]["function"]["name"] == "get_weather" def test_simple_text_messages_unchanged(self): """Plain string messages pass through.""" backend = self._make_backend() messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}, ] converted = backend._convert_messages_for_litellm(messages) assert converted == messages def test_multiple_tool_results(self): """Multiple tool_result blocks in one user message → multiple role=tool messages.""" backend = self._make_backend() messages = [ { "role": "user", "content": [ {"type": "tool_result", "tool_use_id": "toolu_a", "content": "Result A"}, {"type": "tool_result", "tool_use_id": "toolu_b", "content": "Result B"}, ], }, ] converted = backend._convert_messages_for_litellm(messages) assert len(converted) == 2 assert converted[0]["role"] == "tool" assert converted[0]["tool_call_id"] == "toolu_a" assert converted[1]["role"] == "tool" assert converted[1]["tool_call_id"] == "toolu_b" def test_tool_result_immediately_follows_tool_calls(self): """Bedrock requires role=tool immediately after assistant tool_calls — no intervening messages. Regression test for GitHub issue #70: a stray user text message was inserted between the assistant tool_calls and the tool results, causing Bedrock to reject the request with 'tool_use ids were found without tool_result blocks immediately after'. """ backend = self._make_backend() messages = [ {"role": "user", "content": "What's the weather in Paris and Tokyo?"}, { "role": "assistant", "content": [ { "type": "tool_use", "id": "toolu_01", "name": "get_weather", "input": {"city": "Paris"}, }, { "type": "tool_use", "id": "toolu_02", "name": "get_weather", "input": {"city": "Tokyo"}, }, ], }, { "role": "user", "content": [ {"type": "tool_result", "tool_use_id": "toolu_01", "content": "Sunny, 22C"}, {"type": "tool_result", "tool_use_id": "toolu_02", "content": "Rainy, 18C"}, ], }, ] converted = backend._convert_messages_for_litellm(messages) # Find the assistant message with tool_calls assistant_idx = next(i for i, m in enumerate(converted) if m.get("tool_calls")) # Every message after the assistant tool_calls must be role=tool # with no intervening user/assistant messages for i in range(assistant_idx + 1, len(converted)): assert converted[i]["role"] == "tool", ( f"Message at index {i} has role={converted[i]['role']!r}, " f"expected 'tool' — Bedrock requires tool results immediately " f"after assistant tool_calls with no intervening messages" ) def test_tool_result_with_text_does_not_insert_user_message(self): """Text alongside tool_result should NOT produce a separate user message. Bedrock rejects any message between assistant tool_calls and tool results. """ backend = self._make_backend() messages = [ { "role": "user", "content": [ {"type": "text", "text": "Here are the results:"}, {"type": "tool_result", "tool_use_id": "toolu_01", "content": "42"}, ], }, ] converted = backend._convert_messages_for_litellm(messages) # Should only have the tool message, no user text message assert len(converted) == 1 assert converted[0]["role"] == "tool" assert converted[0]["tool_call_id"] == "toolu_01" assert converted[0]["content"] == "42" # ============================================================================= # Streaming tool_calls (GitHub Issue — Bug 1) # ============================================================================= class TestStreamMessageToolCalls: """Test that stream_message emits tool_use blocks and correct stop_reason.""" @pytest.mark.asyncio async def test_stream_emits_tool_use_blocks(self): """Tool calls in streaming should produce content_block_start with type=tool_use.""" async def mock_stream(): # First chunk: tool call start (id + name) tc = MagicMock() tc.index = 0 tc.id = "toolu_stream_01" tc.function = MagicMock() tc.function.name = "get_weather" tc.function.arguments = "" chunk1 = MagicMock() chunk1.choices = [ MagicMock(delta=MagicMock(content=None, tool_calls=[tc]), finish_reason=None) ] yield chunk1 # Second chunk: arguments delta tc2 = MagicMock() tc2.index = 0 tc2.id = None tc2.function = MagicMock() tc2.function.name = None tc2.function.arguments = '{"city":"Paris"}' chunk2 = MagicMock() chunk2.choices = [ MagicMock(delta=MagicMock(content=None, tool_calls=[tc2]), finish_reason=None) ] yield chunk2 # Final chunk: finish_reason=tool_calls chunk3 = MagicMock() chunk3.choices = [ MagicMock( delta=MagicMock(content=None, tool_calls=None), finish_reason="tool_calls" ) ] yield chunk3 with ( patch("headroom.backends.litellm.acompletion", new_callable=AsyncMock) as mock_acomp, patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}), ): mock_acomp.return_value = mock_stream() backend = LiteLLMBackend(provider="openrouter") events = [] async for event in backend.stream_message( { "model": "test", "messages": [{"role": "user", "content": "weather?"}], "tools": [ { "name": "get_weather", "description": "Get weather", "input_schema": {"type": "object"}, } ], }, {}, ): events.append(event) # Find content_block_start events block_starts = [e for e in events if e.event_type == "content_block_start"] assert len(block_starts) == 1 assert block_starts[0].data["content_block"]["type"] == "tool_use" assert block_starts[0].data["content_block"]["id"] == "toolu_stream_01" assert block_starts[0].data["content_block"]["name"] == "get_weather" # Find input_json_delta events json_deltas = [ e for e in events if e.event_type == "content_block_delta" and e.data.get("delta", {}).get("type") == "input_json_delta" ] assert len(json_deltas) == 1 assert json_deltas[0].data["delta"]["partial_json"] == '{"city":"Paris"}' # Check stop_reason is "tool_use" msg_delta = [e for e in events if e.event_type == "message_delta"] assert len(msg_delta) == 1 assert msg_delta[0].data["delta"]["stop_reason"] == "tool_use" @pytest.mark.asyncio async def test_stream_text_still_works(self): """Pure text streaming should still work correctly.""" async def mock_stream(): chunk = MagicMock() chunk.choices = [ MagicMock(delta=MagicMock(content="Hello!", tool_calls=None), finish_reason=None) ] yield chunk chunk2 = MagicMock() chunk2.choices = [ MagicMock(delta=MagicMock(content=None, tool_calls=None), finish_reason="stop") ] yield chunk2 with ( patch("headroom.backends.litellm.acompletion", new_callable=AsyncMock) as mock_acomp, patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}), ): mock_acomp.return_value = mock_stream() backend = LiteLLMBackend(provider="openrouter") events = [] async for event in backend.stream_message( {"model": "test", "messages": [{"role": "user", "content": "hi"}]}, {}, ): events.append(event) block_starts = [e for e in events if e.event_type == "content_block_start"] assert len(block_starts) == 1 assert block_starts[0].data["content_block"]["type"] == "text" text_deltas = [e for e in events if e.event_type == "content_block_delta"] assert len(text_deltas) == 1 assert text_deltas[0].data["delta"]["text"] == "Hello!" msg_delta = [e for e in events if e.event_type == "message_delta"] assert msg_delta[0].data["delta"]["stop_reason"] == "end_turn" @pytest.mark.asyncio async def test_stream_text_then_tool(self): """Text followed by tool call should produce two blocks.""" async def mock_stream(): # Text chunk chunk1 = MagicMock() chunk1.choices = [ MagicMock( delta=MagicMock(content="I'll check. ", tool_calls=None), finish_reason=None ) ] yield chunk1 # Tool call chunk tc = MagicMock() tc.index = 0 tc.id = "toolu_mixed" tc.function = MagicMock() tc.function.name = "search" tc.function.arguments = '{"q":"test"}' chunk2 = MagicMock() chunk2.choices = [ MagicMock(delta=MagicMock(content=None, tool_calls=[tc]), finish_reason=None) ] yield chunk2 # Finish chunk3 = MagicMock() chunk3.choices = [ MagicMock( delta=MagicMock(content=None, tool_calls=None), finish_reason="tool_calls" ) ] yield chunk3 with ( patch("headroom.backends.litellm.acompletion", new_callable=AsyncMock) as mock_acomp, patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}), ): mock_acomp.return_value = mock_stream() backend = LiteLLMBackend(provider="openrouter") events = [] async for event in backend.stream_message( {"model": "test", "messages": [{"role": "user", "content": "hi"}]}, {}, ): events.append(event) block_starts = [e for e in events if e.event_type == "content_block_start"] assert len(block_starts) == 2 assert block_starts[0].data["content_block"]["type"] == "text" assert block_starts[1].data["content_block"]["type"] == "tool_use" # Two content_block_stop events (one per block) block_stops = [e for e in events if e.event_type == "content_block_stop"] assert len(block_stops) == 2 # stop_reason should be tool_use msg_delta = [e for e in events if e.event_type == "message_delta"] assert msg_delta[0].data["delta"]["stop_reason"] == "tool_use" # ============================================================================= # Streaming Params (Bugs 3-4) # ============================================================================= class TestLiteLLMStreamingParams: """Test that streaming forwards all params.""" @pytest.mark.asyncio async def test_streaming_forwards_all_params(self): """stream_message should forward top_p, stop, and tools.""" # Create an async iterator for the mock streaming response async def mock_stream(): chunk = MagicMock() chunk.choices = [MagicMock(delta=MagicMock(content="Hi"))] yield chunk with ( patch("headroom.backends.litellm.acompletion", new_callable=AsyncMock) as mock_acomp, patch("headroom.backends.litellm._fetch_bedrock_inference_profiles", return_value={}), ): mock_acomp.return_value = mock_stream() backend = LiteLLMBackend(provider="openrouter") body = { "model": "test", "messages": [{"role": "user", "content": "hi"}], "max_tokens": 100, "temperature": 0.7, "top_p": 0.9, "stop_sequences": ["\n"], "tools": [ { "name": "test_tool", "description": "A test", "input_schema": {"type": "object"}, } ], } events = [] async for event in backend.stream_message(body, {}): events.append(event) call_kwargs = mock_acomp.call_args[1] assert call_kwargs["top_p"] == 0.9 assert call_kwargs["stop"] == ["\n"] assert "tools" in call_kwargs assert call_kwargs["tools"][0]["function"]["name"] == "test_tool" # ============================================================================= # Vertex AI Model Map (Bug 6) # ============================================================================= class TestVertexModelMap: """Test that Vertex AI model map includes all current models. Model IDs sourced from: https://platform.claude.com/docs/en/build-with-claude/claude-on-vertex-ai """ def test_claude_46_models(self): assert _VERTEX_MODEL_MAP["claude-opus-4-6"] == "vertex_ai/claude-opus-4-6" assert _VERTEX_MODEL_MAP["claude-sonnet-4-6"] == "vertex_ai/claude-sonnet-4-6" def test_claude_45_models(self): assert ( _VERTEX_MODEL_MAP["claude-sonnet-4-5-20250929"] == "vertex_ai/claude-sonnet-4-5@20250929" ) assert _VERTEX_MODEL_MAP["claude-opus-4-5-20251101"] == "vertex_ai/claude-opus-4-5@20251101" def test_claude_4_models(self): assert _VERTEX_MODEL_MAP["claude-sonnet-4-20250514"] == "vertex_ai/claude-sonnet-4@20250514" assert _VERTEX_MODEL_MAP["claude-opus-4-20250514"] == "vertex_ai/claude-opus-4@20250514" def test_claude_35_models(self): assert ( _VERTEX_MODEL_MAP["claude-3-5-sonnet-20241022"] == "vertex_ai/claude-3-5-sonnet-v2@20241022" ) assert ( _VERTEX_MODEL_MAP["claude-3-5-haiku-20241022"] == "vertex_ai/claude-3-5-haiku@20241022" ) def test_claude_haiku_45(self): assert ( _VERTEX_MODEL_MAP["claude-haiku-4-5-20251001"] == "vertex_ai/claude-haiku-4-5@20251001" ) def test_claude_3_legacy(self): assert "claude-3-haiku-20240307" in _VERTEX_MODEL_MAP # ============================================================================= # URL Normalization (trailing /v1 stripping) # ============================================================================= pytest.importorskip("fastapi") class TestOpenAIURLNormalization: """Test that OPENAI_TARGET_API_URL with /v1 suffix is normalized.""" def test_v1_suffix_stripped(self): from headroom.proxy.server import HeadroomProxy, ProxyConfig original = HeadroomProxy.OPENAI_API_URL try: config = ProxyConfig( openai_api_url="http://localhost:4000/v1", optimize=False, cache_enabled=False, rate_limit_enabled=False, ) proxy = HeadroomProxy(config) assert proxy.OPENAI_API_URL == "http://localhost:4000" finally: HeadroomProxy.OPENAI_API_URL = original def test_v1_slash_suffix_stripped(self): from headroom.proxy.server import HeadroomProxy, ProxyConfig original = HeadroomProxy.OPENAI_API_URL try: config = ProxyConfig( openai_api_url="http://localhost:4000/v1/", optimize=False, cache_enabled=False, rate_limit_enabled=False, ) proxy = HeadroomProxy(config) assert proxy.OPENAI_API_URL == "http://localhost:4000" finally: HeadroomProxy.OPENAI_API_URL = original def test_no_v1_unchanged(self): from headroom.proxy.server import HeadroomProxy, ProxyConfig original = HeadroomProxy.OPENAI_API_URL try: config = ProxyConfig( openai_api_url="http://localhost:4000", optimize=False, cache_enabled=False, rate_limit_enabled=False, ) proxy = HeadroomProxy(config) assert proxy.OPENAI_API_URL == "http://localhost:4000" finally: HeadroomProxy.OPENAI_API_URL = original # ============================================================================= # Bedrock API Key Forwarding Regression (#105) # ============================================================================= class TestBedrockApiKeyNotForwarded: """Bedrock uses AWS SigV4 auth, not API keys. Forwarding x-api-key (e.g. sk-ant-dummy) to LiteLLM overrides AWS credentials and breaks Bedrock auth. """ def test_bedrock_does_not_forward_api_key(self): """api_key should NOT be in kwargs for Bedrock provider.""" backend = LiteLLMBackend(provider="bedrock", region="us-west-2") kwargs = {} headers = { "x-api-key": "sk-ant-dummy-key", "authorization": "Bearer sk-ant-dummy-key", } # Simulate what the handler does: build kwargs then check _env_auth_providers = ("bedrock", "vertex_ai", "vertex_ai_beta", "sagemaker") if backend.provider not in _env_auth_providers: auth_header = headers.get("authorization", headers.get("Authorization", "")) if auth_header.startswith("Bearer "): kwargs["api_key"] = auth_header[7:] elif headers.get("x-api-key"): kwargs["api_key"] = headers["x-api-key"] assert "api_key" not in kwargs, ( f"Bedrock should not have api_key in kwargs, got: {kwargs.get('api_key')}" ) def test_openai_does_forward_api_key(self): """api_key SHOULD be in kwargs for non-Bedrock providers.""" backend = LiteLLMBackend(provider="openai") kwargs = {} headers = {"authorization": "Bearer sk-real-key-123"} _env_auth_providers = ("bedrock", "vertex_ai", "vertex_ai_beta", "sagemaker") if backend.provider not in _env_auth_providers: auth_header = headers.get("authorization", headers.get("Authorization", "")) if auth_header.startswith("Bearer "): kwargs["api_key"] = auth_header[7:] assert kwargs.get("api_key") == "sk-real-key-123" def test_vertex_does_not_forward_api_key(self): """Vertex AI also uses env-based auth (Google ADC).""" backend = LiteLLMBackend(provider="vertex_ai") kwargs = {} headers = {"x-api-key": "sk-ant-dummy"} _env_auth_providers = ("bedrock", "vertex_ai", "vertex_ai_beta", "sagemaker") if backend.provider not in _env_auth_providers: if headers.get("x-api-key"): kwargs["api_key"] = headers["x-api-key"] assert "api_key" not in kwargs