Spaces:
Build error
Build error
Commit ·
189bff3
1
Parent(s): 4dca774
Fix CI regressions for cache benchmark work
Browse files
benchmarks/claude_session_mode_benchmark.py
CHANGED
|
@@ -11,7 +11,7 @@ import logging
|
|
| 11 |
import os
|
| 12 |
from collections import Counter
|
| 13 |
from dataclasses import asdict, dataclass, field
|
| 14 |
-
from datetime import
|
| 15 |
from pathlib import Path
|
| 16 |
from typing import Any
|
| 17 |
|
|
@@ -236,10 +236,10 @@ def decode_project_key(project_key: str) -> str:
|
|
| 236 |
|
| 237 |
def _parse_timestamp(value: str | None) -> datetime:
|
| 238 |
if not value:
|
| 239 |
-
return datetime.min.replace(tzinfo=
|
| 240 |
if value.endswith("Z"):
|
| 241 |
value = value[:-1] + "+00:00"
|
| 242 |
-
return datetime.fromisoformat(value).astimezone(
|
| 243 |
|
| 244 |
|
| 245 |
def _canonical_block_key(block: Any) -> str:
|
|
|
|
| 11 |
import os
|
| 12 |
from collections import Counter
|
| 13 |
from dataclasses import asdict, dataclass, field
|
| 14 |
+
from datetime import datetime, timedelta, timezone
|
| 15 |
from pathlib import Path
|
| 16 |
from typing import Any
|
| 17 |
|
|
|
|
| 236 |
|
| 237 |
def _parse_timestamp(value: str | None) -> datetime:
|
| 238 |
if not value:
|
| 239 |
+
return datetime.min.replace(tzinfo=timezone.utc)
|
| 240 |
if value.endswith("Z"):
|
| 241 |
value = value[:-1] + "+00:00"
|
| 242 |
+
return datetime.fromisoformat(value).astimezone(timezone.utc)
|
| 243 |
|
| 244 |
|
| 245 |
def _canonical_block_key(block: Any) -> str:
|
tests/test_provider_model_fallback.py
CHANGED
|
@@ -151,16 +151,14 @@ class TestAnthropicConfigLoading:
|
|
| 151 |
"""Test loading config from file path in env var."""
|
| 152 |
config = {"context_limits": {"file-model": 400000}}
|
| 153 |
|
| 154 |
-
with tempfile.
|
| 155 |
-
|
| 156 |
-
|
| 157 |
|
| 158 |
-
with patch.dict(os.environ, {"HEADROOM_MODEL_LIMITS":
|
| 159 |
loaded = anthropic_load_config()
|
| 160 |
assert loaded["context_limits"]["file-model"] == 400000
|
| 161 |
|
| 162 |
-
os.unlink(f.name)
|
| 163 |
-
|
| 164 |
def test_load_from_config_file(self):
|
| 165 |
"""Test loading from ~/.headroom/models.json."""
|
| 166 |
config = {
|
|
@@ -291,16 +289,14 @@ class TestOpenAIConfigLoading:
|
|
| 291 |
"""Test loading pricing from config."""
|
| 292 |
config = {"openai": {"pricing": {"test-model": [5.0, 15.0]}}}
|
| 293 |
|
| 294 |
-
with tempfile.
|
| 295 |
-
|
| 296 |
-
|
| 297 |
|
| 298 |
-
with patch.dict(os.environ, {"HEADROOM_MODEL_LIMITS":
|
| 299 |
loaded = openai_load_config()
|
| 300 |
assert loaded["pricing"]["test-model"] == [5.0, 15.0]
|
| 301 |
|
| 302 |
-
os.unlink(f.name)
|
| 303 |
-
|
| 304 |
|
| 305 |
class TestCrossProviderConsistency:
|
| 306 |
"""Tests for consistency across providers."""
|
|
@@ -328,10 +324,8 @@ class TestCrossProviderConsistency:
|
|
| 328 |
anthropic.get_context_limit("claude-future-model-xyz")
|
| 329 |
openai.get_context_limit("gpt-future-model-xyz")
|
| 330 |
|
| 331 |
-
def test_both_providers_warn_for_unknown_models(self
|
| 332 |
"""Test that both providers warn for unknown models."""
|
| 333 |
-
import logging
|
| 334 |
-
|
| 335 |
# Clear warning caches
|
| 336 |
from headroom.providers import anthropic as anthropic_module
|
| 337 |
from headroom.providers import openai as openai_module
|
|
@@ -339,12 +333,17 @@ class TestCrossProviderConsistency:
|
|
| 339 |
anthropic_module._UNKNOWN_MODEL_WARNINGS.clear()
|
| 340 |
openai_module._UNKNOWN_MODEL_WARNINGS.clear()
|
| 341 |
|
| 342 |
-
with
|
|
|
|
|
|
|
|
|
|
| 343 |
anthropic = AnthropicProvider()
|
| 344 |
anthropic.get_context_limit("claude-test-unknown-model")
|
| 345 |
|
| 346 |
openai = OpenAIProvider()
|
| 347 |
openai.get_context_limit("gpt-test-unknown-model")
|
| 348 |
|
| 349 |
-
|
| 350 |
-
|
|
|
|
|
|
|
|
|
| 151 |
"""Test loading config from file path in env var."""
|
| 152 |
config = {"context_limits": {"file-model": 400000}}
|
| 153 |
|
| 154 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 155 |
+
config_path = Path(tmpdir) / "model_limits.json"
|
| 156 |
+
config_path.write_text(json.dumps(config))
|
| 157 |
|
| 158 |
+
with patch.dict(os.environ, {"HEADROOM_MODEL_LIMITS": str(config_path)}):
|
| 159 |
loaded = anthropic_load_config()
|
| 160 |
assert loaded["context_limits"]["file-model"] == 400000
|
| 161 |
|
|
|
|
|
|
|
| 162 |
def test_load_from_config_file(self):
|
| 163 |
"""Test loading from ~/.headroom/models.json."""
|
| 164 |
config = {
|
|
|
|
| 289 |
"""Test loading pricing from config."""
|
| 290 |
config = {"openai": {"pricing": {"test-model": [5.0, 15.0]}}}
|
| 291 |
|
| 292 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 293 |
+
config_path = Path(tmpdir) / "model_limits.json"
|
| 294 |
+
config_path.write_text(json.dumps(config))
|
| 295 |
|
| 296 |
+
with patch.dict(os.environ, {"HEADROOM_MODEL_LIMITS": str(config_path)}):
|
| 297 |
loaded = openai_load_config()
|
| 298 |
assert loaded["pricing"]["test-model"] == [5.0, 15.0]
|
| 299 |
|
|
|
|
|
|
|
| 300 |
|
| 301 |
class TestCrossProviderConsistency:
|
| 302 |
"""Tests for consistency across providers."""
|
|
|
|
| 324 |
anthropic.get_context_limit("claude-future-model-xyz")
|
| 325 |
openai.get_context_limit("gpt-future-model-xyz")
|
| 326 |
|
| 327 |
+
def test_both_providers_warn_for_unknown_models(self):
|
| 328 |
"""Test that both providers warn for unknown models."""
|
|
|
|
|
|
|
| 329 |
# Clear warning caches
|
| 330 |
from headroom.providers import anthropic as anthropic_module
|
| 331 |
from headroom.providers import openai as openai_module
|
|
|
|
| 333 |
anthropic_module._UNKNOWN_MODEL_WARNINGS.clear()
|
| 334 |
openai_module._UNKNOWN_MODEL_WARNINGS.clear()
|
| 335 |
|
| 336 |
+
with (
|
| 337 |
+
patch.object(anthropic_module.logger, "warning") as anthropic_warning,
|
| 338 |
+
patch.object(openai_module.logger, "warning") as openai_warning,
|
| 339 |
+
):
|
| 340 |
anthropic = AnthropicProvider()
|
| 341 |
anthropic.get_context_limit("claude-test-unknown-model")
|
| 342 |
|
| 343 |
openai = OpenAIProvider()
|
| 344 |
openai.get_context_limit("gpt-test-unknown-model")
|
| 345 |
|
| 346 |
+
anthropic_warning.assert_called_once()
|
| 347 |
+
openai_warning.assert_called_once()
|
| 348 |
+
assert "claude-test-unknown-model" in anthropic_warning.call_args.args[0]
|
| 349 |
+
assert "gpt-test-unknown-model" in openai_warning.call_args.args[0]
|
tests/test_proxy_openai_cache_stability.py
CHANGED
|
@@ -49,8 +49,8 @@ def test_openai_cache_mode_freezes_previous_turns() -> None:
|
|
| 49 |
proxy.config.mode = "cache"
|
| 50 |
|
| 51 |
fake_tracker = _FakePrefixTracker(frozen_count=0)
|
| 52 |
-
proxy.session_tracker_store.compute_session_id = (
|
| 53 |
-
|
| 54 |
)
|
| 55 |
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
|
| 56 |
|
|
@@ -110,8 +110,8 @@ def test_openai_cache_mode_restores_mutated_frozen_prefix() -> None:
|
|
| 110 |
proxy.config.mode = "cache"
|
| 111 |
|
| 112 |
fake_tracker = _FakePrefixTracker(frozen_count=0)
|
| 113 |
-
proxy.session_tracker_store.compute_session_id = (
|
| 114 |
-
|
| 115 |
)
|
| 116 |
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
|
| 117 |
|
|
|
|
| 49 |
proxy.config.mode = "cache"
|
| 50 |
|
| 51 |
fake_tracker = _FakePrefixTracker(frozen_count=0)
|
| 52 |
+
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
|
| 53 |
+
"stable-session"
|
| 54 |
)
|
| 55 |
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
|
| 56 |
|
|
|
|
| 110 |
proxy.config.mode = "cache"
|
| 111 |
|
| 112 |
fake_tracker = _FakePrefixTracker(frozen_count=0)
|
| 113 |
+
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
|
| 114 |
+
"stable-session"
|
| 115 |
)
|
| 116 |
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
|
| 117 |
|