Spaces:
Build error
Build error
Commit ·
90d3aea
1
Parent(s): c1feb60
Publish headroom-ai v0.2.0 to PyPI with DevEx fixes
Browse files- Renamed package from 'headroom' to 'headroom-ai' (PyPI name conflict)
- Fixed numpy/jinja2 imports to be lazy (core install no longer crashes)
- Fixed SQLite default path (now uses temp directory)
- Fixed f-string {tool} crash in proxy server
- Updated README with correct package name and examples
- Added quickstart and troubleshooting docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- README.md +413 -100
- docs/quickstart.md +330 -0
- docs/troubleshooting.md +442 -0
- examples/basic_usage.py +65 -1
- headroom/__init__.py +50 -20
- headroom/client.py +11 -2
- headroom/proxy/server.py +1 -1
- headroom/relevance/embedding.py +24 -6
- headroom/reporting/generator.py +20 -4
- pyproject.toml +2 -2
README.md
CHANGED
|
@@ -25,7 +25,7 @@
|
|
| 25 |
|
| 26 |
---
|
| 27 |
|
| 28 |
-
##
|
| 29 |
|
| 30 |
AI coding agents and tool-using applications generate **massive contexts**:
|
| 31 |
|
|
@@ -35,9 +35,7 @@ AI coding agents and tool-using applications generate **massive contexts**:
|
|
| 35 |
|
| 36 |
**Result**: You pay for tokens you don't need, and cache hits are rare.
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
Headroom is a **smart compression layer** that sits between your app and LLM providers. It applies three transforms:
|
| 41 |
|
| 42 |
| Transform | What It Does | Savings |
|
| 43 |
|-----------|--------------|---------|
|
|
@@ -47,217 +45,532 @@ Headroom is a **smart compression layer** that sits between your app and LLM pro
|
|
| 47 |
|
| 48 |
**Zero accuracy loss** - we keep what matters: errors, anomalies, relevant items.
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
### Option 1: Proxy (Recommended)
|
| 53 |
|
| 54 |
-
|
| 55 |
|
| 56 |
```bash
|
| 57 |
-
|
|
|
|
| 58 |
|
| 59 |
# Start the proxy
|
| 60 |
headroom proxy --port 8787
|
| 61 |
|
| 62 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
ANTHROPIC_BASE_URL=http://localhost:8787 claude
|
| 64 |
|
| 65 |
-
#
|
| 66 |
OPENAI_BASE_URL=http://localhost:8787/v1 your-app
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
```
|
| 68 |
|
| 69 |
### Option 2: Python SDK
|
| 70 |
|
| 71 |
-
Wrap your existing client:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
```python
|
| 74 |
-
from headroom import HeadroomClient
|
| 75 |
from openai import OpenAI
|
| 76 |
|
|
|
|
| 77 |
client = HeadroomClient(
|
| 78 |
original_client=OpenAI(),
|
| 79 |
-
|
|
|
|
| 80 |
)
|
| 81 |
|
| 82 |
# Use exactly like the original client
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
response = client.chat.completions.create(
|
| 84 |
model="gpt-4o",
|
| 85 |
messages=[...],
|
|
|
|
|
|
|
|
|
|
| 86 |
)
|
| 87 |
```
|
| 88 |
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
```python
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
```
|
| 97 |
|
| 98 |
-
|
| 99 |
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
```python
|
| 103 |
# Before: 50KB tool response with 1000 items
|
| 104 |
-
{"results": [{"id": 1,
|
| 105 |
|
| 106 |
# After: ~2KB with important items preserved
|
|
|
|
| 107 |
# - First 3 items (context)
|
| 108 |
# - Last 2 items (recency)
|
| 109 |
-
# - All error items
|
| 110 |
-
# -
|
| 111 |
-
# - Items matching user's query
|
| 112 |
```
|
| 113 |
|
| 114 |
-
###
|
| 115 |
|
| 116 |
```python
|
| 117 |
# Before: Cache miss every day due to changing date
|
| 118 |
"You are helpful. Today is January 7, 2025."
|
| 119 |
|
| 120 |
-
# After: Stable prefix (cache hit!) + dynamic context
|
| 121 |
"You are helpful."
|
| 122 |
-
#
|
| 123 |
```
|
| 124 |
|
| 125 |
-
###
|
| 126 |
|
| 127 |
```python
|
| 128 |
-
#
|
| 129 |
-
#
|
| 130 |
-
#
|
| 131 |
-
#
|
| 132 |
```
|
| 133 |
|
| 134 |
-
|
| 135 |
|
| 136 |
-
|
| 137 |
-
- **Rate Limiting**: Token bucket (requests + tokens per minute)
|
| 138 |
-
- **Cost Tracking**: Budget enforcement (hourly/daily/monthly)
|
| 139 |
-
- **Prometheus Metrics**: `/metrics` endpoint for monitoring
|
| 140 |
-
- **Request Logging**: JSONL logs for debugging
|
| 141 |
|
| 142 |
-
##
|
| 143 |
|
| 144 |
```bash
|
| 145 |
-
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
|
|
|
| 150 |
|
| 151 |
-
#
|
| 152 |
-
|
| 153 |
|
| 154 |
-
#
|
| 155 |
-
|
| 156 |
```
|
| 157 |
|
| 158 |
-
##
|
| 159 |
-
|
| 160 |
-
### Audit Mode (Observe Only)
|
| 161 |
|
| 162 |
```python
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
```
|
| 166 |
|
| 167 |
-
###
|
| 168 |
|
| 169 |
```python
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
```
|
| 173 |
|
| 174 |
-
###
|
| 175 |
|
| 176 |
```python
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
```
|
| 180 |
|
| 181 |
-
##
|
| 182 |
|
| 183 |
```python
|
| 184 |
-
|
|
|
|
| 185 |
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
keep_last=2, # Always keep last 2
|
| 194 |
-
relevance_threshold=0.3, # Keep items with relevance > 0.3
|
| 195 |
-
),
|
| 196 |
)
|
| 197 |
```
|
| 198 |
|
|
|
|
|
|
|
| 199 |
## Supported Providers
|
| 200 |
|
| 201 |
-
| Provider | Token Counting | Status |
|
| 202 |
-
|----------|----------------|--------|
|
| 203 |
-
| OpenAI | tiktoken |
|
| 204 |
-
| Anthropic | Official API |
|
| 205 |
-
| Google | Official API |
|
| 206 |
-
| Cohere | Official API |
|
| 207 |
-
| Mistral | Official tokenizer |
|
| 208 |
-
| LiteLLM | Via provider |
|
|
|
|
|
|
|
| 209 |
|
| 210 |
## Safety Guarantees
|
| 211 |
|
| 212 |
Headroom follows strict safety rules:
|
| 213 |
|
| 214 |
-
1. **Never removes human content** - User/assistant
|
| 215 |
-
2. **Never breaks tool ordering** - Tool calls and responses stay paired
|
| 216 |
3. **Parse failures are no-ops** - Malformed content passes through unchanged
|
| 217 |
4. **Preserves recency** - Last N turns are always kept
|
|
|
|
| 218 |
|
| 219 |
-
|
|
|
|
|
|
|
| 220 |
|
| 221 |
-
| Scenario | Before | After | Savings |
|
| 222 |
-
|----------|--------|-------|---------|
|
| 223 |
-
| Search results (1000 items) | 45,000 tokens | 4,500 tokens | 90% |
|
| 224 |
-
| Log analysis (500 entries) | 22,000 tokens | 3,300 tokens | 85% |
|
| 225 |
-
| API response (nested JSON) | 15,000 tokens | 2,250 tokens | 85% |
|
| 226 |
-
| Long conversation (50 turns) | 80,000 tokens | 32,000 tokens | 60% |
|
|
|
|
|
|
|
| 227 |
|
| 228 |
## Documentation
|
| 229 |
|
| 230 |
-
- [
|
| 231 |
-
- [Proxy
|
| 232 |
-
- [Transform Reference](docs/transforms.md)
|
| 233 |
-
- [API Reference](docs/api.md)
|
| 234 |
-
- [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
## Contributing
|
| 237 |
|
| 238 |
-
We welcome contributions!
|
| 239 |
|
| 240 |
```bash
|
| 241 |
# Development setup
|
| 242 |
git clone https://github.com/headroom-sdk/headroom.git
|
| 243 |
cd headroom
|
| 244 |
pip install -e ".[dev]"
|
|
|
|
|
|
|
| 245 |
pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
```
|
| 247 |
|
| 248 |
-
|
| 249 |
|
| 250 |
-
|
| 251 |
|
| 252 |
-
##
|
| 253 |
|
| 254 |
-
- [
|
| 255 |
-
- [PyPI](https://pypi.org/project/headroom/)
|
| 256 |
-
- [Documentation](https://headroom.dev/docs)
|
| 257 |
-
- [Discord](https://discord.gg/headroom)
|
| 258 |
|
| 259 |
---
|
| 260 |
|
| 261 |
<p align="center">
|
| 262 |
-
<sub>Built
|
| 263 |
</p>
|
|
|
|
| 25 |
|
| 26 |
---
|
| 27 |
|
| 28 |
+
## Why Headroom?
|
| 29 |
|
| 30 |
AI coding agents and tool-using applications generate **massive contexts**:
|
| 31 |
|
|
|
|
| 35 |
|
| 36 |
**Result**: You pay for tokens you don't need, and cache hits are rare.
|
| 37 |
|
| 38 |
+
Headroom is a **smart compression layer** that sits between your app and LLM providers:
|
|
|
|
|
|
|
| 39 |
|
| 40 |
| Transform | What It Does | Savings |
|
| 41 |
|-----------|--------------|---------|
|
|
|
|
| 45 |
|
| 46 |
**Zero accuracy loss** - we keep what matters: errors, anomalies, relevant items.
|
| 47 |
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
## 5-Minute Quickstart
|
| 51 |
|
| 52 |
+
### Option 1: Proxy Server (Recommended)
|
| 53 |
|
| 54 |
+
Works with **any** OpenAI-compatible client without code changes:
|
| 55 |
|
| 56 |
```bash
|
| 57 |
+
# Install
|
| 58 |
+
pip install "headroom-ai[proxy]"
|
| 59 |
|
| 60 |
# Start the proxy
|
| 61 |
headroom proxy --port 8787
|
| 62 |
|
| 63 |
+
# Verify it's running
|
| 64 |
+
curl http://localhost:8787/health
|
| 65 |
+
# Expected: {"status": "healthy", ...}
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
**Use with your tools:**
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
# Claude Code
|
| 72 |
ANTHROPIC_BASE_URL=http://localhost:8787 claude
|
| 73 |
|
| 74 |
+
# Cursor / Continue / any OpenAI client
|
| 75 |
OPENAI_BASE_URL=http://localhost:8787/v1 your-app
|
| 76 |
+
|
| 77 |
+
# Python OpenAI SDK
|
| 78 |
+
export OPENAI_BASE_URL=http://localhost:8787/v1
|
| 79 |
+
python your_script.py
|
| 80 |
```
|
| 81 |
|
| 82 |
### Option 2: Python SDK
|
| 83 |
|
| 84 |
+
Wrap your existing client for fine-grained control:
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
pip install headroom-ai openai
|
| 88 |
+
```
|
| 89 |
|
| 90 |
```python
|
| 91 |
+
from headroom import HeadroomClient, OpenAIProvider
|
| 92 |
from openai import OpenAI
|
| 93 |
|
| 94 |
+
# Create wrapped client
|
| 95 |
client = HeadroomClient(
|
| 96 |
original_client=OpenAI(),
|
| 97 |
+
provider=OpenAIProvider(),
|
| 98 |
+
default_mode="optimize", # or "audit" to observe only
|
| 99 |
)
|
| 100 |
|
| 101 |
# Use exactly like the original client
|
| 102 |
+
response = client.chat.completions.create(
|
| 103 |
+
model="gpt-4o-mini",
|
| 104 |
+
messages=[
|
| 105 |
+
{"role": "user", "content": "Hello!"},
|
| 106 |
+
],
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
print(response.choices[0].message.content)
|
| 110 |
+
|
| 111 |
+
# Check what happened
|
| 112 |
+
stats = client.get_stats()
|
| 113 |
+
print(f"Tokens saved this session: {stats['session']['tokens_saved_total']}")
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
**With tool outputs (where real savings happen):**
|
| 117 |
+
|
| 118 |
+
```python
|
| 119 |
+
import json
|
| 120 |
+
|
| 121 |
+
# Conversation with large tool output
|
| 122 |
+
messages = [
|
| 123 |
+
{"role": "user", "content": "Search for Python tutorials"},
|
| 124 |
+
{
|
| 125 |
+
"role": "assistant",
|
| 126 |
+
"content": None,
|
| 127 |
+
"tool_calls": [{
|
| 128 |
+
"id": "call_123",
|
| 129 |
+
"type": "function",
|
| 130 |
+
"function": {"name": "search", "arguments": '{"q": "python"}'},
|
| 131 |
+
}],
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"role": "tool",
|
| 135 |
+
"tool_call_id": "call_123",
|
| 136 |
+
"content": json.dumps({
|
| 137 |
+
"results": [{"title": f"Tutorial {i}", "score": 100-i} for i in range(500)]
|
| 138 |
+
}),
|
| 139 |
+
},
|
| 140 |
+
{"role": "user", "content": "What are the top 3?"},
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
# Headroom compresses 500 results to ~15, keeping highest-scoring items
|
| 144 |
+
response = client.chat.completions.create(model="gpt-4o-mini", messages=messages)
|
| 145 |
+
print(f"Tokens saved: {client.get_stats()['session']['tokens_saved_total']}")
|
| 146 |
+
# Typical output: "Tokens saved: 3500"
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
### Option 3: LangChain Integration (Coming Soon)
|
| 150 |
+
|
| 151 |
+
```python
|
| 152 |
+
# Coming soon - use proxy server for now
|
| 153 |
+
# OPENAI_BASE_URL=http://localhost:8787/v1 python your_langchain_app.py
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
---
|
| 157 |
+
|
| 158 |
+
## Verify It's Working
|
| 159 |
+
|
| 160 |
+
### Check Proxy Stats
|
| 161 |
+
|
| 162 |
+
```bash
|
| 163 |
+
curl http://localhost:8787/stats
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
```json
|
| 167 |
+
{
|
| 168 |
+
"requests": {"total": 42, "cached": 5, "rate_limited": 0, "failed": 0},
|
| 169 |
+
"tokens": {"input": 50000, "output": 8000, "saved": 12500, "savings_percent": 25.0},
|
| 170 |
+
"cost": {"total_cost_usd": 0.15, "total_savings_usd": 0.04},
|
| 171 |
+
"cache": {"entries": 10, "total_hits": 5}
|
| 172 |
+
}
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
### Check SDK Stats
|
| 176 |
+
|
| 177 |
+
```python
|
| 178 |
+
# Quick session stats (no database query)
|
| 179 |
+
stats = client.get_stats()
|
| 180 |
+
print(stats)
|
| 181 |
+
# {
|
| 182 |
+
# "session": {"requests_total": 10, "tokens_saved_total": 5000, ...},
|
| 183 |
+
# "config": {"mode": "optimize", "provider": "openai", ...},
|
| 184 |
+
# "transforms": {"smart_crusher_enabled": True, ...}
|
| 185 |
+
# }
|
| 186 |
+
|
| 187 |
+
# Validate setup is correct
|
| 188 |
+
result = client.validate_setup()
|
| 189 |
+
if not result["valid"]:
|
| 190 |
+
print("Setup issues:", result)
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
### Enable Logging
|
| 194 |
+
|
| 195 |
+
```python
|
| 196 |
+
import logging
|
| 197 |
+
logging.basicConfig(level=logging.INFO)
|
| 198 |
+
|
| 199 |
+
# Now you'll see:
|
| 200 |
+
# INFO:headroom.transforms.pipeline:Pipeline complete: 45000 -> 4500 tokens (saved 40500, 90.0% reduction)
|
| 201 |
+
# INFO:headroom.transforms.smart_crusher:SmartCrusher applied top_n strategy: kept 15 of 1000 items
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
---
|
| 205 |
+
|
| 206 |
+
## Installation
|
| 207 |
+
|
| 208 |
+
```bash
|
| 209 |
+
# Core only (minimal dependencies: tiktoken, pydantic)
|
| 210 |
+
pip install headroom-ai
|
| 211 |
+
|
| 212 |
+
# With semantic relevance scoring (adds sentence-transformers)
|
| 213 |
+
pip install "headroom-ai[relevance]"
|
| 214 |
+
|
| 215 |
+
# With proxy server (adds fastapi, uvicorn)
|
| 216 |
+
pip install "headroom-ai[proxy]"
|
| 217 |
+
|
| 218 |
+
# With HTML reports (adds jinja2)
|
| 219 |
+
pip install "headroom-ai[reports]"
|
| 220 |
+
|
| 221 |
+
# Everything
|
| 222 |
+
pip install "headroom-ai[all]"
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
**Requirements**: Python 3.10+
|
| 226 |
+
|
| 227 |
+
---
|
| 228 |
+
|
| 229 |
+
## Configuration
|
| 230 |
+
|
| 231 |
+
### SDK Configuration
|
| 232 |
+
|
| 233 |
+
```python
|
| 234 |
+
from headroom import HeadroomClient, OpenAIProvider
|
| 235 |
+
from openai import OpenAI
|
| 236 |
+
|
| 237 |
+
# Full configuration example
|
| 238 |
+
client = HeadroomClient(
|
| 239 |
+
original_client=OpenAI(),
|
| 240 |
+
provider=OpenAIProvider(),
|
| 241 |
+
default_mode="optimize", # "audit" (observe only) or "optimize" (apply transforms)
|
| 242 |
+
enable_cache_optimizer=True, # Enable provider-specific cache optimization
|
| 243 |
+
enable_semantic_cache=False, # Enable query-level semantic caching
|
| 244 |
+
model_context_limits={ # Override default context limits
|
| 245 |
+
"gpt-4o": 128000,
|
| 246 |
+
"gpt-4o-mini": 128000,
|
| 247 |
+
},
|
| 248 |
+
# store_url defaults to temp directory; override with absolute path if needed:
|
| 249 |
+
# store_url="sqlite:////absolute/path/to/headroom.db",
|
| 250 |
+
)
|
| 251 |
+
```
|
| 252 |
+
|
| 253 |
+
### Proxy Configuration
|
| 254 |
+
|
| 255 |
+
```bash
|
| 256 |
+
# Via command line
|
| 257 |
+
headroom proxy \
|
| 258 |
+
--port 8787 \
|
| 259 |
+
--budget 10.00 \
|
| 260 |
+
--log-file headroom.jsonl
|
| 261 |
+
|
| 262 |
+
# Disable optimization (passthrough mode)
|
| 263 |
+
headroom proxy --no-optimize
|
| 264 |
+
|
| 265 |
+
# Disable semantic caching
|
| 266 |
+
headroom proxy --no-cache
|
| 267 |
+
|
| 268 |
+
# See all options
|
| 269 |
+
headroom proxy --help
|
| 270 |
+
```
|
| 271 |
+
|
| 272 |
+
### Per-Request Overrides
|
| 273 |
+
|
| 274 |
+
```python
|
| 275 |
+
# Override mode for specific requests
|
| 276 |
response = client.chat.completions.create(
|
| 277 |
model="gpt-4o",
|
| 278 |
messages=[...],
|
| 279 |
+
headroom_mode="audit", # Just observe, don't optimize
|
| 280 |
+
headroom_output_buffer_tokens=8000, # Reserve more for output
|
| 281 |
+
headroom_keep_turns=5, # Keep last 5 turns
|
| 282 |
)
|
| 283 |
```
|
| 284 |
|
| 285 |
+
---
|
| 286 |
+
|
| 287 |
+
## Modes
|
| 288 |
+
|
| 289 |
+
| Mode | Behavior | Use Case |
|
| 290 |
+
|------|----------|----------|
|
| 291 |
+
| `audit` | Observes and logs, no modifications | Production monitoring, baseline measurement |
|
| 292 |
+
| `optimize` | Applies safe, deterministic transforms | Production optimization |
|
| 293 |
+
| `simulate` | Returns plan without API call | Testing, cost estimation |
|
| 294 |
|
| 295 |
```python
|
| 296 |
+
# Simulate to see what would happen
|
| 297 |
+
plan = client.chat.completions.simulate(
|
| 298 |
+
model="gpt-4o",
|
| 299 |
+
messages=large_conversation,
|
| 300 |
+
)
|
| 301 |
+
print(f"Would save {plan.tokens_saved} tokens")
|
| 302 |
+
print(f"Transforms: {plan.transforms}")
|
| 303 |
+
print(f"Estimated savings: {plan.estimated_savings}")
|
| 304 |
+
```
|
| 305 |
+
|
| 306 |
+
---
|
| 307 |
+
|
| 308 |
+
## Error Handling
|
| 309 |
+
|
| 310 |
+
Headroom provides explicit exceptions for debugging:
|
| 311 |
|
| 312 |
+
```python
|
| 313 |
+
from headroom import (
|
| 314 |
+
HeadroomClient,
|
| 315 |
+
HeadroomError, # Base class - catch all Headroom errors
|
| 316 |
+
ConfigurationError, # Invalid configuration
|
| 317 |
+
ProviderError, # Provider issues (unknown model, etc.)
|
| 318 |
+
StorageError, # Database/storage failures
|
| 319 |
+
CompressionError, # Compression failures (rare - we fail safe)
|
| 320 |
+
ValidationError, # Setup validation failures
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
try:
|
| 324 |
+
client = HeadroomClient(...)
|
| 325 |
+
response = client.chat.completions.create(...)
|
| 326 |
+
except ConfigurationError as e:
|
| 327 |
+
print(f"Config issue: {e}")
|
| 328 |
+
print(f"Details: {e.details}") # Additional context
|
| 329 |
+
except StorageError as e:
|
| 330 |
+
print(f"Storage issue: {e}")
|
| 331 |
+
# Headroom continues to work, just without metrics persistence
|
| 332 |
+
except HeadroomError as e:
|
| 333 |
+
print(f"Headroom error: {e}")
|
| 334 |
```
|
| 335 |
|
| 336 |
+
**Safety guarantee**: If compression fails, the original content passes through unchanged. Your LLM calls never fail due to Headroom.
|
| 337 |
|
| 338 |
+
---
|
| 339 |
+
|
| 340 |
+
## How It Works
|
| 341 |
+
|
| 342 |
+
### SmartCrusher: Statistical Compression
|
| 343 |
|
| 344 |
```python
|
| 345 |
# Before: 50KB tool response with 1000 items
|
| 346 |
+
{"results": [{"id": 1, "status": "ok", ...}, ... 1000 items ...]}
|
| 347 |
|
| 348 |
# After: ~2KB with important items preserved
|
| 349 |
+
# Headroom keeps:
|
| 350 |
# - First 3 items (context)
|
| 351 |
# - Last 2 items (recency)
|
| 352 |
+
# - All error items (status != "ok")
|
| 353 |
+
# - Statistical anomalies (values > 2 std dev from mean)
|
| 354 |
+
# - Items matching user's query (BM25/embedding similarity)
|
| 355 |
```
|
| 356 |
|
| 357 |
+
### CacheAligner: Prefix Stabilization
|
| 358 |
|
| 359 |
```python
|
| 360 |
# Before: Cache miss every day due to changing date
|
| 361 |
"You are helpful. Today is January 7, 2025."
|
| 362 |
|
| 363 |
+
# After: Stable prefix (cache hit!) + dynamic context moved to end
|
| 364 |
"You are helpful."
|
| 365 |
+
# Dynamic content: "Current date: January 7, 2025"
|
| 366 |
```
|
| 367 |
|
| 368 |
+
### RollingWindow: Context Management
|
| 369 |
|
| 370 |
```python
|
| 371 |
+
# When context exceeds limit:
|
| 372 |
+
# 1. Drop oldest tool outputs first (as atomic units with their calls)
|
| 373 |
+
# 2. Drop oldest conversation turns
|
| 374 |
+
# 3. NEVER drop: system prompt, last N turns, orphaned tool responses
|
| 375 |
```
|
| 376 |
|
| 377 |
+
---
|
| 378 |
|
| 379 |
+
## Metrics & Monitoring
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
+
### Prometheus Metrics (Proxy)
|
| 382 |
|
| 383 |
```bash
|
| 384 |
+
curl http://localhost:8787/metrics
|
| 385 |
+
```
|
| 386 |
|
| 387 |
+
```
|
| 388 |
+
# HELP headroom_requests_total Total requests processed
|
| 389 |
+
headroom_requests_total{mode="optimize"} 1234
|
| 390 |
|
| 391 |
+
# HELP headroom_tokens_saved_total Total tokens saved
|
| 392 |
+
headroom_tokens_saved_total 5678900
|
| 393 |
|
| 394 |
+
# HELP headroom_compression_ratio Compression ratio histogram
|
| 395 |
+
headroom_compression_ratio_bucket{le="0.5"} 890
|
| 396 |
```
|
| 397 |
|
| 398 |
+
### Query Stored Metrics (SDK)
|
|
|
|
|
|
|
| 399 |
|
| 400 |
```python
|
| 401 |
+
from datetime import datetime, timedelta
|
| 402 |
+
|
| 403 |
+
# Get recent metrics
|
| 404 |
+
metrics = client.get_metrics(
|
| 405 |
+
start_time=datetime.utcnow() - timedelta(hours=1),
|
| 406 |
+
limit=100,
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
for m in metrics:
|
| 410 |
+
print(f"{m.timestamp}: {m.tokens_input_before} -> {m.tokens_input_after}")
|
| 411 |
+
|
| 412 |
+
# Get summary statistics
|
| 413 |
+
summary = client.get_summary()
|
| 414 |
+
print(f"Total requests: {summary['total_requests']}")
|
| 415 |
+
print(f"Total tokens saved: {summary['total_tokens_saved']}")
|
| 416 |
+
```
|
| 417 |
+
|
| 418 |
+
---
|
| 419 |
+
|
| 420 |
+
## Troubleshooting
|
| 421 |
+
|
| 422 |
+
### "Proxy won't start"
|
| 423 |
+
|
| 424 |
+
```bash
|
| 425 |
+
# Check if port is in use
|
| 426 |
+
lsof -i :8787
|
| 427 |
+
|
| 428 |
+
# Try a different port
|
| 429 |
+
headroom proxy --port 8788
|
| 430 |
+
|
| 431 |
+
# Check logs
|
| 432 |
+
headroom proxy --log-level debug
|
| 433 |
```
|
| 434 |
|
| 435 |
+
### "No token savings"
|
| 436 |
|
| 437 |
```python
|
| 438 |
+
# 1. Verify mode is "optimize"
|
| 439 |
+
stats = client.get_stats()
|
| 440 |
+
print(stats["config"]["mode"]) # Should be "optimize"
|
| 441 |
+
|
| 442 |
+
# 2. Check if transforms are enabled
|
| 443 |
+
print(stats["transforms"]) # smart_crusher_enabled should be True
|
| 444 |
+
|
| 445 |
+
# 3. Enable logging to see what's happening
|
| 446 |
+
import logging
|
| 447 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 448 |
+
|
| 449 |
+
# 4. Use simulate to see what WOULD happen
|
| 450 |
+
plan = client.chat.completions.simulate(model="gpt-4o", messages=msgs)
|
| 451 |
+
print(f"Transforms that would apply: {plan.transforms}")
|
| 452 |
```
|
| 453 |
|
| 454 |
+
### "High latency"
|
| 455 |
|
| 456 |
```python
|
| 457 |
+
# Headroom adds ~1-5ms overhead. If you see more:
|
| 458 |
+
|
| 459 |
+
# 1. Check if embedding scorer is enabled (slower but better relevance)
|
| 460 |
+
# Switch to BM25 for faster scoring:
|
| 461 |
+
config.smart_crusher.relevance.tier = "bm25"
|
| 462 |
+
|
| 463 |
+
# 2. Disable transforms you don't need
|
| 464 |
+
config.cache_aligner.enabled = False # If you don't need cache alignment
|
| 465 |
+
|
| 466 |
+
# 3. Increase min_tokens_to_crush to skip small payloads
|
| 467 |
+
config.smart_crusher.min_tokens_to_crush = 500
|
| 468 |
```
|
| 469 |
|
| 470 |
+
### "Compression too aggressive"
|
| 471 |
|
| 472 |
```python
|
| 473 |
+
# Keep more items
|
| 474 |
+
config.smart_crusher.max_items_after_crush = 50 # Default is 15
|
| 475 |
|
| 476 |
+
# Or disable compression for specific tools
|
| 477 |
+
response = client.chat.completions.create(
|
| 478 |
+
model="gpt-4o",
|
| 479 |
+
messages=[...],
|
| 480 |
+
headroom_tool_profiles={
|
| 481 |
+
"important_tool": {"skip_compression": True}
|
| 482 |
+
}
|
|
|
|
|
|
|
|
|
|
| 483 |
)
|
| 484 |
```
|
| 485 |
|
| 486 |
+
---
|
| 487 |
+
|
| 488 |
## Supported Providers
|
| 489 |
|
| 490 |
+
| Provider | Token Counting | Cache Optimization | Status |
|
| 491 |
+
|----------|----------------|-------------------|--------|
|
| 492 |
+
| OpenAI | tiktoken (exact) | Automatic prefix caching | Full |
|
| 493 |
+
| Anthropic | Official API | cache_control blocks | Full |
|
| 494 |
+
| Google | Official API | Context caching | Full |
|
| 495 |
+
| Cohere | Official API | - | Full |
|
| 496 |
+
| Mistral | Official tokenizer | - | Full |
|
| 497 |
+
| LiteLLM | Via underlying provider | - | Full |
|
| 498 |
+
|
| 499 |
+
---
|
| 500 |
|
| 501 |
## Safety Guarantees
|
| 502 |
|
| 503 |
Headroom follows strict safety rules:
|
| 504 |
|
| 505 |
+
1. **Never removes human content** - User/assistant messages are never compressed
|
| 506 |
+
2. **Never breaks tool ordering** - Tool calls and responses stay paired as atomic units
|
| 507 |
3. **Parse failures are no-ops** - Malformed content passes through unchanged
|
| 508 |
4. **Preserves recency** - Last N turns are always kept
|
| 509 |
+
5. **Errors surface, don't hide** - Explicit exceptions with context
|
| 510 |
|
| 511 |
+
---
|
| 512 |
+
|
| 513 |
+
## Performance
|
| 514 |
|
| 515 |
+
| Scenario | Before | After | Savings | Overhead |
|
| 516 |
+
|----------|--------|-------|---------|----------|
|
| 517 |
+
| Search results (1000 items) | 45,000 tokens | 4,500 tokens | 90% | ~2ms |
|
| 518 |
+
| Log analysis (500 entries) | 22,000 tokens | 3,300 tokens | 85% | ~1ms |
|
| 519 |
+
| API response (nested JSON) | 15,000 tokens | 2,250 tokens | 85% | ~1ms |
|
| 520 |
+
| Long conversation (50 turns) | 80,000 tokens | 32,000 tokens | 60% | ~3ms |
|
| 521 |
+
|
| 522 |
+
---
|
| 523 |
|
| 524 |
## Documentation
|
| 525 |
|
| 526 |
+
- **[Quickstart Guide](docs/quickstart.md)** - Complete working examples
|
| 527 |
+
- **[Proxy Documentation](docs/proxy.md)** - Production deployment
|
| 528 |
+
- **[Transform Reference](docs/transforms.md)** - How each transform works
|
| 529 |
+
- **[API Reference](docs/api.md)** - Complete API documentation
|
| 530 |
+
- **[Troubleshooting](docs/troubleshooting.md)** - Common issues and solutions
|
| 531 |
+
- **[Architecture](docs/ARCHITECTURE.md)** - How Headroom works internally
|
| 532 |
+
|
| 533 |
+
---
|
| 534 |
+
|
| 535 |
+
## Examples
|
| 536 |
+
|
| 537 |
+
See the [`examples/`](examples/) directory for complete, runnable examples:
|
| 538 |
+
|
| 539 |
+
- `basic_usage.py` - Simple SDK usage
|
| 540 |
+
- `proxy_integration.py` - Using the proxy with different clients
|
| 541 |
+
- `custom_compression.py` - Advanced compression configuration
|
| 542 |
+
- `metrics_dashboard.py` - Building a metrics dashboard
|
| 543 |
+
|
| 544 |
+
---
|
| 545 |
|
| 546 |
## Contributing
|
| 547 |
|
| 548 |
+
We welcome contributions!
|
| 549 |
|
| 550 |
```bash
|
| 551 |
# Development setup
|
| 552 |
git clone https://github.com/headroom-sdk/headroom.git
|
| 553 |
cd headroom
|
| 554 |
pip install -e ".[dev]"
|
| 555 |
+
|
| 556 |
+
# Run tests
|
| 557 |
pytest
|
| 558 |
+
|
| 559 |
+
# Run linting
|
| 560 |
+
ruff check .
|
| 561 |
+
mypy headroom
|
| 562 |
```
|
| 563 |
|
| 564 |
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
|
| 565 |
|
| 566 |
+
---
|
| 567 |
|
| 568 |
+
## License
|
| 569 |
|
| 570 |
+
Apache License 2.0 - see [LICENSE](LICENSE) for details.
|
|
|
|
|
|
|
|
|
|
| 571 |
|
| 572 |
---
|
| 573 |
|
| 574 |
<p align="center">
|
| 575 |
+
<sub>Built for the AI developer community</sub>
|
| 576 |
</p>
|
docs/quickstart.md
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Quickstart Guide
|
| 2 |
+
|
| 3 |
+
Get Headroom running in 5 minutes with these copy-paste examples.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Installation
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
# Core only (minimal dependencies)
|
| 11 |
+
pip install headroom
|
| 12 |
+
|
| 13 |
+
# With proxy server
|
| 14 |
+
pip install "headroom[proxy]"
|
| 15 |
+
|
| 16 |
+
# Everything
|
| 17 |
+
pip install "headroom[all]"
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## Option 1: Proxy Server (Zero Code Changes)
|
| 23 |
+
|
| 24 |
+
The fastest way to start saving tokens. Works with any OpenAI-compatible client.
|
| 25 |
+
|
| 26 |
+
### Step 1: Start the Proxy
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
headroom proxy --port 8787
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### Step 2: Verify It's Running
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
curl http://localhost:8787/health
|
| 36 |
+
# Expected: {"status": "healthy", "mode": "optimize", ...}
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### Step 3: Point Your Client
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
# Claude Code
|
| 43 |
+
ANTHROPIC_BASE_URL=http://localhost:8787 claude
|
| 44 |
+
|
| 45 |
+
# Cursor / Continue / any OpenAI client
|
| 46 |
+
OPENAI_BASE_URL=http://localhost:8787/v1 your-app
|
| 47 |
+
|
| 48 |
+
# Python
|
| 49 |
+
export OPENAI_BASE_URL=http://localhost:8787/v1
|
| 50 |
+
python your_script.py
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Step 4: Check Savings
|
| 54 |
+
|
| 55 |
+
```bash
|
| 56 |
+
curl http://localhost:8787/stats
|
| 57 |
+
# {"requests_total": 42, "tokens_saved_total": 125000, ...}
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## Option 2: Python SDK
|
| 63 |
+
|
| 64 |
+
Wrap your existing client for fine-grained control.
|
| 65 |
+
|
| 66 |
+
### Basic Example
|
| 67 |
+
|
| 68 |
+
```python
|
| 69 |
+
from headroom import HeadroomClient, OpenAIProvider
|
| 70 |
+
from openai import OpenAI
|
| 71 |
+
|
| 72 |
+
# Create wrapped client
|
| 73 |
+
client = HeadroomClient(
|
| 74 |
+
original_client=OpenAI(),
|
| 75 |
+
provider=OpenAIProvider(),
|
| 76 |
+
default_mode="optimize",
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Use exactly like OpenAI client
|
| 80 |
+
response = client.chat.completions.create(
|
| 81 |
+
model="gpt-4o",
|
| 82 |
+
messages=[
|
| 83 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 84 |
+
{"role": "user", "content": "Hello!"},
|
| 85 |
+
],
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
print(response.choices[0].message.content)
|
| 89 |
+
|
| 90 |
+
# Check what happened
|
| 91 |
+
stats = client.get_stats()
|
| 92 |
+
print(f"Tokens saved: {stats['session']['tokens_saved_total']}")
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
### With Tool Outputs (Where Savings Happen)
|
| 96 |
+
|
| 97 |
+
```python
|
| 98 |
+
from headroom import HeadroomClient, OpenAIProvider
|
| 99 |
+
from openai import OpenAI
|
| 100 |
+
import json
|
| 101 |
+
|
| 102 |
+
client = HeadroomClient(
|
| 103 |
+
original_client=OpenAI(),
|
| 104 |
+
provider=OpenAIProvider(),
|
| 105 |
+
default_mode="optimize",
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
# Simulate a conversation with large tool outputs
|
| 109 |
+
messages = [
|
| 110 |
+
{"role": "system", "content": "You analyze search results."},
|
| 111 |
+
{"role": "user", "content": "Search for Python tutorials."},
|
| 112 |
+
{
|
| 113 |
+
"role": "assistant",
|
| 114 |
+
"content": None,
|
| 115 |
+
"tool_calls": [{
|
| 116 |
+
"id": "call_1",
|
| 117 |
+
"type": "function",
|
| 118 |
+
"function": {"name": "search", "arguments": '{"q": "python"}'},
|
| 119 |
+
}],
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"role": "tool",
|
| 123 |
+
"tool_call_id": "call_1",
|
| 124 |
+
# This is where Headroom shines - compressing large outputs
|
| 125 |
+
"content": json.dumps({
|
| 126 |
+
"results": [{"title": f"Result {i}", "score": 100-i} for i in range(500)]
|
| 127 |
+
}),
|
| 128 |
+
},
|
| 129 |
+
{"role": "user", "content": "What are the top 3 results?"},
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
# Headroom compresses the 500 results to ~20, keeping the most relevant
|
| 133 |
+
response = client.chat.completions.create(
|
| 134 |
+
model="gpt-4o",
|
| 135 |
+
messages=messages,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
print(response.choices[0].message.content)
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
### Simulate Before Sending
|
| 142 |
+
|
| 143 |
+
Preview optimizations without making an API call:
|
| 144 |
+
|
| 145 |
+
```python
|
| 146 |
+
# See what would happen without calling the API
|
| 147 |
+
plan = client.chat.completions.simulate(
|
| 148 |
+
model="gpt-4o",
|
| 149 |
+
messages=messages,
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
print(f"Tokens before: {plan.tokens_before}")
|
| 153 |
+
print(f"Tokens after: {plan.tokens_after}")
|
| 154 |
+
print(f"Would save: {plan.tokens_saved} tokens ({plan.tokens_saved/plan.tokens_before*100:.0f}%)")
|
| 155 |
+
print(f"Transforms: {plan.transforms}")
|
| 156 |
+
print(f"Estimated savings: {plan.estimated_savings}")
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
---
|
| 160 |
+
|
| 161 |
+
## Option 3: Anthropic SDK
|
| 162 |
+
|
| 163 |
+
```python
|
| 164 |
+
from headroom import HeadroomClient, AnthropicProvider
|
| 165 |
+
from anthropic import Anthropic
|
| 166 |
+
|
| 167 |
+
client = HeadroomClient(
|
| 168 |
+
original_client=Anthropic(),
|
| 169 |
+
provider=AnthropicProvider(),
|
| 170 |
+
default_mode="optimize",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# Use Anthropic-style API
|
| 174 |
+
response = client.messages.create(
|
| 175 |
+
model="claude-sonnet-4-20250514",
|
| 176 |
+
max_tokens=1024,
|
| 177 |
+
messages=[
|
| 178 |
+
{"role": "user", "content": "Hello, Claude!"},
|
| 179 |
+
],
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
print(response.content[0].text)
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
---
|
| 186 |
+
|
| 187 |
+
## Verify It's Working
|
| 188 |
+
|
| 189 |
+
### Method 1: Enable Logging
|
| 190 |
+
|
| 191 |
+
```python
|
| 192 |
+
import logging
|
| 193 |
+
logging.basicConfig(level=logging.INFO)
|
| 194 |
+
|
| 195 |
+
# Now you'll see:
|
| 196 |
+
# INFO:headroom.transforms.pipeline:Pipeline complete: 45000 -> 4500 tokens (saved 40500, 90.0% reduction)
|
| 197 |
+
# INFO:headroom.transforms.smart_crusher:SmartCrusher: keeping 15 of 500 items
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
### Method 2: Check Session Stats
|
| 201 |
+
|
| 202 |
+
```python
|
| 203 |
+
stats = client.get_stats()
|
| 204 |
+
print(stats)
|
| 205 |
+
# {
|
| 206 |
+
# "session": {"requests_total": 10, "tokens_saved_total": 5000, ...},
|
| 207 |
+
# "config": {"mode": "optimize", "provider": "openai", ...},
|
| 208 |
+
# "transforms": {"smart_crusher_enabled": True, ...}
|
| 209 |
+
# }
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
### Method 3: Validate Setup
|
| 213 |
+
|
| 214 |
+
```python
|
| 215 |
+
result = client.validate_setup()
|
| 216 |
+
if not result["valid"]:
|
| 217 |
+
print("Setup issues:", result)
|
| 218 |
+
else:
|
| 219 |
+
print("Setup OK!")
|
| 220 |
+
print(f"Provider: {result['provider']['name']}")
|
| 221 |
+
print(f"Storage: {result['storage']['url']}")
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
---
|
| 225 |
+
|
| 226 |
+
## Common Configuration
|
| 227 |
+
|
| 228 |
+
### Adjust Compression
|
| 229 |
+
|
| 230 |
+
```python
|
| 231 |
+
from headroom import HeadroomClient, OpenAIProvider, HeadroomConfig
|
| 232 |
+
|
| 233 |
+
config = HeadroomConfig()
|
| 234 |
+
|
| 235 |
+
# Keep more items after compression (default: 15)
|
| 236 |
+
config.smart_crusher.max_items_after_crush = 30
|
| 237 |
+
|
| 238 |
+
# Only compress if tool output has > 500 tokens (default: 200)
|
| 239 |
+
config.smart_crusher.min_tokens_to_crush = 500
|
| 240 |
+
|
| 241 |
+
client = HeadroomClient(
|
| 242 |
+
original_client=OpenAI(),
|
| 243 |
+
provider=OpenAIProvider(),
|
| 244 |
+
config=config, # Pass custom config
|
| 245 |
+
default_mode="optimize",
|
| 246 |
+
)
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
### Skip Compression for Specific Tools
|
| 250 |
+
|
| 251 |
+
```python
|
| 252 |
+
response = client.chat.completions.create(
|
| 253 |
+
model="gpt-4o",
|
| 254 |
+
messages=messages,
|
| 255 |
+
headroom_tool_profiles={
|
| 256 |
+
"database_query": {"skip_compression": True}, # Never compress
|
| 257 |
+
"search": {"max_items": 50}, # Keep more items
|
| 258 |
+
},
|
| 259 |
+
)
|
| 260 |
+
```
|
| 261 |
+
|
| 262 |
+
### Audit Mode (Observe Only)
|
| 263 |
+
|
| 264 |
+
```python
|
| 265 |
+
# Start in audit mode - see what WOULD be optimized
|
| 266 |
+
client = HeadroomClient(
|
| 267 |
+
original_client=OpenAI(),
|
| 268 |
+
provider=OpenAIProvider(),
|
| 269 |
+
default_mode="audit", # No modifications, just logging
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Override per-request
|
| 273 |
+
response = client.chat.completions.create(
|
| 274 |
+
model="gpt-4o",
|
| 275 |
+
messages=messages,
|
| 276 |
+
headroom_mode="optimize", # Enable for this request only
|
| 277 |
+
)
|
| 278 |
+
```
|
| 279 |
+
|
| 280 |
+
---
|
| 281 |
+
|
| 282 |
+
## What Gets Optimized?
|
| 283 |
+
|
| 284 |
+
| Content Type | What Headroom Does | Typical Savings |
|
| 285 |
+
|--------------|-------------------|-----------------|
|
| 286 |
+
| **Tool outputs with lists** | Keeps errors, anomalies, high-score items | 70-90% |
|
| 287 |
+
| **Repeated search results** | Deduplicates and samples | 60-80% |
|
| 288 |
+
| **Long conversations** | Drops old turns, keeps recent | 40-60% |
|
| 289 |
+
| **System prompts with dates** | Stabilizes for cache hits | Cache savings |
|
| 290 |
+
|
| 291 |
+
---
|
| 292 |
+
|
| 293 |
+
## Next Steps
|
| 294 |
+
|
| 295 |
+
- **[Configuration Reference](api.md)** - All configuration options
|
| 296 |
+
- **[Transform Reference](transforms.md)** - How each transform works
|
| 297 |
+
- **[Troubleshooting](troubleshooting.md)** - Common issues and solutions
|
| 298 |
+
- **[Examples](../examples/)** - More complete examples
|
| 299 |
+
|
| 300 |
+
---
|
| 301 |
+
|
| 302 |
+
## Quick Troubleshooting
|
| 303 |
+
|
| 304 |
+
### "No token savings"
|
| 305 |
+
|
| 306 |
+
```python
|
| 307 |
+
# 1. Check mode
|
| 308 |
+
stats = client.get_stats()
|
| 309 |
+
print(stats["config"]["mode"]) # Should be "optimize"
|
| 310 |
+
|
| 311 |
+
# 2. Enable logging to see what's happening
|
| 312 |
+
import logging
|
| 313 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 314 |
+
```
|
| 315 |
+
|
| 316 |
+
### "High latency"
|
| 317 |
+
|
| 318 |
+
```python
|
| 319 |
+
# Use BM25 instead of embeddings for faster relevance scoring
|
| 320 |
+
config.smart_crusher.relevance.tier = "bm25"
|
| 321 |
+
```
|
| 322 |
+
|
| 323 |
+
### "Compression too aggressive"
|
| 324 |
+
|
| 325 |
+
```python
|
| 326 |
+
# Keep more items
|
| 327 |
+
config.smart_crusher.max_items_after_crush = 50
|
| 328 |
+
```
|
| 329 |
+
|
| 330 |
+
See [Troubleshooting Guide](troubleshooting.md) for more solutions.
|
docs/troubleshooting.md
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Troubleshooting Guide
|
| 2 |
+
|
| 3 |
+
Solutions for common Headroom issues.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Proxy Server Issues
|
| 8 |
+
|
| 9 |
+
### "Proxy won't start"
|
| 10 |
+
|
| 11 |
+
**Symptom**: `headroom proxy` fails or hangs.
|
| 12 |
+
|
| 13 |
+
**Solutions**:
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
# 1. Check if port is already in use
|
| 17 |
+
lsof -i :8787
|
| 18 |
+
# If something is using the port, either kill it or use a different port
|
| 19 |
+
|
| 20 |
+
# 2. Try a different port
|
| 21 |
+
headroom proxy --port 8788
|
| 22 |
+
|
| 23 |
+
# 3. Check for missing dependencies
|
| 24 |
+
pip install "headroom[proxy]"
|
| 25 |
+
|
| 26 |
+
# 4. Run with debug logging
|
| 27 |
+
headroom proxy --log-level debug
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
### "Connection refused" when calling proxy
|
| 31 |
+
|
| 32 |
+
**Symptom**: `curl: (7) Failed to connect to localhost port 8787`
|
| 33 |
+
|
| 34 |
+
**Solutions**:
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
# 1. Verify proxy is running
|
| 38 |
+
curl http://localhost:8787/health
|
| 39 |
+
|
| 40 |
+
# 2. Check if proxy started on a different port
|
| 41 |
+
ps aux | grep headroom
|
| 42 |
+
|
| 43 |
+
# 3. Check firewall settings (macOS)
|
| 44 |
+
sudo pfctl -s rules | grep 8787
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
### "Proxy returns errors for some requests"
|
| 48 |
+
|
| 49 |
+
**Symptom**: Some requests work, others fail with 502/503.
|
| 50 |
+
|
| 51 |
+
**Solutions**:
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
# 1. Check proxy logs for the actual error
|
| 55 |
+
headroom proxy --log-level debug
|
| 56 |
+
|
| 57 |
+
# 2. Verify API key is set
|
| 58 |
+
echo $OPENAI_API_KEY # or ANTHROPIC_API_KEY
|
| 59 |
+
|
| 60 |
+
# 3. Test the underlying API directly
|
| 61 |
+
curl https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_API_KEY"
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## SDK Issues
|
| 67 |
+
|
| 68 |
+
### "No token savings"
|
| 69 |
+
|
| 70 |
+
**Symptom**: `stats['session']['tokens_saved_total']` is 0.
|
| 71 |
+
|
| 72 |
+
**Diagnosis**:
|
| 73 |
+
|
| 74 |
+
```python
|
| 75 |
+
# 1. Check mode
|
| 76 |
+
stats = client.get_stats()
|
| 77 |
+
print(f"Mode: {stats['config']['mode']}") # Should be "optimize"
|
| 78 |
+
|
| 79 |
+
# 2. Check transforms are enabled
|
| 80 |
+
print(f"SmartCrusher: {stats['transforms']['smart_crusher_enabled']}")
|
| 81 |
+
|
| 82 |
+
# 3. Check if content meets threshold
|
| 83 |
+
# SmartCrusher only compresses tool outputs > 200 tokens by default
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
**Solutions**:
|
| 87 |
+
|
| 88 |
+
```python
|
| 89 |
+
# 1. Ensure mode is "optimize"
|
| 90 |
+
client = HeadroomClient(
|
| 91 |
+
original_client=OpenAI(),
|
| 92 |
+
provider=OpenAIProvider(),
|
| 93 |
+
default_mode="optimize", # NOT "audit"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# 2. Or override per-request
|
| 97 |
+
response = client.chat.completions.create(
|
| 98 |
+
model="gpt-4o",
|
| 99 |
+
messages=messages,
|
| 100 |
+
headroom_mode="optimize",
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# 3. Lower the compression threshold
|
| 104 |
+
config = HeadroomConfig()
|
| 105 |
+
config.smart_crusher.min_tokens_to_crush = 100 # Default is 200
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
**Why It Might Be 0**:
|
| 109 |
+
- Mode is "audit" (observation only)
|
| 110 |
+
- Messages don't contain tool outputs
|
| 111 |
+
- Tool outputs are below the token threshold
|
| 112 |
+
- Data isn't compressible (high uniqueness)
|
| 113 |
+
|
| 114 |
+
### "Compression too aggressive"
|
| 115 |
+
|
| 116 |
+
**Symptom**: LLM responses are missing information that was in tool outputs.
|
| 117 |
+
|
| 118 |
+
**Solutions**:
|
| 119 |
+
|
| 120 |
+
```python
|
| 121 |
+
# 1. Keep more items
|
| 122 |
+
config = HeadroomConfig()
|
| 123 |
+
config.smart_crusher.max_items_after_crush = 50 # Default: 15
|
| 124 |
+
|
| 125 |
+
# 2. Skip compression for specific tools
|
| 126 |
+
response = client.chat.completions.create(
|
| 127 |
+
model="gpt-4o",
|
| 128 |
+
messages=messages,
|
| 129 |
+
headroom_tool_profiles={
|
| 130 |
+
"important_tool": {"skip_compression": True},
|
| 131 |
+
},
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# 3. Disable SmartCrusher entirely
|
| 135 |
+
config.smart_crusher.enabled = False
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
### "High latency"
|
| 139 |
+
|
| 140 |
+
**Symptom**: Requests take longer than expected.
|
| 141 |
+
|
| 142 |
+
**Diagnosis**:
|
| 143 |
+
|
| 144 |
+
```python
|
| 145 |
+
import time
|
| 146 |
+
import logging
|
| 147 |
+
|
| 148 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 149 |
+
|
| 150 |
+
start = time.time()
|
| 151 |
+
response = client.chat.completions.create(...)
|
| 152 |
+
print(f"Total time: {time.time() - start:.2f}s")
|
| 153 |
+
|
| 154 |
+
# Check logs for:
|
| 155 |
+
# - "SmartCrusher" timing
|
| 156 |
+
# - "EmbeddingScorer" timing (slow if using embeddings)
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
**Solutions**:
|
| 160 |
+
|
| 161 |
+
```python
|
| 162 |
+
# 1. Use BM25 instead of embeddings (faster)
|
| 163 |
+
config = HeadroomConfig()
|
| 164 |
+
config.smart_crusher.relevance.tier = "bm25" # Default may use embeddings
|
| 165 |
+
|
| 166 |
+
# 2. Increase threshold to skip small payloads
|
| 167 |
+
config.smart_crusher.min_tokens_to_crush = 500
|
| 168 |
+
|
| 169 |
+
# 3. Disable transforms you don't need
|
| 170 |
+
config.cache_aligner.enabled = False
|
| 171 |
+
config.rolling_window.enabled = False
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
### "ValidationError on setup"
|
| 175 |
+
|
| 176 |
+
**Symptom**: `validate_setup()` returns errors.
|
| 177 |
+
|
| 178 |
+
**Common Issues**:
|
| 179 |
+
|
| 180 |
+
```python
|
| 181 |
+
result = client.validate_setup()
|
| 182 |
+
print(result)
|
| 183 |
+
|
| 184 |
+
# Provider error:
|
| 185 |
+
# {"provider": {"ok": False, "error": "No API key"}}
|
| 186 |
+
# → Set OPENAI_API_KEY or pass api_key to OpenAI()
|
| 187 |
+
|
| 188 |
+
# Storage error:
|
| 189 |
+
# {"storage": {"ok": False, "error": "unable to open database"}}
|
| 190 |
+
# → Check path permissions, use :memory: for testing
|
| 191 |
+
|
| 192 |
+
# Config error:
|
| 193 |
+
# {"config": {"ok": False, "error": "Invalid mode"}}
|
| 194 |
+
# → Use "audit" or "optimize" only
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
**Solutions**:
|
| 198 |
+
|
| 199 |
+
```python
|
| 200 |
+
# 1. For testing, use in-memory storage
|
| 201 |
+
client = HeadroomClient(
|
| 202 |
+
original_client=OpenAI(),
|
| 203 |
+
provider=OpenAIProvider(),
|
| 204 |
+
store_url="sqlite:///:memory:", # No file created
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
# 2. For temp directory storage
|
| 208 |
+
import tempfile
|
| 209 |
+
import os
|
| 210 |
+
db_path = os.path.join(tempfile.gettempdir(), "headroom.db")
|
| 211 |
+
client = HeadroomClient(
|
| 212 |
+
original_client=OpenAI(),
|
| 213 |
+
provider=OpenAIProvider(),
|
| 214 |
+
store_url=f"sqlite:///{db_path}",
|
| 215 |
+
)
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## Import/Installation Issues
|
| 221 |
+
|
| 222 |
+
### "ModuleNotFoundError: No module named 'headroom'"
|
| 223 |
+
|
| 224 |
+
```bash
|
| 225 |
+
# 1. Check it's installed in the right environment
|
| 226 |
+
pip show headroom
|
| 227 |
+
|
| 228 |
+
# 2. If using virtual environment, ensure it's activated
|
| 229 |
+
source venv/bin/activate # or equivalent
|
| 230 |
+
|
| 231 |
+
# 3. Reinstall
|
| 232 |
+
pip install --upgrade headroom
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
### "ImportError: cannot import name 'X' from 'headroom'"
|
| 236 |
+
|
| 237 |
+
```python
|
| 238 |
+
# Check available imports
|
| 239 |
+
import headroom
|
| 240 |
+
print(dir(headroom))
|
| 241 |
+
|
| 242 |
+
# Common imports:
|
| 243 |
+
from headroom import (
|
| 244 |
+
HeadroomClient,
|
| 245 |
+
OpenAIProvider,
|
| 246 |
+
AnthropicProvider,
|
| 247 |
+
HeadroomConfig,
|
| 248 |
+
# Exceptions
|
| 249 |
+
HeadroomError,
|
| 250 |
+
ConfigurationError,
|
| 251 |
+
ProviderError,
|
| 252 |
+
)
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
### "Missing optional dependency"
|
| 256 |
+
|
| 257 |
+
```bash
|
| 258 |
+
# For proxy server
|
| 259 |
+
pip install "headroom[proxy]"
|
| 260 |
+
|
| 261 |
+
# For embedding-based relevance scoring
|
| 262 |
+
pip install "headroom[relevance]"
|
| 263 |
+
|
| 264 |
+
# For everything
|
| 265 |
+
pip install "headroom[all]"
|
| 266 |
+
```
|
| 267 |
+
|
| 268 |
+
---
|
| 269 |
+
|
| 270 |
+
## Provider-Specific Issues
|
| 271 |
+
|
| 272 |
+
### OpenAI: "Invalid API key"
|
| 273 |
+
|
| 274 |
+
```python
|
| 275 |
+
from openai import OpenAI
|
| 276 |
+
import os
|
| 277 |
+
|
| 278 |
+
# Ensure key is set
|
| 279 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
| 280 |
+
if not api_key:
|
| 281 |
+
raise ValueError("OPENAI_API_KEY not set")
|
| 282 |
+
|
| 283 |
+
client = HeadroomClient(
|
| 284 |
+
original_client=OpenAI(api_key=api_key),
|
| 285 |
+
provider=OpenAIProvider(),
|
| 286 |
+
)
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
### Anthropic: "Authentication error"
|
| 290 |
+
|
| 291 |
+
```python
|
| 292 |
+
from anthropic import Anthropic
|
| 293 |
+
import os
|
| 294 |
+
|
| 295 |
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
| 296 |
+
client = HeadroomClient(
|
| 297 |
+
original_client=Anthropic(api_key=api_key),
|
| 298 |
+
provider=AnthropicProvider(),
|
| 299 |
+
)
|
| 300 |
+
```
|
| 301 |
+
|
| 302 |
+
### "Unknown model" warnings
|
| 303 |
+
|
| 304 |
+
```python
|
| 305 |
+
# For custom/fine-tuned models, specify context limit
|
| 306 |
+
client = HeadroomClient(
|
| 307 |
+
original_client=OpenAI(),
|
| 308 |
+
provider=OpenAIProvider(),
|
| 309 |
+
model_context_limits={
|
| 310 |
+
"ft:gpt-4o-2024-08-06:my-org::abc123": 128000,
|
| 311 |
+
"my-custom-model": 32000,
|
| 312 |
+
},
|
| 313 |
+
)
|
| 314 |
+
```
|
| 315 |
+
|
| 316 |
+
---
|
| 317 |
+
|
| 318 |
+
## Debugging Techniques
|
| 319 |
+
|
| 320 |
+
### Enable Full Logging
|
| 321 |
+
|
| 322 |
+
```python
|
| 323 |
+
import logging
|
| 324 |
+
|
| 325 |
+
# See everything
|
| 326 |
+
logging.basicConfig(
|
| 327 |
+
level=logging.DEBUG,
|
| 328 |
+
format="%(asctime)s %(name)s %(levelname)s %(message)s",
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
# Or just Headroom logs
|
| 332 |
+
logging.getLogger("headroom").setLevel(logging.DEBUG)
|
| 333 |
+
```
|
| 334 |
+
|
| 335 |
+
### Inspect Transform Results
|
| 336 |
+
|
| 337 |
+
```python
|
| 338 |
+
# Use simulate to see what would happen
|
| 339 |
+
plan = client.chat.completions.simulate(
|
| 340 |
+
model="gpt-4o",
|
| 341 |
+
messages=messages,
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
print(f"Tokens: {plan.tokens_before} -> {plan.tokens_after}")
|
| 345 |
+
print(f"Transforms: {plan.transforms}")
|
| 346 |
+
print(f"Waste signals: {plan.waste_signals}")
|
| 347 |
+
|
| 348 |
+
# See the actual optimized messages
|
| 349 |
+
import json
|
| 350 |
+
print(json.dumps(plan.messages_optimized, indent=2))
|
| 351 |
+
```
|
| 352 |
+
|
| 353 |
+
### Check Storage Contents
|
| 354 |
+
|
| 355 |
+
```python
|
| 356 |
+
from datetime import datetime, timedelta
|
| 357 |
+
|
| 358 |
+
# Get recent metrics
|
| 359 |
+
metrics = client.get_metrics(
|
| 360 |
+
start_time=datetime.utcnow() - timedelta(hours=1),
|
| 361 |
+
limit=10,
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
for m in metrics:
|
| 365 |
+
print(f"{m.timestamp}: {m.tokens_input_before} -> {m.tokens_input_after}")
|
| 366 |
+
print(f" Transforms: {m.transforms_applied}")
|
| 367 |
+
if m.error:
|
| 368 |
+
print(f" ERROR: {m.error}")
|
| 369 |
+
```
|
| 370 |
+
|
| 371 |
+
### Manual Transform Testing
|
| 372 |
+
|
| 373 |
+
```python
|
| 374 |
+
from headroom import SmartCrusher, Tokenizer
|
| 375 |
+
from headroom.config import SmartCrusherConfig
|
| 376 |
+
import json
|
| 377 |
+
|
| 378 |
+
# Test compression directly
|
| 379 |
+
config = SmartCrusherConfig()
|
| 380 |
+
crusher = SmartCrusher(config)
|
| 381 |
+
tokenizer = Tokenizer()
|
| 382 |
+
|
| 383 |
+
messages = [
|
| 384 |
+
{"role": "tool", "content": json.dumps({"items": list(range(100))}), "tool_call_id": "1"}
|
| 385 |
+
]
|
| 386 |
+
|
| 387 |
+
result = crusher.apply(messages, tokenizer)
|
| 388 |
+
print(f"Tokens: {result.tokens_before} -> {result.tokens_after}")
|
| 389 |
+
print(f"Compressed content: {result.messages[0]['content'][:200]}...")
|
| 390 |
+
```
|
| 391 |
+
|
| 392 |
+
---
|
| 393 |
+
|
| 394 |
+
## Error Reference
|
| 395 |
+
|
| 396 |
+
| Exception | Meaning | Solution |
|
| 397 |
+
|-----------|---------|----------|
|
| 398 |
+
| `ConfigurationError` | Invalid config values | Check config parameters |
|
| 399 |
+
| `ProviderError` | Provider issue (unknown model, etc.) | Set model_context_limits |
|
| 400 |
+
| `StorageError` | Database issue | Check path/permissions |
|
| 401 |
+
| `CompressionError` | Compression failed | Rare - check data format |
|
| 402 |
+
| `TokenizationError` | Token counting failed | Check model name |
|
| 403 |
+
| `ValidationError` | Setup validation failed | Run validate_setup() |
|
| 404 |
+
|
| 405 |
+
### Handling Errors
|
| 406 |
+
|
| 407 |
+
```python
|
| 408 |
+
from headroom import (
|
| 409 |
+
HeadroomClient,
|
| 410 |
+
HeadroomError,
|
| 411 |
+
ConfigurationError,
|
| 412 |
+
StorageError,
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
try:
|
| 416 |
+
client = HeadroomClient(...)
|
| 417 |
+
response = client.chat.completions.create(...)
|
| 418 |
+
except ConfigurationError as e:
|
| 419 |
+
print(f"Config issue: {e}")
|
| 420 |
+
print(f"Details: {e.details}")
|
| 421 |
+
except StorageError as e:
|
| 422 |
+
print(f"Storage issue: {e}")
|
| 423 |
+
# Headroom continues to work, just without metrics persistence
|
| 424 |
+
except HeadroomError as e:
|
| 425 |
+
print(f"Headroom error: {e}")
|
| 426 |
+
```
|
| 427 |
+
|
| 428 |
+
---
|
| 429 |
+
|
| 430 |
+
## Getting Help
|
| 431 |
+
|
| 432 |
+
1. **Enable debug logging** and check the output
|
| 433 |
+
2. **Use simulate()** to see what transforms would apply
|
| 434 |
+
3. **Check validate_setup()** for configuration issues
|
| 435 |
+
4. **File an issue** at https://github.com/headroom-sdk/headroom/issues
|
| 436 |
+
|
| 437 |
+
When filing an issue, include:
|
| 438 |
+
- Headroom version (`pip show headroom`)
|
| 439 |
+
- Python version
|
| 440 |
+
- Provider (OpenAI/Anthropic)
|
| 441 |
+
- Debug log output
|
| 442 |
+
- Minimal reproduction code
|
examples/basic_usage.py
CHANGED
|
@@ -4,8 +4,13 @@ Basic usage example for Headroom SDK.
|
|
| 4 |
|
| 5 |
This example shows how to wrap an OpenAI client with Headroom
|
| 6 |
and use both audit and optimize modes.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
|
|
|
| 9 |
import os
|
| 10 |
import tempfile
|
| 11 |
|
|
@@ -14,6 +19,12 @@ from openai import OpenAI
|
|
| 14 |
|
| 15 |
from headroom import HeadroomClient, OpenAIProvider
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Load API key from .env.local
|
| 18 |
load_dotenv(".env.local")
|
| 19 |
|
|
@@ -153,7 +164,7 @@ def example_get_metrics():
|
|
| 153 |
print("METRICS EXAMPLE")
|
| 154 |
print("=" * 50)
|
| 155 |
|
| 156 |
-
# Get summary statistics
|
| 157 |
summary = client.get_summary()
|
| 158 |
print(f"Total requests: {summary['total_requests']}")
|
| 159 |
print(f"Total tokens saved: {summary['total_tokens_saved']}")
|
|
@@ -161,12 +172,65 @@ def example_get_metrics():
|
|
| 161 |
print()
|
| 162 |
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
| 165 |
# Run all examples
|
| 166 |
example_audit_mode()
|
| 167 |
example_optimize_mode()
|
| 168 |
example_simulate_mode()
|
| 169 |
example_get_metrics()
|
| 170 |
|
|
|
|
|
|
|
|
|
|
| 171 |
# Clean up
|
| 172 |
client.close()
|
|
|
|
| 4 |
|
| 5 |
This example shows how to wrap an OpenAI client with Headroom
|
| 6 |
and use both audit and optimize modes.
|
| 7 |
+
|
| 8 |
+
Run:
|
| 9 |
+
export OPENAI_API_KEY='sk-...'
|
| 10 |
+
python examples/basic_usage.py
|
| 11 |
"""
|
| 12 |
|
| 13 |
+
import logging
|
| 14 |
import os
|
| 15 |
import tempfile
|
| 16 |
|
|
|
|
| 19 |
|
| 20 |
from headroom import HeadroomClient, OpenAIProvider
|
| 21 |
|
| 22 |
+
# Enable logging to see what Headroom is doing
|
| 23 |
+
logging.basicConfig(
|
| 24 |
+
level=logging.INFO,
|
| 25 |
+
format="%(name)s: %(message)s",
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
# Load API key from .env.local
|
| 29 |
load_dotenv(".env.local")
|
| 30 |
|
|
|
|
| 164 |
print("METRICS EXAMPLE")
|
| 165 |
print("=" * 50)
|
| 166 |
|
| 167 |
+
# Get summary statistics from database
|
| 168 |
summary = client.get_summary()
|
| 169 |
print(f"Total requests: {summary['total_requests']}")
|
| 170 |
print(f"Total tokens saved: {summary['total_tokens_saved']}")
|
|
|
|
| 172 |
print()
|
| 173 |
|
| 174 |
|
| 175 |
+
def example_validate_setup():
|
| 176 |
+
"""Example of validating Headroom setup."""
|
| 177 |
+
print("=" * 50)
|
| 178 |
+
print("VALIDATE SETUP EXAMPLE")
|
| 179 |
+
print("=" * 50)
|
| 180 |
+
|
| 181 |
+
# Validate that everything is configured correctly
|
| 182 |
+
result = client.validate_setup()
|
| 183 |
+
|
| 184 |
+
if result["valid"]:
|
| 185 |
+
print("Setup is valid!")
|
| 186 |
+
print(f" Provider: {result['provider']['name']}")
|
| 187 |
+
print(f" Storage: {result['storage']['url']}")
|
| 188 |
+
print(f" Mode: {result['config']['mode']}")
|
| 189 |
+
else:
|
| 190 |
+
print("Setup issues detected:")
|
| 191 |
+
for key, val in result.items():
|
| 192 |
+
if key != "valid" and not val.get("ok"):
|
| 193 |
+
print(f" {key}: {val.get('error')}")
|
| 194 |
+
print()
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def example_get_stats():
|
| 198 |
+
"""Example of getting quick session stats."""
|
| 199 |
+
print("=" * 50)
|
| 200 |
+
print("SESSION STATS EXAMPLE")
|
| 201 |
+
print("=" * 50)
|
| 202 |
+
|
| 203 |
+
# Get quick stats without database query
|
| 204 |
+
stats = client.get_stats()
|
| 205 |
+
|
| 206 |
+
print("Session stats:")
|
| 207 |
+
print(f" Requests total: {stats['session']['requests_total']}")
|
| 208 |
+
print(f" Requests optimized: {stats['session']['requests_optimized']}")
|
| 209 |
+
print(f" Tokens saved: {stats['session']['tokens_saved_total']}")
|
| 210 |
+
|
| 211 |
+
print("\nConfiguration:")
|
| 212 |
+
print(f" Mode: {stats['config']['mode']}")
|
| 213 |
+
print(f" Provider: {stats['config']['provider']}")
|
| 214 |
+
|
| 215 |
+
print("\nTransforms enabled:")
|
| 216 |
+
print(f" SmartCrusher: {stats['transforms']['smart_crusher_enabled']}")
|
| 217 |
+
print(f" RollingWindow: {stats['transforms']['rolling_window_enabled']}")
|
| 218 |
+
print(f" CacheAligner: {stats['transforms']['cache_aligner_enabled']}")
|
| 219 |
+
print()
|
| 220 |
+
|
| 221 |
+
|
| 222 |
if __name__ == "__main__":
|
| 223 |
+
# First, validate the setup
|
| 224 |
+
example_validate_setup()
|
| 225 |
+
|
| 226 |
# Run all examples
|
| 227 |
example_audit_mode()
|
| 228 |
example_optimize_mode()
|
| 229 |
example_simulate_mode()
|
| 230 |
example_get_metrics()
|
| 231 |
|
| 232 |
+
# Show session stats
|
| 233 |
+
example_get_stats()
|
| 234 |
+
|
| 235 |
# Clean up
|
| 236 |
client.close()
|
headroom/__init__.py
CHANGED
|
@@ -1,41 +1,71 @@
|
|
| 1 |
"""
|
| 2 |
-
Headroom -
|
|
|
|
|
|
|
| 3 |
|
| 4 |
Headroom wraps LLM clients to provide:
|
| 5 |
-
-
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
-
|
| 9 |
-
- Full streaming support
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
from headroom import HeadroomClient, OpenAIProvider
|
| 14 |
from openai import OpenAI
|
| 15 |
|
| 16 |
-
|
| 17 |
-
provider = OpenAIProvider()
|
| 18 |
-
|
| 19 |
client = HeadroomClient(
|
| 20 |
-
original_client=
|
| 21 |
-
provider=
|
| 22 |
-
|
| 23 |
-
default_mode="audit",
|
| 24 |
)
|
| 25 |
|
| 26 |
-
# Use like
|
| 27 |
-
|
| 28 |
model="gpt-4o",
|
| 29 |
-
messages=[
|
| 30 |
-
|
|
|
|
| 31 |
)
|
| 32 |
|
| 33 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
plan = client.chat.completions.simulate(
|
| 35 |
model="gpt-4o",
|
| 36 |
-
messages=
|
| 37 |
)
|
| 38 |
print(f"Would save {plan.tokens_saved} tokens")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
"""
|
| 40 |
|
| 41 |
from .cache import (
|
|
|
|
| 1 |
"""
|
| 2 |
+
Headroom - The Context Optimization Layer for LLM Applications.
|
| 3 |
+
|
| 4 |
+
Cut your LLM costs by 50-90% without losing accuracy.
|
| 5 |
|
| 6 |
Headroom wraps LLM clients to provide:
|
| 7 |
+
- Smart compression of tool outputs (keeps errors, anomalies, relevant items)
|
| 8 |
+
- Cache-aligned prefix optimization for better provider cache hits
|
| 9 |
+
- Rolling window token management for long conversations
|
| 10 |
+
- Full streaming support with zero accuracy loss
|
|
|
|
| 11 |
|
| 12 |
+
Quick Start:
|
| 13 |
|
| 14 |
from headroom import HeadroomClient, OpenAIProvider
|
| 15 |
from openai import OpenAI
|
| 16 |
|
| 17 |
+
# Wrap your existing client
|
|
|
|
|
|
|
| 18 |
client = HeadroomClient(
|
| 19 |
+
original_client=OpenAI(),
|
| 20 |
+
provider=OpenAIProvider(),
|
| 21 |
+
default_mode="optimize",
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
+
# Use exactly like the original client
|
| 25 |
+
response = client.chat.completions.create(
|
| 26 |
model="gpt-4o",
|
| 27 |
+
messages=[
|
| 28 |
+
{"role": "user", "content": "Hello!"},
|
| 29 |
+
],
|
| 30 |
)
|
| 31 |
|
| 32 |
+
# Check savings
|
| 33 |
+
stats = client.get_stats()
|
| 34 |
+
print(f"Tokens saved: {stats['session']['tokens_saved_total']}")
|
| 35 |
+
|
| 36 |
+
Verify It's Working:
|
| 37 |
+
|
| 38 |
+
# Validate configuration
|
| 39 |
+
result = client.validate_setup()
|
| 40 |
+
if not result["valid"]:
|
| 41 |
+
print("Issues:", result)
|
| 42 |
+
|
| 43 |
+
# Enable logging to see what's happening
|
| 44 |
+
import logging
|
| 45 |
+
logging.basicConfig(level=logging.INFO)
|
| 46 |
+
# INFO:headroom.transforms.pipeline:Pipeline complete: 45000 -> 4500 tokens
|
| 47 |
+
|
| 48 |
+
Simulate Before Sending:
|
| 49 |
+
|
| 50 |
plan = client.chat.completions.simulate(
|
| 51 |
model="gpt-4o",
|
| 52 |
+
messages=large_messages,
|
| 53 |
)
|
| 54 |
print(f"Would save {plan.tokens_saved} tokens")
|
| 55 |
+
print(f"Transforms: {plan.transforms}")
|
| 56 |
+
|
| 57 |
+
Error Handling:
|
| 58 |
+
|
| 59 |
+
from headroom import HeadroomError, ConfigurationError, ProviderError
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
response = client.chat.completions.create(...)
|
| 63 |
+
except ConfigurationError as e:
|
| 64 |
+
print(f"Config issue: {e.details}")
|
| 65 |
+
except HeadroomError as e:
|
| 66 |
+
print(f"Headroom error: {e}")
|
| 67 |
+
|
| 68 |
+
For more examples, see https://github.com/headroom-sdk/headroom/tree/main/examples
|
| 69 |
"""
|
| 70 |
|
| 71 |
from .cache import (
|
headroom/client.py
CHANGED
|
@@ -266,7 +266,7 @@ class HeadroomClient:
|
|
| 266 |
self,
|
| 267 |
original_client: Any,
|
| 268 |
provider: Provider,
|
| 269 |
-
store_url: str =
|
| 270 |
default_mode: str = "audit",
|
| 271 |
model_context_limits: dict[str, int] | None = None,
|
| 272 |
cache_optimizer: BaseCacheOptimizer | None = None,
|
|
@@ -279,7 +279,7 @@ class HeadroomClient:
|
|
| 279 |
Args:
|
| 280 |
original_client: The underlying LLM client (OpenAI-compatible).
|
| 281 |
provider: Provider instance for model-specific behavior.
|
| 282 |
-
store_url: Storage URL (sqlite:// or jsonl://).
|
| 283 |
default_mode: Default mode ("audit" | "optimize").
|
| 284 |
model_context_limits: Override context limits for models.
|
| 285 |
cache_optimizer: Optional custom cache optimizer. If None and
|
|
@@ -289,6 +289,15 @@ class HeadroomClient:
|
|
| 289 |
"""
|
| 290 |
self._original = original_client
|
| 291 |
self._provider = provider
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
self._store_url = store_url
|
| 293 |
self._default_mode = HeadroomMode(default_mode)
|
| 294 |
|
|
|
|
| 266 |
self,
|
| 267 |
original_client: Any,
|
| 268 |
provider: Provider,
|
| 269 |
+
store_url: str | None = None,
|
| 270 |
default_mode: str = "audit",
|
| 271 |
model_context_limits: dict[str, int] | None = None,
|
| 272 |
cache_optimizer: BaseCacheOptimizer | None = None,
|
|
|
|
| 279 |
Args:
|
| 280 |
original_client: The underlying LLM client (OpenAI-compatible).
|
| 281 |
provider: Provider instance for model-specific behavior.
|
| 282 |
+
store_url: Storage URL (sqlite:// or jsonl://). Defaults to temp dir.
|
| 283 |
default_mode: Default mode ("audit" | "optimize").
|
| 284 |
model_context_limits: Override context limits for models.
|
| 285 |
cache_optimizer: Optional custom cache optimizer. If None and
|
|
|
|
| 289 |
"""
|
| 290 |
self._original = original_client
|
| 291 |
self._provider = provider
|
| 292 |
+
|
| 293 |
+
# Set default store_url to temp directory for better DevEx
|
| 294 |
+
if store_url is None:
|
| 295 |
+
import os
|
| 296 |
+
import tempfile
|
| 297 |
+
|
| 298 |
+
db_path = os.path.join(tempfile.gettempdir(), "headroom.db")
|
| 299 |
+
store_url = f"sqlite:///{db_path}"
|
| 300 |
+
|
| 301 |
self._store_url = store_url
|
| 302 |
self._default_mode = HeadroomMode(default_mode)
|
| 303 |
|
headroom/proxy/server.py
CHANGED
|
@@ -1779,7 +1779,7 @@ def run_server(config: ProxyConfig | None = None):
|
|
| 1779 |
║ /v1/retrieve/stats CCR: Compression store stats ║
|
| 1780 |
║ /v1/retrieve/tool_call CCR: Handle LLM tool calls ║
|
| 1781 |
║ /v1/feedback CCR: Feedback loop stats & patterns ║
|
| 1782 |
-
║ /v1/feedback/{tool}
|
| 1783 |
║ /v1/telemetry Data flywheel: Telemetry stats ║
|
| 1784 |
║ /v1/telemetry/export Data flywheel: Export for aggregation ║
|
| 1785 |
║ /v1/telemetry/tools Data flywheel: Per-tool stats ║
|
|
|
|
| 1779 |
║ /v1/retrieve/stats CCR: Compression store stats ║
|
| 1780 |
║ /v1/retrieve/tool_call CCR: Handle LLM tool calls ║
|
| 1781 |
║ /v1/feedback CCR: Feedback loop stats & patterns ║
|
| 1782 |
+
║ /v1/feedback/{{tool}} CCR: Compression hints for a tool ║
|
| 1783 |
║ /v1/telemetry Data flywheel: Telemetry stats ║
|
| 1784 |
║ /v1/telemetry/export Data flywheel: Export for aggregation ║
|
| 1785 |
║ /v1/telemetry/tools Data flywheel: Per-tool stats ║
|
headroom/relevance/embedding.py
CHANGED
|
@@ -22,26 +22,44 @@ from __future__ import annotations
|
|
| 22 |
import logging
|
| 23 |
from typing import TYPE_CHECKING
|
| 24 |
|
| 25 |
-
import numpy as np
|
| 26 |
-
|
| 27 |
from .base import RelevanceScore, RelevanceScorer
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
if TYPE_CHECKING:
|
| 30 |
from sentence_transformers import SentenceTransformer
|
| 31 |
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
|
| 34 |
|
| 35 |
-
def _cosine_similarity(a
|
| 36 |
"""Compute cosine similarity between two vectors.
|
| 37 |
|
| 38 |
Args:
|
| 39 |
-
a: First vector.
|
| 40 |
-
b: Second vector.
|
| 41 |
|
| 42 |
Returns:
|
| 43 |
Cosine similarity in range [-1, 1], clamped to [0, 1].
|
| 44 |
"""
|
|
|
|
| 45 |
norm_a = np.linalg.norm(a)
|
| 46 |
norm_b = np.linalg.norm(b)
|
| 47 |
|
|
@@ -144,7 +162,7 @@ class EmbeddingScorer(RelevanceScorer):
|
|
| 144 |
|
| 145 |
return self._model
|
| 146 |
|
| 147 |
-
def _encode(self, texts: list[str])
|
| 148 |
"""Encode texts to embeddings.
|
| 149 |
|
| 150 |
Args:
|
|
|
|
| 22 |
import logging
|
| 23 |
from typing import TYPE_CHECKING
|
| 24 |
|
|
|
|
|
|
|
| 25 |
from .base import RelevanceScore, RelevanceScorer
|
| 26 |
|
| 27 |
+
# numpy is an optional dependency - import lazily
|
| 28 |
+
_numpy = None
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _get_numpy():
|
| 32 |
+
"""Lazily import numpy."""
|
| 33 |
+
global _numpy
|
| 34 |
+
if _numpy is None:
|
| 35 |
+
try:
|
| 36 |
+
import numpy as np
|
| 37 |
+
|
| 38 |
+
_numpy = np
|
| 39 |
+
except ImportError:
|
| 40 |
+
raise ImportError(
|
| 41 |
+
"numpy is required for EmbeddingScorer. "
|
| 42 |
+
"Install with: pip install headroom[relevance]"
|
| 43 |
+
)
|
| 44 |
+
return _numpy
|
| 45 |
+
|
| 46 |
if TYPE_CHECKING:
|
| 47 |
from sentence_transformers import SentenceTransformer
|
| 48 |
|
| 49 |
logger = logging.getLogger(__name__)
|
| 50 |
|
| 51 |
|
| 52 |
+
def _cosine_similarity(a, b) -> float:
|
| 53 |
"""Compute cosine similarity between two vectors.
|
| 54 |
|
| 55 |
Args:
|
| 56 |
+
a: First vector (numpy array).
|
| 57 |
+
b: Second vector (numpy array).
|
| 58 |
|
| 59 |
Returns:
|
| 60 |
Cosine similarity in range [-1, 1], clamped to [0, 1].
|
| 61 |
"""
|
| 62 |
+
np = _get_numpy()
|
| 63 |
norm_a = np.linalg.norm(a)
|
| 64 |
norm_b = np.linalg.norm(b)
|
| 65 |
|
|
|
|
| 162 |
|
| 163 |
return self._model
|
| 164 |
|
| 165 |
+
def _encode(self, texts: list[str]):
|
| 166 |
"""Encode texts to embeddings.
|
| 167 |
|
| 168 |
Args:
|
headroom/reporting/generator.py
CHANGED
|
@@ -4,11 +4,27 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
from datetime import datetime
|
| 6 |
from pathlib import Path
|
| 7 |
-
from typing import Any
|
| 8 |
-
|
| 9 |
-
from jinja2 import Template
|
| 10 |
|
| 11 |
from ..storage import create_storage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from ..utils import estimate_cost, format_cost
|
| 13 |
|
| 14 |
# HTML template embedded as string
|
|
@@ -350,7 +366,7 @@ def generate_report(
|
|
| 350 |
period = "All time"
|
| 351 |
|
| 352 |
# Render template
|
| 353 |
-
template =
|
| 354 |
html = template.render(
|
| 355 |
generated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 356 |
period=period,
|
|
|
|
| 4 |
|
| 5 |
from datetime import datetime
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import TYPE_CHECKING, Any
|
|
|
|
|
|
|
| 8 |
|
| 9 |
from ..storage import create_storage
|
| 10 |
+
|
| 11 |
+
if TYPE_CHECKING:
|
| 12 |
+
from jinja2 import Template
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _get_jinja2_template(template_str: str):
|
| 16 |
+
"""Lazily import jinja2 and create template."""
|
| 17 |
+
try:
|
| 18 |
+
from jinja2 import Template
|
| 19 |
+
|
| 20 |
+
return Template(template_str)
|
| 21 |
+
except ImportError:
|
| 22 |
+
raise ImportError(
|
| 23 |
+
"jinja2 is required for report generation. "
|
| 24 |
+
"Install with: pip install headroom[reports]"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
from ..utils import estimate_cost, format_cost
|
| 29 |
|
| 30 |
# HTML template embedded as string
|
|
|
|
| 366 |
period = "All time"
|
| 367 |
|
| 368 |
# Render template
|
| 369 |
+
template = _get_jinja2_template(REPORT_TEMPLATE)
|
| 370 |
html = template.render(
|
| 371 |
generated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 372 |
period=period,
|
pyproject.toml
CHANGED
|
@@ -3,7 +3,7 @@ requires = ["hatchling"]
|
|
| 3 |
build-backend = "hatchling.build"
|
| 4 |
|
| 5 |
[project]
|
| 6 |
-
name = "headroom"
|
| 7 |
version = "0.2.0"
|
| 8 |
description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%"
|
| 9 |
readme = "README.md"
|
|
@@ -76,7 +76,7 @@ dev = [
|
|
| 76 |
]
|
| 77 |
# All optional dependencies
|
| 78 |
all = [
|
| 79 |
-
"headroom[relevance,proxy,reports]",
|
| 80 |
]
|
| 81 |
|
| 82 |
[project.scripts]
|
|
|
|
| 3 |
build-backend = "hatchling.build"
|
| 4 |
|
| 5 |
[project]
|
| 6 |
+
name = "headroom-ai"
|
| 7 |
version = "0.2.0"
|
| 8 |
description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%"
|
| 9 |
readme = "README.md"
|
|
|
|
| 76 |
]
|
| 77 |
# All optional dependencies
|
| 78 |
all = [
|
| 79 |
+
"headroom-ai[relevance,proxy,reports]",
|
| 80 |
]
|
| 81 |
|
| 82 |
[project.scripts]
|