[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "headroom-ai" version = "0.22.0" description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%" readme = "README.md" license = "Apache-2.0" requires-python = ">=3.10" authors = [ { name = "Headroom Contributors" } ] maintainers = [ { name = "Headroom Contributors" } ] keywords = [ "llm", "openai", "anthropic", "claude", "gpt", "context", "token", "optimization", "compression", "caching", "proxy", "ai", "machine-learning", ] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "Typing :: Typed", ] dependencies = [ # Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN) "tiktoken>=0.5.0", # Tokenizer for all compressors "pydantic>=2.0.0", # Config and data models "litellm==1.82.3", # Model registry, pricing, and provider support "click>=8.1.0", # CLI framework "rich>=13.0.0", # Rich terminal output "opentelemetry-api>=1.24.0", # Safe no-op OTEL API for instrumentation "ast-grep-cli>=0.30.0", # AST-aware code slicing (CodeCompressor); binary wheel "tomli>=2.0.0; python_version < '3.11'", # tomllib backport for helper scripts ] [project.optional-dependencies] # Proxy server (most common install: pip install headroom-ai[proxy]) proxy = [ "fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx[http2]>=0.24.0", "openai>=2.14.0", # OpenAI API format support "anthropic>=0.18.0", # Anthropic official SDK for count_tokens API "mcp>=1.0.0", # MCP server (headroom_compress, retrieve, stats) "magika>=0.6.0", # ML content detection for ContentRouter "zstandard>=0.20.0", # Decompress zstd request bodies (Codex, etc.) "websockets>=13.0", # WebSocket proxy for /v1/responses (Codex gpt-5.4+) "onnxruntime>=1.16.0", # Kompress ONNX INT8 text compression (no torch needed) "transformers>=4.30.0", # Tokenizer only (for Kompress) "watchdog>=4.0.0", # File watcher for live code graph reindexing (--code-graph) "sqlite-vec>=0.1.6", # Vector index for memory (--memory). Lightweight, no torch. "redis>=5.0.0", # Shared stats aggregation for multi-worker proxy (TCP) "upstash-redis>=1.0.0", # Upstash Redis (HTTP REST) for multi-worker proxy on HF Spaces ] # GPU acceleration for ONNX models (mutually exclusive with [proxy]) # Install: pip install headroom-ai[proxy,gpu] (gpu replaces onnxruntime with onnxruntime-gpu) # Requires: CUDA toolkit + cuDNN on host. Auto-detects GPU, falls back to CPU. # Note: onnxruntime-gpu conflicts with onnxruntime; both install to same paths. gpu = [ "onnxruntime-gpu>=1.16.0", ] # AST-based code compression (tree-sitter) code = [ "tree-sitter-language-pack>=0.10.0", ] # ML-based compression with Kompress (ModernBERT) ml = [ "torch>=2.0.0", "transformers>=4.30.0", ] # Legacy ML compression (LLMLingua-2 — use [ml] instead for Kompress) llmlingua = [ "llmlingua>=0.2.0", "torch>=2.0.0", "transformers>=4.30.0", ] # Memory system (hierarchical memory with vector search) memory = [ "hnswlib>=0.8.0", "sqlite-vec>=0.1.6", "sentence-transformers>=2.2.0", ] # Qdrant + Neo4j memory backend helpers memory-stack = [ "mem0ai>=0.1.100", "qdrant-client>=1.9.0", "neo4j>=5.20.0", ] # Semantic relevance scoring with embeddings. # Uses `fastembed` (BAAI/bge-small-en-v1.5 by default — 33M params, # 384 dims, ~30 MB int8-quantized ONNX). Same library + model used by # the Rust SmartCrusher (`fastembed` crate), giving byte-equal embeddings # across the language boundary. Replaced sentence-transformers in # Stage 3c.1 — fastembed is faster (~2-3x), smaller (no torch # dependency), and outranks all-MiniLM-L6-v2 on MTEB by ~6 points. relevance = [ "fastembed>=0.4.0", "numpy>=1.24.0", ] # Image compression (ML-based routing + OCR) image = [ "pillow>=10.0.0", "sentencepiece>=0.1.99", # Required by SigLIP tokenizer (SiglipTokenizer) "rapidocr-onnxruntime>=1.4.0", # ONNX-native OCR for text extraction from images (~15MB models) ] # Report generation reports = [ "jinja2>=3.0.0", ] # OpenTelemetry metrics export otel = [ "opentelemetry-sdk>=1.24.0", "opentelemetry-exporter-otlp-proto-http>=1.24.0", ] # any-llm multi-provider backend (requires Python 3.11+) anyllm = [ "any-llm-sdk>=1.0.0; python_version >= '3.11'", ] # LangChain integration langchain = [ "langchain-core>=0.2.0", "langchain-openai>=0.1.0", ] # Agno agent framework integration agno = [ "agno>=1.0.0", ] # AWS Strands Agents SDK integration strands = [ "strands-agents>=0.1.0", ] # MCP server for Claude Code integration mcp = [ "mcp>=1.0.0", "httpx>=0.24.0", ] # Voice filler detection voice = [ "onnxruntime>=1.16.0", "transformers>=4.30.0", "torch>=2.0.0", ] # Voice training (includes voice deps + training extras) voice-train = [ "headroom-ai[voice]", "datasets>=2.14.0", "accelerate>=0.20.0", ] # Evaluation framework evals = [ "datasets>=2.14.0", "sentence-transformers>=2.2.0", "numpy>=1.24.0", "scikit-learn>=1.3.0", "anthropic>=0.18.0", "openai>=1.0.0", ] # AWS Bedrock backend bedrock = [ "boto3>=1.28.0", ] # HTML content extraction html = [ "trafilatura>=1.6.0", ] # Comprehensive LLM benchmarks benchmark = [ "lm-eval>=0.4.0", "openai>=1.0.0", "anthropic>=0.18.0", ] # Development dependencies dev = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", "pytest-asyncio>=0.21.0", "ruff>=0.1.0", "mypy>=1.0.0", "pre-commit>=3.0.0", "openai>=1.0.0", "anthropic>=0.18.0", "litellm==1.82.3", "fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx[http2]>=0.24.0", "websockets>=13.0", "opentelemetry-sdk>=1.24.0", "opentelemetry-exporter-otlp-proto-http>=1.24.0", "ollama>=0.4.0", "langchain-ollama>=0.2.0", "hnswlib>=0.8.0", "sqlite-vec>=0.1.6", "sentence-transformers>=2.2.0", "numpy>=1.24.0", ] # All optional dependencies (everything you need) all = [ "headroom-ai[proxy,code,ml,memory,relevance,image,reports,otel,evals,voice,html,benchmark,mcp]", ] [project.scripts] headroom = "headroom.cli:main" [project.urls] Homepage = "https://github.com/chopratejas/headroom" Documentation = "https://github.com/chopratejas/headroom#readme" Repository = "https://github.com/chopratejas/headroom" Issues = "https://github.com/chopratejas/headroom/issues" Changelog = "https://github.com/chopratejas/headroom/blob/main/CHANGELOG.md" [tool.hatch.build.targets.wheel] packages = ["headroom"] # Include non-Python files (dashboard templates, etc.) artifacts = [ "headroom/dashboard/templates/*.html", ] [tool.hatch.build.targets.sdist] include = [ "/headroom", "/tests", "/LICENSE", "/NOTICE", "/README.md", "/CHANGELOG.md", ] [tool.ruff] target-version = "py310" line-length = 100 [tool.ruff.lint] select = [ "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes "I", # isort "B", # flake8-bugbear "C4", # flake8-comprehensions "UP", # pyupgrade ] ignore = [ "E501", # line too long (handled by formatter) "B008", # do not perform function calls in argument defaults "B905", # zip without strict parameter ] [tool.ruff.lint.isort] known-first-party = ["headroom"] [tool.ruff.format] quote-style = "double" indent-style = "space" [tool.mypy] python_version = "3.10" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true ignore_missing_imports = true # Per-module overrides for modules with dynamic typing patterns [[tool.mypy.overrides]] module = [ "headroom.proxy.server", "headroom.proxy.cost", "headroom.proxy.prometheus_metrics", "headroom.proxy.semantic_cache", "headroom.proxy.rate_limiter", "headroom.proxy.request_logger", "headroom.proxy.helpers", "headroom.integrations.langchain", "headroom.integrations.mcp", "headroom.ccr.mcp_server", "headroom.relevance.embedding", "headroom.reporting.generator", ] disallow_untyped_defs = false [[tool.mypy.overrides]] module = [ "headroom.tokenizers.*", "headroom.providers.litellm", "headroom.providers.google", ] disallow_untyped_defs = false warn_return_any = false # Handler mixins use self.* from HeadroomProxy via duck typing — mypy can't resolve these [[tool.mypy.overrides]] module = ["headroom.proxy.handlers.*"] disallow_untyped_defs = false ignore_errors = true # Ignore third-party stubs with syntax errors [[tool.mypy.overrides]] module = ["mlx.*"] ignore_errors = true [tool.pytest.ini_options] testpaths = ["tests"] python_files = ["test_*.py"] python_functions = ["test_*"] addopts = "-v --tb=short" asyncio_mode = "auto" markers = [ "slow: slow tests (model loads, large fixtures)", "real_llm: tests that hit real LLM APIs; skipped unless explicitly enabled", "live_redis: tests that hit a live Redis instance; skipped unless explicitly enabled", ] [tool.coverage.run] source = ["headroom"] branch = true omit = [ "headroom/cli.py", "*/tests/*", ] [tool.coverage.report] exclude_lines = [ "pragma: no cover", "def __repr__", "raise NotImplementedError", "if TYPE_CHECKING:", "if __name__ == .__main__.:", ]