[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "headroom-ai" version = "0.5.18" description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%" readme = "README.md" license = "Apache-2.0" requires-python = ">=3.10" authors = [ { name = "Headroom Contributors" } ] maintainers = [ { name = "Headroom Contributors" } ] keywords = [ "llm", "openai", "anthropic", "claude", "gpt", "context", "token", "optimization", "compression", "caching", "proxy", "ai", "machine-learning", ] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "Typing :: Typed", ] dependencies = [ # Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN) "tiktoken>=0.5.0", # Tokenizer for all compressors "pydantic>=2.0.0", # Config and data models "litellm==1.82.3", # Model registry, pricing, and provider support "click>=8.1.0", # CLI framework "rich>=13.0.0", # Rich terminal output ] [project.optional-dependencies] # Proxy server (most common install: pip install headroom-ai[proxy]) proxy = [ "fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx[http2]>=0.24.0", "openai>=2.14.0", # OpenAI API format support "mcp>=1.0.0", # MCP server (headroom_compress, retrieve, stats) "magika>=0.6.0", # ML content detection for ContentRouter "zstandard>=0.20.0", # Decompress zstd request bodies (Codex, etc.) "websockets>=13.0", # WebSocket proxy for /v1/responses (Codex gpt-5.4+) "onnxruntime>=1.16.0", # Kompress ONNX INT8 text compression (no torch needed) "transformers>=4.30.0", # Tokenizer only (for Kompress) ] # AST-based code compression (tree-sitter) code = [ "tree-sitter-language-pack>=0.10.0", ] # ML-based compression with Kompress (ModernBERT) ml = [ "torch>=2.0.0", "transformers>=4.30.0", ] # Legacy ML compression (LLMLingua-2 — use [ml] instead for Kompress) llmlingua = [ "llmlingua>=0.2.0", "torch>=2.0.0", "transformers>=4.30.0", ] # Memory system (hierarchical memory with vector search) memory = [ "hnswlib>=0.8.0", "sqlite-vec>=0.1.6", "sentence-transformers>=2.2.0", ] # Semantic relevance scoring with embeddings relevance = [ "sentence-transformers>=2.2.0", "numpy>=1.24.0", ] # Image compression (ML-based routing) image = [ "pillow>=10.0.0", ] # Report generation reports = [ "jinja2>=3.0.0", ] # any-llm multi-provider backend (requires Python 3.11+) anyllm = [ "any-llm-sdk>=1.0.0; python_version >= '3.11'", ] # LangChain integration langchain = [ "langchain-core>=0.2.0", "langchain-openai>=0.1.0", ] # Agno agent framework integration agno = [ "agno>=1.0.0", ] # AWS Strands Agents SDK integration strands = [ "strands-agents>=0.1.0", ] # MCP server for Claude Code integration mcp = [ "mcp>=1.0.0", "httpx>=0.24.0", ] # Voice filler detection voice = [ "onnxruntime>=1.16.0", "transformers>=4.30.0", "torch>=2.0.0", ] # Voice training (includes voice deps + training extras) voice-train = [ "headroom-ai[voice]", "datasets>=2.14.0", "accelerate>=0.20.0", ] # Evaluation framework evals = [ "datasets>=2.14.0", "sentence-transformers>=2.2.0", "numpy>=1.24.0", "scikit-learn>=1.3.0", "anthropic>=0.18.0", "openai>=1.0.0", ] # AWS Bedrock backend bedrock = [ "boto3>=1.28.0", ] # HTML content extraction html = [ "trafilatura>=1.6.0", ] # Comprehensive LLM benchmarks benchmark = [ "lm-eval>=0.4.0", "openai>=1.0.0", "anthropic>=0.18.0", ] # Development dependencies dev = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", "pytest-asyncio>=0.21.0", "ruff>=0.1.0", "mypy>=1.0.0", "pre-commit>=3.0.0", "openai>=1.0.0", "anthropic>=0.18.0", "litellm==1.82.3", "fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx[http2]>=0.24.0", "ollama>=0.4.0", "langchain-ollama>=0.2.0", "hnswlib>=0.8.0", "sqlite-vec>=0.1.6", "sentence-transformers>=2.2.0", "numpy>=1.24.0", ] # All optional dependencies (everything you need) all = [ "headroom-ai[proxy,code,ml,memory,relevance,image,reports,evals,voice,html,benchmark,mcp]", ] [project.scripts] headroom = "headroom.cli:main" [project.urls] Homepage = "https://github.com/chopratejas/headroom" Documentation = "https://github.com/chopratejas/headroom#readme" Repository = "https://github.com/chopratejas/headroom" Issues = "https://github.com/chopratejas/headroom/issues" Changelog = "https://github.com/chopratejas/headroom/blob/main/CHANGELOG.md" [tool.hatch.build.targets.wheel] packages = ["headroom"] # Include non-Python files (dashboard templates, etc.) artifacts = [ "headroom/dashboard/templates/*.html", ] [tool.hatch.build.targets.sdist] include = [ "/headroom", "/tests", "/LICENSE", "/NOTICE", "/README.md", "/CHANGELOG.md", ] [tool.ruff] target-version = "py310" line-length = 100 [tool.ruff.lint] select = [ "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes "I", # isort "B", # flake8-bugbear "C4", # flake8-comprehensions "UP", # pyupgrade ] ignore = [ "E501", # line too long (handled by formatter) "B008", # do not perform function calls in argument defaults "B905", # zip without strict parameter ] [tool.ruff.lint.isort] known-first-party = ["headroom"] [tool.ruff.format] quote-style = "double" indent-style = "space" [tool.mypy] python_version = "3.10" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true ignore_missing_imports = true # Per-module overrides for modules with dynamic typing patterns [[tool.mypy.overrides]] module = [ "headroom.proxy.server", "headroom.proxy.cost", "headroom.proxy.prometheus_metrics", "headroom.proxy.semantic_cache", "headroom.proxy.rate_limiter", "headroom.proxy.request_logger", "headroom.proxy.helpers", "headroom.integrations.langchain", "headroom.integrations.mcp", "headroom.ccr.mcp_server", "headroom.relevance.embedding", "headroom.reporting.generator", ] disallow_untyped_defs = false [[tool.mypy.overrides]] module = [ "headroom.tokenizers.*", "headroom.providers.litellm", "headroom.providers.google", ] disallow_untyped_defs = false warn_return_any = false # Handler mixins use self.* from HeadroomProxy via duck typing — mypy can't resolve these [[tool.mypy.overrides]] module = ["headroom.proxy.handlers.*"] disallow_untyped_defs = false ignore_errors = true # Ignore third-party stubs with syntax errors [[tool.mypy.overrides]] module = ["mlx.*"] ignore_errors = true [tool.pytest.ini_options] testpaths = ["tests"] python_files = ["test_*.py"] python_functions = ["test_*"] addopts = "-v --tb=short" asyncio_mode = "auto" [tool.coverage.run] source = ["headroom"] branch = true omit = [ "headroom/cli.py", "*/tests/*", ] [tool.coverage.report] exclude_lines = [ "pragma: no cover", "def __repr__", "raise NotImplementedError", "if TYPE_CHECKING:", "if __name__ == .__main__.:", ]