[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "headroom-ai"
version = "0.22.0"
description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10"
authors = [
    { name = "Headroom Contributors" }
]
maintainers = [
    { name = "Headroom Contributors" }
]
keywords = [
    "llm",
    "openai",
    "anthropic",
    "claude",
    "gpt",
    "context",
    "token",
    "optimization",
    "compression",
    "caching",
    "proxy",
    "ai",
    "machine-learning",
]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Typing :: Typed",
]
dependencies = [
    # Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN)
    "tiktoken>=0.5.0",            # Tokenizer for all compressors
    "pydantic>=2.0.0",            # Config and data models
    "litellm==1.82.3",            # Model registry, pricing, and provider support
    "click>=8.1.0",               # CLI framework
    "rich>=13.0.0",               # Rich terminal output
    "opentelemetry-api>=1.24.0",  # Safe no-op OTEL API for instrumentation
    "ast-grep-cli>=0.30.0",       # AST-aware code slicing (CodeCompressor); binary wheel
    "tomli>=2.0.0; python_version < '3.11'",  # tomllib backport for helper scripts
]

[project.optional-dependencies]
# Proxy server (most common install: pip install headroom-ai[proxy])
proxy = [
    "fastapi>=0.100.0",
    "uvicorn>=0.23.0",
    "httpx[http2]>=0.24.0",
    "openai>=2.14.0",             # OpenAI API format support
    "anthropic>=0.18.0",          # Anthropic official SDK for count_tokens API
    "mcp>=1.0.0",                 # MCP server (headroom_compress, retrieve, stats)
    "magika>=0.6.0",              # ML content detection for ContentRouter
    "zstandard>=0.20.0",          # Decompress zstd request bodies (Codex, etc.)
    "websockets>=13.0",           # WebSocket proxy for /v1/responses (Codex gpt-5.4+)
    "onnxruntime>=1.16.0",        # Kompress ONNX INT8 text compression (no torch needed)
    "transformers>=4.30.0",       # Tokenizer only (for Kompress)
    "watchdog>=4.0.0",            # File watcher for live code graph reindexing (--code-graph)
    "sqlite-vec>=0.1.6",          # Vector index for memory (--memory). Lightweight, no torch.
    "redis>=5.0.0",               # Shared stats aggregation for multi-worker proxy (TCP)
    "upstash-redis>=1.0.0",       # Upstash Redis (HTTP REST) for multi-worker proxy on HF Spaces
]
# GPU acceleration for ONNX models (mutually exclusive with [proxy])
# Install: pip install headroom-ai[proxy,gpu]  (gpu replaces onnxruntime with onnxruntime-gpu)
# Requires: CUDA toolkit + cuDNN on host. Auto-detects GPU, falls back to CPU.
# Note: onnxruntime-gpu conflicts with onnxruntime; both install to same paths.
gpu = [
    "onnxruntime-gpu>=1.16.0",
]
# AST-based code compression (tree-sitter)
code = [
    "tree-sitter-language-pack>=0.10.0",
]
# ML-based compression with Kompress (ModernBERT)
ml = [
    "torch>=2.0.0",
    "transformers>=4.30.0",
]
# Legacy ML compression (LLMLingua-2 — use [ml] instead for Kompress)
llmlingua = [
    "llmlingua>=0.2.0",
    "torch>=2.0.0",
    "transformers>=4.30.0",
]
# Memory system (hierarchical memory with vector search)
memory = [
    "hnswlib>=0.8.0",
    "sqlite-vec>=0.1.6",
    "sentence-transformers>=2.2.0",
]
# Qdrant + Neo4j memory backend helpers
memory-stack = [
    "mem0ai>=0.1.100",
    "qdrant-client>=1.9.0",
    "neo4j>=5.20.0",
]
# Semantic relevance scoring with embeddings.
# Uses `fastembed` (BAAI/bge-small-en-v1.5 by default — 33M params,
# 384 dims, ~30 MB int8-quantized ONNX). Same library + model used by
# the Rust SmartCrusher (`fastembed` crate), giving byte-equal embeddings
# across the language boundary. Replaced sentence-transformers in
# Stage 3c.1 — fastembed is faster (~2-3x), smaller (no torch
# dependency), and outranks all-MiniLM-L6-v2 on MTEB by ~6 points.
relevance = [
    "fastembed>=0.4.0",
    "numpy>=1.24.0",
]
# Image compression (ML-based routing + OCR)
image = [
    "pillow>=10.0.0",
    "sentencepiece>=0.1.99",  # Required by SigLIP tokenizer (SiglipTokenizer)
    "rapidocr-onnxruntime>=1.4.0",  # ONNX-native OCR for text extraction from images (~15MB models)
]
# Report generation
reports = [
    "jinja2>=3.0.0",
]
# OpenTelemetry metrics export
otel = [
    "opentelemetry-sdk>=1.24.0",
    "opentelemetry-exporter-otlp-proto-http>=1.24.0",
]
# any-llm multi-provider backend (requires Python 3.11+)
anyllm = [
    "any-llm-sdk>=1.0.0; python_version >= '3.11'",
]
# LangChain integration
langchain = [
    "langchain-core>=0.2.0",
    "langchain-openai>=0.1.0",
]
# Agno agent framework integration
agno = [
    "agno>=1.0.0",
]
# AWS Strands Agents SDK integration
strands = [
    "strands-agents>=0.1.0",
]
# MCP server for Claude Code integration
mcp = [
    "mcp>=1.0.0",
    "httpx>=0.24.0",
]
# Voice filler detection
voice = [
    "onnxruntime>=1.16.0",
    "transformers>=4.30.0",
    "torch>=2.0.0",
]
# Voice training (includes voice deps + training extras)
voice-train = [
    "headroom-ai[voice]",
    "datasets>=2.14.0",
    "accelerate>=0.20.0",
]
# Evaluation framework
evals = [
    "datasets>=2.14.0",
    "sentence-transformers>=2.2.0",
    "numpy>=1.24.0",
    "scikit-learn>=1.3.0",
    "anthropic>=0.18.0",
    "openai>=1.0.0",
]
# AWS Bedrock backend
bedrock = [
    "boto3>=1.28.0",
]
# HTML content extraction
html = [
    "trafilatura>=1.6.0",
]
# Comprehensive LLM benchmarks
benchmark = [
    "lm-eval>=0.4.0",
    "openai>=1.0.0",
    "anthropic>=0.18.0",
]
# Development dependencies
dev = [
    "pytest>=7.0.0",
    "pytest-cov>=4.0.0",
    "pytest-asyncio>=0.21.0",
    "ruff>=0.1.0",
    "mypy>=1.0.0",
    "pre-commit>=3.0.0",
    "openai>=1.0.0",
    "anthropic>=0.18.0",
    "litellm==1.82.3",
    "fastapi>=0.100.0",
    "uvicorn>=0.23.0",
    "httpx[http2]>=0.24.0",
    "websockets>=13.0",
    "opentelemetry-sdk>=1.24.0",
    "opentelemetry-exporter-otlp-proto-http>=1.24.0",
    "ollama>=0.4.0",
    "langchain-ollama>=0.2.0",
    "hnswlib>=0.8.0",
    "sqlite-vec>=0.1.6",
    "sentence-transformers>=2.2.0",
    "numpy>=1.24.0",
]
# All optional dependencies (everything you need)
all = [
    "headroom-ai[proxy,code,ml,memory,relevance,image,reports,otel,evals,voice,html,benchmark,mcp]",
]

[project.scripts]
headroom = "headroom.cli:main"

[project.urls]
Homepage = "https://github.com/chopratejas/headroom"
Documentation = "https://github.com/chopratejas/headroom#readme"
Repository = "https://github.com/chopratejas/headroom"
Issues = "https://github.com/chopratejas/headroom/issues"
Changelog = "https://github.com/chopratejas/headroom/blob/main/CHANGELOG.md"

[tool.hatch.build.targets.wheel]
packages = ["headroom"]
# Include non-Python files (dashboard templates, etc.)
artifacts = [
    "headroom/dashboard/templates/*.html",
]

[tool.hatch.build.targets.sdist]
include = [
    "/headroom",
    "/tests",
    "/LICENSE",
    "/NOTICE",
    "/README.md",
    "/CHANGELOG.md",
]

[tool.ruff]
target-version = "py310"
line-length = 100

[tool.ruff.lint]
select = [
    "E",   # pycodestyle errors
    "W",   # pycodestyle warnings
    "F",   # pyflakes
    "I",   # isort
    "B",   # flake8-bugbear
    "C4",  # flake8-comprehensions
    "UP",  # pyupgrade
]
ignore = [
    "E501",  # line too long (handled by formatter)
    "B008",  # do not perform function calls in argument defaults
    "B905",  # zip without strict parameter
]

[tool.ruff.lint.isort]
known-first-party = ["headroom"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
ignore_missing_imports = true

# Per-module overrides for modules with dynamic typing patterns
[[tool.mypy.overrides]]
module = [
    "headroom.proxy.server",
    "headroom.proxy.cost",
    "headroom.proxy.prometheus_metrics",
    "headroom.proxy.semantic_cache",
    "headroom.proxy.rate_limiter",
    "headroom.proxy.request_logger",
    "headroom.proxy.helpers",
    "headroom.integrations.langchain",
    "headroom.integrations.mcp",
    "headroom.ccr.mcp_server",
    "headroom.relevance.embedding",
    "headroom.reporting.generator",
]
disallow_untyped_defs = false

[[tool.mypy.overrides]]
module = [
    "headroom.tokenizers.*",
    "headroom.providers.litellm",
    "headroom.providers.google",
]
disallow_untyped_defs = false
warn_return_any = false

# Handler mixins use self.* from HeadroomProxy via duck typing — mypy can't resolve these
[[tool.mypy.overrides]]
module = ["headroom.proxy.handlers.*"]
disallow_untyped_defs = false
ignore_errors = true

# Ignore third-party stubs with syntax errors
[[tool.mypy.overrides]]
module = ["mlx.*"]
ignore_errors = true

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
asyncio_mode = "auto"
markers = [
    "slow: slow tests (model loads, large fixtures)",
    "real_llm: tests that hit real LLM APIs; skipped unless explicitly enabled",
    "live_redis: tests that hit a live Redis instance; skipped unless explicitly enabled",
]

[tool.coverage.run]
source = ["headroom"]
branch = true
omit = [
    "headroom/cli.py",
    "*/tests/*",
]

[tool.coverage.report]
exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "raise NotImplementedError",
    "if TYPE_CHECKING:",
    "if __name__ == .__main__.:",
]