"""Demonstrate Headroom compression on LangChain tool outputs. This script shows EXACTLY what Headroom does to large tool outputs: - Before: Full 100-item JSON array - After: Compressed to ~20 relevant items No API key required - runs locally. Run: python -m examples.langchain_demo.show_compression """ import json import sys try: import tiktoken except ImportError: print("ERROR: tiktoken required. Run: uv pip install tiktoken") sys.exit(1) from headroom.providers import OpenAIProvider from headroom.transforms import SmartCrusher from .mock_tools import TOOL_FUNCTIONS ENCODER = tiktoken.get_encoding("cl100k_base") def count_tokens(text: str) -> int: """Count tokens.""" return len(ENCODER.encode(text)) def demonstrate_compression(tool_name: str, tool_arg: str, context: str): """Show before/after compression for a tool output.""" print(f"\n{'=' * 70}") print(f"TOOL: {tool_name}({tool_arg!r})") print(f"CONTEXT: {context!r}") print(f"{'=' * 70}") # Generate tool output raw_output = TOOL_FUNCTIONS[tool_name](tool_arg) raw_tokens = count_tokens(raw_output) # Parse to count items data = json.loads(raw_output) if "results" in data: item_count = len(data["results"]) elif "entries" in data: item_count = len(data["entries"]) elif "metrics" in data: item_count = len(data["metrics"]) elif "data" in data: item_count = len(data["data"]) else: item_count = "?" print("\n--- BEFORE COMPRESSION ---") print(f"Items: {item_count}") print(f"Tokens: {raw_tokens:,}") print(f"Chars: {len(raw_output):,}") print("\nFirst 500 chars:") print(raw_output[:500] + "...") # Create SmartCrusher with context from headroom.config import SmartCrusherConfig smart_config = SmartCrusherConfig( enabled=True, min_tokens_to_crush=200, max_items_after_crush=20, ) provider = OpenAIProvider() tokenizer = provider.get_token_counter("gpt-4o") crusher = SmartCrusher(config=smart_config) # Build messages with tool output (simulating agent conversation) messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": context}, { "role": "assistant", "content": None, "tool_calls": [ { "id": "call_1", "function": { "name": tool_name, "arguments": json.dumps({tool_name.split("_")[-1]: tool_arg}), }, } ], }, {"role": "tool", "content": raw_output, "tool_call_id": "call_1"}, ] # Apply SmartCrusher (tokenizer is passed to apply()) result = crusher.apply(messages, tokenizer=tokenizer) compressed_messages = result.messages # Get compressed output compressed_output = compressed_messages[-1]["content"] compressed_tokens = count_tokens(compressed_output) # Parse compressed to count items try: compressed_data = json.loads(compressed_output) if "results" in compressed_data: compressed_items = len(compressed_data["results"]) elif "entries" in compressed_data: compressed_items = len(compressed_data["entries"]) elif "metrics" in compressed_data: compressed_items = len(compressed_data["metrics"]) elif "data" in compressed_data: compressed_items = len(compressed_data["data"]) else: compressed_items = "?" except json.JSONDecodeError: compressed_items = "N/A" print("\n--- AFTER COMPRESSION ---") print(f"Items: {compressed_items}") print(f"Tokens: {compressed_tokens:,}") print(f"Chars: {len(compressed_output):,}") print("\nFirst 500 chars:") print(compressed_output[:500] + "...") # Calculate savings tokens_saved = raw_tokens - compressed_tokens pct_saved = (tokens_saved / raw_tokens * 100) if raw_tokens > 0 else 0 print("\n--- SAVINGS ---") print(f"Tokens saved: {tokens_saved:,} ({pct_saved:.1f}%)") print(f"Items reduced: {item_count} -> {compressed_items}") return { "tool": tool_name, "before_tokens": raw_tokens, "after_tokens": compressed_tokens, "saved_tokens": tokens_saved, "saved_pct": pct_saved, } def main(): """Run compression demonstrations.""" print("\n" + "=" * 70) print("HEADROOM SMARTCRUSHER: BEFORE/AFTER COMPRESSION") print("=" * 70) print(""" This demonstrates how Headroom's SmartCrusher compresses large tool outputs. Key techniques: 1. Pattern detection (logs, time-series, search results) 2. Keep first/last items for context 3. Keep ERROR/anomaly items (important!) 4. Keep items matching the user's query (relevance scoring) 5. Statistical sampling for remaining slots """) results = [] # Demo 1: User database search results.append( demonstrate_compression( tool_name="search_users", tool_arg="Engineering users", context="Find all users in the Engineering department who are currently active", ) ) # Demo 2: Log search with errors results.append( demonstrate_compression( tool_name="search_logs", tool_arg="payment-service", context="Check the payment-service logs for any ERROR entries", ) ) # Demo 3: Metrics with anomalies results.append( demonstrate_compression( tool_name="get_metrics", tool_arg="api-gateway", context="Look for any CPU spikes or high error rates in the api-gateway metrics", ) ) # Demo 4: Documentation search results.append( demonstrate_compression( tool_name="search_docs", tool_arg="authentication", context="Find documentation about authentication troubleshooting", ) ) # Demo 5: API data results.append( demonstrate_compression( tool_name="fetch_api_data", tool_arg="orders", context="Get recent orders with status 'pending'", ) ) # Summary print("\n" + "=" * 70) print("SUMMARY: TOKEN SAVINGS ACROSS ALL TOOLS") print("=" * 70) print(f"\n{'Tool':<20} {'Before':>12} {'After':>12} {'Saved':>12} {'%':>8}") print("-" * 66) total_before = 0 total_after = 0 for r in results: print( f"{r['tool']:<20} {r['before_tokens']:>12,} {r['after_tokens']:>12,} {r['saved_tokens']:>12,} {r['saved_pct']:>7.1f}%" ) total_before += r["before_tokens"] total_after += r["after_tokens"] total_saved = total_before - total_after total_pct = (total_saved / total_before * 100) if total_before > 0 else 0 print("-" * 66) print( f"{'TOTAL':<20} {total_before:>12,} {total_after:>12,} {total_saved:>12,} {total_pct:>7.1f}%" ) # Cost savings input_cost_per_1m = 2.50 # gpt-4o pricing cost_before = total_before * input_cost_per_1m / 1_000_000 cost_after = total_after * input_cost_per_1m / 1_000_000 cost_saved = cost_before - cost_after print("\n--- COST IMPACT (at gpt-4o $2.50/1M input tokens) ---") print(f"Before: ${cost_before:.4f}") print(f"After: ${cost_after:.4f}") print(f"Saved: ${cost_saved:.4f} per request") print( f"\nAt 1000 requests/day: ${cost_saved * 1000:.2f}/day = ${cost_saved * 1000 * 30:.2f}/month" ) if __name__ == "__main__": main()