"""Mock tools that generate realistic large outputs.

These simulate real-world API responses that benefit from Headroom compression:
- Database queries returning many rows
- Search APIs returning many results
- Log analysis tools returning many entries
- Monitoring tools returning many metrics
"""

import json
import random
from datetime import datetime, timedelta


def generate_user_database_results(query: str, count: int = 100) -> str:
    """Simulate a database query returning user records.

    Real-world scenario: Agent searches for users matching criteria,
    database returns 100+ records but only a few are actually relevant.
    """
    users = []
    departments = ["Engineering", "Sales", "Marketing", "Support", "HR", "Finance"]
    statuses = ["active", "inactive", "pending", "suspended"]

    for i in range(count):
        user = {
            "id": f"usr_{random.randint(100000, 999999)}",
            "email": f"user{i}@example.com",
            "name": f"User {i} {'Smith' if i % 3 == 0 else 'Johnson' if i % 3 == 1 else 'Williams'}",
            "department": random.choice(departments),
            "status": random.choice(statuses),
            "created_at": (datetime.now() - timedelta(days=random.randint(1, 365))).isoformat(),
            "last_login": (datetime.now() - timedelta(hours=random.randint(1, 720))).isoformat(),
            "role": random.choice(["admin", "user", "viewer", "editor"]),
            "metadata": {
                "preferences": {
                    "theme": random.choice(["dark", "light"]),
                    "notifications": random.choice([True, False]),
                    "timezone": random.choice(["UTC", "PST", "EST", "CST"]),
                },
                "tags": random.sample(
                    ["premium", "verified", "beta", "enterprise"], k=random.randint(0, 3)
                ),
                "login_count": random.randint(1, 500),
            },
        }
        users.append(user)

    return json.dumps({"results": users, "total": count, "query": query}, indent=2)


def generate_search_results(query: str, count: int = 50) -> str:
    """Simulate a search API returning many results.

    Real-world scenario: Agent searches documentation/knowledge base,
    returns many results ranked by relevance.
    """
    results = []
    categories = ["documentation", "tutorial", "api-reference", "faq", "blog", "changelog"]

    for i in range(count):
        result = {
            "id": f"doc_{random.randint(10000, 99999)}",
            "title": f"Document {i}: {query.title()} Guide",
            "snippet": f"This document covers {query}. " * random.randint(2, 5)
            + f"Learn more about implementing {query} in your application...",
            "url": f"https://docs.example.com/{query.replace(' ', '-')}/{i}",
            "category": random.choice(categories),
            "relevance_score": round(random.uniform(0.5, 1.0), 3),
            "last_updated": (datetime.now() - timedelta(days=random.randint(1, 180))).isoformat(),
            "author": f"Author {random.randint(1, 20)}",
            "views": random.randint(100, 10000),
            "helpful_votes": random.randint(0, 500),
        }
        results.append(result)

    # Sort by relevance
    results.sort(key=lambda x: x["relevance_score"], reverse=True)

    return json.dumps({"results": results, "total": count, "query": query}, indent=2)


def generate_log_entries(service: str, count: int = 200) -> str:
    """Simulate a log analysis tool returning many entries.

    Real-world scenario: Agent investigates an issue by searching logs,
    returns many entries but only a few show the actual error.
    """
    entries = []
    levels = ["DEBUG", "INFO", "INFO", "INFO", "WARN", "ERROR"]  # Most are INFO

    for _i in range(count):
        timestamp = datetime.now() - timedelta(minutes=random.randint(1, 1440))
        level = random.choice(levels)

        if level == "ERROR":
            message = random.choice(
                [
                    f"Connection refused to {service}-db: timeout after 30s",
                    "Failed to process request: NullPointerException at line 42",
                    "Authentication failed for user: invalid token",
                    "Rate limit exceeded: 429 Too Many Requests",
                ]
            )
        elif level == "WARN":
            message = random.choice(
                [
                    "Slow query detected: took 2.5s",
                    "Memory usage high: 85% of heap",
                    "Retrying request after transient failure",
                ]
            )
        else:
            message = f"Processing request {random.randint(1000, 9999)} for {service}"

        entry = {
            "timestamp": timestamp.isoformat(),
            "level": level,
            "service": service,
            "message": message,
            "trace_id": f"trace_{random.randint(100000, 999999)}",
            "span_id": f"span_{random.randint(1000, 9999)}",
            "host": f"{service}-{random.randint(1, 5)}.prod.internal",
            "metadata": {
                "request_id": f"req_{random.randint(100000, 999999)}",
                "user_agent": "Mozilla/5.0" if random.random() > 0.5 else "API-Client/1.0",
                "duration_ms": random.randint(1, 5000),
            },
        }
        entries.append(entry)

    # Sort by timestamp
    entries.sort(key=lambda x: x["timestamp"], reverse=True)

    return json.dumps({"entries": entries, "total": count, "service": service}, indent=2)


def generate_metrics_data(service: str, count: int = 100) -> str:
    """Simulate a monitoring tool returning time-series metrics.

    Real-world scenario: Agent checks service health metrics,
    returns many data points but only anomalies matter.
    """
    metrics = []
    now = datetime.now()

    for i in range(count):
        timestamp = now - timedelta(minutes=i * 5)

        # Inject some anomalies
        is_anomaly = random.random() < 0.05

        metric = {
            "timestamp": timestamp.isoformat(),
            "service": service,
            "cpu_percent": random.uniform(60, 95) if is_anomaly else random.uniform(20, 40),
            "memory_percent": random.uniform(80, 98) if is_anomaly else random.uniform(40, 60),
            "request_rate": random.randint(800, 2000) if is_anomaly else random.randint(100, 300),
            "error_rate": random.uniform(5, 15) if is_anomaly else random.uniform(0, 1),
            "latency_p50_ms": random.randint(200, 500) if is_anomaly else random.randint(10, 50),
            "latency_p99_ms": random.randint(1000, 3000) if is_anomaly else random.randint(50, 200),
            "active_connections": random.randint(500, 1000)
            if is_anomaly
            else random.randint(50, 150),
        }
        metrics.append(metric)

    return json.dumps({"metrics": metrics, "service": service, "interval": "5m"}, indent=2)


def generate_api_response(endpoint: str, count: int = 75) -> str:
    """Simulate a generic API returning paginated data.

    Real-world scenario: Agent fetches data from an external API,
    receives large paginated response.
    """
    items = []

    for i in range(count):
        item = {
            "id": i + 1,
            "uuid": f"{random.randint(10000000, 99999999)}-{random.randint(1000, 9999)}-{random.randint(1000, 9999)}-{random.randint(1000, 9999)}-{random.randint(100000000000, 999999999999)}",
            "name": f"Item {i}",
            "description": f"This is item {i} from the {endpoint} endpoint. " * 3,
            "status": random.choice(["active", "pending", "completed", "archived"]),
            "priority": random.choice(["low", "medium", "high", "critical"]),
            "created_at": (datetime.now() - timedelta(days=random.randint(1, 90))).isoformat(),
            "updated_at": (datetime.now() - timedelta(hours=random.randint(1, 168))).isoformat(),
            "owner": {
                "id": random.randint(1, 100),
                "name": f"Owner {random.randint(1, 100)}",
                "email": f"owner{random.randint(1, 100)}@example.com",
            },
            "tags": random.sample(
                ["urgent", "review", "approved", "blocked", "in-progress"], k=random.randint(1, 3)
            ),
            "metadata": {
                "source": random.choice(["web", "api", "mobile", "import"]),
                "version": f"v{random.randint(1, 5)}.{random.randint(0, 9)}",
            },
        }
        items.append(item)

    return json.dumps(
        {
            "data": items,
            "pagination": {
                "page": 1,
                "per_page": count,
                "total": count * 10,  # Simulate more pages available
                "total_pages": 10,
            },
            "endpoint": endpoint,
        },
        indent=2,
    )


# Tool definitions for LangChain
TOOL_FUNCTIONS = {
    "search_users": lambda query: generate_user_database_results(query, count=100),
    "search_docs": lambda query: generate_search_results(query, count=50),
    "search_logs": lambda service: generate_log_entries(service, count=200),
    "get_metrics": lambda service: generate_metrics_data(service, count=100),
    "fetch_api_data": lambda endpoint: generate_api_response(endpoint, count=75),
}


if __name__ == "__main__":
    # Test output sizes
    import tiktoken

    enc = tiktoken.get_encoding("cl100k_base")

    print("Tool Output Token Counts:")
    print("=" * 50)

    for name, func in TOOL_FUNCTIONS.items():
        output = func("test")
        tokens = len(enc.encode(output))
        print(f"{name}: {tokens:,} tokens ({len(output):,} chars)")