"""Game generation module using NVIDIA Nemotron 3 Nano 4B via llama.cpp."""

import gc
import json
import uuid
from typing import Optional
from pathlib import Path


# Model initialization cache (exported for reuse by story.py recap)
_model_cache = {}
NEMOTRON_MODEL_CACHE = _model_cache

# Model configuration
NEMOTRON_MODEL_ID = "nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF"
NEMOTRON_GGUF_FILE = "NVIDIA-Nemotron3-Nano-4B-Q4_K_M.gguf"


# ── Lazy model path resolution ──────────────────────────────────────────
# The model is a 2.84 GB GGUF file.  We do NOT download at import time
# because that would block HF Space startup and cause OCI kill (exit 128).
# Instead the download happens lazily inside the @spaces.GPU-decorated
# function, where we have ample duration.  hf_hub_download caches to
# ~/.cache/huggingface/hub/ so subsequent calls are instant.
_model_path: Optional[str] = None
_model_download_attempted = False


def _resolve_model_path() -> Optional[str]:
    """Download the GGUF model to a local cache and return its path.

    Uses ``hf_hub_download`` which caches to ``~/.cache/huggingface/hub/``
    so subsequent calls are instant (no re-download).

    Call this lazily inside ``@spaces.GPU`` so the 2.84 GB download does
    *not* block container startup.

    Returns:
        Absolute path to the GGUF file, or ``None`` on failure.
    """
    global _model_path, _model_download_attempted
    if _model_download_attempted:
        return _model_path
    _model_download_attempted = True

    try:
        # Enable hf_transfer for faster downloads (silent if unavailable)
        import os
        os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")

        from huggingface_hub import hf_hub_download

        print(f"[model] Downloading {NEMOTRON_MODEL_ID}/{NEMOTRON_GGUF_FILE} …")
        _model_path = hf_hub_download(
            repo_id=NEMOTRON_MODEL_ID,
            filename=NEMOTRON_GGUF_FILE,
        )
        print(f"[model] Downloaded → {_model_path}")
    except Exception as e:
        print(f"[model] Download failed: {type(e).__name__}: {e}")
    return _model_path


# NOTE: _resolve_model_path() is NOT called here — see docstring above.


# ── GPU detection for llama.cpp ──────────────────────────────────────────

def _get_n_gpu_layers() -> int:
    """Auto-detect GPU availability for llama.cpp inference.

    Returns:
        -1 if CUDA/GPU available (use all layers on GPU), 0 for CPU-only
    """
    try:
        import torch
        if torch.cuda.is_available():
            return -1  # All layers on GPU
    except ImportError:
        pass
    return 0  # CPU only


def unload_nemotron() -> None:
    """Deload the Nemotron llama.cpp model to free GPU memory.

    After game generation is complete, the 2.84 GB GGUF model no longer
    needs to sit in VRAM.  Calling this frees ~3 GB so that other models
    (FLUX poster, Cohere ASR) can load on the same GPU.

    Safe to call outside ``@spaces.GPU`` context — skips CUDA calls
    if GPU is not available.
    """
    global _model_path, _model_download_attempted
    cleared = 0
    for key in list(_model_cache.keys()):
        obj = _model_cache.pop(key, None)
        del obj
        cleared += 1
    if cleared:
        print(f"[nemotron] Cleared {cleared} cached model(s) from memory")
    # Force a garbage collection
    gc.collect()
    # Only attempt CUDA cleanup if CUDA was actually used —
    # torch.cuda.is_available() can crash outside @spaces.GPU on ZeroGPU.
    try:
        import torch
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
            free, total = torch.cuda.mem_get_info()
            print(f"[nemotron] GPU memory freed — {free / 1e9:.1f} GB / {total / 1e9:.1f} GB available")
    except Exception:
        pass  # CUDA not available or outside GPU context — skip silently


# ── Prompt building ──────────────────────────────────────────────────────

def build_generation_prompt(config: dict, retrieved_examples: list[dict]) -> str:
    """Build the game generation prompt with context and examples.

    The prompt adapts its example formatting to the requested game type:
    - For scavenger_hunt: shows task patterns with points, proof types, hints
    - For hide_and_seek: shows hiding zones, concealment ratings, seeker strategies
    - For tag: shows task patterns (simpler structure)

    Args:
        config: Game configuration from user
        retrieved_examples: Retrieved similar games for grounding

    Returns:
        Formatted prompt string
    """
    game_type = config.get('game_type', 'scavenger_hunt')

    # ── Format retrieved examples (game-type-aware) ──────────────────
    examples_str = ""
    if retrieved_examples:
        examples_str = "\n## Retrieved Similar Examples:\n"
        for i, ex in enumerate(retrieved_examples[:3], 1):
            examples_str += f"\n### Example {i}: {ex.get('id')}\n"
            examples_str += f"- Type: {ex.get('game_type')}\n"
            examples_str += f"- City: {ex.get('city')} · Area: {ex.get('area')}\n"
            examples_str += f"- Difficulty: {ex.get('difficulty')} · Age: {ex.get('age_group')} · Duration: {ex.get('duration_minutes')} min\n"
            examples_str += f"- Theme: {ex.get('theme', 'general')} · Mobility: {ex.get('mobility', 'standard')}\n"
            examples_str += f"- Landscape Tags: {', '.join(ex.get('landscape_tags', []))}\n"

            # Rules
            rules = ex.get('rules_summary', [])
            if rules:
                examples_str += f"- Rules: {', '.join(rules[:2])}\n"

            if game_type == 'hide_and_seek' and ex.get('hiding_zones_summary'):
                # Hide & seek: format hiding zones + play area + seeker strategy
                examples_str += "- Hiding Zones:\n"
                for z in ex['hiding_zones_summary'][:2]:
                    examples_str += f"  • {z.get('zone_id')}: {z.get('description', '')[:80]} "
                    examples_str += f"[concealment: {z.get('concealment_rating')}]\n"
                pa = ex.get('play_area_summary', {})
                if pa.get('boundary_description'):
                    examples_str += f"- Play Area: {pa['boundary_description'][:100]}...\n"
                examples_str += f"- Boundary Size: {pa.get('boundary_size_tier', 'medium')}\n"
                if ex.get('seeker_strategy'):
                    examples_str += f"- Seeker Strategy: {ex['seeker_strategy'][:120]}...\n"

            elif game_type == 'tag' and ex.get('arena_summary'):
                # Tag: format arena, safe zones, movement features
                ar = ex.get('arena_summary', {})
                if ar.get('boundary_description'):
                    examples_str += f"- Arena: {ar['boundary_description'][:100]}...\n"
                examples_str += f"- Arena Size: {ar.get('arena_size_tier', 'medium')}\n"
                examples_str += f"- Variant: {ex.get('tag_variant', 'classic')} · "
                examples_str += f"'It' Players: {ex.get('it_count', 1)} · "
                examples_str += f"Rounds: {ex.get('round_count', 1)}\n"
                sz = ex.get('safe_zones_summary', [])
                if sz:
                    examples_str += "- Safe Zones:\n"
                    for z in sz[:2]:
                        examples_str += f"  • {z.get('zone_id')}: {z.get('description', '')[:80]}\n"
                cp = ex.get('chokepoints', [])
                if cp:
                    examples_str += f"- Chokepoints: {'; '.join(cp[:2])}\n"
                oz = ex.get('open_zones', [])
                if oz:
                    examples_str += f"- Open Zones: {'; '.join(oz[:2])}\n"
                if ex.get('tag_mechanic'):
                    examples_str += f"- Tag Mechanic: {ex['tag_mechanic'][:100]}...\n"

            else:
                # Scavenger hunt / tag: format task patterns
                task_patterns = ex.get('task_patterns', [])
                if task_patterns:
                    examples_str += "- Tasks:\n"
                    for task in task_patterns[:2]:
                        pts = task.get('points', '?')
                        tl = task.get('time_limit', '?')
                        tt = task.get('task_type', '')
                        diff = task.get('difficulty', '')
                        tags = task.get('landscape_tags_used', [])
                        examples_str += f"  • {task.get('task_id')}: {task.get('title', '')} "
                        examples_str += f"({pts} pts, {tl} min, {diff})"
                        if tags:
                            examples_str += f" [{', '.join(tags)}]"
                        examples_str += "\n"
                if ex.get('dataset_source') in ('scavenger_hunt',):
                    examples_str += f"- Notes: {ex.get('notes', '')[:80]}\n"

    # ── Live city context via Wikipedia ─────────────────────────────────
    city = config.get('city', 'Paris')
    city_context_str = ""
    try:
        from app.services.city_context import build_city_section
        city_context_str = build_city_section(city)
    except Exception as e:
        print(f"[prompt] Wikipedia city context unavailable: {e}")

    # ── Load prompt template ──────────────────────────────────────────
    template_path = Path("app/prompts/game_generation.txt")
    if template_path.exists():
        with open(template_path, 'r', encoding='utf-8') as f:
            template = f.read()
    else:
        template = "Generate a location-based game in strict JSON format.\n{output_schema}"

    # ── Load output schema ───────────────────────────────────────────
    schema_path = Path("app/schemas/game_schema.json")
    schema_str = ""
    if schema_path.exists():
        with open(schema_path, 'r', encoding='utf-8') as f:
            schema_obj = json.load(f)
            schema_str = json.dumps(schema_obj, indent=2)

    # ── Build prompt ─────────────────────────────────────────────────
    prompt = template.format(
        city=city,
        area=config.get('area', 'downtown'),
        game_type=game_type,
        duration_minutes=config.get('duration_minutes', 45),
        num_players=config.get('num_players', 4),
        difficulty=config.get('difficulty', 'medium'),
        age_group=config.get('age_group', 'adults'),
        location_type=config.get('location_type', 'mixed'),
        retrieved_examples=examples_str,
        city_context=city_context_str,
        output_schema=schema_str,
    )

    return prompt


# ── JSON extraction from model output ────────────────────────────────────

def extract_json(text: str) -> Optional[str]:
    """Extract JSON object from generated text.

    Finds the first complete JSON object by tracking brace depth.

    Args:
        text: Generated text that may contain JSON

    Returns:
        JSON string or None if not found
    """
    start_idx = text.find('{')
    if start_idx == -1:
        return None

    depth = 0
    for i in range(start_idx, len(text)):
        if text[i] == '{':
            depth += 1
        elif text[i] == '}':
            depth -= 1
            if depth == 0:
                raw = text[start_idx:i+1]
                # Normalize double braces from prompt echoing ({{ -> {)
                if raw.startswith('{{') and raw.endswith('}}'):
                    raw = raw[1:-1]
                return raw

    return None


# ── Model-based generation with llama.cpp ───────────────────────────────

def generate_game_with_model(
    prompt: str,
    model_path: Optional[str] = None,
    model_name: str = "nemotron",
) -> Optional[str]:
    """Generate game JSON using NVIDIA Nemotron 3 Nano 4B via llama.cpp.

    Uses llama-cpp-python for optimal performance with GGUF quantization.

    Important — HF Spaces Zero GPU pattern:
    * The 2.84 GB GGUF file is lazily downloaded inside ``@spaces.GPU``
      (if not already cached on disk from a previous run).  ``hf_hub_download``
      uses the local Hugging Face cache so subsequent calls are instant.
    * ``Llama(model_path=...)`` initialisation happens here — inside the GPU
      context where CUDA is available.

    Args:
        prompt: Generation prompt
        model_path: Path to a local GGUF file (optional — auto-downloaded
            if omitted).
        model_name: Model identifier (unused, kept for API compat).

    Returns:
        Generated game JSON string or None if model unavailable
    """
    try:
        from llama_cpp import Llama

        cache_key = f"llama_cpp_{model_path or 'module_default'}"
        if cache_key in _model_cache:
            llm = _model_cache[cache_key]
        else:
            resolved = model_path or _resolve_model_path()
            if not resolved:
                print("[nemotron] No model path available — fall back to mock")
                return None

            n_gpu_layers = _get_n_gpu_layers()
            gpu_info = "GPU" if n_gpu_layers < 0 else "CPU"
            print(f"[nemotron] Initialising llama.cpp from: {resolved} ({gpu_info})")
            llm = Llama(
                model_path=resolved,
                verbose=False,
                n_gpu_layers=n_gpu_layers,
                n_ctx=8192,
            )
            _model_cache[cache_key] = llm

        # Use create_chat_completion — this model uses a Nemotron chat template
        messages = [
            {"role": "system", "content": "You output only valid JSON. No other text."},
            {"role": "user", "content": prompt},
        ]

        result = llm.create_chat_completion(
            messages=messages,
            max_tokens=8192,
            temperature=0.3,
            top_p=0.9,
            stop=["```"],
        )

        generated_text = result["choices"][0]["message"]["content"]
        generated_text = generated_text.strip()
        print(f"[nemotron] Generated {len(generated_text)} chars")

        json_str = extract_json(generated_text)
        if not json_str:
            print(f"[nemotron] JSON extraction failed on output (len={len(generated_text)})")
            print(f"[nemotron] Preview: {generated_text[:300]}...")
        return json_str

    except ImportError:
        print("[nemotron] llama-cpp-python not available. Install with: pip install llama-cpp-python")
        return None
    except Exception as e:
        print(f"[nemotron] llama.cpp generation failed: {type(e).__name__}: {e}")
        return None


# ── Mock generation (fallback) ───────────────────────────────────────────

def generate_game_mock(config: dict, retrieved_examples: list[dict]) -> dict:
    """Generate a realistic mock game for testing without a model.

    Uses retrieved examples and config to create a valid game structure
    that passes schema validation.

    Args:
        config: Game configuration
        retrieved_examples: Retrieved similar games for grounding

    Returns:
        Generated game JSON matching the game schema
    """
    game_id = f"mock-{uuid.uuid4().hex[:8]}"

    num_tasks = max(2, config.get('duration_minutes', 45) // 15)
    tasks = []

    proof_types = ["photo", "observation", "text"]
    locations = ["main square", "city center", "park area", "landmark district", "historic district"]

    for i in range(min(num_tasks, 5)):
        task_id = f"t{i+1}"
        points = 15 + (i * 5)
        time_limit = 8 + (i * 2)
        proof_type = proof_types[i % len(proof_types)]
        location = locations[i % len(locations)]

        task = {
            "task_id": task_id,
            "title": f"Task {i+1}: Explore the {location}",
            "description": f"Find and document something interesting in the {location}",
            "location_hint": f"Navigate to the {location} and look for distinctive features",
            "points": points,
            "time_limit_minutes": time_limit,
            "proof_type": proof_type,
            "hint": f"Look for signs or landmarks in the {location}",
            "safety_note": "Stay on public paths and avoid restricted areas",
        }
        tasks.append(task)

    game = {
        "game_id": game_id,
        "game_type": config.get('game_type', 'scavenger_hunt'),
        "title": f"{config.get('game_type', 'scavenger hunt').title()} in {config.get('area', 'the city')}",
        "theme": f"{config.get('difficulty', 'medium').lower()} adventure",
        "setup": {
            "city": config.get('city', 'Paris'),
            "area": config.get('area', 'downtown'),
            "meeting_point": f"Main entrance of {config.get('area', 'downtown')}",
            "duration_minutes": config.get('duration_minutes', 45),
            "num_players": config.get('num_players', 4),
        },
        "rules": [
            f"Complete as many tasks as possible within {config.get('duration_minutes', 45)} minutes",
            "Take photos or notes as proof of completion",
            "Stay within the designated area at all times",
            "No entering private buildings or restricted areas",
            f"This game is suitable for {config.get('age_group', 'all ages')}",
        ],
        "tasks": tasks,
        "global_hints": [
            "Explore systematically from the meeting point outward",
            "Ask locals for directions if needed",
            "Time management is key - don't spend too long on any single task",
        ],
        "score_rules": [
            "Each task completed: full points",
            "Early completion: +1 bonus point per minute under limit",
            "Hints used: -5 points per hint",
            "Late arrival at meeting point: -10 points per minute",
        ],
        "tie_breaker": "Winner is the player with the most points when time expires. Ties broken by earliest completion time.",
        "safety": {
            "allowed_zone": config.get('area', 'downtown'),
            "forbidden_behaviors": [
                "Entering buildings without permission",
                "Crossing busy streets recklessly",
                "Approaching strangers",
                "Leaving the designated area",
            ],
            "adult_supervision": config.get('age_group') in ['kids', 'teens'],
            "stop_conditions": [
                "If a player feels unsafe, the game stops immediately",
                "If weather becomes severe, relocate to shelter",
                "If anyone is injured, call emergency services",
            ],
        },
        "story_seed": {
            "tone": "playful",
            "motifs": ["exploration", "discovery", "teamwork"],
            "recap_style": "episode_recap",
        },
    }

    return game


# ── Main game generation (wrapper) ───────────────────────────────────────

def generate_game(config: dict, retrieved_examples: list[dict]) -> dict:
    """Generate a game from user config and retrieved examples.

    Uses NVIDIA Nemotron 3 Nano 4B via llama.cpp for optimal performance.
    Falls back to mock generation if model unavailable.

    Args:
        config: Game configuration (game_type, city, duration, etc.)
        retrieved_examples: List of similar example games for grounding

    Returns:
        Generated game JSON matching the game schema
    """
    prompt = build_generation_prompt(config, retrieved_examples)

    json_str = generate_game_with_model(prompt, model_name="nemotron")

    if json_str:
        try:
            game = json.loads(json_str)
            if all(field in game for field in ["game_id", "title", "setup", "tasks", "safety"]):
                print(f"[gen] Generated game via Nemotron: {game.get('game_id')}")
                return game
        except json.JSONDecodeError:
            print("[gen] Failed to parse generated JSON, using mock")

    print("[gen] Using mock generation (model unavailable or generation failed)")
    return generate_game_mock(config, retrieved_examples)