Spaces:
Running on Zero
Running on Zero
| """Game generation module using NVIDIA Nemotron 3 Nano 4B via llama.cpp.""" | |
| import gc | |
| import json | |
| import uuid | |
| from typing import Optional | |
| from pathlib import Path | |
| # Model initialization cache (exported for reuse by story.py recap) | |
| _model_cache = {} | |
| NEMOTRON_MODEL_CACHE = _model_cache | |
| # Model configuration | |
| NEMOTRON_MODEL_ID = "nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF" | |
| NEMOTRON_GGUF_FILE = "NVIDIA-Nemotron3-Nano-4B-Q4_K_M.gguf" | |
| # ββ Lazy model path resolution ββββββββββββββββββββββββββββββββββββββββββ | |
| # The model is a 2.84 GB GGUF file. We do NOT download at import time | |
| # because that would block HF Space startup and cause OCI kill (exit 128). | |
| # Instead the download happens lazily inside the @spaces.GPU-decorated | |
| # function, where we have ample duration. hf_hub_download caches to | |
| # ~/.cache/huggingface/hub/ so subsequent calls are instant. | |
| _model_path: Optional[str] = None | |
| _model_download_attempted = False | |
| def _resolve_model_path() -> Optional[str]: | |
| """Download the GGUF model to a local cache and return its path. | |
| Uses ``hf_hub_download`` which caches to ``~/.cache/huggingface/hub/`` | |
| so subsequent calls are instant (no re-download). | |
| Call this lazily inside ``@spaces.GPU`` so the 2.84 GB download does | |
| *not* block container startup. | |
| Returns: | |
| Absolute path to the GGUF file, or ``None`` on failure. | |
| """ | |
| global _model_path, _model_download_attempted | |
| if _model_download_attempted: | |
| return _model_path | |
| _model_download_attempted = True | |
| try: | |
| # Enable hf_transfer for faster downloads (silent if unavailable) | |
| import os | |
| os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") | |
| from huggingface_hub import hf_hub_download | |
| print(f"[model] Downloading {NEMOTRON_MODEL_ID}/{NEMOTRON_GGUF_FILE} β¦") | |
| _model_path = hf_hub_download( | |
| repo_id=NEMOTRON_MODEL_ID, | |
| filename=NEMOTRON_GGUF_FILE, | |
| ) | |
| print(f"[model] Downloaded β {_model_path}") | |
| except Exception as e: | |
| print(f"[model] Download failed: {type(e).__name__}: {e}") | |
| return _model_path | |
| # NOTE: _resolve_model_path() is NOT called here β see docstring above. | |
| # ββ GPU detection for llama.cpp ββββββββββββββββββββββββββββββββββββββββββ | |
| def _get_n_gpu_layers() -> int: | |
| """Auto-detect GPU availability for llama.cpp inference. | |
| Returns: | |
| -1 if CUDA/GPU available (use all layers on GPU), 0 for CPU-only | |
| """ | |
| try: | |
| import torch | |
| if torch.cuda.is_available(): | |
| return -1 # All layers on GPU | |
| except ImportError: | |
| pass | |
| return 0 # CPU only | |
| def unload_nemotron() -> None: | |
| """Deload the Nemotron llama.cpp model to free GPU memory. | |
| After game generation is complete, the 2.84 GB GGUF model no longer | |
| needs to sit in VRAM. Calling this frees ~3 GB so that other models | |
| (FLUX poster, Cohere ASR) can load on the same GPU. | |
| Safe to call outside ``@spaces.GPU`` context β skips CUDA calls | |
| if GPU is not available. | |
| """ | |
| global _model_path, _model_download_attempted | |
| cleared = 0 | |
| for key in list(_model_cache.keys()): | |
| obj = _model_cache.pop(key, None) | |
| del obj | |
| cleared += 1 | |
| if cleared: | |
| print(f"[nemotron] Cleared {cleared} cached model(s) from memory") | |
| # Force a garbage collection | |
| gc.collect() | |
| # Only attempt CUDA cleanup if CUDA was actually used β | |
| # torch.cuda.is_available() can crash outside @spaces.GPU on ZeroGPU. | |
| try: | |
| import torch | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| torch.cuda.synchronize() | |
| free, total = torch.cuda.mem_get_info() | |
| print(f"[nemotron] GPU memory freed β {free / 1e9:.1f} GB / {total / 1e9:.1f} GB available") | |
| except Exception: | |
| pass # CUDA not available or outside GPU context β skip silently | |
| # ββ Prompt building ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_generation_prompt(config: dict, retrieved_examples: list[dict]) -> str: | |
| """Build the game generation prompt with context and examples. | |
| The prompt adapts its example formatting to the requested game type: | |
| - For scavenger_hunt: shows task patterns with points, proof types, hints | |
| - For hide_and_seek: shows hiding zones, concealment ratings, seeker strategies | |
| - For tag: shows task patterns (simpler structure) | |
| Args: | |
| config: Game configuration from user | |
| retrieved_examples: Retrieved similar games for grounding | |
| Returns: | |
| Formatted prompt string | |
| """ | |
| game_type = config.get('game_type', 'scavenger_hunt') | |
| # ββ Format retrieved examples (game-type-aware) ββββββββββββββββββ | |
| examples_str = "" | |
| if retrieved_examples: | |
| examples_str = "\n## Retrieved Similar Examples:\n" | |
| for i, ex in enumerate(retrieved_examples[:3], 1): | |
| examples_str += f"\n### Example {i}: {ex.get('id')}\n" | |
| examples_str += f"- Type: {ex.get('game_type')}\n" | |
| examples_str += f"- City: {ex.get('city')} Β· Area: {ex.get('area')}\n" | |
| examples_str += f"- Difficulty: {ex.get('difficulty')} Β· Age: {ex.get('age_group')} Β· Duration: {ex.get('duration_minutes')} min\n" | |
| examples_str += f"- Theme: {ex.get('theme', 'general')} Β· Mobility: {ex.get('mobility', 'standard')}\n" | |
| examples_str += f"- Landscape Tags: {', '.join(ex.get('landscape_tags', []))}\n" | |
| # Rules | |
| rules = ex.get('rules_summary', []) | |
| if rules: | |
| examples_str += f"- Rules: {', '.join(rules[:2])}\n" | |
| if game_type == 'hide_and_seek' and ex.get('hiding_zones_summary'): | |
| # Hide & seek: format hiding zones + play area + seeker strategy | |
| examples_str += "- Hiding Zones:\n" | |
| for z in ex['hiding_zones_summary'][:2]: | |
| examples_str += f" β’ {z.get('zone_id')}: {z.get('description', '')[:80]} " | |
| examples_str += f"[concealment: {z.get('concealment_rating')}]\n" | |
| pa = ex.get('play_area_summary', {}) | |
| if pa.get('boundary_description'): | |
| examples_str += f"- Play Area: {pa['boundary_description'][:100]}...\n" | |
| examples_str += f"- Boundary Size: {pa.get('boundary_size_tier', 'medium')}\n" | |
| if ex.get('seeker_strategy'): | |
| examples_str += f"- Seeker Strategy: {ex['seeker_strategy'][:120]}...\n" | |
| elif game_type == 'tag' and ex.get('arena_summary'): | |
| # Tag: format arena, safe zones, movement features | |
| ar = ex.get('arena_summary', {}) | |
| if ar.get('boundary_description'): | |
| examples_str += f"- Arena: {ar['boundary_description'][:100]}...\n" | |
| examples_str += f"- Arena Size: {ar.get('arena_size_tier', 'medium')}\n" | |
| examples_str += f"- Variant: {ex.get('tag_variant', 'classic')} Β· " | |
| examples_str += f"'It' Players: {ex.get('it_count', 1)} Β· " | |
| examples_str += f"Rounds: {ex.get('round_count', 1)}\n" | |
| sz = ex.get('safe_zones_summary', []) | |
| if sz: | |
| examples_str += "- Safe Zones:\n" | |
| for z in sz[:2]: | |
| examples_str += f" β’ {z.get('zone_id')}: {z.get('description', '')[:80]}\n" | |
| cp = ex.get('chokepoints', []) | |
| if cp: | |
| examples_str += f"- Chokepoints: {'; '.join(cp[:2])}\n" | |
| oz = ex.get('open_zones', []) | |
| if oz: | |
| examples_str += f"- Open Zones: {'; '.join(oz[:2])}\n" | |
| if ex.get('tag_mechanic'): | |
| examples_str += f"- Tag Mechanic: {ex['tag_mechanic'][:100]}...\n" | |
| else: | |
| # Scavenger hunt / tag: format task patterns | |
| task_patterns = ex.get('task_patterns', []) | |
| if task_patterns: | |
| examples_str += "- Tasks:\n" | |
| for task in task_patterns[:2]: | |
| pts = task.get('points', '?') | |
| tl = task.get('time_limit', '?') | |
| tt = task.get('task_type', '') | |
| diff = task.get('difficulty', '') | |
| tags = task.get('landscape_tags_used', []) | |
| examples_str += f" β’ {task.get('task_id')}: {task.get('title', '')} " | |
| examples_str += f"({pts} pts, {tl} min, {diff})" | |
| if tags: | |
| examples_str += f" [{', '.join(tags)}]" | |
| examples_str += "\n" | |
| if ex.get('dataset_source') in ('scavenger_hunt',): | |
| examples_str += f"- Notes: {ex.get('notes', '')[:80]}\n" | |
| # ββ Live city context via Wikipedia βββββββββββββββββββββββββββββββββ | |
| city = config.get('city', 'Paris') | |
| city_context_str = "" | |
| try: | |
| from app.services.city_context import build_city_section | |
| city_context_str = build_city_section(city) | |
| except Exception as e: | |
| print(f"[prompt] Wikipedia city context unavailable: {e}") | |
| # ββ Load prompt template ββββββββββββββββββββββββββββββββββββββββββ | |
| template_path = Path("app/prompts/game_generation.txt") | |
| if template_path.exists(): | |
| with open(template_path, 'r', encoding='utf-8') as f: | |
| template = f.read() | |
| else: | |
| template = "Generate a location-based game in strict JSON format.\n{output_schema}" | |
| # ββ Load output schema βββββββββββββββββββββββββββββββββββββββββββ | |
| schema_path = Path("app/schemas/game_schema.json") | |
| schema_str = "" | |
| if schema_path.exists(): | |
| with open(schema_path, 'r', encoding='utf-8') as f: | |
| schema_obj = json.load(f) | |
| schema_str = json.dumps(schema_obj, indent=2) | |
| # ββ Build prompt βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| prompt = template.format( | |
| city=city, | |
| area=config.get('area', 'downtown'), | |
| game_type=game_type, | |
| duration_minutes=config.get('duration_minutes', 45), | |
| num_players=config.get('num_players', 4), | |
| difficulty=config.get('difficulty', 'medium'), | |
| age_group=config.get('age_group', 'adults'), | |
| location_type=config.get('location_type', 'mixed'), | |
| retrieved_examples=examples_str, | |
| city_context=city_context_str, | |
| output_schema=schema_str, | |
| ) | |
| return prompt | |
| # ββ JSON extraction from model output ββββββββββββββββββββββββββββββββββββ | |
| def extract_json(text: str) -> Optional[str]: | |
| """Extract JSON object from generated text. | |
| Finds the first complete JSON object by tracking brace depth. | |
| Args: | |
| text: Generated text that may contain JSON | |
| Returns: | |
| JSON string or None if not found | |
| """ | |
| start_idx = text.find('{') | |
| if start_idx == -1: | |
| return None | |
| depth = 0 | |
| for i in range(start_idx, len(text)): | |
| if text[i] == '{': | |
| depth += 1 | |
| elif text[i] == '}': | |
| depth -= 1 | |
| if depth == 0: | |
| raw = text[start_idx:i+1] | |
| # Normalize double braces from prompt echoing ({{ -> {) | |
| if raw.startswith('{{') and raw.endswith('}}'): | |
| raw = raw[1:-1] | |
| return raw | |
| return None | |
| # ββ Model-based generation with llama.cpp βββββββββββββββββββββββββββββββ | |
| def generate_game_with_model( | |
| prompt: str, | |
| model_path: Optional[str] = None, | |
| model_name: str = "nemotron", | |
| ) -> Optional[str]: | |
| """Generate game JSON using NVIDIA Nemotron 3 Nano 4B via llama.cpp. | |
| Uses llama-cpp-python for optimal performance with GGUF quantization. | |
| Important β HF Spaces Zero GPU pattern: | |
| * The 2.84 GB GGUF file is lazily downloaded inside ``@spaces.GPU`` | |
| (if not already cached on disk from a previous run). ``hf_hub_download`` | |
| uses the local Hugging Face cache so subsequent calls are instant. | |
| * ``Llama(model_path=...)`` initialisation happens here β inside the GPU | |
| context where CUDA is available. | |
| Args: | |
| prompt: Generation prompt | |
| model_path: Path to a local GGUF file (optional β auto-downloaded | |
| if omitted). | |
| model_name: Model identifier (unused, kept for API compat). | |
| Returns: | |
| Generated game JSON string or None if model unavailable | |
| """ | |
| try: | |
| from llama_cpp import Llama | |
| cache_key = f"llama_cpp_{model_path or 'module_default'}" | |
| if cache_key in _model_cache: | |
| llm = _model_cache[cache_key] | |
| else: | |
| resolved = model_path or _resolve_model_path() | |
| if not resolved: | |
| print("[nemotron] No model path available β fall back to mock") | |
| return None | |
| n_gpu_layers = _get_n_gpu_layers() | |
| gpu_info = "GPU" if n_gpu_layers < 0 else "CPU" | |
| print(f"[nemotron] Initialising llama.cpp from: {resolved} ({gpu_info})") | |
| llm = Llama( | |
| model_path=resolved, | |
| verbose=False, | |
| n_gpu_layers=n_gpu_layers, | |
| n_ctx=8192, | |
| ) | |
| _model_cache[cache_key] = llm | |
| # Use create_chat_completion β this model uses a Nemotron chat template | |
| messages = [ | |
| {"role": "system", "content": "You output only valid JSON. No other text."}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| result = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=8192, | |
| temperature=0.3, | |
| top_p=0.9, | |
| stop=["```"], | |
| ) | |
| generated_text = result["choices"][0]["message"]["content"] | |
| generated_text = generated_text.strip() | |
| print(f"[nemotron] Generated {len(generated_text)} chars") | |
| json_str = extract_json(generated_text) | |
| if not json_str: | |
| print(f"[nemotron] JSON extraction failed on output (len={len(generated_text)})") | |
| print(f"[nemotron] Preview: {generated_text[:300]}...") | |
| return json_str | |
| except ImportError: | |
| print("[nemotron] llama-cpp-python not available. Install with: pip install llama-cpp-python") | |
| return None | |
| except Exception as e: | |
| print(f"[nemotron] llama.cpp generation failed: {type(e).__name__}: {e}") | |
| return None | |
| # ββ Mock generation (fallback) βββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_game_mock(config: dict, retrieved_examples: list[dict]) -> dict: | |
| """Generate a realistic mock game for testing without a model. | |
| Uses retrieved examples and config to create a valid game structure | |
| that passes schema validation. | |
| Args: | |
| config: Game configuration | |
| retrieved_examples: Retrieved similar games for grounding | |
| Returns: | |
| Generated game JSON matching the game schema | |
| """ | |
| game_id = f"mock-{uuid.uuid4().hex[:8]}" | |
| num_tasks = max(2, config.get('duration_minutes', 45) // 15) | |
| tasks = [] | |
| proof_types = ["photo", "observation", "text"] | |
| locations = ["main square", "city center", "park area", "landmark district", "historic district"] | |
| for i in range(min(num_tasks, 5)): | |
| task_id = f"t{i+1}" | |
| points = 15 + (i * 5) | |
| time_limit = 8 + (i * 2) | |
| proof_type = proof_types[i % len(proof_types)] | |
| location = locations[i % len(locations)] | |
| task = { | |
| "task_id": task_id, | |
| "title": f"Task {i+1}: Explore the {location}", | |
| "description": f"Find and document something interesting in the {location}", | |
| "location_hint": f"Navigate to the {location} and look for distinctive features", | |
| "points": points, | |
| "time_limit_minutes": time_limit, | |
| "proof_type": proof_type, | |
| "hint": f"Look for signs or landmarks in the {location}", | |
| "safety_note": "Stay on public paths and avoid restricted areas", | |
| } | |
| tasks.append(task) | |
| game = { | |
| "game_id": game_id, | |
| "game_type": config.get('game_type', 'scavenger_hunt'), | |
| "title": f"{config.get('game_type', 'scavenger hunt').title()} in {config.get('area', 'the city')}", | |
| "theme": f"{config.get('difficulty', 'medium').lower()} adventure", | |
| "setup": { | |
| "city": config.get('city', 'Paris'), | |
| "area": config.get('area', 'downtown'), | |
| "meeting_point": f"Main entrance of {config.get('area', 'downtown')}", | |
| "duration_minutes": config.get('duration_minutes', 45), | |
| "num_players": config.get('num_players', 4), | |
| }, | |
| "rules": [ | |
| f"Complete as many tasks as possible within {config.get('duration_minutes', 45)} minutes", | |
| "Take photos or notes as proof of completion", | |
| "Stay within the designated area at all times", | |
| "No entering private buildings or restricted areas", | |
| f"This game is suitable for {config.get('age_group', 'all ages')}", | |
| ], | |
| "tasks": tasks, | |
| "global_hints": [ | |
| "Explore systematically from the meeting point outward", | |
| "Ask locals for directions if needed", | |
| "Time management is key - don't spend too long on any single task", | |
| ], | |
| "score_rules": [ | |
| "Each task completed: full points", | |
| "Early completion: +1 bonus point per minute under limit", | |
| "Hints used: -5 points per hint", | |
| "Late arrival at meeting point: -10 points per minute", | |
| ], | |
| "tie_breaker": "Winner is the player with the most points when time expires. Ties broken by earliest completion time.", | |
| "safety": { | |
| "allowed_zone": config.get('area', 'downtown'), | |
| "forbidden_behaviors": [ | |
| "Entering buildings without permission", | |
| "Crossing busy streets recklessly", | |
| "Approaching strangers", | |
| "Leaving the designated area", | |
| ], | |
| "adult_supervision": config.get('age_group') in ['kids', 'teens'], | |
| "stop_conditions": [ | |
| "If a player feels unsafe, the game stops immediately", | |
| "If weather becomes severe, relocate to shelter", | |
| "If anyone is injured, call emergency services", | |
| ], | |
| }, | |
| "story_seed": { | |
| "tone": "playful", | |
| "motifs": ["exploration", "discovery", "teamwork"], | |
| "recap_style": "episode_recap", | |
| }, | |
| } | |
| return game | |
| # ββ Main game generation (wrapper) βββββββββββββββββββββββββββββββββββββββ | |
| def generate_game(config: dict, retrieved_examples: list[dict]) -> dict: | |
| """Generate a game from user config and retrieved examples. | |
| Uses NVIDIA Nemotron 3 Nano 4B via llama.cpp for optimal performance. | |
| Falls back to mock generation if model unavailable. | |
| Args: | |
| config: Game configuration (game_type, city, duration, etc.) | |
| retrieved_examples: List of similar example games for grounding | |
| Returns: | |
| Generated game JSON matching the game schema | |
| """ | |
| prompt = build_generation_prompt(config, retrieved_examples) | |
| json_str = generate_game_with_model(prompt, model_name="nemotron") | |
| if json_str: | |
| try: | |
| game = json.loads(json_str) | |
| if all(field in game for field in ["game_id", "title", "setup", "tasks", "safety"]): | |
| print(f"[gen] Generated game via Nemotron: {game.get('game_id')}") | |
| return game | |
| except json.JSONDecodeError: | |
| print("[gen] Failed to parse generated JSON, using mock") | |
| print("[gen] Using mock generation (model unavailable or generation failed)") | |
| return generate_game_mock(config, retrieved_examples) | |