Spaces:

build-small-hackathon
/

cq-test

Running on Zero

BhargavMN

fix: record selected game_type on the generated game

e44426f 23 days ago

20.9 kB

	"""Game generation module using NVIDIA Nemotron 3 Nano 4B via llama.cpp."""

	import gc
	import json
	import uuid
	from typing import Optional
	from pathlib import Path


	# Model initialization cache (exported for reuse by story.py recap)
	_model_cache = {}
	NEMOTRON_MODEL_CACHE = _model_cache

	# Model configuration
	NEMOTRON_MODEL_ID = "nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF"
	NEMOTRON_GGUF_FILE = "NVIDIA-Nemotron3-Nano-4B-Q4_K_M.gguf"


	# ── Lazy model path resolution ──────────────────────────────────────────
	# The model is a 2.84 GB GGUF file. We do NOT download at import time
	# because that would block HF Space startup and cause OCI kill (exit 128).
	# Instead the download happens lazily inside the @spaces.GPU-decorated
	# function, where we have ample duration. hf_hub_download caches to
	# ~/.cache/huggingface/hub/ so subsequent calls are instant.
	_model_path: Optional[str] = None
	_model_download_attempted = False


	def _resolve_model_path() -> Optional[str]:
	"""Download the GGUF model to a local cache and return its path.

	Uses ``hf_hub_download`` which caches to ``~/.cache/huggingface/hub/``
	so subsequent calls are instant (no re-download).

	Call this lazily inside ``@spaces.GPU`` so the 2.84 GB download does
	not block container startup.

	Returns:
	Absolute path to the GGUF file, or ``None`` on failure.
	"""
	global _model_path, _model_download_attempted
	if _model_download_attempted:
	return _model_path
	_model_download_attempted = True

	try:
	# Enable hf_transfer for faster downloads (silent if unavailable)
	import os
	os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")

	from huggingface_hub import hf_hub_download

	print(f"[model] Downloading {NEMOTRON_MODEL_ID}/{NEMOTRON_GGUF_FILE} …")
	_model_path = hf_hub_download(
	repo_id=NEMOTRON_MODEL_ID,
	filename=NEMOTRON_GGUF_FILE,
	)
	print(f"[model] Downloaded → {_model_path}")
	except Exception as e:
	print(f"[model] Download failed: {type(e).__name__}: {e}")
	return _model_path


	# NOTE: _resolve_model_path() is NOT called here — see docstring above.


	# ── GPU detection for llama.cpp ──────────────────────────────────────────

	def _get_n_gpu_layers() -> int:
	"""Auto-detect GPU availability for llama.cpp inference.

	Returns:
	-1 if CUDA/GPU available (use all layers on GPU), 0 for CPU-only
	"""
	try:
	import torch
	if torch.cuda.is_available():
	return -1 # All layers on GPU
	except ImportError:
	pass
	return 0 # CPU only


	def unload_nemotron() -> None:
	"""Deload the Nemotron llama.cpp model to free GPU memory.

	After game generation is complete, the 2.84 GB GGUF model no longer
	needs to sit in VRAM. Calling this frees ~3 GB so that other models
	(FLUX poster, Cohere ASR) can load on the same GPU.

	Safe to call outside ``@spaces.GPU`` context — skips CUDA calls
	if GPU is not available.
	"""
	global _model_path, _model_download_attempted
	cleared = 0
	for key in list(_model_cache.keys()):
	obj = _model_cache.pop(key, None)
	del obj
	cleared += 1
	if cleared:
	print(f"[nemotron] Cleared {cleared} cached model(s) from memory")
	# Force a garbage collection
	gc.collect()
	# Only attempt CUDA cleanup if CUDA was actually used —
	# torch.cuda.is_available() can crash outside @spaces.GPU on ZeroGPU.
	try:
	import torch
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	torch.cuda.synchronize()
	free, total = torch.cuda.mem_get_info()
	print(f"[nemotron] GPU memory freed — {free / 1e9:.1f} GB / {total / 1e9:.1f} GB available")
	except Exception:
	pass # CUDA not available or outside GPU context — skip silently


	# ── Prompt building ──────────────────────────────────────────────────────

	def build_generation_prompt(config: dict, retrieved_examples: list[dict]) -> str:
	"""Build the game generation prompt with context and examples.

	The prompt adapts its example formatting to the requested game type:
	- For scavenger_hunt: shows task patterns with points, proof types, hints
	- For hide_and_seek: shows hiding zones, concealment ratings, seeker strategies
	- For tag: shows task patterns (simpler structure)

	Args:
	config: Game configuration from user
	retrieved_examples: Retrieved similar games for grounding

	Returns:
	Formatted prompt string
	"""
	game_type = config.get('game_type', 'scavenger_hunt')

	# ── Format retrieved examples (game-type-aware) ──────────────────
	examples_str = ""
	if retrieved_examples:
	examples_str = "\n## Retrieved Similar Examples:\n"
	for i, ex in enumerate(retrieved_examples[:3], 1):
	examples_str += f"\n### Example {i}: {ex.get('id')}\n"
	examples_str += f"- Type: {ex.get('game_type')}\n"
	examples_str += f"- City: {ex.get('city')} · Area: {ex.get('area')}\n"
	examples_str += f"- Difficulty: {ex.get('difficulty')} · Age: {ex.get('age_group')} · Duration: {ex.get('duration_minutes')} min\n"
	examples_str += f"- Theme: {ex.get('theme', 'general')} · Mobility: {ex.get('mobility', 'standard')}\n"
	examples_str += f"- Landscape Tags: {', '.join(ex.get('landscape_tags', []))}\n"

	# Rules
	rules = ex.get('rules_summary', [])
	if rules:
	examples_str += f"- Rules: {', '.join(rules[:2])}\n"

	if game_type == 'hide_and_seek' and ex.get('hiding_zones_summary'):
	# Hide & seek: format hiding zones + play area + seeker strategy
	examples_str += "- Hiding Zones:\n"
	for z in ex['hiding_zones_summary'][:2]:
	examples_str += f" • {z.get('zone_id')}: {z.get('description', '')[:80]} "
	examples_str += f"[concealment: {z.get('concealment_rating')}]\n"
	pa = ex.get('play_area_summary', {})
	if pa.get('boundary_description'):
	examples_str += f"- Play Area: {pa['boundary_description'][:100]}...\n"
	examples_str += f"- Boundary Size: {pa.get('boundary_size_tier', 'medium')}\n"
	if ex.get('seeker_strategy'):
	examples_str += f"- Seeker Strategy: {ex['seeker_strategy'][:120]}...\n"

	elif game_type == 'tag' and ex.get('arena_summary'):
	# Tag: format arena, safe zones, movement features
	ar = ex.get('arena_summary', {})
	if ar.get('boundary_description'):
	examples_str += f"- Arena: {ar['boundary_description'][:100]}...\n"
	examples_str += f"- Arena Size: {ar.get('arena_size_tier', 'medium')}\n"
	examples_str += f"- Variant: {ex.get('tag_variant', 'classic')} · "
	examples_str += f"'It' Players: {ex.get('it_count', 1)} · "
	examples_str += f"Rounds: {ex.get('round_count', 1)}\n"
	sz = ex.get('safe_zones_summary', [])
	if sz:
	examples_str += "- Safe Zones:\n"
	for z in sz[:2]:
	examples_str += f" • {z.get('zone_id')}: {z.get('description', '')[:80]}\n"
	cp = ex.get('chokepoints', [])
	if cp:
	examples_str += f"- Chokepoints: {'; '.join(cp[:2])}\n"
	oz = ex.get('open_zones', [])
	if oz:
	examples_str += f"- Open Zones: {'; '.join(oz[:2])}\n"
	if ex.get('tag_mechanic'):
	examples_str += f"- Tag Mechanic: {ex['tag_mechanic'][:100]}...\n"

	else:
	# Scavenger hunt / tag: format task patterns
	task_patterns = ex.get('task_patterns', [])
	if task_patterns:
	examples_str += "- Tasks:\n"
	for task in task_patterns[:2]:
	pts = task.get('points', '?')
	tl = task.get('time_limit', '?')
	tt = task.get('task_type', '')
	diff = task.get('difficulty', '')
	tags = task.get('landscape_tags_used', [])
	examples_str += f" • {task.get('task_id')}: {task.get('title', '')} "
	examples_str += f"({pts} pts, {tl} min, {diff})"
	if tags:
	examples_str += f" [{', '.join(tags)}]"
	examples_str += "\n"
	if ex.get('dataset_source') in ('scavenger_hunt',):
	examples_str += f"- Notes: {ex.get('notes', '')[:80]}\n"

	# ── Live city context via Wikipedia ─────────────────────────────────
	city = config.get('city', 'Paris')
	city_context_str = ""
	try:
	from app.services.city_context import build_city_section
	city_context_str = build_city_section(city)
	except Exception as e:
	print(f"[prompt] Wikipedia city context unavailable: {e}")

	# ── Load prompt template ──────────────────────────────────────────
	template_path = Path("app/prompts/game_generation.txt")
	if template_path.exists():
	with open(template_path, 'r', encoding='utf-8') as f:
	template = f.read()
	else:
	template = "Generate a location-based game in strict JSON format.\n{output_schema}"

	# ── Load output schema ───────────────────────────────────────────
	schema_path = Path("app/schemas/game_schema.json")
	schema_str = ""
	if schema_path.exists():
	with open(schema_path, 'r', encoding='utf-8') as f:
	schema_obj = json.load(f)
	schema_str = json.dumps(schema_obj, indent=2)

	# ── Build prompt ─────────────────────────────────────────────────
	prompt = template.format(
	city=city,
	area=config.get('area', 'downtown'),
	game_type=game_type,
	duration_minutes=config.get('duration_minutes', 45),
	num_players=config.get('num_players', 4),
	difficulty=config.get('difficulty', 'medium'),
	age_group=config.get('age_group', 'adults'),
	location_type=config.get('location_type', 'mixed'),
	retrieved_examples=examples_str,
	city_context=city_context_str,
	output_schema=schema_str,
	)

	return prompt


	# ── JSON extraction from model output ────────────────────────────────────

	def extract_json(text: str) -> Optional[str]:
	"""Extract JSON object from generated text.

	Finds the first complete JSON object by tracking brace depth.

	Args:
	text: Generated text that may contain JSON

	Returns:
	JSON string or None if not found
	"""
	start_idx = text.find('{')
	if start_idx == -1:
	return None

	depth = 0
	for i in range(start_idx, len(text)):
	if text[i] == '{':
	depth += 1
	elif text[i] == '}':
	depth -= 1
	if depth == 0:
	raw = text[start_idx:i+1]
	# Normalize double braces from prompt echoing ({{ -> {)
	if raw.startswith('{{') and raw.endswith('}}'):
	raw = raw[1:-1]
	return raw

	return None


	# ── Model-based generation with llama.cpp ───────────────────────────────

	def generate_game_with_model(
	prompt: str,
	model_path: Optional[str] = None,
	model_name: str = "nemotron",
	) -> Optional[str]:
	"""Generate game JSON using NVIDIA Nemotron 3 Nano 4B via llama.cpp.

	Uses llama-cpp-python for optimal performance with GGUF quantization.

	Important — HF Spaces Zero GPU pattern:
	* The 2.84 GB GGUF file is lazily downloaded inside ``@spaces.GPU``
	(if not already cached on disk from a previous run). ``hf_hub_download``
	uses the local Hugging Face cache so subsequent calls are instant.
	* ``Llama(model_path=...)`` initialisation happens here — inside the GPU
	context where CUDA is available.

	Args:
	prompt: Generation prompt
	model_path: Path to a local GGUF file (optional — auto-downloaded
	if omitted).
	model_name: Model identifier (unused, kept for API compat).

	Returns:
	Generated game JSON string or None if model unavailable
	"""
	try:
	from llama_cpp import Llama

	cache_key = f"llama_cpp_{model_path or 'module_default'}"
	if cache_key in _model_cache:
	llm = _model_cache[cache_key]
	else:
	resolved = model_path or _resolve_model_path()
	if not resolved:
	print("[nemotron] No model path available — fall back to mock")
	return None

	n_gpu_layers = _get_n_gpu_layers()
	gpu_info = "GPU" if n_gpu_layers < 0 else "CPU"
	print(f"[nemotron] Initialising llama.cpp from: {resolved} ({gpu_info})")
	llm = Llama(
	model_path=resolved,
	verbose=False,
	n_gpu_layers=n_gpu_layers,
	n_ctx=8192,
	)
	_model_cache[cache_key] = llm

	# Use create_chat_completion — this model uses a Nemotron chat template
	messages = [
	{"role": "system", "content": "You output only valid JSON. No other text."},
	{"role": "user", "content": prompt},
	]

	result = llm.create_chat_completion(
	messages=messages,
	max_tokens=8192,
	temperature=0.3,
	top_p=0.9,
	stop=["```"],
	)

	generated_text = result["choices"][0]["message"]["content"]
	generated_text = generated_text.strip()
	print(f"[nemotron] Generated {len(generated_text)} chars")

	json_str = extract_json(generated_text)
	if not json_str:
	print(f"[nemotron] JSON extraction failed on output (len={len(generated_text)})")
	print(f"[nemotron] Preview: {generated_text[:300]}...")
	return json_str

	except ImportError:
	print("[nemotron] llama-cpp-python not available. Install with: pip install llama-cpp-python")
	return None
	except Exception as e:
	print(f"[nemotron] llama.cpp generation failed: {type(e).__name__}: {e}")
	return None


	# ── Mock generation (fallback) ───────────────────────────────────────────

	def generate_game_mock(config: dict, retrieved_examples: list[dict]) -> dict:
	"""Generate a realistic mock game for testing without a model.

	Uses retrieved examples and config to create a valid game structure
	that passes schema validation.

	Args:
	config: Game configuration
	retrieved_examples: Retrieved similar games for grounding

	Returns:
	Generated game JSON matching the game schema
	"""
	game_id = f"mock-{uuid.uuid4().hex[:8]}"

	num_tasks = max(2, config.get('duration_minutes', 45) // 15)
	tasks = []

	proof_types = ["photo", "observation", "text"]
	locations = ["main square", "city center", "park area", "landmark district", "historic district"]

	for i in range(min(num_tasks, 5)):
	task_id = f"t{i+1}"
	points = 15 + (i * 5)
	time_limit = 8 + (i * 2)
	proof_type = proof_types[i % len(proof_types)]
	location = locations[i % len(locations)]

	task = {
	"task_id": task_id,
	"title": f"Task {i+1}: Explore the {location}",
	"description": f"Find and document something interesting in the {location}",
	"location_hint": f"Navigate to the {location} and look for distinctive features",
	"points": points,
	"time_limit_minutes": time_limit,
	"proof_type": proof_type,
	"hint": f"Look for signs or landmarks in the {location}",
	"safety_note": "Stay on public paths and avoid restricted areas",
	}
	tasks.append(task)

	game = {
	"game_id": game_id,
	"game_type": config.get('game_type', 'scavenger_hunt'),
	"title": f"{config.get('game_type', 'scavenger hunt').title()} in {config.get('area', 'the city')}",
	"theme": f"{config.get('difficulty', 'medium').lower()} adventure",
	"setup": {
	"city": config.get('city', 'Paris'),
	"area": config.get('area', 'downtown'),
	"meeting_point": f"Main entrance of {config.get('area', 'downtown')}",
	"duration_minutes": config.get('duration_minutes', 45),
	"num_players": config.get('num_players', 4),
	},
	"rules": [
	f"Complete as many tasks as possible within {config.get('duration_minutes', 45)} minutes",
	"Take photos or notes as proof of completion",
	"Stay within the designated area at all times",
	"No entering private buildings or restricted areas",
	f"This game is suitable for {config.get('age_group', 'all ages')}",
	],
	"tasks": tasks,
	"global_hints": [
	"Explore systematically from the meeting point outward",
	"Ask locals for directions if needed",
	"Time management is key - don't spend too long on any single task",
	],
	"score_rules": [
	"Each task completed: full points",
	"Early completion: +1 bonus point per minute under limit",
	"Hints used: -5 points per hint",
	"Late arrival at meeting point: -10 points per minute",
	],
	"tie_breaker": "Winner is the player with the most points when time expires. Ties broken by earliest completion time.",
	"safety": {
	"allowed_zone": config.get('area', 'downtown'),
	"forbidden_behaviors": [
	"Entering buildings without permission",
	"Crossing busy streets recklessly",
	"Approaching strangers",
	"Leaving the designated area",
	],
	"adult_supervision": config.get('age_group') in ['kids', 'teens'],
	"stop_conditions": [
	"If a player feels unsafe, the game stops immediately",
	"If weather becomes severe, relocate to shelter",
	"If anyone is injured, call emergency services",
	],
	},
	"story_seed": {
	"tone": "playful",
	"motifs": ["exploration", "discovery", "teamwork"],
	"recap_style": "episode_recap",
	},
	}

	return game


	# ── Main game generation (wrapper) ───────────────────────────────────────

	def generate_game(config: dict, retrieved_examples: list[dict]) -> dict:
	"""Generate a game from user config and retrieved examples.

	Uses NVIDIA Nemotron 3 Nano 4B via llama.cpp for optimal performance.
	Falls back to mock generation if model unavailable.

	Args:
	config: Game configuration (game_type, city, duration, etc.)
	retrieved_examples: List of similar example games for grounding

	Returns:
	Generated game JSON matching the game schema
	"""
	prompt = build_generation_prompt(config, retrieved_examples)

	json_str = generate_game_with_model(prompt, model_name="nemotron")

	if json_str:
	try:
	game = json.loads(json_str)
	if all(field in game for field in ["game_id", "title", "setup", "tasks", "safety"]):
	print(f"[gen] Generated game via Nemotron: {game.get('game_id')}")
	return game
	except json.JSONDecodeError:
	print("[gen] Failed to parse generated JSON, using mock")

	print("[gen] Using mock generation (model unavailable or generation failed)")
	return generate_game_mock(config, retrieved_examples)