""" Inspect and verify the games dataset loader and normalizer. Run this script to: 1. Load the dataset 2. Normalize records 3. Print one normalized record per game type 4. Verify schema consistency """ import json import sys from pathlib import Path from app.services.retrieval import load_games_dataset, normalize_game_record def main(): # Load dataset dataset_path = Path("app/data/games_dataset.json") print(f"Loading dataset from: {dataset_path}") try: raw_records = load_games_dataset(str(dataset_path)) except FileNotFoundError: print(f"ERROR: Dataset not found at {dataset_path}") sys.exit(1) print(f"\nāœ“ Loaded {len(raw_records)} game records\n") # Normalize and group by game type normalized_records = [] game_types_seen = set() for record in raw_records: try: normalized = normalize_game_record(record) normalized_records.append(normalized) except Exception as e: print(f"ERROR normalizing record {record.get('id')}: {e}") continue print(f"āœ“ Normalized {len(normalized_records)} records\n") # Print summary print("=" * 80) print("DATASET SUMMARY") print("=" * 80) game_types = {} difficulties = set() age_groups = set() locations = set() for norm in normalized_records: gt = norm['game_type'] game_types[gt] = game_types.get(gt, 0) + 1 difficulties.add(norm['difficulty']) age_groups.add(norm['age_group']) locations.add((norm['city'], norm['area'])) print(f"\nGame Types: {dict(game_types)}") print(f"Difficulties: {sorted(difficulties)}") print(f"Age Groups: {sorted(age_groups)}") print(f"Unique Locations: {len(locations)}") for city, area in sorted(locations): print(f" - {city}: {area}") # Print one example per game type print("\n" + "=" * 80) print("SAMPLE NORMALIZED RECORDS (one per game type)") print("=" * 80) printed = set() for norm in normalized_records: gt = norm['game_type'] if gt not in printed: print(f"\n--- {gt.upper()} (ID: {norm['id']}) ---") print(f"City: {norm['city']}") print(f"Area: {norm['area']}") print(f"Duration: {norm['duration_minutes']} min | Players: {norm['num_players']}") print(f"Difficulty: {norm['difficulty']} | Age Group: {norm['age_group']}") print(f"Tasks: {norm['num_tasks']} | Rules: {norm['num_rules']} | Hints: {norm['num_hints']}") print(f"Location Type: {norm['location_type']}") print(f"Safety: {norm['is_safe']} | Quality Score: {norm['quality_score']}") print(f"Notes: {norm['notes']}") # Print first task as example if norm['tasks']: task = norm['tasks'][0] print(f"\nFirst Task Example:") print(f" Task ID: {task.get('task_id')}") print(f" Description: {task.get('description')[:80]}...") print(f" Points: {task.get('points')} | Time: {task.get('time_limit_minutes')} min") printed.add(gt) # Schema validation print("\n" + "=" * 80) print("SCHEMA VALIDATION") print("=" * 80) required_fields = [ 'id', 'game_type', 'city', 'area', 'location_type', 'duration_minutes', 'num_players', 'difficulty', 'age_group', 'num_tasks', 'task_ids', 'num_rules', 'num_hints', 'is_safe', 'quality_score' ] all_valid = True for norm in normalized_records: for field in required_fields: if field not in norm: print(f"āœ— Record {norm.get('id')} missing field: {field}") all_valid = False if all_valid: print("āœ“ All records have required fields") print("\n" + "=" * 80) print("INSPECTION COMPLETE") print("=" * 80) if __name__ == "__main__": main()