from pathlib import Path import gamemaster_copilot.catalog as catalog from gamemaster_copilot.catalog import ScrapedDocument, build_catalog_index, get_catalog def test_catalog_entries_have_required_legal_metadata() -> None: entries = get_catalog() assert entries for entry in entries: assert entry.id assert entry.license assert entry.permission in {"permissive", "sharealike_open", "noncommercial_open"} assert entry.attribution assert entry.source_url.startswith("https://") def test_build_catalog_index_with_mocked_scraper(tmp_path: Path, monkeypatch) -> None: def fake_scrape(entry, *, max_docs=30, user_agent=catalog.DEFAULT_USER_AGENT, timeout_seconds=20): return ( [ ScrapedDocument( source_id=f"{entry.id}.sample", title=f"{entry.label} Sample", text="Level design teaches mechanics by sequencing pressure, safety, discovery, and feedback.", url=entry.source_url, license=entry.license, attribution=entry.attribution, tags=entry.tags, ) ], [], ) monkeypatch.setattr(catalog, "scrape_catalog_entry", fake_scrape) manifest = build_catalog_index( selected_ids=["wikipedia_game_design"], index_dir=tmp_path / "index", embedding_backend="hash", embedding_model="unused", embedding_dimensions=384, max_docs_per_source=2, chunk_words=12, overlap_words=2, ) assert manifest["scraped_document_count"] == 1 assert manifest["chunk_count"] > 0 assert manifest["selected_catalog_source_ids"] == ["wikipedia_game_design"] assert (tmp_path / "index" / "catalog-manifest.json").exists()