from pathlib import Path

import gamemaster_copilot.catalog as catalog
from gamemaster_copilot.catalog import ScrapedDocument, build_catalog_index, get_catalog


def test_catalog_entries_have_required_legal_metadata() -> None:
    entries = get_catalog()

    assert entries
    for entry in entries:
        assert entry.id
        assert entry.license
        assert entry.permission in {"permissive", "sharealike_open", "noncommercial_open"}
        assert entry.attribution
        assert entry.source_url.startswith("https://")


def test_build_catalog_index_with_mocked_scraper(tmp_path: Path, monkeypatch) -> None:
    def fake_scrape(entry, *, max_docs=30, user_agent=catalog.DEFAULT_USER_AGENT, timeout_seconds=20):
        return (
            [
                ScrapedDocument(
                    source_id=f"{entry.id}.sample",
                    title=f"{entry.label} Sample",
                    text="Level design teaches mechanics by sequencing pressure, safety, discovery, and feedback.",
                    url=entry.source_url,
                    license=entry.license,
                    attribution=entry.attribution,
                    tags=entry.tags,
                )
            ],
            [],
        )

    monkeypatch.setattr(catalog, "scrape_catalog_entry", fake_scrape)

    manifest = build_catalog_index(
        selected_ids=["wikipedia_game_design"],
        index_dir=tmp_path / "index",
        embedding_backend="hash",
        embedding_model="unused",
        embedding_dimensions=384,
        max_docs_per_source=2,
        chunk_words=12,
        overlap_words=2,
    )

    assert manifest["scraped_document_count"] == 1
    assert manifest["chunk_count"] > 0
    assert manifest["selected_catalog_source_ids"] == ["wikipedia_game_design"]
    assert (tmp_path / "index" / "catalog-manifest.json").exists()