"""Evaluate outfit composition diversity across anchors.

For each pairable anchor bucket, sample N anchors, compose an outfit, and
report:

    distinct_ratio  = unique complement urls / total complement slots
                      (1.0 = every anchor gets a unique complement; lower = repeats)
    mean_anchor_sim = mean FashionSigLIP cos sim between anchor and chosen
                      complements (higher = anchor-conditioning is firing)

Acceptance gate: ``distinct_ratio >= 0.18`` overall. The Step-6 multi-axis
scorer + Step-7 type-aware projection scaffold are designed to keep this
above the floor; if a future tweak drops it, this script exits non-zero so
it shows up in CI / commit hooks.

Usage::

    uv run --no-project --with numpy scripts/eval_outfit_diversity.py [N=20]
"""

# /// script
# requires-python = ">=3.11"
# dependencies = ["numpy", "scikit-image>=0.22,<0.25"]
# ///

import random
import sqlite3
import sys
from collections import Counter, defaultdict
from pathlib import Path

import numpy as np

sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

from taste.outfits import PAIRING_RULES, compose_outfit  # noqa: E402
from taste.store import _blob_to_emb  # noqa: E402

DB_PATH = Path.home() / ".taste/taste.db"
SAMPLE_BUCKETS = ["dress", "top", "bottom", "outerwear"]
DISTINCT_FLOOR = 0.18  # AC2 — diversity gate


def main() -> int:
    n_per_bucket = int(sys.argv[1]) if len(sys.argv) > 1 else 20
    rng = random.Random(42)

    conn = sqlite3.connect(str(DB_PATH))
    conn.row_factory = sqlite3.Row

    print(f"Sampling {n_per_bucket} anchors per bucket from {DB_PATH}\n")

    overall_complements: list[str] = []
    overall_slots = 0

    for bucket in SAMPLE_BUCKETS:
        if bucket not in PAIRING_RULES or not PAIRING_RULES[bucket]:
            continue
        rows = conn.execute(
            """
            SELECT url, taste_embedding FROM products
            WHERE canonical_category = ? AND in_stock = 1 AND score IS NOT NULL
              AND taste_embedding IS NOT NULL
            ORDER BY score DESC
            LIMIT 200
            """,
            (bucket,),
        ).fetchall()
        if not rows:
            continue
        anchors = rng.sample(rows, min(n_per_bucket, len(rows)))

        per_slot: dict[str, list[str]] = defaultdict(list)
        sims: list[float] = []
        slots = 0

        for r in anchors:
            anchor_emb = _blob_to_emb(r["taste_embedding"])
            outfit = compose_outfit(r["url"], conn)
            if not outfit:
                continue
            for comp in outfit["complements"]:
                slots += 1
                per_slot[comp["bucket"]].append(comp["product"]["url"])
                overall_complements.append(comp["product"]["url"])
                overall_slots += 1
                cand_emb = _blob_to_emb(
                    conn.execute(
                        "SELECT taste_embedding FROM products WHERE url = ?",
                        (comp["product"]["url"],),
                    ).fetchone()["taste_embedding"] or b""
                )
                if anchor_emb is not None and cand_emb is not None:
                    sims.append(float(np.dot(anchor_emb, cand_emb)))

        print(f"=== anchor bucket: {bucket} ({len(anchors)} anchors, {slots} slots) ===")
        for slot_bucket, urls in per_slot.items():
            uniq = len(set(urls))
            top = Counter(urls).most_common(1)[0]
            ratio = uniq / len(urls) if urls else 0
            print(
                f"  {slot_bucket:10s}  unique={uniq:3d}/{len(urls):3d} "
                f"({ratio:.2f})  most-frequent×{top[1]}"
            )
        if sims:
            print(f"  mean anchor↔complement cos_sim: {np.mean(sims):.3f}\n")

    if not overall_slots:
        print("No outfits composed — nothing to evaluate.")
        return 1

    distinct = len(set(overall_complements))
    ratio = distinct / overall_slots
    print(f"\nOVERALL distinct_ratio = {distinct}/{overall_slots} = {ratio:.3f}")
    print(f"Floor: {DISTINCT_FLOOR:.2f}")
    if ratio < DISTINCT_FLOOR:
        print(f"FAIL — diversity below floor by {DISTINCT_FLOOR - ratio:.3f}")
        return 1
    print(f"PASS — diversity ≥ {DISTINCT_FLOOR:.2f}")
    return 0


if __name__ == "__main__":
    sys.exit(main())