"""Evaluate outfit composition diversity across anchors. For each pairable anchor bucket, sample N anchors, compose an outfit, and report: distinct_ratio = unique complement urls / total complement slots (1.0 = every anchor gets a unique complement; lower = repeats) mean_anchor_sim = mean FashionSigLIP cos sim between anchor and chosen complements (higher = anchor-conditioning is firing) Acceptance gate: ``distinct_ratio >= 0.18`` overall. The Step-6 multi-axis scorer + Step-7 type-aware projection scaffold are designed to keep this above the floor; if a future tweak drops it, this script exits non-zero so it shows up in CI / commit hooks. Usage:: uv run --no-project --with numpy scripts/eval_outfit_diversity.py [N=20] """ # /// script # requires-python = ">=3.11" # dependencies = ["numpy", "scikit-image>=0.22,<0.25"] # /// import random import sqlite3 import sys from collections import Counter, defaultdict from pathlib import Path import numpy as np sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from taste.outfits import PAIRING_RULES, compose_outfit # noqa: E402 from taste.store import _blob_to_emb # noqa: E402 DB_PATH = Path.home() / ".taste/taste.db" SAMPLE_BUCKETS = ["dress", "top", "bottom", "outerwear"] DISTINCT_FLOOR = 0.18 # AC2 — diversity gate def main() -> int: n_per_bucket = int(sys.argv[1]) if len(sys.argv) > 1 else 20 rng = random.Random(42) conn = sqlite3.connect(str(DB_PATH)) conn.row_factory = sqlite3.Row print(f"Sampling {n_per_bucket} anchors per bucket from {DB_PATH}\n") overall_complements: list[str] = [] overall_slots = 0 for bucket in SAMPLE_BUCKETS: if bucket not in PAIRING_RULES or not PAIRING_RULES[bucket]: continue rows = conn.execute( """ SELECT url, taste_embedding FROM products WHERE canonical_category = ? AND in_stock = 1 AND score IS NOT NULL AND taste_embedding IS NOT NULL ORDER BY score DESC LIMIT 200 """, (bucket,), ).fetchall() if not rows: continue anchors = rng.sample(rows, min(n_per_bucket, len(rows))) per_slot: dict[str, list[str]] = defaultdict(list) sims: list[float] = [] slots = 0 for r in anchors: anchor_emb = _blob_to_emb(r["taste_embedding"]) outfit = compose_outfit(r["url"], conn) if not outfit: continue for comp in outfit["complements"]: slots += 1 per_slot[comp["bucket"]].append(comp["product"]["url"]) overall_complements.append(comp["product"]["url"]) overall_slots += 1 cand_emb = _blob_to_emb( conn.execute( "SELECT taste_embedding FROM products WHERE url = ?", (comp["product"]["url"],), ).fetchone()["taste_embedding"] or b"" ) if anchor_emb is not None and cand_emb is not None: sims.append(float(np.dot(anchor_emb, cand_emb))) print(f"=== anchor bucket: {bucket} ({len(anchors)} anchors, {slots} slots) ===") for slot_bucket, urls in per_slot.items(): uniq = len(set(urls)) top = Counter(urls).most_common(1)[0] ratio = uniq / len(urls) if urls else 0 print( f" {slot_bucket:10s} unique={uniq:3d}/{len(urls):3d} " f"({ratio:.2f}) most-frequent×{top[1]}" ) if sims: print(f" mean anchor↔complement cos_sim: {np.mean(sims):.3f}\n") if not overall_slots: print("No outfits composed — nothing to evaluate.") return 1 distinct = len(set(overall_complements)) ratio = distinct / overall_slots print(f"\nOVERALL distinct_ratio = {distinct}/{overall_slots} = {ratio:.3f}") print(f"Floor: {DISTINCT_FLOOR:.2f}") if ratio < DISTINCT_FLOOR: print(f"FAIL — diversity below floor by {DISTINCT_FLOOR - ratio:.3f}") return 1 print(f"PASS — diversity ≥ {DISTINCT_FLOOR:.2f}") return 0 if __name__ == "__main__": sys.exit(main())