"""Shot grammar for the 4-koma stance comics: named shots -> deterministic camera,
computed purely from the skeleton frame (bbox + hip heading). The LLM never sees any
of this; it only ever picks captions. Cameras are plain pinhole (eye/target/fov/roll)
so the same parameters port 1:1 to the three.js viewer later."""
import numpy as np

PARENTS = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19]
PEL, LHIP, RHIP, HEAD = 0, 1, 2, 15
LEFTJ = {1, 4, 7, 10, 13, 16, 18, 20}
RIGHTJ = {2, 5, 8, 11, 14, 17, 19, 21}

# az = horizontal angle of the camera relative to the character's facing direction
# (0 = head-on). el = elevation in degrees (negative = camera low, looking up).
# margin = how much air around the joint bbox. roll = dutch tilt.
SHOTS = {
    # aim_up biases the look-at above the bbox center so the figure sits LOW in
    # the frame — the upper strip belongs to the speech bubble.
    "establishing": dict(az=38.0, el=10.0, margin=1.06, roll=0.0, fov=40.0, aim_up=0.18),
    "action":       dict(az=55.0, el=4.0,  margin=1.08, roll=-4.0, fov=46.0, aim_up=0.18),
    "gag":          dict(az=22.0, el=16.0, margin=0.98, roll=13.0, fov=50.0, aim_up=0.10),
    "hero":         dict(az=32.0, el=-16.0, margin=1.08, roll=0.0, fov=38.0, aim_up=0.16),
}


def heading(Pf):
    """Facing angle about Y from the hip line (0 = facing +Z), as in _build_stances."""
    d = Pf[RHIP] - Pf[LHIP]
    return float(np.arctan2(d[2], -d[0]))


def make_camera(shot_name, Pf):
    """Camera dict for one skeleton frame [22,3]. Frames the joint bbox with the
    shot's margin, oriented relative to the character's heading."""
    s = SHOTS[shot_name]
    lo, hi = Pf.min(axis=0), Pf.max(axis=0)
    target = (lo + hi) / 2.0
    radius = float(np.linalg.norm(Pf - target, axis=1).max())
    fov = np.radians(s["fov"])
    dist = s["margin"] * radius / np.tan(fov / 2.0)
    theta = heading(Pf) + np.radians(s["az"])
    el = np.radians(s["el"])
    direction = np.array([np.sin(theta) * np.cos(el), np.sin(el), np.cos(theta) * np.cos(el)])
    eye = target + direction * dist
    # Don't let the camera dip below the floor (hero shots on grounded poses).
    eye[1] = max(eye[1], 0.12)
    aim = target.copy()
    aim[1] += s.get("aim_up", 0.0) * radius
    return dict(eye=eye, target=aim, fov=fov, roll=np.radians(s["roll"]))


def project(points, cam, W, H):
    """Pinhole-project [N,3] world points -> ([N,2] pixel coords, [N] view depth)."""
    pts = np.asarray(points, np.float64).reshape(-1, 3)
    eye, target = cam["eye"], cam["target"]
    fwd = target - eye
    fwd = fwd / np.linalg.norm(fwd)
    right = np.cross(fwd, np.array([0.0, 1.0, 0.0]))
    right = right / (np.linalg.norm(right) + 1e-9)
    up = np.cross(right, fwd)
    r = cam["roll"]
    if abs(r) > 1e-6:  # dutch tilt: rotate the basis about the view axis
        c, s = np.cos(r), np.sin(r)
        right, up = c * right + s * up, -s * right + c * up
    rel = pts - eye
    x = rel @ right
    y = rel @ up
    z = rel @ fwd
    z = np.maximum(z, 1e-4)
    f = (H / 2.0) / np.tan(cam["fov"] / 2.0)
    px = W / 2.0 + f * x / z
    py = H / 2.0 - f * y / z
    return np.stack([px, py], axis=1), z


def ground_grid(center, span=2.6, step=0.65):
    """3D segments of a floor grid (y=0) around the character, for scene depth."""
    segs = []
    cx, cz = float(center[0]), float(center[2])
    ticks = np.arange(-span, span + 1e-6, step)
    for t in ticks:
        segs.append(((cx - span, 0.0, cz + t), (cx + span, 0.0, cz + t)))
        segs.append(((cx + t, 0.0, cz - span), (cx + t, 0.0, cz + span)))
    return segs