import gradio as gr
import numpy as np
import onnxruntime as ort
from PIL import Image
from torchvision import transforms
from torchvision.transforms import functional as F


MODEL_PATH = "oink0.onnx"
NSFW_THRESHOLD = 0.30  # adjust if needed


class Letterbox224:
    def __call__(self, img):
        w, h = img.size
        scale = 224 / max(w, h)
        nw, nh = int(w * scale), int(h * scale)
        img = img.resize((nw, nh))
        pad_w, pad_h = 224 - nw, 224 - nh
        return F.pad(
            img,
            (pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2),
            fill=0
        )

transform = transforms.Compose([
    Letterbox224(),
    transforms.ToTensor(),
])


session = ort.InferenceSession(
    MODEL_PATH,
    providers=["CPUExecutionProvider"]
)


def predict(image: Image.Image):
    if image is None:
        return "No image", 0.0

    img = image.convert("RGB")
    x = transform(img).unsqueeze(0).numpy()

    logits = session.run(None, {"input": x})[0]
    probs = np.exp(logits) / np.exp(logits).sum(axis=1, keepdims=True)

    nsfw_prob = float(probs[0, 1])

    if nsfw_prob > NSFW_THRESHOLD:
        decision = "NSFW (block)"
    elif nsfw_prob > 0.20:
        decision = "Borderline (review)"
    else:
        decision = "Safe"

    return decision, nsfw_prob


with gr.Blocks() as demo:
    gr.Markdown("# Oink0 Demo (ONNX)")
    gr.Markdown(
        "Upload a Roblox avatar image. "
        "Model outputs an NSFW probability and a moderation decision."
    )

    with gr.Row():
        image_input = gr.Image(type="pil", label="Avatar Image")
        label_output = gr.Label(label="Result", num_top_classes=1)

    prob_output = gr.Number(label="NSFW probability", precision=4)

    image_input.change(
        fn=predict,
        inputs=image_input,
        outputs=[label_output, prob_output],
    )

demo.launch()