import torch
from diffusers import FluxImg2ImgPipeline
from PIL import Image
import sys
import spaces

# Tested with FLUX.1-schnell

@spaces.GPU
def process_image(image, mask_image, prompt="a person", model_id="black-forest-labs/FLUX.1-schnell", strength=0.75, seed=0, num_inference_steps=4):
    print("Starting process_image")
    if image is None:
        print("Empty input image returned.")
        return None

    # Ensure the image is in RGB mode (this handles formats like WebP and JFIF)
    if image.mode != "RGB":
        image = image.convert("RGB")
    
    # If needed, add use_auth_token="YOUR_TOKEN" in from_pretrained below.
    pipe = FluxImg2ImgPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.bfloat16
    ).to("cuda")

    generator = torch.Generator("cuda").manual_seed(seed)
    print(prompt)
    output = pipe(
        prompt=prompt,
        image=image,
        generator=generator,
        strength=strength,
        guidance_scale=0,
        num_inference_steps=num_inference_steps,
        max_sequence_length=256
    )

    # TODO: Add mask support if needed
    return output.images[0]

if __name__ == "__main__":
    # Usage: python flux1_img2img.py input-image input-mask output
    image = Image.open(sys.argv[1])
    mask  = Image.open(sys.argv[2])
    output = process_image(image, mask)
    output.save(sys.argv[3])