""" models/inpainter_sd.py ----------------------- Stable Diffusion inpainting via diffusers. Produces high-quality, context-aware fills for complex backgrounds. Requires a CUDA GPU with 8 GB VRAM for reasonable speed. CPU inference is supported but slow (~5 min per image). """ import os import sys from typing import Optional import numpy as np import torch from PIL import Image sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config import ( DEVICE, DTYPE, SD_INPAINT_MODEL, SD_INPAINT_STEPS, SD_INPAINT_GUIDANCE, SD_INPAINT_PROMPT, ) # SD inpainting works best at multiples of 64; we tile/rescale to this _SD_SIZE = 512 def _fit_to_multiple(value: int, multiple: int = 64) -> int: return ((value + multiple - 1) // multiple) * multiple class SDInpainter: """ Stable Diffusion inpainting wrapper using diffusers. The pipeline is loaded lazily on first use. """ def __init__(self) -> None: self._pipe = None def _load(self) -> None: if self._pipe is not None: return print(" Loading Stable Diffusion inpainting pipeline ...") try: from diffusers import StableDiffusionInpaintPipeline, AutoPipelineForInpainting # SD 2.x uses a different pipeline; try auto-detect first try: self._pipe = AutoPipelineForInpainting.from_pretrained( SD_INPAINT_MODEL, torch_dtype=DTYPE, ).to(DEVICE) except Exception: self._pipe = StableDiffusionInpaintPipeline.from_pretrained( SD_INPAINT_MODEL, torch_dtype=DTYPE, safety_checker=None, ).to(DEVICE) # Enable optimisations self._pipe.enable_model_cpu_offload() self._pipe.enable_attention_slicing(1) except ImportError as e: raise RuntimeError( "diffusers is not installed.\n" "Run: pip install diffusers accelerate\n" f"Original error: {e}" ) def inpaint( self, image_pil: Image.Image, mask: np.ndarray, prompt: str = SD_INPAINT_PROMPT, num_steps: int = SD_INPAINT_STEPS, guidance: float = SD_INPAINT_GUIDANCE, ) -> Image.Image: import gc gc.collect() torch.cuda.empty_cache() """ Fill in the masked region using Stable Diffusion inpainting. Args: image_pil: RGB PIL image. mask: Binary mask (HW uint8, 255 = inpaint here). prompt: Text prompt guiding the fill (empty unconditioned). num_steps: Diffusion steps. guidance: Classifier-free guidance scale. Returns: Inpainted RGB PIL image at the original resolution. """ self._load() original_size = image_pil.size # (W, H) orig_w, orig_h = original_size # SD needs H, W to be multiples of 8; work at _SD_SIZE on the short side scale = _SD_SIZE / min(orig_w, orig_h) proc_w = _fit_to_multiple(int(orig_w * scale)) proc_h = _fit_to_multiple(int(orig_h * scale)) image_resized = image_pil.resize((proc_w, proc_h), Image.LANCZOS) mask_pil = Image.fromarray(mask).resize( (proc_w, proc_h), Image.NEAREST ).convert("L") print(f" SD inpainting at {proc_w}{proc_h} ({num_steps} steps) ...") out = self._pipe( prompt=prompt or "background, clean, seamless", image=image_resized, mask_image=mask_pil, num_inference_steps=num_steps, guidance_scale=guidance, height=proc_h, width=proc_w, ).images[0] # Restore to original resolution result = out.resize(original_size, Image.LANCZOS) import cv2 result_np = np.array(result) orig_np = np.array(image_pil) # Hard mask for interior, feathered only at the 1-2px boundary hard_mask = (mask > 0).astype(np.uint8) feather_size = 3 feathered = cv2.GaussianBlur( hard_mask.astype(np.float32), (feather_size * 2 + 1, feather_size * 2 + 1), 0 ) interior = cv2.erode(hard_mask, np.ones((5, 5), np.uint8)).astype(np.float32) alpha = np.clip(feathered + interior, 0, 1) alpha_3 = np.stack([alpha] * 3, axis=-1) blended = (result_np * alpha_3 + orig_np * (1 - alpha_3)).astype(np.uint8) return Image.fromarray(blended)