""" Pipeline configuration - edit these settings to control behaviour. """ import os import torch # -- Device -------------------------------------------------------------------- DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32 # -- Paths --------------------------------------------------------------------- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) INPUT_SCENE_DIR = os.path.join(BASE_DIR, "input", "scene") INPUT_OBJ_DIR = os.path.join(BASE_DIR, "input", "objects") OUTPUT_DIR = os.path.join(BASE_DIR, "output") CHECKPOINT_DIR = os.path.join(BASE_DIR, "checkpoints") # -- GroundingDINO ------------------------------------------------------------- GDINO_CONFIG = os.path.join( CHECKPOINT_DIR, "GroundingDINO_SwinT_OGC.py" ) GDINO_WEIGHTS = os.path.join( CHECKPOINT_DIR, "groundingdino_swint_ogc.pth" ) GDINO_CONFIG_URL = ( "https://raw.githubusercontent.com/IDEA-Research/GroundingDINO/main/" "groundingdino/config/GroundingDINO_SwinT_OGC.py" ) GDINO_WEIGHTS_URL = ( "https://github.com/IDEA-Research/GroundingDINO/releases/download/" "v0.1.0-alpha/groundingdino_swint_ogc.pth" ) # -- SAM ----------------------------------------------------------------------- SAM_CHECKPOINT = os.path.join(CHECKPOINT_DIR, "sam_vit_h_4b8939.pth") SAM_MODEL_TYPE = "vit_h" SAM_CHECKPOINT_URL = ( "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth" ) # -- CLIP (matching) ----------------------------------------------------------- CLIP_MODEL = "openai/clip-vit-large-patch14" # -- Inpainting ---------------------------------------------------------------- # Options: "lama" | "sd_inpaint" INPAINT_METHOD = "sd_inpaint" # Stable Diffusion inpainting model (used when INPAINT_METHOD == "sd_inpaint") SD_INPAINT_MODEL = "stabilityai/stable-diffusion-2-inpainting" SD_INPAINT_STEPS = 30 SD_INPAINT_GUIDANCE = 7.5 SD_INPAINT_PROMPT = "clean background, sharp, photorealistic, 8k, no blur, seamless" # LaMa checkpoint (auto-downloaded if missing) LAMA_CHECKPOINT = os.path.join(CHECKPOINT_DIR, "big-lama") LAMA_URL = ( "https://huggingface.co/smartywu/big-lama/resolve/main/big-lama.zip" ) # -- Detection / Matching thresholds ------------------------------------------- # GroundingDINO - box + text confidence GDINO_BOX_THRESHOLD = 0.30 GDINO_TEXT_THRESHOLD = 0.25 # CLIP cosine similarity threshold for accepting a match CLIP_SIMILARITY_THRESHOLD = 0.30 # Mask dilation (pixels) applied before inpainting to cover object edges MASK_DILATION_PX = 25 # -- Visualisation ------------------------------------------------------------- SAVE_DEBUG_IMAGES = True # saves intermediate masks and detections