TeleStyleV2

Running on Zero

App Files Files Community

witcherderivia commited on May 25

Commit

1b95aae

verified ·

1 Parent(s): 10e235d

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -33

app.py CHANGED Viewed

@@ -5,16 +5,29 @@ import torch
 import spaces
 from PIL import Image
-from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
 import os
 from huggingface_hub import  hf_hub_download
 pipe = QwenImagePipeline.from_pretrained(
     torch_dtype=torch.bfloat16,
     device="cuda",
@@ -31,16 +44,32 @@ pipe = QwenImagePipeline.from_pretrained(
     processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit-2509",
     download_source='huggingface',origin_file_pattern="processor/"),
 )
 speedup = hf_hub_download(repo_id="witcherderivia/Qwen-Image-Style-Transfer", filename="diffsynth_Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors")
-qwenstyle= hf_hub_download(repo_id="witcherderivia/Qwen-Image-Style-Transfer", filename="diffsynth_Qwen-Image-Edit-2509-Style-Transfer-V1.safetensors")
-pipe.load_lora(pipe.dit, qwenstyle)
-pipe.load_lora(pipe.dit,speedup)
@@ -53,7 +82,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_SEED = np.iinfo(np.int32).max
-@spaces.GPU
 def infer(
     content_ref,
     style_ref,
@@ -64,6 +95,7 @@ def infer(
     num_inference_steps=4,
     minedge=1024,
     progress=gr.Progress(track_tqdm=True),
 ):
@@ -71,51 +103,186 @@ def infer(
-    content_ref=Image.fromarray(content_ref)
-    style_ref=Image.fromarray(style_ref)
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    w,h=content_ref.size
-    #minedge=1024
-    if w>h:
-        r=w/h
-        h=minedge
-        w=int(h*r)-int(h*r)%16
-    else:
-        r=h/w
-        w=minedge
-        h=int(w*r)-int(w*r)%16
-    print(f"Calling pipeline with prompt: '{prompt}'")
-    print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {w}x{h}")
-    images = [
-        content_ref.resize((w, h)),
-        style_ref.resize((minedge, minedge)) ,
-    ]
-    # Generate the image
-    image = pipe(prompt, edit_image=images, seed=seed, num_inference_steps=num_inference_steps, height=h, width=w,edit_image_auto_resize=False,cfg_scale=true_guidance_scale)#ligtning
-    return image, seed
 # --- Examples and UI Layout ---
 examples = []
@@ -130,7 +297,7 @@ _HEADER_ = '''
 <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://openreview.net/forum?id=Cgb7JpOA5Q&referrer=%5Bthe%20profile%20of%20Shiwen%20Zhang%5D(%2Fprofile%3Fid%3D~Shiwen_Zhang1)' target='_blank'>QwenStyle: Content-Preserving Style Transfer with Qwen-Image-Edit</a> | Codes: <a href='https://github.com/witcherofresearch/Qwen-Image-Style-Transfer' target='_blank'>GitHub</a></p>
-<p style="font-size: 1rem; margin-bottom: 1.5rem;">If you encounter an Error with this demo, the most possible reason is ZeroGPU out-of-memory and the solution is to decrease the Min Edge of the generated image from 1024 to a lower value. This is because ZeroGPU has a memory limit of 70GB, while all the examples are tested with 80GB H100 GPUs.  </p>
 '''
 with gr.Blocks() as demo:
@@ -227,8 +394,9 @@ with gr.Blocks() as demo:
                     randomize_seed,
                     true_guidance_scale,
                     num_inference_steps,
-                    minedge,],
-                outputs=[result, seed],
                 fn=infer,
                 cache_examples=False
                 )
@@ -251,10 +419,14 @@ with gr.Blocks() as demo:
             true_guidance_scale,
             num_inference_steps,
             minedge,
         ],
-        outputs=[result, seed],
     )
 if __name__ == "__main__":
     demo.launch(server_name='0.0.0.0')

 import spaces
 from PIL import Image
+#from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
+from pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
+from qwen_vl_utils import process_vision_info
 import os
 from huggingface_hub import  hf_hub_download
+def update_textbox(selected_items):
+    # Join the selected list of strings into a comma-separated string
+    return ", ".join(selected_items)
+pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509", torch_dtype=torch.bfloat16)
+print("pipeline loaded")
+pipe.to('cuda')
+pipe.set_progress_bar_config(disable=None)
+'''
 pipe = QwenImagePipeline.from_pretrained(
     torch_dtype=torch.bfloat16,
     device="cuda",
     processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit-2509",
     download_source='huggingface',origin_file_pattern="processor/"),
 )
+'''
 speedup = hf_hub_download(repo_id="witcherderivia/Qwen-Image-Style-Transfer", filename="diffsynth_Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors")
+qwenstyle= hf_hub_download(repo_id="witcherderivia/Qwen-Image-Style-Transfer", filename="diffusers_Qwen-Image-Edit-2509-Style-Transfer-V1.safetensors")
+pipe.load_lora_weights(
+    qwenstyle,adapter_name='style'
+)
+pipe.load_lora_weights(
+    speedup,adapter_name='dmd'
+)
+pipe.set_adapters(["style", "dmd",], adapter_weights=[1.0, 1.0])
+pipe.fuse_lora(adapter_names=["style", "dmd"], lora_scale=1.0)
+pipe.unload_lora_weights()
 MAX_SEED = np.iinfo(np.int32).max
+@spaces.GPU(size="xlarge")
 def infer(
     content_ref,
     style_ref,
     num_inference_steps=4,
     minedge=1024,
     progress=gr.Progress(track_tqdm=True),
+    checkbox=[],
 ):
+    content_text_input='describe main objects (fewer than 3) with separated words, each word is separated by comma,  the total number of words is strictly fewer than 3'
+    style_text_input='describe only the artistic style, material and stroke in 5 words, not objects.'
+    #pipe.text_encoder.eval()
+    content_prompt=''
+    style_prompt=''
+    if content_ref is not None:
+        content_ref=Image.fromarray(content_ref)
+        content_messages = [
+        {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "image": content_ref,
+                    },
+                    {"type": "text", "text": content_text_input},
+                ],
+            }
+        ]
+        content_text = pipe.processor.apply_chat_template(
+            content_messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(content_messages)
+        inputs = pipe.processor(
+            text=[content_text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to(device)
+        # Inference: Generation of the output
+        generated_ids = pipe.text_encoder.generate(**inputs, max_new_tokens=1024)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        content_prompt = pipe.processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f"content_prompt={content_prompt}")
+    if style_ref is not None:
+        style_ref=Image.fromarray(style_ref)
+        style_messages = [
+        {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "image": style_ref,
+                    },
+                    {"type": "text", "text": style_text_input},
+                ],
+            }
+        ]
+        style_text = pipe.processor.apply_chat_template(
+            style_messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(style_messages)
+        inputs = pipe.processor(
+            text=[style_text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to(device)
+        # Inference: Generation of the output
+        generated_ids = pipe.text_encoder.generate(**inputs, max_new_tokens=1024)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        style_prompt = pipe.processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f"style_prompt={style_prompt}")
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    sw,sh,w,h=0,0,0,0
+    if content_ref:
+        w,h=content_ref.size
+        #minedge=1024
+        if w>h:
+            r=w/h
+            h=minedge
+            w=int(h*r)-int(h*r)%16
+        else:
+            r=h/w
+            w=minedge
+            h=int(w*r)-int(w*r)%16
+    if style_ref:
+        sw,sh=style_ref.size
+        if sw>sh:
+            r=sw/sh
+            sh=minedge
+            sw=int(sh*r)-int(sh*r)%16
+        else:
+            r=sh/sw
+            sw=minedge
+            sh=int(sw*r)-int(sw*r)%16
+    print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale},")
+    if content_ref and style_ref:
+        images = [
+            content_ref.resize((w, h)),
+            style_ref.resize((sw, sh)) ,
+            #style_ref.resize((minedge, minedge)) ,
+        ]
+    elif content_ref:
+        images = [
+            content_ref.resize((w, h)),
+            #style_ref.resize((sw, sh)) ,
+            #style_ref.resize((minedge, minedge)) ,
+        ]
+    elif style_ref:
+        images = [
+            #content_ref.resize((w, h)),
+            style_ref.resize((sw, sh)) ,
+            #style_ref.resize((minedge, minedge)) ,
+        ]
+    if "infer with content prompt" in checkbox and content_prompt not in prompt:
+        prompt=','.join([prompt,content_prompt])
+    if "infer with style prompt" in checkbox and style_prompt not in prompt:
+        prompt=','.join([prompt,style_prompt])
+    if "infer with content prompt" not in checkbox and content_prompt in prompt:
+        prompt=prompt.replace(content_prompt.strip(','),'')
+    if "infer with style prompt" not in checkbox and style_prompt in prompt:
+        prompt=prompt.replace(style_prompt.strip(),'')
+    prompt=prompt.strip(',')
+    print(f"Calling pipeline with prompt: '{prompt}'")
+    inputs = {
+        "image": images,
+        "prompt": prompt,
+        "generator": torch.manual_seed(seed),
+        "true_cfg_scale": true_guidance_scale,
+        "negative_prompt": " ",
+        "num_inference_steps": num_inference_steps,
+        "guidance_scale": true_g,
+        "num_images_per_prompt": 1,
+        "width": w or sw,
+        "height": h or sh,
+    }
+    with torch.inference_mode():
+        image = pipe(**inputs)
+    image = image.images[0]
+    return image, seed, content_prompt, style_prompt, prompt
 # --- Examples and UI Layout ---
 examples = []
 <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://openreview.net/forum?id=Cgb7JpOA5Q&referrer=%5Bthe%20profile%20of%20Shiwen%20Zhang%5D(%2Fprofile%3Fid%3D~Shiwen_Zhang1)' target='_blank'>QwenStyle: Content-Preserving Style Transfer with Qwen-Image-Edit</a> | Codes: <a href='https://github.com/witcherofresearch/Qwen-Image-Style-Transfer' target='_blank'>GitHub</a></p>
+<p style="font-size: 1rem; margin-bottom: 1.5rem;">If you encounter an Error with this demo, the most possible reason is ZeroGPU out-of-memory and the solution is to decrease the Min Edge of the generated image from 1024 to a lower value.  </p>
 '''
 with gr.Blocks() as demo:
                     randomize_seed,
                     true_guidance_scale,
                     num_inference_steps,
+                    minedge,
+                    ],
+                outputs=[content_prompt, style_prompt,prompt]],
                 fn=infer,
                 cache_examples=False
                 )
             true_guidance_scale,
             num_inference_steps,
             minedge,
+            checkbox,
         ],
+        outputs=[result, seed, content_prompt, style_prompt,prompt],
     )
 if __name__ == "__main__":
     demo.launch(server_name='0.0.0.0')