Spaces:

rootlocalghost
/

Model-Quantizer

Running

App Files Files Community

rootlocalghost commited on about 1 month ago

Commit

f239fbd

verified ·

1 Parent(s): a6158dc

Create app.py

Browse files

Files changed (1) hide show

app.py +215 -0

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import os
+import gc
+import torch
+import shutil
+import uuid
+import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download
+from safetensors.torch import load_file, save_file
+# --- ARCHITECTURE PROFILES ---
+# Defines which sensitive layers must stay in BF16 during INT8 quantization to prevent precision collapse.
+ARCH_PROFILES = {
+    "FLUX / Generic Rectified Flow": ["norm", "ln_", "embed", "time_in", "vector_in", "guidance_in", "txt_in", "img_in"],
+    "Z-Image / DiT Core": ["t_embedder", "cap_embedder", "all_x_embedder", "all_final_layer", "rope_embedder", "embed_tokens", "norm", "ln_", "shared"],
+    "Stable Diffusion (SDXL/SD3)": ["time_embed", "label_emb", "norm", "ln_", "out."]
+}
+def convert_and_upload(token, source_repo, target_repo, precision, target_components, arch_profile):
+    if not token:
+        yield "❌ Error: Please provide a valid Hugging Face Write Token."
+        return
+    if not target_repo.strip() or "/" not in target_repo:
+        yield "❌ Error: Target Repository must be in format 'username/repo-name'."
+        return
+    if not target_components:
+        yield "❌ Error: Please select at least one component to quantize."
+        return
+    # Map precision
+    target_dtype = None
+    is_int8 = precision == "INT8"
+    if precision == "FP8": target_dtype = torch.float8_e4m3fn
+    elif precision == "FP16": target_dtype = torch.float16
+    elif precision == "BF16": target_dtype = torch.bfloat16
+    api = HfApi(token=token)
+    yield f"🔄 Verifying target repo: {target_repo}..."
+    try:
+        api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
+    except Exception as e:
+        yield f"❌ Error creating repo: {str(e)}"
+        return
+    yield f"📋 Fetching files from {source_repo}..."
+    try:
+        files = api.list_repo_files(source_repo)
+    except Exception as e:
+        yield f"❌ Error fetching files: {str(e)}"
+        return
+    cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}"
+    success_count, error_count = 0, 0
+    exclude_prefixes = ARCH_PROFILES.get(arch_profile, [])
+    for file in files:
+        if "/" not in file and file.endswith(".safetensors"):
+            yield f"🗑️ Auto-skipping massive root model: {file}..."
+            continue
+        yield f"⏳ Processing {file}..."
+        try:
+            os.makedirs(cache_dir, exist_ok=True)
+            local_path = hf_hub_download(repo_id=source_repo, filename=file, cache_dir=cache_dir, token=token)
+            in_target_component = any(f"{comp}/" in file for comp in target_components)
+            if file.endswith(".safetensors") and in_target_component:
+                yield f"🧠 Quantizing {file} to {precision}..."
+                tensors = load_file(local_path)
+                new_tensors = {}
+                for k, v in tensors.items():
+                    if is_int8:
+                        is_2d_weight = "weight" in k and len(v.shape) == 2
+                        is_excluded = any(ex in k for ex in exclude_prefixes)
+                        if is_2d_weight and not is_excluded:
+                            if v.dtype == torch.float8_e4m3fn: v = v.to(torch.bfloat16)
+                            scale = v.abs().max(dim=1, keepdim=True)[0] / 127.0
+                            scale = scale.clamp(min=1e-8)
+                            new_tensors[f"{k.rsplit('.', 1)[0]}.weight_int8"] = torch.round(v / scale).clamp(-127, 127).to(torch.int8)
+                            new_tensors[f"{k.rsplit('.', 1)[0]}.weight_scale"] = scale.to(torch.bfloat16)
+                        else:
+                            new_tensors[k] = v.to(torch.bfloat16) if v.is_floating_point() else v
+                    else:
+                        new_tensors[k] = v.to(target_dtype) if v.is_floating_point() else v
+                converted_path = "converted.safetensors"
+                save_file(new_tensors, converted_path)
+                del tensors, new_tensors
+                gc.collect()
+                yield f"☁️ Uploading {precision} version of {file}..."
+                api.upload_file(path_or_fileobj=converted_path, path_in_repo=file, repo_id=target_repo)
+                os.remove(converted_path)
+            else:
+                yield f"☁️ Copying {file} as-is..."
+                api.upload_file(path_or_fileobj=local_path, path_in_repo=file, repo_id=target_repo)
+            success_count += 1
+            if os.path.exists(cache_dir): shutil.rmtree(cache_dir)
+            gc.collect()
+        except Exception as e:
+            error_count += 1
+            yield f"⚠️ Error processing {file}: {str(e)}\nSkipping..."
+    if os.path.exists(cache_dir): shutil.rmtree(cache_dir)
+    yield f"✅ Finished! Processed: {success_count} | Errors: {error_count}."
+# --- UI LOGIC ---
+def generate_target_repo(source, precision):
+    model_name = source.split("/")[-1] if "/" in source else source
+    return f"your-username/{model_name}-{precision}"
+def toggle_int8_warning(precision):
+    return gr.update(visible=(precision == "INT8"))
+# --- GUI ---
+with gr.Blocks(theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate")) as demo:
+    gr.Markdown(
+        """
+        # ⚡ Universal Model Quantizer Hub
+        Convert massive diffusion and transformer models directly on the Hugging Face hub.
+        Engineered with aggressive cache-clearing to prevent storage crashes on free-tier Spaces.
+        """
+    )
+    with gr.Row():
+        # LEFT COLUMN: Configuration
+        with gr.Column(scale=5):
+            with gr.Tabs():
+                with gr.TabItem("1. Authentication & Source"):
+                    hf_token = gr.Textbox(label="HF Access Token (Write)", type="password", placeholder="hf_...")
+                    source_repo = gr.Textbox(
+                        label="Source Repository",
+                        placeholder="e.g., black-forest-labs/FLUX.1-dev",
+                        info="Paste any Hugging Face model repository ID."
+                    )
+                    gr.Markdown("### Popular Presets")
+                    with gr.Row():
+                        preset_flux = gr.Button("FLUX.2-klein-9B", size="sm")
+                        preset_zimage = gr.Button("Z-Image-Turbo", size="sm")
+                        preset_sd3 = gr.Button("SD3.5-Large", size="sm")
+                with gr.TabItem("2. Quantization Rules"):
+                    arch_profile = gr.Radio(
+                        choices=list(ARCH_PROFILES.keys()),
+                        value="FLUX / Generic Rectified Flow",
+                        label="Architecture Profile",
+                        info="Crucial for INT8: Selects which layers to protect from precision loss."
+                    )
+                    target_components = gr.CheckboxGroup(
+                        choices=["transformer", "text_encoder", "text_encoder_2", "vae"],
+                        value=["transformer"],
+                        label="Folders to Quantize",
+                        info="Unselected folders will be copied to the new repo unchanged."
+                    )
+                with gr.TabItem("3. Output Settings"):
+                    precision = gr.Dropdown(
+                        choices=["FP8", "FP16", "BF16", "INT8"],
+                        value="INT8",
+                        label="Target Precision"
+                    )
+                    int8_warning = gr.Markdown(
+                        "⚠️ **INT8 Selected:** Keys will be split into `weight_int8` and `weight_scale`. "
+                        "Requires custom XPU/CUDA native linear classes to execute.",
+                        visible=True
+                    )
+                    target_repo = gr.Textbox(
+                        label="Target Repository",
+                        placeholder="your-username/model-name",
+                        interactive=True
+                    )
+            start_btn = gr.Button("🚀 Start Cloud Quantization", variant="primary", size="lg")
+        # RIGHT COLUMN: Logs
+        with gr.Column(scale=4):
+            output_log = gr.Textbox(
+                label="Terminal Output",
+                lines=24,
+                interactive=False,
+                max_lines=30,
+                show_copy_button=True
+            )
+    # --- WIRING ---
+    # Presets
+    preset_flux.click(lambda: ("black-forest-labs/FLUX.2-klein-9B", "FLUX / Generic Rectified Flow"), outputs=[source_repo, arch_profile])
+    preset_zimage.click(lambda: ("your-username/Z-Image-Turbo", "Z-Image / DiT Core"), outputs=[source_repo, arch_profile])
+    preset_sd3.click(lambda: ("stabilityai/stable-diffusion-3.5-large", "Stable Diffusion (SDXL/SD3)"), outputs=[source_repo, arch_profile])
+    # Dynamic Updates
+    source_repo.change(fn=generate_target_repo, inputs=[source_repo, precision], outputs=[target_repo])
+    precision.change(fn=generate_target_repo, inputs=[source_repo, precision], outputs=[target_repo])
+    precision.change(fn=toggle_int8_warning, inputs=[precision], outputs=[int8_warning])
+    # Execution
+    start_btn.click(
+        fn=convert_and_upload,
+        inputs=[hf_token, source_repo, target_repo, precision, target_components, arch_profile],
+        outputs=[output_log]
+    )
+if __name__ == "__main__":
+    demo.launch()