Add sharded SDNQ int8 quantized Anima transformer

Browse files

Files changed (9) hide show

README.md +33 -0
config.json +74 -0
diffusion_pytorch_model-00001-of-00005.safetensors +3 -0
diffusion_pytorch_model-00002-of-00005.safetensors +3 -0
diffusion_pytorch_model-00003-of-00005.safetensors +3 -0
diffusion_pytorch_model-00004-of-00005.safetensors +3 -0
diffusion_pytorch_model-00005-of-00005.safetensors +3 -0
diffusion_pytorch_model.safetensors.index.json +0 -0
quantization_config.json +37 -0

README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+---
+license: other
+base_model: circlestone-labs/Anima
+tags:
+- sdnq
+- int8
+- diffusion-single-file
+- text-to-image
+- cosmos
+---
+# Anima SDNQ INT8 Diffusers Transformer
+SDNQ INT8 quantization of the `circlestone-labs/Anima` Preview3 diffusion transformer checkpoint (`split_files/diffusion_models/anima-preview3-base.safetensors`).
+Quantization was performed with `sdnq==0.1.8` using static INT8 weights and quantized matmul enabled. The resulting transformer weights are saved in standard diffusers component format with `quantization_config.json`.
+Important: the upstream Anima checkpoint contains additional `llm_adapter.*` weights used by the native ComfyUI loader. Current `diffusers.CosmosTransformer3DModel.from_single_file` ignores those adapter weights, so this repo is a diffusers-compatible SDNQ transformer artifact, not a full ComfyUI-native split checkpoint replacement.
+## Load Test
+```python
+import torch
+import sdnq
+from diffusers import CosmosTransformer3DModel
+transformer = CosmosTransformer3DModel.from_pretrained(
+    "WaveCut/Anima-SDNQ-int8",
+    torch_dtype=torch.bfloat16,
+)
+```
+Original model: https://huggingface.co/circlestone-labs/Anima

config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "_class_name": "CosmosTransformer3DModel",
+  "_diffusers_version": "0.38.0",
+  "_name_or_path": "/workspace/Anima-SDNQ-int8-diffusers",
+  "adaln_lora_dim": 256,
+  "attention_head_dim": 128,
+  "concat_padding_mask": true,
+  "controlnet_block_every_n": null,
+  "crossattn_proj_in_channels": 1024,
+  "encoder_hidden_states_channels": 1024,
+  "extra_pos_embed_type": null,
+  "img_context_dim_in": null,
+  "img_context_dim_out": 2048,
+  "img_context_num_tokens": 256,
+  "in_channels": 16,
+  "max_size": [
+    128,
+    240,
+    240
+  ],
+  "mlp_ratio": 4.0,
+  "num_attention_heads": 16,
+  "num_layers": 28,
+  "out_channels": 16,
+  "patch_size": [
+    1,
+    2,
+    2
+  ],
+  "quantization_config": {
+    "add_skip_keys": false,
+    "dequantize_fp32": false,
+    "dynamic_loss_threshold": null,
+    "group_size": 0,
+    "is_integer": true,
+    "is_training": false,
+    "modules_dtype_dict": {},
+    "modules_quant_config": {},
+    "modules_to_not_convert": [
+      "patch_embed",
+      "learnable_pos_embed",
+      "time_embed",
+      "norm_out",
+      "crossattn_proj",
+      "transformer_blocks.0.norm*",
+      "proj_out"
+    ],
+    "non_blocking": false,
+    "quant_conv": false,
+    "quant_embedding": false,
+    "quant_method": "sdnq",
+    "quantization_device": null,
+    "quantized_matmul_dtype": null,
+    "return_device": null,
+    "sdnq_version": "0.1.8",
+    "svd_rank": 32,
+    "svd_steps": 8,
+    "use_dynamic_quantization": false,
+    "use_grad_ckpt": true,
+    "use_quantized_matmul": true,
+    "use_quantized_matmul_conv": false,
+    "use_static_quantization": true,
+    "use_stochastic_rounding": false,
+    "use_svd": false,
+    "weights_dtype": "int8"
+  },
+  "rope_scale": [
+    1.0,
+    4.0,
+    4.0
+  ],
+  "text_embed_dim": 1024,
+  "use_crossattn_projection": false
+}

diffusion_pytorch_model-00001-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b14bddbfaa2e6523001883e7cf21d308959f449dd021a607e8999c81f8ec8c0b
+size 491633632

diffusion_pytorch_model-00002-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db1cc0a6c5680c29afbc866b5c9418952b3cb9f14f2900988bcb050c06258042
+size 485118640

diffusion_pytorch_model-00003-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81dfcd37a9fcc75d5c72cd9d194c792272562cee5905b108f77cad8fbe2fdc13
+size 485118792

diffusion_pytorch_model-00004-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:950d8b222ce75c88b60e4b6d291794c95b81fb707d1e5b970d5c8820df8d6a37
+size 485118792

diffusion_pytorch_model-00005-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f2879bb3845c3fee39553d5a55b6d326ed4498034e0fd05d2a0ea9dbbd3205f
+size 36983544

diffusion_pytorch_model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

quantization_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "add_skip_keys": false,
+  "dequantize_fp32": false,
+  "dynamic_loss_threshold": null,
+  "group_size": 0,
+  "is_integer": true,
+  "is_training": false,
+  "modules_dtype_dict": {},
+  "modules_quant_config": {},
+  "modules_to_not_convert": [
+    "patch_embed",
+    "learnable_pos_embed",
+    "time_embed",
+    "norm_out",
+    "crossattn_proj",
+    "transformer_blocks.0.norm*",
+    "proj_out"
+  ],
+  "non_blocking": false,
+  "quant_conv": false,
+  "quant_embedding": false,
+  "quant_method": "sdnq",
+  "quantization_device": null,
+  "quantized_matmul_dtype": null,
+  "return_device": null,
+  "sdnq_version": "0.1.8",
+  "svd_rank": 32,
+  "svd_steps": 8,
+  "use_dynamic_quantization": false,
+  "use_grad_ckpt": true,
+  "use_quantized_matmul": true,
+  "use_quantized_matmul_conv": false,
+  "use_static_quantization": true,
+  "use_stochastic_rounding": false,
+  "use_svd": false,
+  "weights_dtype": "int8"
+}