WaveCut commited on
Commit
d937520
·
verified ·
1 Parent(s): 5e207b3

Add sharded SDNQ int8 quantized Anima transformer

Browse files
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: circlestone-labs/Anima
4
+ tags:
5
+ - sdnq
6
+ - int8
7
+ - diffusion-single-file
8
+ - text-to-image
9
+ - cosmos
10
+ ---
11
+
12
+ # Anima SDNQ INT8 Diffusers Transformer
13
+
14
+ SDNQ INT8 quantization of the `circlestone-labs/Anima` Preview3 diffusion transformer checkpoint (`split_files/diffusion_models/anima-preview3-base.safetensors`).
15
+
16
+ Quantization was performed with `sdnq==0.1.8` using static INT8 weights and quantized matmul enabled. The resulting transformer weights are saved in standard diffusers component format with `quantization_config.json`.
17
+
18
+ Important: the upstream Anima checkpoint contains additional `llm_adapter.*` weights used by the native ComfyUI loader. Current `diffusers.CosmosTransformer3DModel.from_single_file` ignores those adapter weights, so this repo is a diffusers-compatible SDNQ transformer artifact, not a full ComfyUI-native split checkpoint replacement.
19
+
20
+ ## Load Test
21
+
22
+ ```python
23
+ import torch
24
+ import sdnq
25
+ from diffusers import CosmosTransformer3DModel
26
+
27
+ transformer = CosmosTransformer3DModel.from_pretrained(
28
+ "WaveCut/Anima-SDNQ-int8",
29
+ torch_dtype=torch.bfloat16,
30
+ )
31
+ ```
32
+
33
+ Original model: https://huggingface.co/circlestone-labs/Anima
config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CosmosTransformer3DModel",
3
+ "_diffusers_version": "0.38.0",
4
+ "_name_or_path": "/workspace/Anima-SDNQ-int8-diffusers",
5
+ "adaln_lora_dim": 256,
6
+ "attention_head_dim": 128,
7
+ "concat_padding_mask": true,
8
+ "controlnet_block_every_n": null,
9
+ "crossattn_proj_in_channels": 1024,
10
+ "encoder_hidden_states_channels": 1024,
11
+ "extra_pos_embed_type": null,
12
+ "img_context_dim_in": null,
13
+ "img_context_dim_out": 2048,
14
+ "img_context_num_tokens": 256,
15
+ "in_channels": 16,
16
+ "max_size": [
17
+ 128,
18
+ 240,
19
+ 240
20
+ ],
21
+ "mlp_ratio": 4.0,
22
+ "num_attention_heads": 16,
23
+ "num_layers": 28,
24
+ "out_channels": 16,
25
+ "patch_size": [
26
+ 1,
27
+ 2,
28
+ 2
29
+ ],
30
+ "quantization_config": {
31
+ "add_skip_keys": false,
32
+ "dequantize_fp32": false,
33
+ "dynamic_loss_threshold": null,
34
+ "group_size": 0,
35
+ "is_integer": true,
36
+ "is_training": false,
37
+ "modules_dtype_dict": {},
38
+ "modules_quant_config": {},
39
+ "modules_to_not_convert": [
40
+ "patch_embed",
41
+ "learnable_pos_embed",
42
+ "time_embed",
43
+ "norm_out",
44
+ "crossattn_proj",
45
+ "transformer_blocks.0.norm*",
46
+ "proj_out"
47
+ ],
48
+ "non_blocking": false,
49
+ "quant_conv": false,
50
+ "quant_embedding": false,
51
+ "quant_method": "sdnq",
52
+ "quantization_device": null,
53
+ "quantized_matmul_dtype": null,
54
+ "return_device": null,
55
+ "sdnq_version": "0.1.8",
56
+ "svd_rank": 32,
57
+ "svd_steps": 8,
58
+ "use_dynamic_quantization": false,
59
+ "use_grad_ckpt": true,
60
+ "use_quantized_matmul": true,
61
+ "use_quantized_matmul_conv": false,
62
+ "use_static_quantization": true,
63
+ "use_stochastic_rounding": false,
64
+ "use_svd": false,
65
+ "weights_dtype": "int8"
66
+ },
67
+ "rope_scale": [
68
+ 1.0,
69
+ 4.0,
70
+ 4.0
71
+ ],
72
+ "text_embed_dim": 1024,
73
+ "use_crossattn_projection": false
74
+ }
diffusion_pytorch_model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b14bddbfaa2e6523001883e7cf21d308959f449dd021a607e8999c81f8ec8c0b
3
+ size 491633632
diffusion_pytorch_model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1cc0a6c5680c29afbc866b5c9418952b3cb9f14f2900988bcb050c06258042
3
+ size 485118640
diffusion_pytorch_model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81dfcd37a9fcc75d5c72cd9d194c792272562cee5905b108f77cad8fbe2fdc13
3
+ size 485118792
diffusion_pytorch_model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950d8b222ce75c88b60e4b6d291794c95b81fb707d1e5b970d5c8820df8d6a37
3
+ size 485118792
diffusion_pytorch_model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f2879bb3845c3fee39553d5a55b6d326ed4498034e0fd05d2a0ea9dbbd3205f
3
+ size 36983544
diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quantization_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_skip_keys": false,
3
+ "dequantize_fp32": false,
4
+ "dynamic_loss_threshold": null,
5
+ "group_size": 0,
6
+ "is_integer": true,
7
+ "is_training": false,
8
+ "modules_dtype_dict": {},
9
+ "modules_quant_config": {},
10
+ "modules_to_not_convert": [
11
+ "patch_embed",
12
+ "learnable_pos_embed",
13
+ "time_embed",
14
+ "norm_out",
15
+ "crossattn_proj",
16
+ "transformer_blocks.0.norm*",
17
+ "proj_out"
18
+ ],
19
+ "non_blocking": false,
20
+ "quant_conv": false,
21
+ "quant_embedding": false,
22
+ "quant_method": "sdnq",
23
+ "quantization_device": null,
24
+ "quantized_matmul_dtype": null,
25
+ "return_device": null,
26
+ "sdnq_version": "0.1.8",
27
+ "svd_rank": 32,
28
+ "svd_steps": 8,
29
+ "use_dynamic_quantization": false,
30
+ "use_grad_ckpt": true,
31
+ "use_quantized_matmul": true,
32
+ "use_quantized_matmul_conv": false,
33
+ "use_static_quantization": true,
34
+ "use_stochastic_rounding": false,
35
+ "use_svd": false,
36
+ "weights_dtype": "int8"
37
+ }