| quant_stage: | |
| quant_modifiers: | |
| QuantizationModifier: | |
| config_groups: | |
| group_0: | |
| targets: ['re:.*self_attn.q_proj.*', 're:.*self_attn.k_proj.*', 're:.*self_attn.v_proj.*', | |
| 're:.*self_attn.o_proj.*', 're:.*linear_attn.in_proj_qkv.*', 're:.*linear_attn.in_proj_z.*', | |
| 're:.*linear_attn.out_proj.*', 're:.*shared_expert.gate_proj.*', 're:.*shared_expert.up_proj.*', | |
| 're:.*shared_expert.down_proj.*'] | |
| weights: | |
| num_bits: 8 | |
| type: float | |
| symmetric: true | |
| group_size: null | |
| strategy: block | |
| block_structure: [128, 128] | |
| dynamic: false | |
| actorder: null | |
| scale_dtype: null | |
| zp_dtype: null | |
| observer: memoryless_minmax | |
| observer_kwargs: {} | |
| input_activations: | |
| num_bits: 8 | |
| type: float | |
| symmetric: true | |
| group_size: 128 | |
| strategy: group | |
| block_structure: null | |
| dynamic: true | |
| actorder: null | |
| scale_dtype: null | |
| zp_dtype: null | |
| observer: null | |
| observer_kwargs: {} | |
| output_activations: null | |
| format: null | |
| group_1: | |
| targets: ['re:.*mlp.experts.*gate_proj.*', 're:.*mlp.experts.*up_proj.*', 're:.*mlp.experts.*down_proj.*'] | |
| weights: | |
| num_bits: 4 | |
| type: float | |
| symmetric: true | |
| group_size: 16 | |
| strategy: tensor_group | |
| block_structure: null | |
| dynamic: false | |
| actorder: null | |
| scale_dtype: torch.float8_e4m3fn | |
| zp_dtype: null | |
| observer: memoryless_minmax | |
| observer_kwargs: {} | |
| input_activations: null | |
| output_activations: null | |
| format: null | |
| targets: [Linear] | |
| ignore: ['re:.*lm_head', 're:.*embed_tokens', 're:visual.*', 're:model.visual.*', 're:.*mlp.gate$', | |
| 're:.*shared_expert_gate$', 're:.*linear_attn.in_proj_a', 're:.*linear_attn.in_proj_b', | |
| 're:.*linear_attn.conv1d', 're:^mtp\..*'] | |
| bypass_divisibility_checks: false | |