File size: 348 Bytes
445d8c8
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
default_stage:
  default_modifiers:
    QuantizationModifier:
      targets: [Linear]
      ignore: [lm_head, 're:^mtp\..*', 're:^model\.visual\..*', 're:.*linear_attn\.in_proj_qkv$',
        're:.*linear_attn\.in_proj_z$', 're:.*linear_attn\.in_proj_a$', 're:.*linear_attn\.in_proj_b$']
      scheme: NVFP4
      bypass_divisibility_checks: false