{ "alpha": 64, "architectures": [ "ParameterGenerator" ], "d_model": 4096, "dim_accumulation": 4, "dtype": "float32", "head_dim": 128, "input_dim": 4096, "model_type": "parameter_generator", "num_base_model_layers": 32, "num_pg_layers": 24, "output_dim": 1024, "pg_mapping": { "mlp.experts": { "num_experts": 64, "sub_weights": { "w1": { "lora_A_dim": 4096, "lora_B_dim": 3072 }, "w2": { "lora_A_dim": 3072, "lora_B_dim": 4096 }, "w3": { "lora_A_dim": 4096, "lora_B_dim": 3072 } }, "type": "grouped" }, "mlp.shared_mlp.down_proj": { "lora_A_dim": 3072, "lora_B_dim": 4096 }, "mlp.shared_mlp.gate_and_up_proj": { "lora_A_dim": 4096, "lora_B_dim": 6144 }, "self_attn.o_proj": { "lora_A_dim": 4096, "lora_B_dim": 4096 }, "self_attn.qkv_proj": { "lora_A_dim": 4096, "lora_B_dim": 6144 } }, "prefix": "model.layers.", "rank": 16, "token_dim": 1024, "transformers_version": "4.57.1" }