{
  "_entry_class": "SingleModelCacheEntry",
  "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
  "_task": "text-generation",
  "attention_bias": false,
  "attention_chunk_size": 8192,
  "attention_dropout": 0.0,
  "attn_scale": 0.1,
  "attn_temperature_tuning": true,
  "dtype": "bfloat16",
  "floor_scale": 8192,
  "for_llm_compressor": false,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 5120,
  "initializer_range": 0.02,
  "interleave_moe_layer_step": 2,
  "intermediate_size": 8192,
  "intermediate_size_mlp": 16384,
  "layer_types": [
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention",
    "chunked_attention",
    "chunked_attention",
    "chunked_attention",
    "full_attention"
  ],
  "max_position_embeddings": 1048576,
  "model_type": "llama4_text",
  "moe_layers": [
    1,
    3,
    5,
    7,
    9,
    11,
    13,
    15,
    17,
    19,
    21,
    23,
    25,
    27,
    29,
    31,
    33,
    35,
    37,
    39,
    41,
    43,
    45,
    47
  ],
  "neuron": {
    "_serialized_key": "NxDNeuronConfig",
    "batch_size": 32,
    "capacity_factor": null,
    "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
    "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617",
    "continuous_batching": true,
    "ep_degree": 1,
    "fused_qkv": false,
    "glu_mlp": true,
    "local_ranks_size": 64,
    "max_batch_size": 32,
    "max_context_length": 4096,
    "max_topk": 256,
    "n_active_tokens": 4096,
    "neuronxcc_version": "2.21.33363.0+82129205",
    "on_device_sampling": true,
    "optimum_neuron_version": "0.4.5.dev1",
    "output_logits": false,
    "pp_degree": 1,
    "sequence_length": 4096,
    "speculation_length": 0,
    "start_rank_id": 0,
    "target": "trn2",
    "torch_dtype": "bfloat16",
    "tp_degree": 64
  },
  "no_rope_layers": [
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0,
    1,
    1,
    1,
    0
  ],
  "num_attention_heads": 40,
  "num_experts_per_tok": 1,
  "num_hidden_layers": 48,
  "num_key_value_heads": 8,
  "num_local_experts": 128,
  "output_router_logits": false,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 500000.0,
  "router_aux_loss_coef": 0.001,
  "router_jitter_noise": 0.0,
  "tie_word_embeddings": false,
  "use_cache": true,
  "use_qk_norm": false,
  "vocab_size": 202048
}